From 601e6a0b2524c1ff9726be7bde265e6b4ff1bec1 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Mon, 24 Jun 2019 17:15:53 +0300 Subject: [PATCH] Add support for SECCOMP (v2) The patch is added for testing before publishing on linux-riscv. Signed-off-by: David Abdurachmanov --- config | 201 ------------------ configs/fedora/generic/riscv/CONFIG_SECCOMP | 1 - .../generic/riscv/CONFIG_SECCOMP_FILTER | 1 - kernel-riscv64-debug.config | 1 - kernel-riscv64.config | 1 - kernel.spec | 5 +- riscv_seccomp_v2.patch | 174 +++++++++++++++ 7 files changed, 178 insertions(+), 206 deletions(-) delete mode 100644 config delete mode 100644 configs/fedora/generic/riscv/CONFIG_SECCOMP delete mode 100644 configs/fedora/generic/riscv/CONFIG_SECCOMP_FILTER create mode 100644 riscv_seccomp_v2.patch diff --git a/config b/config deleted file mode 100644 index f3503b6b1..000000000 --- a/config +++ /dev/null @@ -1,201 +0,0 @@ -# https://github.com/riscv/riscv-qemu/commit/039dbd521277bc0aab672203a1a199e4519094da -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y - -# https://github.com/riscv/riscv-qemu/commit/3446cee04256753a29c45b033d643fcdea24fc72 -CONFIG_RISCV_PLIC=y -CONFIG_HVC_RISCV_SBI=y -CONFIG_VIRTIO=y -CONFIG_VIRTIO_MMIO=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_SCSI_VIRTIO=y -CONFIG_DRM_VIRTIO_GPU=y - -# Loopback lets us build the stage4 disk. -CONFIG_BLK_DEV=y -CONFIG_BLK_DEV_LOOP=y - -# The stage4 root filesystem is ext4. -CONFIG_EXT4_FS=y - -# Needed to avoid cap_set_file errors when installing RPMs. -CONFIG_EXT4_FS_SECURITY=y - -CONFIG_FILE_LOCKING=y -CONFIG_NET_CORE=y -CONFIG_NETDEVICES=y - -# iptables supported, needed for mock. -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NETFILTER_XTABLES=y -CONFIG_NETFILTER_XT_MATCH_STATE=y -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y -CONFIG_NF_IPTABLES=y -CONFIG_NF_NAT=y -CONFIG_NF_NAT_IPV4=y -CONFIG_NF_NAT_IPV6=y -CONFIG_NF_CONNTRACK=y -CONFIG_NF_CONNTRACK_IPV4=y -CONFIG_NF_CONNTRACK_IPV6=y -CONFIG_NF_SOCKET_IPV4=y -CONFIG_NF_SOCKET_IPV6=y -CONFIG_IP_NF_NAT=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_IPTABLES=y -CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_RAW=y -CONFIG_IP6_NF_NAT=y -CONFIG_IP6_NF_FILTER=y -CONFIG_IP6_NF_IPTABLES=y -CONFIG_IP6_NF_TARGET_REJECT=y -CONFIG_IP6_NF_RAW=y - -# For systemd: -# https://cgit.freedesktop.org/systemd/systemd/tree/README -CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_SYSFS=y -CONFIG_DEVTMPFS=y -CONFIG_CGROUPS=y -CONFIG_INOTIFY_USER=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EPOLL=y -CONFIG_NET=y -CONFIG_PROC_FS=y -CONFIG_FHANDLE=y -CONFIG_SYSFS_DEPRECATED=n -CONFIG_UEVENT_HELPER_PATH="" -CONFIG_FW_LOADER_USER_HELPER=n -CONFIG_DMIID=y -CONFIG_BLK_DEV_BSG=y -CONFIG_NET_NS=y -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y -CONFIG_AUTOFS4_FS=y -CONFIG_TMPFS_XATTR=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_EXT4_POSIX_ACL=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_SECCOMP=y -CONFIG_SECCOMP_FILTER=y -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_CGROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_EFIVAR_FS=y -CONFIG_EFI_PARTITION=y -CONFIG_RT_GROUP_SCHED=n -CONFIG_AUDIT=n - -# Networking: -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_BRIDGE=y -CONFIG_IPV6=y - -# NFS client. -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFS_V4_1=y -CONFIG_NFS_V4_2=y -CONFIG_NFS_FSCACHE=y -CONFIG_ROOT_NFS=y - -# SysV IPC needed by GnuPG. -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y - -# Add XFS for running xfstests (to test libaio). -CONFIG_XFS_FS=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y - -# For mock, avoids: -# WARNING: tcmsg: [Errno 2] No such file or directory: '/proc/net/psched' -# WARNING: the tc subsystem functionality is limited -CONFIG_NET_SCHED=y - -# For NBD booting. -CONFIG_BLK_DEV_NBD=y - -# Debugging. -CONFIG_STACKTRACE=y - -# Device mapper support. -CONFIG_MD=y -CONFIG_MD_AUTODETECT=y -CONFIG_BLK_DEV_DM=y -CONFIG_BLK_DEV_DM_BUILTIN=y - -CONFIG_DM_DEBUG=y -CONFIG_DM_BUFIO=y -CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING=y -CONFIG_DM_BIO_PRISON=y -CONFIG_DM_PERSISTENT_DATA=y -CONFIG_DM_UNSTRIPED=y -CONFIG_DM_CRYPT=y -CONFIG_DM_SNAPSHOT=y -CONFIG_DM_THIN_PROVISIONING=y -CONFIG_DM_CACHE=y -CONFIG_DM_CACHE_SMQ=y -CONFIG_DM_MIRROR=y -CONFIG_DM_LOG_USERSPACE=y -CONFIG_DM_RAID=y -CONFIG_DM_ZERO=y -CONFIG_DM_MULTIPATH=y -CONFIG_DM_MULTIPATH_QL=y -CONFIG_DM_MULTIPATH_ST=y -CONFIG_DM_DELAY=y -CONFIG_DM_UEVENT=y -CONFIG_DM_FLAKEY=y -CONFIG_DM_VERITY=y -CONFIG_DM_VERITY_FEC=y -CONFIG_DM_SWITCH=y -CONFIG_DM_LOG_WRITES=y -CONFIG_DM_INTEGRITY=y -CONFIG_DM_ZONED=y - -# Magic SysRQ key. -CONFIG_MAGIC_SYSRQ=y -CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0 -CONFIG_MAGIC_SYSRQ_SERIAL=y - -# Crypto user API. -CONFIG_CRYPTO_USER=y -CONFIG_CRYPTO_USER_API=y -CONFIG_CRYPTO_USER_API_HASH=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y -CONFIG_CRYPTO_USER_API_RNG=y -CONFIG_CRYPTO_USER_API_AEAD=y - -# Enable remaining PCIe features (NB: some of these are specific to -# the microsemi PCIe hardware and not used by qemu). -CONFIG_HOTPLUG_PCI_PCIE=y -CONFIG_HOTPLUG_PCI=y -CONFIG_MEDIA_PCI_SUPPORT=y -CONFIG_PCI_ATS=y -CONFIG_PCI_DEBUG=y -CONFIG_PCIEAER=y -CONFIG_PCIEASPM_DEFAULT=y -CONFIG_PCIEASPM=y -CONFIG_PCI_ECAM=y -CONFIG_PCIE_MICROSEMI=y -CONFIG_PCI_ENDPOINT=y -CONFIG_PCIEPORTBUS=y -CONFIG_PCI_HOST_COMMON=y -CONFIG_PCI_HOST_GENERIC=y -CONFIG_PCI_IOV=y -CONFIG_PCI_PASID=y -CONFIG_PCI_PRI=y -CONFIG_PCI_SW_SWITCHTEC=y -CONFIG_USB_BDC_PCI=y diff --git a/configs/fedora/generic/riscv/CONFIG_SECCOMP b/configs/fedora/generic/riscv/CONFIG_SECCOMP deleted file mode 100644 index eb9e15092..000000000 --- a/configs/fedora/generic/riscv/CONFIG_SECCOMP +++ /dev/null @@ -1 +0,0 @@ -CONFIG_SECCOMP=y diff --git a/configs/fedora/generic/riscv/CONFIG_SECCOMP_FILTER b/configs/fedora/generic/riscv/CONFIG_SECCOMP_FILTER deleted file mode 100644 index 0814ba30a..000000000 --- a/configs/fedora/generic/riscv/CONFIG_SECCOMP_FILTER +++ /dev/null @@ -1 +0,0 @@ -CONFIG_SECCOMP_FILTER=y diff --git a/kernel-riscv64-debug.config b/kernel-riscv64-debug.config index 32d5350ea..49c096195 100644 --- a/kernel-riscv64-debug.config +++ b/kernel-riscv64-debug.config @@ -4417,7 +4417,6 @@ CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y # CONFIG_SD_ADC_MODULATOR is not set CONFIG_SDIO_UART=m -CONFIG_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y diff --git a/kernel-riscv64.config b/kernel-riscv64.config index 54029eb84..65a78ed7f 100644 --- a/kernel-riscv64.config +++ b/kernel-riscv64.config @@ -4395,7 +4395,6 @@ CONFIG_SCTP_COOKIE_HMAC_SHA1=y CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y # CONFIG_SD_ADC_MODULATOR is not set CONFIG_SDIO_UART=m -CONFIG_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECONDARY_TRUSTED_KEYRING=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y diff --git a/kernel.spec b/kernel.spec index 3a0d88f3f..1f03c73ec 100644 --- a/kernel.spec +++ b/kernel.spec @@ -142,7 +142,7 @@ Summary: The Linux kernel # pkg_release is what we'll fill in for the rpm Release: field %if 0%{?released_kernel} -%define pkg_release %{fedora_build}%{?buildid}.1.riscv64%{?dist} +%define pkg_release %{fedora_build}%{?buildid}.2.riscv64%{?dist} %else @@ -598,6 +598,9 @@ Patch534: 0001-HID-logitech-dj-Fix-forwarding-of-very-long-HID-repo.patch # Fix for broken bluetooth, reverted in upstream stable, not yet in Torvald's tree Patch535: Revert-Bluetooth-Align-minimum-encryption-key-size.patch +# RISC-V SECCOMP support v2 (not posted on linux-riscv yet) +Patch550: riscv_seccomp_v2.patch + # END OF PATCH DEFINITIONS %endif diff --git a/riscv_seccomp_v2.patch b/riscv_seccomp_v2.patch new file mode 100644 index 000000000..7c646bf32 --- /dev/null +++ b/riscv_seccomp_v2.patch @@ -0,0 +1,174 @@ +diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig +index 0c4b12205632..6f89a83c1e9c 100644 +--- a/arch/riscv/Kconfig ++++ b/arch/riscv/Kconfig +@@ -30,6 +30,7 @@ config RISCV + select GENERIC_SMP_IDLE_THREAD + select GENERIC_ATOMIC64 if !64BIT + select HAVE_ARCH_AUDITSYSCALL ++ select HAVE_ARCH_SECCOMP_FILTER + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_DMA_CONTIGUOUS + select HAVE_FUTEX_CMPXCHG if FUTEX +@@ -223,6 +224,19 @@ menu "Kernel features" + + source "kernel/Kconfig.hz" + ++config SECCOMP ++ bool "Enable seccomp to safely compute untrusted bytecode" ++ help ++ This kernel feature is useful for number crunching applications ++ that may need to compute untrusted bytecode during their ++ execution. By using pipes or other transports made available to ++ the process as file descriptors supporting the read/write ++ syscalls, it's possible to isolate those applications in ++ their own address space using seccomp. Once seccomp is ++ enabled via prctl(PR_SET_SECCOMP), it cannot be disabled ++ and the task is only allowed to execute a few safe syscalls ++ defined by each seccomp mode. ++ + endmenu + + menu "Boot options" +diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h +new file mode 100644 +index 000000000000..bf7744ee3b3d +--- /dev/null ++++ b/arch/riscv/include/asm/seccomp.h +@@ -0,0 +1,10 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++#ifndef _ASM_SECCOMP_H ++#define _ASM_SECCOMP_H ++ ++#include ++ ++#include ++ ++#endif /* _ASM_SECCOMP_H */ +diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h +index 905372d7eeb8..8c7585a4610c 100644 +--- a/arch/riscv/include/asm/thread_info.h ++++ b/arch/riscv/include/asm/thread_info.h +@@ -75,6 +75,7 @@ struct thread_info { + #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ + #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ ++#define TIF_SECCOMP 8 /* syscall secure computing */ + + #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) + #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +@@ -82,11 +83,13 @@ struct thread_info { + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) + #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) + #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) ++#define _TIF_SECCOMP (1 << TIF_SECCOMP) + + #define _TIF_WORK_MASK \ + (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) + + #define _TIF_SYSCALL_WORK \ +- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) ++ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ ++ _TIF_SECCOMP ) + + #endif /* _ASM_RISCV_THREAD_INFO_H */ +diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S +index bc7a56e1ca6f..0bbedfa3e47d 100644 +--- a/arch/riscv/kernel/entry.S ++++ b/arch/riscv/kernel/entry.S +@@ -203,8 +203,25 @@ check_syscall_nr: + /* Check to make sure we don't jump to a bogus syscall number. */ + li t0, __NR_syscalls + la s0, sys_ni_syscall +- /* Syscall number held in a7 */ +- bgeu a7, t0, 1f ++ /* ++ * The tracer can change syscall number to valid/invalid value. ++ * We use syscall_set_nr helper in syscall_trace_enter thus we ++ * cannot trust the current value in a7 and have to reload from ++ * the current task pt_regs. ++ */ ++ REG_L a7, PT_A7(sp) ++ /* ++ * Syscall number held in a7. ++ * If syscall number is above allowed value, redirect to ni_syscall. ++ */ ++ bge a7, t0, 1f ++ /* ++ * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. ++ * If yes, we pretend it was executed. ++ */ ++ li t1, -1 ++ beq a7, t1, ret_from_syscall_rejected ++ /* Call syscall */ + la s0, sys_call_table + slli t0, a7, RISCV_LGPTR + add s0, s0, t0 +@@ -215,6 +232,12 @@ check_syscall_nr: + ret_from_syscall: + /* Set user a0 to kernel a0 */ + REG_S a0, PT_A0(sp) ++ /* ++ * We didn't execute the actual syscall. ++ * Seccomp already set return value for the current task pt_regs. ++ * (If it was configured with SECCOMP_RET_ERRNO/TRACE) ++ */ ++ret_from_syscall_rejected: + /* Trace syscalls, but only if requested by the user. */ + REG_L t0, TASK_TI_FLAGS(tp) + andi t0, t0, _TIF_SYSCALL_WORK +diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c +index 368751438366..63e47c9f85f0 100644 +--- a/arch/riscv/kernel/ptrace.c ++++ b/arch/riscv/kernel/ptrace.c +@@ -154,6 +154,16 @@ void do_syscall_trace_enter(struct pt_regs *regs) + if (tracehook_report_syscall_entry(regs)) + syscall_set_nr(current, regs, -1); + ++ /* ++ * Do the secure computing after ptrace; failures should be fast. ++ * If this fails we might have return value in a0 from seccomp ++ * (via SECCOMP_RET_ERRNO/TRACE). ++ */ ++ if (secure_computing(NULL) == -1) { ++ syscall_set_nr(current, regs, -1); ++ return; ++ } ++ + #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, syscall_get_nr(current, regs)); +diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c +index dc66fe852768..e30864b25fb5 100644 +--- a/tools/testing/selftests/seccomp/seccomp_bpf.c ++++ b/tools/testing/selftests/seccomp/seccomp_bpf.c +@@ -112,6 +112,8 @@ struct seccomp_data { + # define __NR_seccomp 383 + # elif defined(__aarch64__) + # define __NR_seccomp 277 ++# elif defined(__riscv) ++# define __NR_seccomp 277 + # elif defined(__hppa__) + # define __NR_seccomp 338 + # elif defined(__powerpc__) +@@ -1582,6 +1584,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) + # define ARCH_REGS struct user_pt_regs + # define SYSCALL_NUM regs[8] + # define SYSCALL_RET regs[0] ++#elif defined(__riscv) && __riscv_xlen == 64 ++# define ARCH_REGS struct user_regs_struct ++# define SYSCALL_NUM a7 ++# define SYSCALL_RET a0 + #elif defined(__hppa__) + # define ARCH_REGS struct user_regs_struct + # define SYSCALL_NUM gr[20] +@@ -1671,7 +1677,7 @@ void change_syscall(struct __test_metadata *_metadata, + EXPECT_EQ(0, ret) {} + + #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ +- defined(__s390__) || defined(__hppa__) ++ defined(__s390__) || defined(__hppa__) || defined(__riscv) + { + regs.SYSCALL_NUM = syscall; + }