Add support for SECCOMP (v2)
The patch is added for testing before publishing on linux-riscv. Signed-off-by: David Abdurachmanov <david.abdurachmanov@sifive.com>
This commit is contained in:
parent
394552ce0a
commit
601e6a0b25
201
config
201
config
|
@ -1,201 +0,0 @@
|
|||
# https://github.com/riscv/riscv-qemu/commit/039dbd521277bc0aab672203a1a199e4519094da
|
||||
CONFIG_SERIAL_8250=y
|
||||
CONFIG_SERIAL_8250_CONSOLE=y
|
||||
CONFIG_SERIAL_OF_PLATFORM=y
|
||||
|
||||
# https://github.com/riscv/riscv-qemu/commit/3446cee04256753a29c45b033d643fcdea24fc72
|
||||
CONFIG_RISCV_PLIC=y
|
||||
CONFIG_HVC_RISCV_SBI=y
|
||||
CONFIG_VIRTIO=y
|
||||
CONFIG_VIRTIO_MMIO=y
|
||||
CONFIG_VIRTIO_PCI=y
|
||||
CONFIG_VIRTIO_BLK=y
|
||||
CONFIG_VIRTIO_NET=y
|
||||
CONFIG_VIRTIO_CONSOLE=y
|
||||
CONFIG_SCSI_VIRTIO=y
|
||||
CONFIG_DRM_VIRTIO_GPU=y
|
||||
|
||||
# Loopback lets us build the stage4 disk.
|
||||
CONFIG_BLK_DEV=y
|
||||
CONFIG_BLK_DEV_LOOP=y
|
||||
|
||||
# The stage4 root filesystem is ext4.
|
||||
CONFIG_EXT4_FS=y
|
||||
|
||||
# Needed to avoid cap_set_file errors when installing RPMs.
|
||||
CONFIG_EXT4_FS_SECURITY=y
|
||||
|
||||
CONFIG_FILE_LOCKING=y
|
||||
CONFIG_NET_CORE=y
|
||||
CONFIG_NETDEVICES=y
|
||||
|
||||
# iptables supported, needed for mock.
|
||||
CONFIG_NETFILTER=y
|
||||
CONFIG_NETFILTER_ADVANCED=y
|
||||
CONFIG_NETFILTER_XTABLES=y
|
||||
CONFIG_NETFILTER_XT_MATCH_STATE=y
|
||||
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
|
||||
CONFIG_NF_IPTABLES=y
|
||||
CONFIG_NF_NAT=y
|
||||
CONFIG_NF_NAT_IPV4=y
|
||||
CONFIG_NF_NAT_IPV6=y
|
||||
CONFIG_NF_CONNTRACK=y
|
||||
CONFIG_NF_CONNTRACK_IPV4=y
|
||||
CONFIG_NF_CONNTRACK_IPV6=y
|
||||
CONFIG_NF_SOCKET_IPV4=y
|
||||
CONFIG_NF_SOCKET_IPV6=y
|
||||
CONFIG_IP_NF_NAT=y
|
||||
CONFIG_IP_NF_FILTER=y
|
||||
CONFIG_IP_NF_IPTABLES=y
|
||||
CONFIG_IP_NF_TARGET_REJECT=y
|
||||
CONFIG_IP_NF_RAW=y
|
||||
CONFIG_IP6_NF_NAT=y
|
||||
CONFIG_IP6_NF_FILTER=y
|
||||
CONFIG_IP6_NF_IPTABLES=y
|
||||
CONFIG_IP6_NF_TARGET_REJECT=y
|
||||
CONFIG_IP6_NF_RAW=y
|
||||
|
||||
# For systemd:
|
||||
# https://cgit.freedesktop.org/systemd/systemd/tree/README
|
||||
CONFIG_TMPFS=y
|
||||
CONFIG_TMPFS_XATTR=y
|
||||
CONFIG_SYSFS=y
|
||||
CONFIG_DEVTMPFS=y
|
||||
CONFIG_CGROUPS=y
|
||||
CONFIG_INOTIFY_USER=y
|
||||
CONFIG_SIGNALFD=y
|
||||
CONFIG_TIMERFD=y
|
||||
CONFIG_EPOLL=y
|
||||
CONFIG_NET=y
|
||||
CONFIG_PROC_FS=y
|
||||
CONFIG_FHANDLE=y
|
||||
CONFIG_SYSFS_DEPRECATED=n
|
||||
CONFIG_UEVENT_HELPER_PATH=""
|
||||
CONFIG_FW_LOADER_USER_HELPER=n
|
||||
CONFIG_DMIID=y
|
||||
CONFIG_BLK_DEV_BSG=y
|
||||
CONFIG_NET_NS=y
|
||||
CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
|
||||
CONFIG_AUTOFS4_FS=y
|
||||
CONFIG_TMPFS_XATTR=y
|
||||
CONFIG_TMPFS_POSIX_ACL=y
|
||||
CONFIG_EXT4_POSIX_ACL=y
|
||||
CONFIG_XFS_POSIX_ACL=y
|
||||
CONFIG_BTRFS_FS_POSIX_ACL=y
|
||||
CONFIG_SECCOMP=y
|
||||
CONFIG_SECCOMP_FILTER=y
|
||||
CONFIG_CHECKPOINT_RESTORE=y
|
||||
CONFIG_CGROUP_SCHED=y
|
||||
CONFIG_FAIR_GROUP_SCHED=y
|
||||
CONFIG_CFS_BANDWIDTH=y
|
||||
CONFIG_EFIVAR_FS=y
|
||||
CONFIG_EFI_PARTITION=y
|
||||
CONFIG_RT_GROUP_SCHED=n
|
||||
CONFIG_AUDIT=n
|
||||
|
||||
# Networking:
|
||||
CONFIG_NET=y
|
||||
CONFIG_PACKET=y
|
||||
CONFIG_UNIX=y
|
||||
CONFIG_INET=y
|
||||
CONFIG_BRIDGE=y
|
||||
CONFIG_IPV6=y
|
||||
|
||||
# NFS client.
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_NFS_V3=y
|
||||
CONFIG_NFS_V3_ACL=y
|
||||
CONFIG_NFS_V4=y
|
||||
CONFIG_NFS_V4_1=y
|
||||
CONFIG_NFS_V4_2=y
|
||||
CONFIG_NFS_FSCACHE=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
|
||||
# SysV IPC needed by GnuPG.
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_SYSVIPC_SYSCTL=y
|
||||
|
||||
# Add XFS for running xfstests (to test libaio).
|
||||
CONFIG_XFS_FS=y
|
||||
CONFIG_XFS_QUOTA=y
|
||||
CONFIG_XFS_POSIX_ACL=y
|
||||
|
||||
# For mock, avoids:
|
||||
# WARNING: tcmsg: [Errno 2] No such file or directory: '/proc/net/psched'
|
||||
# WARNING: the tc subsystem functionality is limited
|
||||
CONFIG_NET_SCHED=y
|
||||
|
||||
# For NBD booting.
|
||||
CONFIG_BLK_DEV_NBD=y
|
||||
|
||||
# Debugging.
|
||||
CONFIG_STACKTRACE=y
|
||||
|
||||
# Device mapper support.
|
||||
CONFIG_MD=y
|
||||
CONFIG_MD_AUTODETECT=y
|
||||
CONFIG_BLK_DEV_DM=y
|
||||
CONFIG_BLK_DEV_DM_BUILTIN=y
|
||||
|
||||
CONFIG_DM_DEBUG=y
|
||||
CONFIG_DM_BUFIO=y
|
||||
CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING=y
|
||||
CONFIG_DM_BIO_PRISON=y
|
||||
CONFIG_DM_PERSISTENT_DATA=y
|
||||
CONFIG_DM_UNSTRIPED=y
|
||||
CONFIG_DM_CRYPT=y
|
||||
CONFIG_DM_SNAPSHOT=y
|
||||
CONFIG_DM_THIN_PROVISIONING=y
|
||||
CONFIG_DM_CACHE=y
|
||||
CONFIG_DM_CACHE_SMQ=y
|
||||
CONFIG_DM_MIRROR=y
|
||||
CONFIG_DM_LOG_USERSPACE=y
|
||||
CONFIG_DM_RAID=y
|
||||
CONFIG_DM_ZERO=y
|
||||
CONFIG_DM_MULTIPATH=y
|
||||
CONFIG_DM_MULTIPATH_QL=y
|
||||
CONFIG_DM_MULTIPATH_ST=y
|
||||
CONFIG_DM_DELAY=y
|
||||
CONFIG_DM_UEVENT=y
|
||||
CONFIG_DM_FLAKEY=y
|
||||
CONFIG_DM_VERITY=y
|
||||
CONFIG_DM_VERITY_FEC=y
|
||||
CONFIG_DM_SWITCH=y
|
||||
CONFIG_DM_LOG_WRITES=y
|
||||
CONFIG_DM_INTEGRITY=y
|
||||
CONFIG_DM_ZONED=y
|
||||
|
||||
# Magic SysRQ key.
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x0
|
||||
CONFIG_MAGIC_SYSRQ_SERIAL=y
|
||||
|
||||
# Crypto user API.
|
||||
CONFIG_CRYPTO_USER=y
|
||||
CONFIG_CRYPTO_USER_API=y
|
||||
CONFIG_CRYPTO_USER_API_HASH=y
|
||||
CONFIG_CRYPTO_USER_API_SKCIPHER=y
|
||||
CONFIG_CRYPTO_USER_API_RNG=y
|
||||
CONFIG_CRYPTO_USER_API_AEAD=y
|
||||
|
||||
# Enable remaining PCIe features (NB: some of these are specific to
|
||||
# the microsemi PCIe hardware and not used by qemu).
|
||||
CONFIG_HOTPLUG_PCI_PCIE=y
|
||||
CONFIG_HOTPLUG_PCI=y
|
||||
CONFIG_MEDIA_PCI_SUPPORT=y
|
||||
CONFIG_PCI_ATS=y
|
||||
CONFIG_PCI_DEBUG=y
|
||||
CONFIG_PCIEAER=y
|
||||
CONFIG_PCIEASPM_DEFAULT=y
|
||||
CONFIG_PCIEASPM=y
|
||||
CONFIG_PCI_ECAM=y
|
||||
CONFIG_PCIE_MICROSEMI=y
|
||||
CONFIG_PCI_ENDPOINT=y
|
||||
CONFIG_PCIEPORTBUS=y
|
||||
CONFIG_PCI_HOST_COMMON=y
|
||||
CONFIG_PCI_HOST_GENERIC=y
|
||||
CONFIG_PCI_IOV=y
|
||||
CONFIG_PCI_PASID=y
|
||||
CONFIG_PCI_PRI=y
|
||||
CONFIG_PCI_SW_SWITCHTEC=y
|
||||
CONFIG_USB_BDC_PCI=y
|
|
@ -1 +0,0 @@
|
|||
CONFIG_SECCOMP=y
|
|
@ -1 +0,0 @@
|
|||
CONFIG_SECCOMP_FILTER=y
|
|
@ -4417,7 +4417,6 @@ CONFIG_SCTP_COOKIE_HMAC_SHA1=y
|
|||
CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
|
||||
# CONFIG_SD_ADC_MODULATOR is not set
|
||||
CONFIG_SDIO_UART=m
|
||||
CONFIG_SECCOMP_FILTER=y
|
||||
CONFIG_SECCOMP=y
|
||||
CONFIG_SECONDARY_TRUSTED_KEYRING=y
|
||||
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
|
||||
|
|
|
@ -4395,7 +4395,6 @@ CONFIG_SCTP_COOKIE_HMAC_SHA1=y
|
|||
CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1=y
|
||||
# CONFIG_SD_ADC_MODULATOR is not set
|
||||
CONFIG_SDIO_UART=m
|
||||
CONFIG_SECCOMP_FILTER=y
|
||||
CONFIG_SECCOMP=y
|
||||
CONFIG_SECONDARY_TRUSTED_KEYRING=y
|
||||
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
|
||||
|
|
|
@ -142,7 +142,7 @@ Summary: The Linux kernel
|
|||
# pkg_release is what we'll fill in for the rpm Release: field
|
||||
%if 0%{?released_kernel}
|
||||
|
||||
%define pkg_release %{fedora_build}%{?buildid}.1.riscv64%{?dist}
|
||||
%define pkg_release %{fedora_build}%{?buildid}.2.riscv64%{?dist}
|
||||
|
||||
%else
|
||||
|
||||
|
@ -598,6 +598,9 @@ Patch534: 0001-HID-logitech-dj-Fix-forwarding-of-very-long-HID-repo.patch
|
|||
# Fix for broken bluetooth, reverted in upstream stable, not yet in Torvald's tree
|
||||
Patch535: Revert-Bluetooth-Align-minimum-encryption-key-size.patch
|
||||
|
||||
# RISC-V SECCOMP support v2 (not posted on linux-riscv yet)
|
||||
Patch550: riscv_seccomp_v2.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
%endif
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
|
||||
index 0c4b12205632..6f89a83c1e9c 100644
|
||||
--- a/arch/riscv/Kconfig
|
||||
+++ b/arch/riscv/Kconfig
|
||||
@@ -30,6 +30,7 @@ config RISCV
|
||||
select GENERIC_SMP_IDLE_THREAD
|
||||
select GENERIC_ATOMIC64 if !64BIT
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
+ select HAVE_ARCH_SECCOMP_FILTER
|
||||
select HAVE_MEMBLOCK_NODE_MAP
|
||||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_FUTEX_CMPXCHG if FUTEX
|
||||
@@ -223,6 +224,19 @@ menu "Kernel features"
|
||||
|
||||
source "kernel/Kconfig.hz"
|
||||
|
||||
+config SECCOMP
|
||||
+ bool "Enable seccomp to safely compute untrusted bytecode"
|
||||
+ help
|
||||
+ This kernel feature is useful for number crunching applications
|
||||
+ that may need to compute untrusted bytecode during their
|
||||
+ execution. By using pipes or other transports made available to
|
||||
+ the process as file descriptors supporting the read/write
|
||||
+ syscalls, it's possible to isolate those applications in
|
||||
+ their own address space using seccomp. Once seccomp is
|
||||
+ enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
|
||||
+ and the task is only allowed to execute a few safe syscalls
|
||||
+ defined by each seccomp mode.
|
||||
+
|
||||
endmenu
|
||||
|
||||
menu "Boot options"
|
||||
diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h
|
||||
new file mode 100644
|
||||
index 000000000000..bf7744ee3b3d
|
||||
--- /dev/null
|
||||
+++ b/arch/riscv/include/asm/seccomp.h
|
||||
@@ -0,0 +1,10 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+
|
||||
+#ifndef _ASM_SECCOMP_H
|
||||
+#define _ASM_SECCOMP_H
|
||||
+
|
||||
+#include <asm/unistd.h>
|
||||
+
|
||||
+#include <asm-generic/seccomp.h>
|
||||
+
|
||||
+#endif /* _ASM_SECCOMP_H */
|
||||
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
|
||||
index 905372d7eeb8..8c7585a4610c 100644
|
||||
--- a/arch/riscv/include/asm/thread_info.h
|
||||
+++ b/arch/riscv/include/asm/thread_info.h
|
||||
@@ -75,6 +75,7 @@ struct thread_info {
|
||||
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
|
||||
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
|
||||
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
|
||||
+#define TIF_SECCOMP 8 /* syscall secure computing */
|
||||
|
||||
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
||||
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||||
@@ -82,11 +83,13 @@ struct thread_info {
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
||||
|
||||
#define _TIF_WORK_MASK \
|
||||
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
|
||||
|
||||
#define _TIF_SYSCALL_WORK \
|
||||
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
|
||||
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \
|
||||
+ _TIF_SECCOMP )
|
||||
|
||||
#endif /* _ASM_RISCV_THREAD_INFO_H */
|
||||
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
|
||||
index bc7a56e1ca6f..0bbedfa3e47d 100644
|
||||
--- a/arch/riscv/kernel/entry.S
|
||||
+++ b/arch/riscv/kernel/entry.S
|
||||
@@ -203,8 +203,25 @@ check_syscall_nr:
|
||||
/* Check to make sure we don't jump to a bogus syscall number. */
|
||||
li t0, __NR_syscalls
|
||||
la s0, sys_ni_syscall
|
||||
- /* Syscall number held in a7 */
|
||||
- bgeu a7, t0, 1f
|
||||
+ /*
|
||||
+ * The tracer can change syscall number to valid/invalid value.
|
||||
+ * We use syscall_set_nr helper in syscall_trace_enter thus we
|
||||
+ * cannot trust the current value in a7 and have to reload from
|
||||
+ * the current task pt_regs.
|
||||
+ */
|
||||
+ REG_L a7, PT_A7(sp)
|
||||
+ /*
|
||||
+ * Syscall number held in a7.
|
||||
+ * If syscall number is above allowed value, redirect to ni_syscall.
|
||||
+ */
|
||||
+ bge a7, t0, 1f
|
||||
+ /*
|
||||
+ * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
|
||||
+ * If yes, we pretend it was executed.
|
||||
+ */
|
||||
+ li t1, -1
|
||||
+ beq a7, t1, ret_from_syscall_rejected
|
||||
+ /* Call syscall */
|
||||
la s0, sys_call_table
|
||||
slli t0, a7, RISCV_LGPTR
|
||||
add s0, s0, t0
|
||||
@@ -215,6 +232,12 @@ check_syscall_nr:
|
||||
ret_from_syscall:
|
||||
/* Set user a0 to kernel a0 */
|
||||
REG_S a0, PT_A0(sp)
|
||||
+ /*
|
||||
+ * We didn't execute the actual syscall.
|
||||
+ * Seccomp already set return value for the current task pt_regs.
|
||||
+ * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
|
||||
+ */
|
||||
+ret_from_syscall_rejected:
|
||||
/* Trace syscalls, but only if requested by the user. */
|
||||
REG_L t0, TASK_TI_FLAGS(tp)
|
||||
andi t0, t0, _TIF_SYSCALL_WORK
|
||||
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
|
||||
index 368751438366..63e47c9f85f0 100644
|
||||
--- a/arch/riscv/kernel/ptrace.c
|
||||
+++ b/arch/riscv/kernel/ptrace.c
|
||||
@@ -154,6 +154,16 @@ void do_syscall_trace_enter(struct pt_regs *regs)
|
||||
if (tracehook_report_syscall_entry(regs))
|
||||
syscall_set_nr(current, regs, -1);
|
||||
|
||||
+ /*
|
||||
+ * Do the secure computing after ptrace; failures should be fast.
|
||||
+ * If this fails we might have return value in a0 from seccomp
|
||||
+ * (via SECCOMP_RET_ERRNO/TRACE).
|
||||
+ */
|
||||
+ if (secure_computing(NULL) == -1) {
|
||||
+ syscall_set_nr(current, regs, -1);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
|
||||
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
|
||||
trace_sys_enter(regs, syscall_get_nr(current, regs));
|
||||
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
|
||||
index dc66fe852768..e30864b25fb5 100644
|
||||
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
|
||||
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
|
||||
@@ -112,6 +112,8 @@ struct seccomp_data {
|
||||
# define __NR_seccomp 383
|
||||
# elif defined(__aarch64__)
|
||||
# define __NR_seccomp 277
|
||||
+# elif defined(__riscv)
|
||||
+# define __NR_seccomp 277
|
||||
# elif defined(__hppa__)
|
||||
# define __NR_seccomp 338
|
||||
# elif defined(__powerpc__)
|
||||
@@ -1582,6 +1584,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
|
||||
# define ARCH_REGS struct user_pt_regs
|
||||
# define SYSCALL_NUM regs[8]
|
||||
# define SYSCALL_RET regs[0]
|
||||
+#elif defined(__riscv) && __riscv_xlen == 64
|
||||
+# define ARCH_REGS struct user_regs_struct
|
||||
+# define SYSCALL_NUM a7
|
||||
+# define SYSCALL_RET a0
|
||||
#elif defined(__hppa__)
|
||||
# define ARCH_REGS struct user_regs_struct
|
||||
# define SYSCALL_NUM gr[20]
|
||||
@@ -1671,7 +1677,7 @@ void change_syscall(struct __test_metadata *_metadata,
|
||||
EXPECT_EQ(0, ret) {}
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
|
||||
- defined(__s390__) || defined(__hppa__)
|
||||
+ defined(__s390__) || defined(__hppa__) || defined(__riscv)
|
||||
{
|
||||
regs.SYSCALL_NUM = syscall;
|
||||
}
|
Loading…
Reference in New Issue