From abc158d4678f71b23f9b00e1408c92f62be97080 Mon Sep 17 00:00:00 2001 From: "Justin M. Forbes" Date: Tue, 18 Dec 2018 16:46:25 -0600 Subject: [PATCH] Linux v4.20-rc7-6-gddfbab46539f --- bpf-fix-bpf_jit_limit-knob.patch | 173 +++++++++++++++++++++++++++++++ gitrev | 2 +- kernel.spec | 11 +- sources | 1 + 4 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 bpf-fix-bpf_jit_limit-knob.patch diff --git a/bpf-fix-bpf_jit_limit-knob.patch b/bpf-fix-bpf_jit_limit-knob.patch new file mode 100644 index 000000000..68c9d967c --- /dev/null +++ b/bpf-fix-bpf_jit_limit-knob.patch @@ -0,0 +1,173 @@ +From fdadd04931c2d7cd294dc5b2b342863f94be53a3 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Tue, 11 Dec 2018 12:14:12 +0100 +Subject: bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K + +Michael and Sandipan report: + + Commit ede95a63b5 introduced a bpf_jit_limit tuneable to limit BPF + JIT allocations. At compile time it defaults to PAGE_SIZE * 40000, + and is adjusted again at init time if MODULES_VADDR is defined. + + For ppc64 kernels, MODULES_VADDR isn't defined, so we're stuck with + the compile-time default at boot-time, which is 0x9c400000 when + using 64K page size. This overflows the signed 32-bit bpf_jit_limit + value: + + root@ubuntu:/tmp# cat /proc/sys/net/core/bpf_jit_limit + -1673527296 + + and can cause various unexpected failures throughout the network + stack. In one case `strace dhclient eth0` reported: + + setsockopt(5, SOL_SOCKET, SO_ATTACH_FILTER, {len=11, filter=0x105dd27f8}, + 16) = -1 ENOTSUPP (Unknown error 524) + + and similar failures can be seen with tools like tcpdump. This doesn't + always reproduce however, and I'm not sure why. The more consistent + failure I've seen is an Ubuntu 18.04 KVM guest booted on a POWER9 + host would time out on systemd/netplan configuring a virtio-net NIC + with no noticeable errors in the logs. + +Given this and also given that in near future some architectures like +arm64 will have a custom area for BPF JIT image allocations we should +get rid of the BPF_JIT_LIMIT_DEFAULT fallback / default entirely. For +4.21, we have an overridable bpf_jit_alloc_exec(), bpf_jit_free_exec() +so therefore add another overridable bpf_jit_alloc_exec_limit() helper +function which returns the possible size of the memory area for deriving +the default heuristic in bpf_jit_charge_init(). + +Like bpf_jit_alloc_exec() and bpf_jit_free_exec(), the new +bpf_jit_alloc_exec_limit() assumes that module_alloc() is the default +JIT memory provider, and therefore in case archs implement their custom +module_alloc() we use MODULES_{END,_VADDR} for limits and otherwise for +vmalloc_exec() cases like on ppc64 we use VMALLOC_{END,_START}. + +Additionally, for archs supporting large page sizes, we should change +the sysctl to be handled as long to not run into sysctl restrictions +in future. + +Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations") +Reported-by: Sandipan Das +Reported-by: Michael Roth +Signed-off-by: Daniel Borkmann +Tested-by: Michael Roth +Signed-off-by: Alexei Starovoitov +--- + include/linux/filter.h | 2 +- + kernel/bpf/core.c | 21 +++++++++++++++------ + net/core/sysctl_net_core.c | 20 +++++++++++++++++--- + 3 files changed, 33 insertions(+), 10 deletions(-) + +diff --git a/include/linux/filter.h b/include/linux/filter.h +index 795ff0b869bb..a8b9d90a8042 100644 +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -861,7 +861,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, + extern int bpf_jit_enable; + extern int bpf_jit_harden; + extern int bpf_jit_kallsyms; +-extern int bpf_jit_limit; ++extern long bpf_jit_limit; + + typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index b1a3545d0ec8..b2890c268cb3 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) + } + + #ifdef CONFIG_BPF_JIT +-# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) +- + /* All BPF JIT sysctl knobs here. */ + int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); + int bpf_jit_harden __read_mostly; + int bpf_jit_kallsyms __read_mostly; +-int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; ++long bpf_jit_limit __read_mostly; + + static __always_inline void + bpf_get_prog_addr_region(const struct bpf_prog *prog, +@@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + + static atomic_long_t bpf_jit_current; + ++/* Can be overridden by an arch's JIT compiler if it has a custom, ++ * dedicated BPF backend memory area, or if neither of the two ++ * below apply. ++ */ ++u64 __weak bpf_jit_alloc_exec_limit(void) ++{ + #if defined(MODULES_VADDR) ++ return MODULES_END - MODULES_VADDR; ++#else ++ return VMALLOC_END - VMALLOC_START; ++#endif ++} ++ + static int __init bpf_jit_charge_init(void) + { + /* Only used as heuristic here to derive limit. */ +- bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, +- PAGE_SIZE), INT_MAX); ++ bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, ++ PAGE_SIZE), LONG_MAX); + return 0; + } + pure_initcall(bpf_jit_charge_init); +-#endif + + static int bpf_jit_charge_modmem(u32 pages) + { +diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c +index 37b4667128a3..d67ec17f2cc8 100644 +--- a/net/core/sysctl_net_core.c ++++ b/net/core/sysctl_net_core.c +@@ -28,6 +28,8 @@ static int two __maybe_unused = 2; + static int min_sndbuf = SOCK_MIN_SNDBUF; + static int min_rcvbuf = SOCK_MIN_RCVBUF; + static int max_skb_frags = MAX_SKB_FRAGS; ++static long long_one __maybe_unused = 1; ++static long long_max __maybe_unused = LONG_MAX; + + static int net_msg_warn; /* Unused, but still a sysctl */ + +@@ -289,6 +291,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); + } ++ ++static int ++proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, ++ void __user *buffer, size_t *lenp, ++ loff_t *ppos) ++{ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); ++} + #endif + + static struct ctl_table net_core_table[] = { +@@ -398,10 +411,11 @@ static struct ctl_table net_core_table[] = { + { + .procname = "bpf_jit_limit", + .data = &bpf_jit_limit, +- .maxlen = sizeof(int), ++ .maxlen = sizeof(long), + .mode = 0600, +- .proc_handler = proc_dointvec_minmax_bpf_restricted, +- .extra1 = &one, ++ .proc_handler = proc_dolongvec_minmax_bpf_restricted, ++ .extra1 = &long_one, ++ .extra2 = &long_max, + }, + #endif + { +-- +cgit 1.2-0.3.lf.el7 + diff --git a/gitrev b/gitrev index fab6f201d..ff20bbe6e 100644 --- a/gitrev +++ b/gitrev @@ -1 +1 @@ -7566ec393f4161572ba6f11ad5171fd5d59b0fbd +ddfbab46539f2d37a9e9d357b054486b51f7dc27 diff --git a/kernel.spec b/kernel.spec index 32b63997b..13308c6a0 100644 --- a/kernel.spec +++ b/kernel.spec @@ -69,7 +69,7 @@ Summary: The Linux kernel # The rc snapshot level %global rcrev 7 # The git snapshot level -%define gitrev 0 +%define gitrev 1 # Set rpm version accordingly %define rpmversion 4.%{upstream_sublevel}.0 %endif @@ -122,7 +122,7 @@ Summary: The Linux kernel # Set debugbuildsenabled to 1 for production (build separate debug kernels) # and 0 for rawhide (all kernels are debug kernels). # See also 'make debug' and 'make release'. -%define debugbuildsenabled 1 +%define debugbuildsenabled 0 # Kernel headers are being split out into a separate package %if 0%{?fedora} @@ -612,6 +612,9 @@ Patch504: iio-accel-kxcjk1013-Add-more-hardware-ids.patch # rhbz 1645070 patch queued upstream for merging into 4.21 Patch505: asus-fx503-keyb.patch +# rhbz 1647947 +Patch506: bpf-fix-bpf_jit_limit-knob.patch + # END OF PATCH DEFINITIONS %endif @@ -1884,6 +1887,10 @@ fi # # %changelog +* Tue Dec 18 2018 Justin M. Forbes - 4.20.0-0.rc7.git1.1 +- Linux v4.20-rc7-6-gddfbab46539f +- Reenable debugging options. + * Mon Dec 17 2018 Justin M. Forbes - 4.20.0-0.rc7.git0.1 - Linux v4.20-rc7 diff --git a/sources b/sources index 431cf579f..7674e69d3 100644 --- a/sources +++ b/sources @@ -1,2 +1,3 @@ SHA512 (linux-4.19.tar.xz) = ab67cc746b375a8b135e8b23e35e1d6787930d19b3c26b2679787d62951cbdbc3bb66f8ededeb9b890e5008b2459397f9018f1a6772fdef67780b06a4cb9f6f4 SHA512 (patch-4.20-rc7.xz) = 84c35b95f08454f3920b1400e6fee8c6f30ebfdcc9a32f447d2124867b22a17da87c0d1496dd22512ddb4d6c0ce9457acddb6d6167e8c673d44b3f2a585486bd +SHA512 (patch-4.20-rc7-git1.xz) = 1be1f4d521267a23b3682926dd7b6cf638d8bd1073dd14575007b7736714668229fd2e0b6532e50d9ff07a3079210741e3bd37c52ecab9706435db546e495f51