diff --git a/include/linux/bpf.h b/include/linux/bpf.h index de18227b3d95..a2132e09dc1c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -16,6 +16,7 @@ #include #include #include +#include struct bpf_verifier_env; struct perf_event; @@ -340,6 +341,12 @@ enum bpf_cgroup_storage_type { #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +struct bpf_prog_stats { + u64 cnt; + u64 nsecs; + struct u64_stats_sync syncp; +}; + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -389,6 +396,7 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + struct bpf_prog_stats __percpu *stats; union { struct work_struct work; struct rcu_head rcu; @@ -559,6 +567,7 @@ void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); extern int sysctl_unprivileged_bpf_disabled; +extern int sysctl_bpf_stats_enabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); diff --git a/include/linux/filter.h b/include/linux/filter.h index f32b3eca5a04..7e5e3db11106 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -533,7 +533,24 @@ struct sk_filter { struct bpf_prog *prog; }; -#define BPF_PROG_RUN(filter, ctx) ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); }) +DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); + +#define BPF_PROG_RUN(prog, ctx) ({ \ + u32 ret; \ + cant_sleep(); \ + if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ + struct bpf_prog_stats *stats; \ + u64 start = sched_clock(); \ + ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ + stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&stats->syncp); \ + stats->cnt++; \ + stats->nsecs += sched_clock() - start; \ + u64_stats_update_end(&stats->syncp); \ + } else { \ + ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ + } \ + ret; }) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN @@ -764,6 +781,7 @@ void bpf_prog_free_jited_linfo(struct bpf_prog *prog); void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); +struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ef88b167959d..1c14c347f3cf 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } -struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; @@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) return fp; } + +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + struct bpf_prog *prog; + + prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags); + if (!prog) + return NULL; + + prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); + if (!prog->aux->stats) { + kfree(prog->aux); + vfree(prog); + return NULL; + } + + u64_stats_init(&prog->aux->stats->syncp); + return prog; +} EXPORT_SYMBOL_GPL(bpf_prog_alloc); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) @@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, void __bpf_prog_free(struct bpf_prog *fp) { - kfree(fp->aux); + if (fp->aux) { + free_percpu(fp->aux->stats); + kfree(fp->aux); + } vfree(fp); } @@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, return -EFAULT; } +DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); +EXPORT_SYMBOL(bpf_stats_enabled_key); +int sysctl_bpf_stats_enabled __read_mostly; + /* All definitions of tracepoints related to BPF. */ #define CREATE_TRACE_POINTS #include diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ec7c552af76b..31cf66fc3f5c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +static void bpf_prog_get_stats(const struct bpf_prog *prog, + struct bpf_prog_stats *stats) +{ + u64 nsecs = 0, cnt = 0; + int cpu; + + for_each_possible_cpu(cpu) { + const struct bpf_prog_stats *st; + unsigned int start; + u64 tnsecs, tcnt; + + st = per_cpu_ptr(prog->aux->stats, cpu); + do { + start = u64_stats_fetch_begin_irq(&st->syncp); + tnsecs = st->nsecs; + tcnt = st->cnt; + } while (u64_stats_fetch_retry_irq(&st->syncp, start)); + nsecs += tnsecs; + cnt += tcnt; + } + stats->nsecs = nsecs; + stats->cnt = cnt; +} + #ifdef CONFIG_PROC_FS static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; + struct bpf_prog_stats stats; + bpf_prog_get_stats(prog, &stats); bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "prog_type:\t%u\n" "prog_jited:\t%u\n" "prog_tag:\t%s\n" "memlock:\t%llu\n" - "prog_id:\t%u\n", + "prog_id:\t%u\n" + "run_time_ns:\t%llu\n" + "run_cnt:\t%llu\n", prog->type, prog->jited, prog_tag, prog->pages * 1ULL << PAGE_SHIFT, - prog->aux->id); + prog->aux->id, + stats.nsecs, + stats.cnt); } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1b9496c41383..0e4edd7e3c5f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) subprog_end = env->subprog_info[i + 1].start; len = subprog_end - subprog_start; - func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); + /* BPF_PROG_RUN doesn't call subprogs directly, + * hence main prog stats include the runtime of subprogs. + * subprogs don't have IDs and not reachable via prog_get_next_id + * func[i]->aux->stats will never be accessed and stays NULL + */ + func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); if (!func[i]) goto out_free; memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba4d9e85feb8..86e0771352f2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #endif static int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses its own private copy */ @@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, #endif + { + .procname = "bpf_stats_enabled", + .data = &sysctl_bpf_stats_enabled, + .maxlen = sizeof(sysctl_bpf_stats_enabled), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_bpf_stats, + .extra1 = &zero, + .extra2 = &one, + }, #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) { .procname = "panic_on_rcu_stall", @@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, #endif /* CONFIG_PROC_SYSCTL */ +static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, bpf_stats = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &bpf_stats; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + *(int *)table->data = bpf_stats; + if (bpf_stats) + static_branch_enable(&bpf_stats_enabled_key); + else + static_branch_disable(&bpf_stats_enabled_key); + } + return ret; +} + /* * No sense putting this after each symbol definition, twice, * exception granted :-)