From 271de525e1d7f564e88a9d212c50998b49a54476 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:16 -0700 Subject: bpf: Remove prog->active check for bpf_lsm and bpf_iter The commit 64696c40d03c ("bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline") removed prog->active check for struct_ops prog. The bpf_lsm and bpf_iter is also using trampoline. Like struct_ops, the bpf_lsm and bpf_iter have fixed hooks for the prog to attach. The kernel does not call the same hook in a recursive way. This patch also removes the prog->active check for bpf_lsm and bpf_iter. A later patch has a test to reproduce the recursion issue for a sleepable bpf_lsm program. This patch appends the '_recur' naming to the existing enter and exit functions that track the prog->active counter. New __bpf_prog_{enter,exit}[_sleepable] function are added to skip the prog->active tracking. The '_struct_ops' version is also removed. It also moves the decision on picking the enter and exit function to the new bpf_trampoline_{enter,exit}(). It returns the '_recur' ones for all tracing progs to use. For bpf_lsm, bpf_iter, struct_ops (no prog->active tracking after 64696c40d03c), and bpf_lsm_cgroup (no prog->active tracking after 69fd337a975c7), it will return the functions that don't track the prog->active. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 5 ++-- kernel/bpf/trampoline.c | 80 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7b373a5e861f..a0b4266196a8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5133,13 +5133,14 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) run_ctx.bpf_cookie = 0; run_ctx.saved_run_ctx = NULL; - if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) { + if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { /* recursion detected */ bpf_prog_put(prog); return -EBUSY; } attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx); + __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, + &run_ctx); bpf_prog_put(prog); return 0; #endif diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index bf0906e1e2b9..d6395215b849 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -864,7 +864,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void) * [2..MAX_U64] - execute bpf prog and record execution time. * This is start time. */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) +static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { rcu_read_lock(); @@ -901,7 +901,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog, } } -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx) +static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -912,8 +913,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_ rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx) +static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { /* Runtime stats are exported via actual BPF_LSM_CGROUP @@ -927,8 +928,8 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, return NO_START_TIME; } -void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) +static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -937,7 +938,8 @@ void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) +u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) { rcu_read_lock_trace(); migrate_disable(); @@ -953,8 +955,8 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) +void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -964,8 +966,30 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, rcu_read_unlock_trace(); } -u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx) +static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) +{ + rcu_read_lock_trace(); + migrate_disable(); + might_fault(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + + return bpf_prog_start_time(); +} + +static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) +{ + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + + update_prog_stats(prog, start); + migrate_enable(); + rcu_read_unlock_trace(); +} + +static u64 notrace __bpf_prog_enter(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { rcu_read_lock(); @@ -976,8 +1000,8 @@ u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) +static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -997,6 +1021,36 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) percpu_ref_put(&tr->pcref); } +bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog) +{ + bool sleepable = prog->aux->sleepable; + + if (bpf_prog_check_recur(prog)) + return sleepable ? __bpf_prog_enter_sleepable_recur : + __bpf_prog_enter_recur; + + if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && + prog->expected_attach_type == BPF_LSM_CGROUP) + return __bpf_prog_enter_lsm_cgroup; + + return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter; +} + +bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog) +{ + bool sleepable = prog->aux->sleepable; + + if (bpf_prog_check_recur(prog)) + return sleepable ? __bpf_prog_exit_sleepable_recur : + __bpf_prog_exit_recur; + + if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && + prog->expected_attach_type == BPF_LSM_CGROUP) + return __bpf_prog_exit_lsm_cgroup; + + return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit; +} + int __weak arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, -- cgit From 0593dd34e53489557569d5e6d27371b49aa9b41f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:17 -0700 Subject: bpf: Append _recur naming to the bpf_task_storage helper proto This patch adds the "_recur" naming to the bpf_task_storage_{get,delete} proto. In a latter patch, they will only be used by the tracing programs that requires a deadlock detection because a tracing prog may use bpf_task_storage_{get,delete} recursively and cause a deadlock. Another following patch will add a different helper proto for the non tracing programs because they do not need the deadlock prevention. This patch does this rename to prepare for this future proto additions. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-3-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_task_storage.c | 12 ++++++------ kernel/trace/bpf_trace.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 6f290623347e..bce50ae03f42 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -228,7 +228,7 @@ out: } /* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, +BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; @@ -260,7 +260,7 @@ unlock: (unsigned long)sdata->data; } -BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, +BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { int ret; @@ -322,8 +322,8 @@ const struct bpf_map_ops task_storage_map_ops = { .map_owner_storage_ptr = task_storage_ptr, }; -const struct bpf_func_proto bpf_task_storage_get_proto = { - .func = bpf_task_storage_get, +const struct bpf_func_proto bpf_task_storage_get_recur_proto = { + .func = bpf_task_storage_get_recur, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, @@ -333,8 +333,8 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; -const struct bpf_func_proto bpf_task_storage_delete_proto = { - .func = bpf_task_storage_delete, +const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { + .func = bpf_task_storage_delete_recur, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2460e38f75ff..83b9b9afe235 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1488,9 +1488,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_task_storage_get: - return &bpf_task_storage_get_proto; + return &bpf_task_storage_get_recur_proto; case BPF_FUNC_task_storage_delete: - return &bpf_task_storage_delete_proto; + return &bpf_task_storage_delete_recur_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_snprintf: -- cgit From 6d65500c34d897329ed1be0fd3c4014ec52cd473 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:18 -0700 Subject: bpf: Refactor the core bpf_task_storage_get logic into a new function This patch creates a new function __bpf_task_storage_get() and moves the core logic of the existing bpf_task_storage_get() into this new function. This new function will be shared by another new helper proto in the latter patch. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_task_storage.c | 44 +++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index bce50ae03f42..2726435e3eda 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -227,37 +227,45 @@ out: return err; } -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, - task, void *, value, u64, flags, gfp_t, gfp_flags) +/* Called by bpf_task_storage_get*() helpers */ +static void *__bpf_task_storage_get(struct bpf_map *map, + struct task_struct *task, void *value, + u64 flags, gfp_t gfp_flags) { struct bpf_local_storage_data *sdata; - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) - return (unsigned long)NULL; - - if (!task) - return (unsigned long)NULL; - - if (!bpf_task_storage_trylock()) - return (unsigned long)NULL; - sdata = task_storage_lookup(task, map, true); if (sdata) - goto unlock; + return sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, gfp_flags); + return IS_ERR(sdata) ? NULL : sdata->data; + } -unlock: + return NULL; +} + +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags, gfp_t, gfp_flags) +{ + void *data; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) + return (unsigned long)NULL; + + if (!bpf_task_storage_trylock()) + return (unsigned long)NULL; + data = __bpf_task_storage_get(map, task, value, flags, + gfp_flags); bpf_task_storage_unlock(); - return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : - (unsigned long)sdata->data; + return (unsigned long)data; } BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, -- cgit From e8b02296a6b8d07de752d6157d863a642117bcd3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:19 -0700 Subject: bpf: Avoid taking spinlock in bpf_task_storage_get if potential deadlock is detected bpf_task_storage_get() does a lookup and optionally inserts new data if BPF_LOCAL_STORAGE_GET_F_CREATE is present. During lookup, it will cache the lookup result and caching requires to acquire a spinlock. When potential deadlock is detected (by the bpf_task_storage_busy pcpu-counter added in commit bc235cdb423a ("bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]")), the current behavior is returning NULL immediately to avoid deadlock. It is too pessimistic. This patch will go ahead to do a lookup (which is a lockless operation) but it will avoid caching it in order to avoid acquiring the spinlock. When lookup fails to find the data and BPF_LOCAL_STORAGE_GET_F_CREATE is set, an insertion is needed and this requires acquiring a spinlock. This patch will still return NULL when a potential deadlock is detected. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-5-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 1 + kernel/bpf/bpf_task_storage.c | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 9dc6de1cf185..781d14167140 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -242,6 +242,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) __bpf_selem_unlink_storage(selem, use_trace_rcu); } +/* If cacheit_lockit is false, this lookup function is lockless */ struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 2726435e3eda..bc52bc8b59f7 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -230,17 +230,17 @@ out: /* Called by bpf_task_storage_get*() helpers */ static void *__bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, - u64 flags, gfp_t gfp_flags) + u64 flags, gfp_t gfp_flags, bool nobusy) { struct bpf_local_storage_data *sdata; - sdata = task_storage_lookup(task, map, true); + sdata = task_storage_lookup(task, map, nobusy); if (sdata) return sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) { + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, gfp_flags); @@ -254,17 +254,18 @@ static void *__bpf_task_storage_get(struct bpf_map *map, BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, task, void *, value, u64, flags, gfp_t, gfp_flags) { + bool nobusy; void *data; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) return (unsigned long)NULL; - if (!bpf_task_storage_trylock()) - return (unsigned long)NULL; + nobusy = bpf_task_storage_trylock(); data = __bpf_task_storage_get(map, task, value, flags, - gfp_flags); - bpf_task_storage_unlock(); + gfp_flags, nobusy); + if (nobusy) + bpf_task_storage_unlock(); return (unsigned long)data; } -- cgit From 4279adb094a17132423f1271c3d11b593fc2327e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:20 -0700 Subject: bpf: Add new bpf_task_storage_get proto with no deadlock detection The bpf_lsm and bpf_iter do not recur that will cause a deadlock. The situation is similar to the bpf_pid_task_storage_lookup_elem() which is called from the syscall map_lookup_elem. It does not need deadlock detection. Otherwise, it will cause unnecessary failure when calling the bpf_task_storage_get() helper. This patch adds bpf_task_storage_get proto that does not do deadlock detection. It will be used by bpf_lsm and bpf_iter programs. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-6-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_task_storage.c | 28 ++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 5 ++++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index bc52bc8b59f7..c3a841be438f 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -269,6 +269,23 @@ BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct return (unsigned long)data; } +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags, gfp_t, gfp_flags) +{ + void *data; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) + return (unsigned long)NULL; + + bpf_task_storage_lock(); + data = __bpf_task_storage_get(map, task, value, flags, + gfp_flags, true); + bpf_task_storage_unlock(); + return (unsigned long)data; +} + BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { @@ -342,6 +359,17 @@ const struct bpf_func_proto bpf_task_storage_get_recur_proto = { .arg4_type = ARG_ANYTHING, }; +const struct bpf_func_proto bpf_task_storage_get_proto = { + .func = bpf_task_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { .func = bpf_task_storage_delete_recur, .gpl_only = false, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 83b9b9afe235..e9759b0f7199 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1488,7 +1489,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_task_storage_get: - return &bpf_task_storage_get_recur_proto; + if (bpf_prog_check_recur(prog)) + return &bpf_task_storage_get_recur_proto; + return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: return &bpf_task_storage_delete_recur_proto; case BPF_FUNC_for_each_map_elem: -- cgit From fda64ae0bb3e37b5a4292625c6931cb156224d0f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:21 -0700 Subject: bpf: bpf_task_storage_delete_recur does lookup first before the deadlock check Similar to the earlier change in bpf_task_storage_get_recur. This patch changes bpf_task_storage_delete_recur such that it does the lookup first. It only returns -EBUSY if it needs to take the spinlock to do the deletion when potential deadlock is detected. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-7-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_task_storage.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index c3a841be438f..f3f79b618a68 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -184,7 +184,8 @@ out: return err; } -static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +static int task_storage_delete(struct task_struct *task, struct bpf_map *map, + bool nobusy) { struct bpf_local_storage_data *sdata; @@ -192,6 +193,9 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map) if (!sdata) return -ENOENT; + if (!nobusy) + return -EBUSY; + bpf_selem_unlink(SELEM(sdata), true); return 0; @@ -220,7 +224,7 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) } bpf_task_storage_lock(); - err = task_storage_delete(task, map); + err = task_storage_delete(task, map, true); bpf_task_storage_unlock(); out: put_pid(pid); @@ -289,21 +293,21 @@ BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { + bool nobusy; int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; - if (!bpf_task_storage_trylock()) - return -EBUSY; - + nobusy = bpf_task_storage_trylock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ - ret = task_storage_delete(task, map); - bpf_task_storage_unlock(); + ret = task_storage_delete(task, map, nobusy); + if (nobusy) + bpf_task_storage_unlock(); return ret; } -- cgit From 8a7dac37f27a3dfbd814bf29a73d6417db2c81d9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Oct 2022 11:45:22 -0700 Subject: bpf: Add new bpf_task_storage_delete proto with no deadlock detection The bpf_lsm and bpf_iter do not recur that will cause a deadlock. The situation is similar to the bpf_pid_task_storage_delete_elem() which is called from the syscall map_delete_elem. It does not need deadlock detection. Otherwise, it will cause unnecessary failure when calling the bpf_task_storage_delete() helper. This patch adds bpf_task_storage_delete proto that does not do deadlock detection. It will be used by bpf_lsm and bpf_iter program. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221025184524.3526117-8-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_task_storage.c | 28 ++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 4 +++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index f3f79b618a68..ba3fe72d1fa5 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -311,6 +311,25 @@ BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_str return ret; } +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + int ret; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!task) + return -EINVAL; + + bpf_task_storage_lock(); + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + ret = task_storage_delete(task, map, true); + bpf_task_storage_unlock(); + return ret; +} + static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -ENOTSUPP; @@ -382,3 +401,12 @@ const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { .arg2_type = ARG_PTR_TO_BTF_ID, .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], }; + +const struct bpf_func_proto bpf_task_storage_delete_proto = { + .func = bpf_task_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e9759b0f7199..eed1bd952c3a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1493,7 +1493,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: - return &bpf_task_storage_delete_recur_proto; + if (bpf_prog_check_recur(prog)) + return &bpf_task_storage_delete_recur_proto; + return &bpf_task_storage_delete_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_snprintf: -- cgit