diff options
| author | Andrii Nakryiko <andrii@kernel.org> | 2025-06-13 10:29:46 -0700 |
|---|---|---|
| committer | Andrii Nakryiko <andrii@kernel.org> | 2025-06-13 10:44:17 -0700 |
| commit | e4c8f96adeb29a98f7848ee793085765cdede64c (patch) | |
| tree | a15bfe787ed40376e958dbae96939b0598409308 | |
| parent | ccefa19335a0b81f11b0856e951ca909445b3783 (diff) | |
| parent | 67cdcc405b46c13446d6d220a108daa2f8de3436 (diff) | |
Merge branch 'veristat-memory-accounting-for-bpf-programs'
Eduard Zingerman says:
====================
veristat: memory accounting for bpf programs
When working on the verifier, it is sometimes interesting to know how a
particular change affects memory consumption. This patch-set modifies
veristat to provide such information. As a collateral, kernel needs an
update to make allocations reachable from BPF program load accountable
in memcg statistics.
Here is a sample output:
Program Peak states Peak memory (MiB)
--------------- ----------- -----------------
lavd_select_cpu 2153 43
lavd_enqueue 1982 41
lavd_dispatch 3480 28
Technically, this is implemented by creating and entering a new cgroup
at the start of veristat execution. The difference between values from
cgroup "memory.peak" file before and after bpf_object__load() is used
as a metric.
To minimize measurements jitter data is collected in megabytes.
Changelog:
v2: https://lore.kernel.org/bpf/20250612130835.2478649-1-eddyz87@gmail.com/
v2 -> v3:
- bpf_verifier_state->jmp_history and
bpf_verifier_env->explored_states allocations are switched from
GFP_USER to GFP_KERNEL_ACCOUNT (Andrii, Alexei);
- veristat.c:STR macro removed, PATH_MAX-1 == 4095 is hard-coded in
scanf format strings (Andrii);
- env->{orig,stat}_cgroup size changed to PATH_MAX (Andrii);
- snprintf_trunc() is removed, flag -Wno-format-truncation
is added to CFLAGS for veristat.o when compiled with gcc;
v1: https://lore.kernel.org/bpf/20250605230609.1444980-1-eddyz87@gmail.com/
v1 -> v2:
- a single cgroup, created at the beginning of execution, is now used
for measurements (Andrii, Mykyta);
- cgroup namespace is not created, as it turned out to be useless
(Andrii);
- veristat no longer mounts cgroup fs or changes subtree_control,
instead it looks for an existing mount point and reports an error if
memory.peak file can't be opened (Andrii, Alexei);
- if 'mem_peak' statistics is not enabled, veristat skips cgroup
setup;
- code sharing with cgroup_helpers.c was considered but was decided
against to simplify veristat github sync.
====================
Link: https://patch.msgid.link/20250613072147.3938139-1-eddyz87@gmail.com
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
| -rw-r--r-- | kernel/bpf/btf.c | 15 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 69 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/Makefile | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/veristat.c | 248 |
4 files changed, 289 insertions, 48 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 1d2cf898e21e..682acb1ed234 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3443,7 +3443,8 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, node_field_name = strstr(value_type, ":"); if (!node_field_name) return -EINVAL; - value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN); + value_type = kstrndup(value_type, node_field_name - value_type, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!value_type) return -ENOMEM; id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); @@ -3958,7 +3959,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* This needs to be kzalloc to zero out padding and unused fields, see * comment in btf_record_equal. */ - rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL | __GFP_NOWARN); + rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!rec) return ERR_PTR(-ENOMEM); @@ -9019,7 +9020,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, bpf_free_cands_from_cache(*cc); *cc = NULL; } - new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); + new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL_ACCOUNT); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); @@ -9027,7 +9028,7 @@ static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, /* strdup the name, since it will stay in cache. * the cands->name points to strings in prog's BTF and the prog can be unloaded. */ - new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); + new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL_ACCOUNT); bpf_free_cands(cands); if (!new_cands->name) { kfree(new_cands); @@ -9111,7 +9112,7 @@ bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, continue; /* most of the time there is only one candidate for a given kind+name pair */ - new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); + new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL_ACCOUNT); if (!new_cands) { bpf_free_cands(cands); return ERR_PTR(-ENOMEM); @@ -9228,7 +9229,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" * into arrays of btf_ids of struct fields and array indices. */ - specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); + specs = kcalloc(3, sizeof(*specs), GFP_KERNEL_ACCOUNT); if (!specs) return -ENOMEM; @@ -9253,7 +9254,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, goto out; } if (cc->cnt) { - cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); + cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL_ACCOUNT); if (!cands.cands) { err = -ENOMEM; goto out; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c378074516cf..fd81cff11617 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1404,7 +1404,7 @@ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) goto out; alloc_size = kmalloc_size_roundup(size_mul(new_n, size)); - new_arr = krealloc(arr, alloc_size, GFP_KERNEL); + new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT); if (!new_arr) { kfree(arr); return NULL; @@ -1421,7 +1421,7 @@ out: static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) { dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, - sizeof(struct bpf_reference_state), GFP_KERNEL); + sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT); if (!dst->refs) return -ENOMEM; @@ -1440,7 +1440,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st size_t n = src->allocated_stack / BPF_REG_SIZE; dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!dst->stack) return -ENOMEM; @@ -1731,7 +1731,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, src->jmp_history_cnt, sizeof(*dst_state->jmp_history), - GFP_USER); + GFP_KERNEL_ACCOUNT); if (!dst_state->jmp_history) return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt; @@ -1760,7 +1760,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = kzalloc(sizeof(*dst), GFP_KERNEL_ACCOUNT); if (!dst) return -ENOMEM; dst_state->frame[i] = dst; @@ -1874,7 +1874,7 @@ static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, info = env->scc_info[scc]; num_visits = info ? info->num_visits : 0; new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1); - info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL); + info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT); if (!info) return NULL; env->scc_info[scc] = info; @@ -2095,7 +2095,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, struct bpf_verifier_stack_elem *elem; int err; - elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) goto err; @@ -2862,7 +2862,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, struct bpf_verifier_stack_elem *elem; struct bpf_func_state *frame; - elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) goto err; @@ -2885,7 +2885,7 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, */ elem->st.branches = 1; elem->st.in_sleepable = is_sleepable; - frame = kzalloc(sizeof(*frame), GFP_KERNEL); + frame = kzalloc(sizeof(*frame), GFP_KERNEL_ACCOUNT); if (!frame) goto err; init_func_state(env, frame, @@ -3237,7 +3237,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -EINVAL; } - tab = kzalloc(sizeof(*tab), GFP_KERNEL); + tab = kzalloc(sizeof(*tab), GFP_KERNEL_ACCOUNT); if (!tab) return -ENOMEM; prog_aux->kfunc_tab = tab; @@ -3253,7 +3253,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return 0; if (!btf_tab && offset) { - btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); + btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL_ACCOUNT); if (!btf_tab) return -ENOMEM; prog_aux->kfunc_btf_tab = btf_tab; @@ -3939,7 +3939,7 @@ static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_st cnt++; alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); - p = krealloc(cur->jmp_history, alloc_size, GFP_USER); + p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT); if (!p) return -ENOMEM; cur->jmp_history = p; @@ -10356,7 +10356,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls } caller = state->frame[state->curframe]; - callee = kzalloc(sizeof(*callee), GFP_KERNEL); + callee = kzalloc(sizeof(*callee), GFP_KERNEL_ACCOUNT); if (!callee) return -ENOMEM; state->frame[state->curframe + 1] = callee; @@ -17693,17 +17693,18 @@ static int check_cfg(struct bpf_verifier_env *env) int *insn_stack, *insn_state, *insn_postorder; int ex_insn_beg, i, ret = 0; - insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); if (!insn_state) return -ENOMEM; - insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); if (!insn_stack) { kvfree(insn_state); return -ENOMEM; } - insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_postorder = env->cfg.insn_postorder = + kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); if (!insn_postorder) { kvfree(insn_state); kvfree(insn_stack); @@ -17837,7 +17838,7 @@ static int check_btf_func_early(struct bpf_verifier_env *env, urecord = make_bpfptr(attr->func_info, uattr.is_kernel); min_size = min_t(u32, krec_size, urec_size); - krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); + krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!krecord) return -ENOMEM; @@ -17937,7 +17938,7 @@ static int check_btf_func(struct bpf_verifier_env *env, urecord = make_bpfptr(attr->func_info, uattr.is_kernel); krecord = prog->aux->func_info; - info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); + info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!info_aux) return -ENOMEM; @@ -18023,7 +18024,7 @@ static int check_btf_line(struct bpf_verifier_env *env, * pass in a smaller bpf_line_info object. */ linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), - GFP_KERNEL | __GFP_NOWARN); + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!linfo) return -ENOMEM; @@ -19408,7 +19409,7 @@ hit: if (loop) { struct bpf_scc_backedge *backedge; - backedge = kzalloc(sizeof(*backedge), GFP_KERNEL); + backedge = kzalloc(sizeof(*backedge), GFP_KERNEL_ACCOUNT); if (!backedge) return -ENOMEM; err = copy_verifier_state(&backedge->state, cur); @@ -19472,7 +19473,7 @@ miss: * When looping the sl->state.branches will be > 0 and this state * will not be considered for equivalence until branches == 0. */ - new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); + new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL_ACCOUNT); if (!new_sl) return -ENOMEM; env->total_states++; @@ -22976,13 +22977,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) env->prev_linfo = NULL; env->pass_cnt++; - state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL_ACCOUNT); if (!state) return -ENOMEM; state->curframe = 0; state->speculative = false; state->branches = 1; - state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); + state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL_ACCOUNT); if (!state->frame[0]) { kfree(state); return -ENOMEM; @@ -23208,7 +23209,7 @@ static void print_verification_stats(struct bpf_verifier_env *env) int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt) { - prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL); + prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT); prog->aux->ctx_arg_info_size = cnt; return prog->aux->ctx_arg_info ? 0 : -ENOMEM; @@ -24152,7 +24153,7 @@ static int compute_live_registers(struct bpf_verifier_env *env) * - repeat the computation while {in,out} fields changes for * any instruction. */ - state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL); + state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL_ACCOUNT); if (!state) { err = -ENOMEM; goto out; @@ -24244,10 +24245,10 @@ static int compute_scc(struct bpf_verifier_env *env) * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n'; * - 'dfs' DFS traversal stack, used to emulate explicit recursion. */ - stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); - pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); - low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); - dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL); + stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); + pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); + low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT); + dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT); if (!stack || !pre || !low || !dfs) { err = -ENOMEM; goto exit; @@ -24381,7 +24382,7 @@ dfs_continue: dfs_sz--; } } - env->scc_info = kvcalloc(next_scc_id, sizeof(*env->scc_info), GFP_KERNEL); + env->scc_info = kvcalloc(next_scc_id, sizeof(*env->scc_info), GFP_KERNEL_ACCOUNT); if (!env->scc_info) { err = -ENOMEM; goto exit; @@ -24409,7 +24410,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL_ACCOUNT); if (!env) return -ENOMEM; @@ -24472,7 +24473,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct list_head), - GFP_USER); + GFP_KERNEL_ACCOUNT); ret = -ENOMEM; if (!env->explored_states) goto skip_full_check; @@ -24603,7 +24604,7 @@ skip_full_check: /* if program passed verifier, update used_maps in bpf_prog_info */ env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, sizeof(env->used_maps[0]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!env->prog->aux->used_maps) { ret = -ENOMEM; @@ -24618,7 +24619,7 @@ skip_full_check: /* if program passed verifier, update used_btfs in bpf_prog_aux */ env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt, sizeof(env->used_btfs[0]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!env->prog->aux->used_btfs) { ret = -ENOMEM; goto err_release_maps; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..97013c49920b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -841,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +# This works around GCC warning about snprintf truncating strings like: +# +# char a[PATH_MAX], b[PATH_MAX]; +# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation +$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation $(OUTPUT)/veristat.o: $(BPFOBJ) $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 70cdabd88c50..56771650ee8c 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -49,6 +49,7 @@ enum stat_id { STACK, PROG_TYPE, ATTACH_TYPE, + MEMORY_PEAK, FILE_NAME, PROG_NAME, @@ -208,6 +209,9 @@ static struct env { int top_src_lines; struct var_preset *presets; int npresets; + char orig_cgroup[PATH_MAX]; + char stat_cgroup[PATH_MAX]; + int memory_peak_fd; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -219,6 +223,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } +#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) + +__printf(3, 4) +static int log_errno_aux(const char *file, int line, const char *fmt, ...) +{ + int err = -errno; + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, " failed with error '%s'.\n", strerror(errno)); + va_end(ap); + return err; +} + #ifndef VERISTAT_VERSION #define VERISTAT_VERSION "<kernel>" #endif @@ -734,13 +754,13 @@ cleanup: } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 14, + .spec_cnt = 15, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, + STACK, MEMORY_PEAK, }, }; @@ -781,6 +801,7 @@ static struct stat_def { [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, + [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -1279,16 +1300,214 @@ static int max_verifier_log_size(void) return log_size; } +static bool output_stat_enabled(int id) +{ + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) + if (env.output_spec.ids[i] == id) + return true; + return false; +} + +__printf(2, 3) +static int write_one_line(const char *file, const char *fmt, ...) +{ + int err, saved_errno; + va_list ap; + FILE *f; + + f = fopen(file, "w"); + if (!f) + return -1; + + va_start(ap, fmt); + errno = 0; + err = vfprintf(f, fmt, ap); + saved_errno = errno; + va_end(ap); + fclose(f); + errno = saved_errno; + return err < 0 ? -1 : 0; +} + +__scanf(3, 4) +static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...) +{ + int res = 0, saved_errno = 0; + char *line = NULL; + size_t line_len; + va_list ap; + FILE *f; + + f = fopen(file, "r"); + if (!f) + return -1; + + va_start(ap, fmt); + while (getline(&line, &line_len, f) > 0) { + res = vsscanf(line, fmt, ap); + if (res == fields_expected) + goto out; + } + if (ferror(f)) { + saved_errno = errno; + res = -1; + } + +out: + va_end(ap); + free(line); + fclose(f); + errno = saved_errno; + return res; +} + +static void destroy_stat_cgroup(void) +{ + char buf[PATH_MAX]; + int err; + + close(env.memory_peak_fd); + + if (env.orig_cgroup[0]) { + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) + log_errno("moving self to original cgroup %s\n", env.orig_cgroup); + } + + if (env.stat_cgroup[0]) { + err = rmdir(env.stat_cgroup); + if (err < 0) + log_errno("deletion of cgroup %s", env.stat_cgroup); + } + + env.memory_peak_fd = -1; + env.orig_cgroup[0] = 0; + env.stat_cgroup[0] = 0; +} + +/* + * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>, + * moves current process to this cgroup. + */ +static void create_stat_cgroup(void) +{ + char cgroup_fs_mount[4096]; + char buf[4096]; + int err; + + env.memory_peak_fd = -1; + + if (!output_stat_enabled(MEMORY_PEAK)) + return; + + err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s", + cgroup_fs_mount, buf); + if (err != 2) { + if (err < 0) + log_errno("reading /proc/self/mounts"); + else if (!env.quiet) + fprintf(stderr, "Can't find cgroupfs v2 mount point.\n"); + goto err_out; + } + + /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */ + err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf); + if (err != 1) { + if (err < 0) + log_errno("reading /proc/self/cgroup"); + else if (!env.quiet) + fprintf(stderr, "Can't infer veristat process cgroup."); + goto err_out; + } + + snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf); + + snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid()); + err = mkdir(buf, 0777); + if (err < 0) { + log_errno("creation of cgroup %s", buf); + goto err_out; + } + strcpy(env.stat_cgroup, buf); + + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) { + log_errno("entering cgroup %s", buf); + goto err_out; + } + + snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup); + env.memory_peak_fd = open(buf, O_RDWR | O_APPEND); + if (env.memory_peak_fd < 0) { + log_errno("opening %s", buf); + goto err_out; + } + + return; + +err_out: + if (!env.quiet) + fprintf(stderr, "Memory usage metric unavailable.\n"); + destroy_stat_cgroup(); +} + +/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */ +static long cgroup_memory_peak(void) +{ + long err, memory_peak; + char buf[32]; + + if (env.memory_peak_fd < 0) + return -1; + + err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0); + if (err <= 0) { + log_errno("pread(%s/memory.peak)", env.stat_cgroup); + return -1; + } + + buf[err] = 0; + errno = 0; + memory_peak = strtoll(buf, NULL, 10); + if (errno) { + log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf); + return -1; + } + + return memory_peak; +} + +static int reset_stat_cgroup(void) +{ + char buf[] = "r\n"; + int err; + + if (env.memory_peak_fd < 0) + return -1; + + err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0); + if (err <= 0) { + log_errno("pwrite(%s/memory.peak)", env.stat_cgroup); + return -1; + } + return 0; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); const char *prog_name = bpf_program__name(prog); + long mem_peak_a, mem_peak_b, mem_peak = -1; char *buf; int buf_sz, log_level; struct verif_stats *stats; struct bpf_prog_info info; __u32 info_len = sizeof(info); - int err = 0; + int err = 0, cgroup_err; void *tmp; int fd; @@ -1333,7 +1552,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_reg_invariants) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); - err = bpf_object__load(obj); + err = bpf_object__prepare(obj); + if (!err) { + cgroup_err = reset_stat_cgroup(); + mem_peak_a = cgroup_memory_peak(); + err = bpf_object__load(obj); + mem_peak_b = cgroup_memory_peak(); + if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0) + mem_peak = mem_peak_b - mem_peak_a; + } env.progs_processed++; stats->file_name = strdup(base_filename); @@ -1342,6 +1569,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->stats[SIZE] = bpf_program__insn_cnt(prog); stats->stats[PROG_TYPE] = bpf_program__type(prog); stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024); memset(&info, 0, info_len); fd = bpf_program__fd(prog); @@ -1825,6 +2053,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: + case MEMORY_PEAK: case MARK_READ_MAX_LEN: { long v1 = s1->stats[id]; long v2 = s2->stats[id]; @@ -2054,6 +2283,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case STACK: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: *val = s ? s->stats[id] : 0; break; default: @@ -2140,6 +2370,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case MARK_READ_MAX_LEN: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: case STACK: { long val; int err, n; @@ -2777,7 +3008,7 @@ static void output_prog_stats(void) static int handle_verif_mode(void) { - int i, err; + int i, err = 0; if (env.filename_cnt == 0) { fprintf(stderr, "Please provide path to BPF object file!\n\n"); @@ -2785,11 +3016,12 @@ static int handle_verif_mode(void) return -EINVAL; } + create_stat_cgroup(); for (i = 0; i < env.filename_cnt; i++) { err = process_obj(env.filenames[i]); if (err) { fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; + goto out; } } @@ -2797,7 +3029,9 @@ static int handle_verif_mode(void) output_prog_stats(); - return 0; +out: + destroy_stat_cgroup(); + return err; } static int handle_replay_mode(void) |
