summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/net/bpf_jit_comp.c97
-rw-r--r--include/linux/bpf.h39
-rw-r--r--include/linux/bpf_verifier.h8
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/linux/extable.h10
-rw-r--r--include/linux/filter.h6
-rw-r--r--include/trace/bpf_probe.h3
-rw-r--r--include/uapi/linux/bpf.h28
-rw-r--r--kernel/bpf/btf.c329
-rw-r--r--kernel/bpf/core.c39
-rw-r--r--kernel/bpf/syscall.c88
-rw-r--r--kernel/bpf/verifier.c161
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/trace/bpf_trace.c6
-rw-r--r--net/core/filter.c15
-rw-r--r--tools/include/uapi/linux/bpf.h28
-rw-r--r--tools/lib/bpf/bpf.c3
-rw-r--r--tools/lib/bpf/libbpf.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c89
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c103
20 files changed, 1023 insertions, 70 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 3ad2ba1ad855..8cd23d8309bf 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,7 +9,7 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
-
+#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
@@ -123,6 +123,19 @@ static const int reg2hex[] = {
[AUX_REG] = 3, /* R11 temp register */
};
+static const int reg2pt_regs[] = {
+ [BPF_REG_0] = offsetof(struct pt_regs, ax),
+ [BPF_REG_1] = offsetof(struct pt_regs, di),
+ [BPF_REG_2] = offsetof(struct pt_regs, si),
+ [BPF_REG_3] = offsetof(struct pt_regs, dx),
+ [BPF_REG_4] = offsetof(struct pt_regs, cx),
+ [BPF_REG_5] = offsetof(struct pt_regs, r8),
+ [BPF_REG_6] = offsetof(struct pt_regs, bx),
+ [BPF_REG_7] = offsetof(struct pt_regs, r13),
+ [BPF_REG_8] = offsetof(struct pt_regs, r14),
+ [BPF_REG_9] = offsetof(struct pt_regs, r15),
+};
+
/*
* is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
* which need extra byte of encoding.
@@ -377,6 +390,19 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ u32 reg = x->fixup >> 8;
+
+ /* jump over faulting load and clear dest register */
+ *(unsigned long *)((void *)regs + reg) = 0;
+ regs->ip += x->fixup & 0xff;
+ return true;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -384,7 +410,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, cnt = 0;
+ int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
@@ -778,14 +804,17 @@ stx: if (is_imm8(insn->off))
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* Emit 'movzx rax, byte ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* Emit 'movzx rax, word ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* Emit 'mov eax, dword ptr [rax+0x14]' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
@@ -793,6 +822,7 @@ stx: if (is_imm8(insn->off))
EMIT1(0x8B);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* Emit 'mov rax, qword ptr [rax+0x14]' */
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
ldx: /*
@@ -805,6 +835,48 @@ ldx: /*
else
EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
insn->off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen;
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("ex gen bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ if (!is_simm32(delta)) {
+ pr_err("extable->insn doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->insn = delta;
+
+ delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ if (!is_simm32(delta)) {
+ pr_err("extable->handler doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->handler = delta;
+
+ if (dst_reg > BPF_REG_9) {
+ pr_err("verifier error\n");
+ return -EFAULT;
+ }
+ /*
+ * Compute size of x86 insn and its target dest x86 register.
+ * ex_handler_bpf() will use lower 8 bits to adjust
+ * pt_regs->ip to jump over this x86 instruction
+ * and upper bits to figure out which pt_regs to zero out.
+ * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+ * of 4 bytes will be ignored and rbx will be zero inited.
+ */
+ ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+ }
break;
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -1058,6 +1130,11 @@ emit_jmp:
addrs[i] = proglen;
prog = temp;
}
+
+ if (image && excnt != bpf_prog->aux->num_exentries) {
+ pr_err("extable is not populated\n");
+ return -EFAULT;
+ }
return proglen;
}
@@ -1158,12 +1235,24 @@ out_image:
break;
}
if (proglen == oldproglen) {
- header = bpf_jit_binary_alloc(proglen, &image,
- 1, jit_fill_hole);
+ /*
+ * The number of entries in extable is the number of BPF_LDX
+ * insns that access kernel memory via "pointer to BTF type".
+ * The verifier changed their opcode from LDX|MEM|size
+ * to LDX|PROBE_MEM|size to make JITing easier.
+ */
+ u32 align = __alignof__(struct exception_table_entry);
+ u32 extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+
+ /* allocate module memory for x86 insns and extable */
+ header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+ &image, align, jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
}
+ prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
cond_resched();
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 282e28bf41ec..2c2c29b49845 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -16,6 +16,7 @@
#include <linux/u64_stats_sync.h>
struct bpf_verifier_env;
+struct bpf_verifier_log;
struct perf_event;
struct bpf_prog;
struct bpf_map;
@@ -23,6 +24,7 @@ struct sock;
struct seq_file;
struct btf;
struct btf_type;
+struct exception_table_entry;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -211,6 +213,7 @@ enum bpf_arg_type {
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
+ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
};
/* type of values returned from helper functions */
@@ -233,11 +236,17 @@ struct bpf_func_proto {
bool gpl_only;
bool pkt_access;
enum bpf_return_type ret_type;
- enum bpf_arg_type arg1_type;
- enum bpf_arg_type arg2_type;
- enum bpf_arg_type arg3_type;
- enum bpf_arg_type arg4_type;
- enum bpf_arg_type arg5_type;
+ union {
+ struct {
+ enum bpf_arg_type arg1_type;
+ enum bpf_arg_type arg2_type;
+ enum bpf_arg_type arg3_type;
+ enum bpf_arg_type arg4_type;
+ enum bpf_arg_type arg5_type;
+ };
+ enum bpf_arg_type arg_type[5];
+ };
+ u32 *btf_id; /* BTF ids of arguments */
};
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -281,6 +290,7 @@ enum bpf_reg_type {
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
+ PTR_TO_BTF_ID, /* reg points to kernel struct */
};
/* The information passed from prog-specific *_is_valid_access
@@ -288,7 +298,11 @@ enum bpf_reg_type {
*/
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
- int ctx_field_size;
+ union {
+ int ctx_field_size;
+ u32 btf_id;
+ };
+ struct bpf_verifier_log *log; /* for verbose logs */
};
static inline void
@@ -375,6 +389,7 @@ struct bpf_prog_aux {
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
+ u32 attach_btf_id; /* in-kernel BTF type id to attach to */
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
struct bpf_prog **func;
@@ -416,6 +431,8 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
+ u32 num_exentries;
+ struct exception_table_entry *extable;
struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
@@ -482,6 +499,7 @@ struct bpf_event_entry {
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
int bpf_prog_calc_tag(struct bpf_prog *fp);
+const char *kernel_type_name(u32 btf_type_id);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -747,6 +765,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info);
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype,
+ u32 *next_btf_id);
+u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int);
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 26a6d58ca78c..6e7284ea1468 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -52,6 +52,8 @@ struct bpf_reg_state {
*/
struct bpf_map *map_ptr;
+ u32 btf_id; /* for PTR_TO_BTF_ID */
+
/* Max size from any of the above. */
unsigned long raw;
};
@@ -330,10 +332,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
#define BPF_LOG_STATS 4
#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
+#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
- return log->level && log->ubuf && !bpf_verifier_log_full(log);
+ return (log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
+ log->level == BPF_LOG_KERNEL;
}
#define BPF_MAX_SUBPROGS 256
@@ -397,6 +401,8 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
const char *fmt, va_list args);
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+ const char *fmt, ...);
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 64cdf2a23d42..55d43bc856be 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -56,6 +56,7 @@ bool btf_type_is_void(const struct btf_type *t);
#ifdef CONFIG_BPF_SYSCALL
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
const char *btf_name_by_offset(const struct btf *btf, u32 offset);
+struct btf *btf_parse_vmlinux(void);
#else
static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
u32 type_id)
diff --git a/include/linux/extable.h b/include/linux/extable.h
index 81ecfaa83ad3..4ab9e78f313b 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -33,4 +33,14 @@ search_module_extables(unsigned long addr)
}
#endif /*CONFIG_MODULES*/
+#ifdef CONFIG_BPF_JIT
+const struct exception_table_entry *search_bpf_extables(unsigned long addr);
+#else
+static inline const struct exception_table_entry *
+search_bpf_extables(unsigned long addr)
+{
+ return NULL;
+}
+#endif
+
#endif /* _LINUX_EXTABLE_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2ce57645f3cd..22ebea2e64ea 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -65,6 +65,9 @@ struct ctl_table_header;
/* unused opcode to mark special call to bpf_tail_call() helper */
#define BPF_TAIL_CALL 0xf0
+/* unused opcode to mark special load instruction. Same as BPF_ABS */
+#define BPF_PROBE_MEM 0x20
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@ -464,10 +467,11 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
#define BPF_CALL_x(x, name, ...) \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
+ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \
u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \
u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \
{ \
- return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
} \
static __always_inline \
u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index d6e556c0a085..b04c29270973 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -74,11 +74,12 @@ static inline void bpf_test_probe_##call(void) \
{ \
check_trace_callback_type_##call(__bpf_trace_##template); \
} \
+typedef void (*btf_trace_##call)(void *__data, proto); \
static struct bpf_raw_event_map __used \
__attribute__((section("__bpf_raw_tp_map"))) \
__bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
- .bpf_func = (void *)__bpf_trace_##template, \
+ .bpf_func = (void *)(btf_trace_##call)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
.writable_size = size, \
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a65c3b0c6935..4af8b0819a32 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -420,6 +420,7 @@ union bpf_attr {
__u32 line_info_rec_size; /* userspace bpf_line_info size */
__aligned_u64 line_info; /* line info */
__u32 line_info_cnt; /* number of bpf_line_info records */
+ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2750,6 +2751,30 @@ union bpf_attr {
* **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * This helper is similar to **bpf_perf_event_output**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2862,7 +2887,8 @@ union bpf_attr {
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal), \
- FN(tcp_gen_syncookie),
+ FN(tcp_gen_syncookie), \
+ FN(skb_output),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 29c7c06c6bd6..f7557af39756 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -698,6 +698,13 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ /* btf verifier prints all types it is processing via
+ * btf_verifier_log_type(..., fmt = NULL).
+ * Skip those prints for in-kernel BTF verification.
+ */
+ if (log->level == BPF_LOG_KERNEL && !fmt)
+ return;
+
__btf_verifier_log(log, "[%u] %s %s%s",
env->log_type_id,
btf_kind_str[kind],
@@ -735,6 +742,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ if (log->level == BPF_LOG_KERNEL && !fmt)
+ return;
/* The CHECK_META phase already did a btf dump.
*
* If member is logged again, it must hit an error in
@@ -777,6 +786,8 @@ static void btf_verifier_log_vsi(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ if (log->level == BPF_LOG_KERNEL && !fmt)
+ return;
if (env->phase != CHECK_META)
btf_verifier_log_type(env, datasec_type, NULL);
@@ -802,6 +813,8 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
+ if (log->level == BPF_LOG_KERNEL)
+ return;
hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version);
@@ -2405,7 +2418,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
-
+ if (env->log.level == BPF_LOG_KERNEL)
+ continue;
btf_verifier_log(env, "\t%s val=%d\n",
__btf_name_by_offset(btf, enums[i].name_off),
enums[i].val);
@@ -3367,6 +3381,319 @@ errout:
return ERR_PTR(err);
}
+extern char __weak _binary__btf_vmlinux_bin_start[];
+extern char __weak _binary__btf_vmlinux_bin_end[];
+
+struct btf *btf_parse_vmlinux(void)
+{
+ struct btf_verifier_env *env = NULL;
+ struct bpf_verifier_log *log;
+ struct btf *btf = NULL;
+ int err;
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+ if (!env)
+ return ERR_PTR(-ENOMEM);
+
+ log = &env->log;
+ log->level = BPF_LOG_KERNEL;
+
+ btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+ if (!btf) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ env->btf = btf;
+
+ btf->data = _binary__btf_vmlinux_bin_start;
+ btf->data_size = _binary__btf_vmlinux_bin_end -
+ _binary__btf_vmlinux_bin_start;
+
+ err = btf_parse_hdr(env);
+ if (err)
+ goto errout;
+
+ btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+
+ err = btf_parse_str_sec(env);
+ if (err)
+ goto errout;
+
+ err = btf_check_all_metas(env);
+ if (err)
+ goto errout;
+
+ btf_verifier_env_free(env);
+ refcount_set(&btf->refcnt, 1);
+ return btf;
+
+errout:
+ btf_verifier_env_free(env);
+ if (btf) {
+ kvfree(btf->types);
+ kfree(btf);
+ }
+ return ERR_PTR(err);
+}
+
+extern struct btf *btf_vmlinux;
+
+bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ struct bpf_verifier_log *log = info->log;
+ u32 btf_id = prog->aux->attach_btf_id;
+ const struct btf_param *args;
+ const struct btf_type *t;
+ const char prefix[] = "btf_trace_";
+ const char *tname;
+ u32 nr_args, arg;
+
+ if (!btf_id)
+ return true;
+
+ if (IS_ERR(btf_vmlinux)) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return false;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, btf_id);
+ if (!t || BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
+ bpf_log(log, "btf_id is invalid\n");
+ return false;
+ }
+
+ tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
+ bpf_log(log, "btf_id points to wrong type name %s\n", tname);
+ return false;
+ }
+ tname += sizeof(prefix) - 1;
+
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_ptr(t))
+ return false;
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_func_proto(t))
+ return false;
+
+ if (off % 8) {
+ bpf_log(log, "raw_tp '%s' offset %d is not multiple of 8\n",
+ tname, off);
+ return false;
+ }
+ arg = off / 8;
+ args = (const struct btf_param *)(t + 1);
+ /* skip first 'void *__data' argument in btf_trace_##name typedef */
+ args++;
+ nr_args = btf_type_vlen(t) - 1;
+ if (arg >= nr_args) {
+ bpf_log(log, "raw_tp '%s' doesn't have %d-th argument\n",
+ tname, arg);
+ return false;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, args[arg].type);
+ /* skip modifiers */
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (btf_type_is_int(t))
+ /* accessing a scalar */
+ return true;
+ if (!btf_type_is_ptr(t)) {
+ bpf_log(log,
+ "raw_tp '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
+ tname, arg,
+ __btf_name_by_offset(btf_vmlinux, t->name_off),
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return false;
+ }
+ if (t->type == 0)
+ /* This is a pointer to void.
+ * It is the same as scalar from the verifier safety pov.
+ * No further pointer walking is allowed.
+ */
+ return true;
+
+ /* this is a pointer to another type */
+ info->reg_type = PTR_TO_BTF_ID;
+ info->btf_id = t->type;
+
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ /* skip modifiers */
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_struct(t)) {
+ bpf_log(log,
+ "raw_tp '%s' arg%d type %s is not a struct\n",
+ tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return false;
+ }
+ bpf_log(log, "raw_tp '%s' arg%d has btf_id %d type %s '%s'\n",
+ tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
+ __btf_name_by_offset(btf_vmlinux, t->name_off));
+ return true;
+}
+
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype,
+ u32 *next_btf_id)
+{
+ const struct btf_member *member;
+ const struct btf_type *mtype;
+ const char *tname, *mname;
+ int i, moff = 0, msize;
+
+again:
+ tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ if (!btf_type_is_struct(t)) {
+ bpf_log(log, "Type '%s' is not a struct", tname);
+ return -EINVAL;
+ }
+
+ for_each_member(i, t, member) {
+ /* offset of the field in bits */
+ moff = btf_member_bit_offset(t, member);
+
+ if (btf_member_bitfield_size(t, member))
+ /* bitfields are not supported yet */
+ continue;
+
+ if (off + size <= moff / 8)
+ /* won't find anything, field is already too far */
+ break;
+
+ /* type of the field */
+ mtype = btf_type_by_id(btf_vmlinux, member->type);
+ mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
+
+ /* skip modifiers */
+ while (btf_type_is_modifier(mtype))
+ mtype = btf_type_by_id(btf_vmlinux, mtype->type);
+
+ if (btf_type_is_array(mtype))
+ /* array deref is not supported yet */
+ continue;
+
+ if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) {
+ bpf_log(log, "field %s doesn't have size\n", mname);
+ return -EFAULT;
+ }
+ if (btf_type_is_ptr(mtype))
+ msize = 8;
+ else
+ msize = mtype->size;
+ if (off >= moff / 8 + msize)
+ /* no overlap with member, keep iterating */
+ continue;
+ /* the 'off' we're looking for is either equal to start
+ * of this field or inside of this struct
+ */
+ if (btf_type_is_struct(mtype)) {
+ /* our field must be inside that union or struct */
+ t = mtype;
+
+ /* adjust offset we're looking for */
+ off -= moff / 8;
+ goto again;
+ }
+ if (msize != size) {
+ /* field access size doesn't match */
+ bpf_log(log,
+ "cannot access %d bytes in struct %s field %s that has size %d\n",
+ size, tname, mname, msize);
+ return -EACCES;
+ }
+
+ if (btf_type_is_ptr(mtype)) {
+ const struct btf_type *stype;
+
+ stype = btf_type_by_id(btf_vmlinux, mtype->type);
+ /* skip modifiers */
+ while (btf_type_is_modifier(stype))
+ stype = btf_type_by_id(btf_vmlinux, stype->type);
+ if (btf_type_is_struct(stype)) {
+ *next_btf_id = mtype->type;
+ return PTR_TO_BTF_ID;
+ }
+ }
+ /* all other fields are treated as scalars */
+ return SCALAR_VALUE;
+ }
+ bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
+ return -EINVAL;
+}
+
+u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
+{
+ char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
+ const struct btf_param *args;
+ const struct btf_type *t;
+ const char *tname, *sym;
+ u32 btf_id, i;
+
+ if (IS_ERR(btf_vmlinux)) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return -EINVAL;
+ }
+
+ sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4);
+ if (!sym) {
+ bpf_log(log, "kernel doesn't have kallsyms\n");
+ return -EFAULT;
+ }
+
+ for (i = 1; i <= btf_vmlinux->nr_types; i++) {
+ t = btf_type_by_id(btf_vmlinux, i);
+ if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF)
+ continue;
+ tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ if (!strcmp(tname, fnname))
+ break;
+ }
+ if (i > btf_vmlinux->nr_types) {
+ bpf_log(log, "helper %s type is not found\n", fnname);
+ return -ENOENT;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_ptr(t))
+ return -EFAULT;
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EFAULT;
+
+ args = (const struct btf_param *)(t + 1);
+ if (arg >= btf_type_vlen(t)) {
+ bpf_log(log, "bpf helper %s doesn't have %d-th argument\n",
+ fnname, arg);
+ return -EINVAL;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, args[arg].type);
+ if (!btf_type_is_ptr(t) || !t->type) {
+ /* anything but the pointer to struct is a helper config bug */
+ bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n");
+ return -EFAULT;
+ }
+ btf_id = t->type;
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ /* skip modifiers */
+ while (btf_type_is_modifier(t)) {
+ btf_id = t->type;
+ t = btf_type_by_id(btf_vmlinux, t->type);
+ }
+ if (!btf_type_is_struct(t)) {
+ bpf_log(log, "ARG_PTR_TO_BTF is not a struct\n");
+ return -EFAULT;
+ }
+ bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4,
+ arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off));
+ return btf_id;
+}
+
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m)
{
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 66088a9e9b9e..673f5d40a93e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -30,7 +30,7 @@
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
-
+#include <linux/extable.h>
#include <asm/unaligned.h>
/* Registers */
@@ -712,6 +712,24 @@ bool is_bpf_text_address(unsigned long addr)
return ret;
}
+const struct exception_table_entry *search_bpf_extables(unsigned long addr)
+{
+ const struct exception_table_entry *e = NULL;
+ struct bpf_prog *prog;
+
+ rcu_read_lock();
+ prog = bpf_prog_kallsyms_find(addr);
+ if (!prog)
+ goto out;
+ if (!prog->aux->num_exentries)
+ goto out;
+
+ e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr);
+out:
+ rcu_read_unlock();
+ return e;
+}
+
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
char *sym)
{
@@ -1291,6 +1309,11 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
+{
+ memset(dst, 0, size);
+ return -EFAULT;
+}
/**
* __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -1310,6 +1333,10 @@ static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u6
/* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+ [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
+ [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
+ [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
+ [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW,
};
#undef BPF_INSN_3_LBL
#undef BPF_INSN_2_LBL
@@ -1542,6 +1569,16 @@ out:
LDST(W, u32)
LDST(DW, u64)
#undef LDST
+#define LDX_PROBE(SIZEOP, SIZE) \
+ LDX_PROBE_MEM_##SIZEOP: \
+ bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \
+ CONT;
+ LDX_PROBE(B, 1)
+ LDX_PROBE(H, 2)
+ LDX_PROBE(W, 4)
+ LDX_PROBE(DW, 8)
+#undef LDX_PROBE
+
STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
atomic_add((u32) SRC, (atomic_t *)(unsigned long)
(DST + insn->off));
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 82eabd4e38ad..523e3ac15a08 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -23,6 +23,7 @@
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
+#include <uapi/linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -1565,8 +1566,9 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
}
static int
-bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
- enum bpf_attach_type expected_attach_type)
+bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
+ enum bpf_attach_type expected_attach_type,
+ u32 btf_id)
{
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
@@ -1608,13 +1610,19 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ if (btf_id > BTF_MAX_TYPE)
+ return -EINVAL;
+ return 0;
default:
+ if (btf_id)
+ return -EINVAL;
return 0;
}
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt
+#define BPF_PROG_LOAD_LAST_FIELD attach_btf_id
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
@@ -1656,7 +1664,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
return -EPERM;
bpf_prog_load_fixup_attach_type(attr);
- if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
+ if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
+ attr->attach_btf_id))
return -EINVAL;
/* plain bpf_prog allocation */
@@ -1665,6 +1674,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
return -ENOMEM;
prog->expected_attach_type = attr->expected_attach_type;
+ prog->aux->attach_btf_id = attr->attach_btf_id;
prog->aux->offload_requested = !!attr->prog_ifindex;
@@ -1806,17 +1816,52 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
struct bpf_raw_tracepoint *raw_tp;
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
- char tp_name[128];
+ const char *tp_name;
+ char buf[128];
int tp_fd, err;
- if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
- sizeof(tp_name) - 1) < 0)
- return -EFAULT;
- tp_name[sizeof(tp_name) - 1] = 0;
+ if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+ prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT &&
+ prog->aux->attach_btf_id) {
+ if (attr->raw_tracepoint.name) {
+ /* raw_tp name should not be specified in raw_tp
+ * programs that were verified via in-kernel BTF info
+ */
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+ /* raw_tp name is taken from type name instead */
+ tp_name = kernel_type_name(prog->aux->attach_btf_id);
+ /* skip the prefix */
+ tp_name += sizeof("btf_trace_") - 1;
+ } else {
+ if (strncpy_from_user(buf,
+ u64_to_user_ptr(attr->raw_tracepoint.name),
+ sizeof(buf) - 1) < 0) {
+ err = -EFAULT;
+ goto out_put_prog;
+ }
+ buf[sizeof(buf) - 1] = 0;
+ tp_name = buf;
+ }
btp = bpf_get_raw_tracepoint(tp_name);
- if (!btp)
- return -ENOENT;
+ if (!btp) {
+ err = -ENOENT;
+ goto out_put_prog;
+ }
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
if (!raw_tp) {
@@ -1824,38 +1869,27 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
goto out_put_btp;
}
raw_tp->btp = btp;
-
- prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
- if (IS_ERR(prog)) {
- err = PTR_ERR(prog);
- goto out_free_tp;
- }
- if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
- prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
- err = -EINVAL;
- goto out_put_prog;
- }
+ raw_tp->prog = prog;
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
- goto out_put_prog;
+ goto out_free_tp;
- raw_tp->prog = prog;
tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
O_CLOEXEC);
if (tp_fd < 0) {
bpf_probe_unregister(raw_tp->btp, prog);
err = tp_fd;
- goto out_put_prog;
+ goto out_free_tp;
}
return tp_fd;
-out_put_prog:
- bpf_prog_put(prog);
out_free_tp:
kfree(raw_tp);
out_put_btp:
bpf_put_raw_tracepoint(btp);
+out_put_prog:
+ bpf_prog_put(prog);
return err;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d3446f018b9a..556e82f8869b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -205,8 +205,11 @@ struct bpf_call_arg_meta {
u64 msize_umax_value;
int ref_obj_id;
int func_id;
+ u32 btf_id;
};
+struct btf *btf_vmlinux;
+
static DEFINE_MUTEX(bpf_verifier_lock);
static const struct bpf_line_info *
@@ -243,6 +246,10 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
n = min(log->len_total - log->len_used - 1, n);
log->kbuf[n] = '\0';
+ if (log->level == BPF_LOG_KERNEL) {
+ pr_err("BPF:%s\n", log->kbuf);
+ return;
+ }
if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
log->len_used += n;
else
@@ -280,6 +287,19 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
va_end(args);
}
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+}
+
static const char *ltrim(const char *s)
{
while (isspace(*s))
@@ -400,6 +420,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
+ [PTR_TO_BTF_ID] = "ptr_",
};
static char slot_type_char[] = {
@@ -430,6 +451,12 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env,
return cur->frame[reg->frameno];
}
+const char *kernel_type_name(u32 id)
+{
+ return btf_name_by_offset(btf_vmlinux,
+ btf_type_by_id(btf_vmlinux, id)->name_off);
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state)
{
@@ -454,6 +481,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
+ if (t == PTR_TO_BTF_ID)
+ verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
@@ -2331,10 +2360,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
- enum bpf_access_type t, enum bpf_reg_type *reg_type)
+ enum bpf_access_type t, enum bpf_reg_type *reg_type,
+ u32 *btf_id)
{
struct bpf_insn_access_aux info = {
.reg_type = *reg_type,
+ .log = &env->log,
};
if (env->ops->is_valid_access &&
@@ -2348,7 +2379,10 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*/
*reg_type = info.reg_type;
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+ if (*reg_type == PTR_TO_BTF_ID)
+ *btf_id = info.btf_id;
+ else
+ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
env->prog->aux->max_ctx_offset = off + size;
@@ -2774,6 +2808,53 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
return 0;
}
+static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs,
+ int regno, int off, int size,
+ enum bpf_access_type atype,
+ int value_regno)
+{
+ struct bpf_reg_state *reg = regs + regno;
+ const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
+ const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ u32 btf_id;
+ int ret;
+
+ if (atype != BPF_READ) {
+ verbose(env, "only read is supported\n");
+ return -EACCES;
+ }
+
+ if (off < 0) {
+ verbose(env,
+ "R%d is ptr_%s invalid negative access: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env,
+ "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
+ regno, tname, off, tn_buf);
+ return -EACCES;
+ }
+
+ ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+ if (ret < 0)
+ return ret;
+
+ if (ret == SCALAR_VALUE) {
+ mark_reg_unknown(env, regs, value_regno);
+ return 0;
+ }
+ mark_reg_known_zero(env, regs, value_regno);
+ regs[value_regno].type = PTR_TO_BTF_ID;
+ regs[value_regno].btf_id = btf_id;
+ return 0;
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -2834,6 +2915,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
}
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
+ u32 btf_id = 0;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -2845,7 +2927,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
+ if (err)
+ verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
* PTR_TO_PACKET[_META,_END]. In the latter
@@ -2864,6 +2948,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
+ if (reg_type == PTR_TO_BTF_ID)
+ regs[value_regno].btf_id = btf_id;
}
regs[value_regno].type = reg_type;
}
@@ -2923,6 +3009,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_BTF_ID) {
+ err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
+ value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -3351,6 +3440,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = PTR_TO_SOCKET;
if (type != expected_type)
goto err_type;
+ } else if (arg_type == ARG_PTR_TO_BTF_ID) {
+ expected_type = PTR_TO_BTF_ID;
+ if (type != expected_type)
+ goto err_type;
+ if (reg->btf_id != meta->btf_id) {
+ verbose(env, "Helper has type %s got %s in R%d\n",
+ kernel_type_name(meta->btf_id),
+ kernel_type_name(reg->btf_id), regno);
+
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
+ verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
+ regno);
+ return -EACCES;
+ }
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -3498,6 +3603,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
if (func_id != BPF_FUNC_perf_event_read &&
func_id != BPF_FUNC_perf_event_output &&
+ func_id != BPF_FUNC_skb_output &&
func_id != BPF_FUNC_perf_event_read_value)
goto error;
break;
@@ -3585,6 +3691,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_read:
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
+ case BPF_FUNC_skb_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
@@ -4039,21 +4146,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
- err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
- if (err)
- return err;
+ for (i = 0; i < 5; i++) {
+ if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) {
+ if (!fn->btf_id[i])
+ fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i);
+ meta.btf_id = fn->btf_id[i];
+ }
+ err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
+ if (err)
+ return err;
+ }
err = record_func_map(env, &meta, func_id, insn_idx);
if (err)
@@ -7493,6 +7595,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
+ case PTR_TO_BTF_ID:
return false;
default:
return true;
@@ -8634,6 +8737,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
case PTR_TO_XDP_SOCK:
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
+ case PTR_TO_BTF_ID:
+ if (type == BPF_WRITE) {
+ verbose(env, "Writes through BTF pointers are not allowed\n");
+ return -EINVAL;
+ }
+ insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
+ env->prog->aux->num_exentries++;
+ continue;
default:
continue;
}
@@ -9294,6 +9405,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
env->ops = bpf_verifier_ops[env->prog->type];
is_priv = capable(CAP_SYS_ADMIN);
+ if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ mutex_lock(&bpf_verifier_lock);
+ if (!btf_vmlinux)
+ btf_vmlinux = btf_parse_vmlinux();
+ mutex_unlock(&bpf_verifier_lock);
+ }
+
/* grab the mutex to protect few globals used by verifier */
if (!is_priv)
mutex_lock(&bpf_verifier_lock);
@@ -9313,6 +9431,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
goto err_unlock;
}
+ if (IS_ERR(btf_vmlinux)) {
+ /* Either gcc or pahole or kernel are broken. */
+ verbose(env, "in-kernel BTF is malformed\n");
+ ret = PTR_ERR(btf_vmlinux);
+ goto err_unlock;
+ }
+
env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
diff --git a/kernel/extable.c b/kernel/extable.c
index f6c9406eec7d..f6920a11e28a 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -56,6 +56,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
e = search_kernel_exception_table(addr);
if (!e)
e = search_module_extables(addr);
+ if (!e)
+ e = search_bpf_extables(addr);
return e;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 44bd08f2443b..52f7e9d8c29b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -995,6 +995,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+extern const struct bpf_func_proto bpf_skb_output_proto;
+
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
@@ -1053,6 +1055,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_perf_event_output_proto_raw_tp;
+ case BPF_FUNC_skb_output:
+ return &bpf_skb_output_proto;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_raw_tp;
case BPF_FUNC_get_stack:
@@ -1074,7 +1078,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size,
return false;
if (off % size != 0)
return false;
- return true;
+ return btf_ctx_access(off, size, type, prog, info);
}
const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
diff --git a/net/core/filter.c b/net/core/filter.c
index 46196e212413..728ba6203c1f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3798,7 +3798,7 @@ BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(skb_size > skb->len))
+ if (unlikely(!skb || skb_size > skb->len))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
@@ -3816,6 +3816,19 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static u32 bpf_skb_output_btf_ids[5];
+const struct bpf_func_proto bpf_skb_output_proto = {
+ .func = bpf_skb_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_skb_output_btf_ids,
+};
+
static unsigned short bpf_tunnel_key_af(u64 flags)
{
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a65c3b0c6935..4af8b0819a32 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -420,6 +420,7 @@ union bpf_attr {
__u32 line_info_rec_size; /* userspace bpf_line_info size */
__aligned_u64 line_info; /* line info */
__u32 line_info_cnt; /* number of bpf_line_info records */
+ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2750,6 +2751,30 @@ union bpf_attr {
* **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * This helper is similar to **bpf_perf_event_output**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2862,7 +2887,8 @@ union bpf_attr {
FN(sk_storage_get), \
FN(sk_storage_delete), \
FN(send_signal), \
- FN(tcp_gen_syncookie),
+ FN(tcp_gen_syncookie), \
+ FN(skb_output),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index cbb933532981..79046067720f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -228,6 +228,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
+ if (attr.prog_type == BPF_PROG_TYPE_RAW_TRACEPOINT)
+ /* expected_attach_type is ignored for tracing progs */
+ attr.attach_btf_id = attr.expected_attach_type;
attr.insn_cnt = (__u32)load_attr->insns_cnt;
attr.insns = ptr_to_u64(load_attr->insns);
attr.license = ptr_to_u64(load_attr->license);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8d565590ce05..22bf3b189947 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4489,19 +4489,22 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
prog->expected_attach_type = type;
}
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
- { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
+ { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
/* Programs that can NOT be attached. */
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
/* Programs that can be attached. */
#define BPF_APROG_SEC(string, ptype, atype) \
- BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
+ BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
/* Programs that must specify expected attach type at load time. */
#define BPF_EAPROG_SEC(string, ptype, eatype) \
- BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
+
+/* Programs that use BTF to identify attach point */
+#define BPF_PROG_BTF(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 1, 0)
/* Programs that can be attached but attach type can't be identified by section
* name. Kept for backward compatibility.
@@ -4513,7 +4516,8 @@ static const struct {
size_t len;
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
- int is_attachable;
+ bool is_attachable;
+ bool is_attach_btf;
enum bpf_attach_type attach_type;
} section_names[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
@@ -4523,6 +4527,7 @@ static const struct {
BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
+ BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_RAW_TRACEPOINT),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -4627,6 +4632,27 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
continue;
*prog_type = section_names[i].prog_type;
*expected_attach_type = section_names[i].expected_attach_type;
+ if (section_names[i].is_attach_btf) {
+ struct btf *btf = bpf_core_find_kernel_btf();
+ char raw_tp_btf_name[128] = "btf_trace_";
+ char *dst = raw_tp_btf_name + sizeof("btf_trace_") - 1;
+ int ret;
+
+ if (IS_ERR(btf)) {
+ pr_warning("vmlinux BTF is not found\n");
+ return -EINVAL;
+ }
+ /* prepend "btf_trace_" prefix per kernel convention */
+ strncat(dst, name + section_names[i].len,
+ sizeof(raw_tp_btf_name) - (dst - raw_tp_btf_name));
+ ret = btf__find_by_name(btf, raw_tp_btf_name);
+ btf__free(btf);
+ if (ret <= 0) {
+ pr_warning("%s is not found in vmlinux BTF\n", dst);
+ return -EINVAL;
+ }
+ *expected_attach_type = ret;
+ }
return 0;
}
pr_warning("failed to guess program type based on ELF section name '%s'\n", name);
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
new file mode 100644
index 000000000000..430b50de1583
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+ int ifindex = *(int *)data, duration = 0;
+ struct ipv6_packet *pkt_v6 = data + 4;
+
+ if (ifindex != 1)
+ /* spurious kfree_skb not on loopback device */
+ return;
+ if (CHECK(size != 76, "check_size", "size %u != 76\n", size))
+ return;
+ if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
+ "h_proto %x\n", pkt_v6->eth.h_proto))
+ return;
+ if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
+ "iph.nexthdr %x\n", pkt_v6->iph.nexthdr))
+ return;
+ if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp",
+ "tcp.doff %x\n", pkt_v6->tcp.doff))
+ return;
+
+ *(bool *)ctx = true;
+}
+
+void test_kfree_skb(void)
+{
+ struct bpf_prog_load_attr attr = {
+ .file = "./kfree_skb.o",
+ };
+
+ struct bpf_object *obj, *obj2 = NULL;
+ struct perf_buffer_opts pb_opts = {};
+ struct perf_buffer *pb = NULL;
+ struct bpf_link *link = NULL;
+ struct bpf_map *perf_buf_map;
+ struct bpf_program *prog;
+ __u32 duration, retval;
+ int err, pkt_fd, kfree_skb_fd;
+ bool passed = false;
+
+ err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &pkt_fd);
+ if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ return;
+
+ err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
+ if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
+ goto close_prog;
+ link = bpf_program__attach_raw_tracepoint(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ goto close_prog;
+
+ perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
+ if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+ goto close_prog;
+
+ /* set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+ pb_opts.ctx = &passed;
+ pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+ if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ goto close_prog;
+
+ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ /* read perf buffer */
+ err = perf_buffer__poll(pb, 100);
+ if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+ goto close_prog;
+ /* make sure kfree_skb program was triggered
+ * and it sent expected skb into ring buffer
+ */
+ CHECK_FAIL(!passed);
+close_prog:
+ perf_buffer__free(pb);
+ if (!IS_ERR_OR_NULL(link))
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+ bpf_object__close(obj2);
+}
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
new file mode 100644
index 000000000000..89af8a921ee4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+char _license[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
+#define _(P) (__builtin_preserve_access_index(P))
+
+/* define few struct-s that bpf program needs to access */
+struct callback_head {
+ struct callback_head *next;
+ void (*func)(struct callback_head *head);
+};
+struct dev_ifalias {
+ struct callback_head rcuhead;
+};
+
+struct net_device /* same as kernel's struct net_device */ {
+ int ifindex;
+ struct dev_ifalias *ifalias;
+};
+
+typedef struct {
+ int counter;
+} atomic_t;
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+struct sk_buff {
+ /* field names and sizes should match to those in the kernel */
+ unsigned int len, data_len;
+ __u16 mac_len, hdr_len, queue_mapping;
+ struct net_device *dev;
+ /* order of the fields doesn't matter */
+ refcount_t users;
+ unsigned char *data;
+ char __pkt_type_offset[0];
+};
+
+/* copy arguments from
+ * include/trace/events/skb.h:
+ * TRACE_EVENT(kfree_skb,
+ * TP_PROTO(struct sk_buff *skb, void *location),
+ *
+ * into struct below:
+ */
+struct trace_kfree_skb {
+ struct sk_buff *skb;
+ void *location;
+};
+
+SEC("tp_btf/kfree_skb")
+int trace_kfree_skb(struct trace_kfree_skb *ctx)
+{
+ struct sk_buff *skb = ctx->skb;
+ struct net_device *dev;
+ int ifindex;
+ struct callback_head *ptr;
+ void *func;
+ int users;
+ unsigned char *data;
+ unsigned short pkt_data;
+ char pkt_type;
+
+ __builtin_preserve_access_index(({
+ users = skb->users.refs.counter;
+ data = skb->data;
+ dev = skb->dev;
+ ifindex = dev->ifindex;
+ ptr = dev->ifalias->rcuhead.next;
+ func = ptr->func;
+ }));
+
+ bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
+ pkt_type &= 7;
+
+ /* read eth proto */
+ bpf_probe_read(&pkt_data, sizeof(pkt_data), data + 12);
+
+ bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
+ bpf_printk("skb->len %d users %d pkt_type %x\n",
+ _(skb->len), users, pkt_type);
+ bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping));
+ bpf_printk("dev->ifindex %d data %llx pkt_data %x\n",
+ ifindex, data, pkt_data);
+
+ if (users != 1 || pkt_data != bpf_htons(0x86dd) || ifindex != 1)
+ /* raw tp ignores return value */
+ return 0;
+
+ /* send first 72 byte of the packet to user space */
+ bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU,
+ &ifindex, sizeof(ifindex));
+ return 0;
+}