diff options
Diffstat (limited to 'tools')
113 files changed, 5169 insertions, 892 deletions
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index b6c5ece4fdee..690b6ebd118f 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -117,6 +117,7 @@ #define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) +/* Data cache zero operations */ #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4) #define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6) @@ -153,11 +154,13 @@ #define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3) #define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5) -/* Data cache zero operations */ #define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1) #define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3) #define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4) +#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1) +#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5) + /* * Automatically generated definitions for system registers, the * manual encodings below are in the process of being converted to @@ -475,6 +478,7 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) +#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) @@ -482,23 +486,36 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0) #define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) #define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) #define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) +#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) #define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) +#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) +#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n)) #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) #define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) #define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1)) + +#define __SPMEV_op2(n) ((n) & 0x7) +#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1)) +#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n)) +#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n)) +#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n)) +#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n)) + #define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0) #define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5) @@ -518,7 +535,6 @@ #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) -#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) @@ -604,28 +620,18 @@ /* VHE encodings for architectural EL0/1 system registers */ #define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) -#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) -#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) -#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) -#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) -#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1) -#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) -#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) -#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3) #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) -#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) #define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) -#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) #define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) @@ -1028,8 +1034,11 @@ #define PIE_RX UL(0xa) #define PIE_RW UL(0xc) #define PIE_RWX UL(0xe) +#define PIE_MASK UL(0xf) -#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) +#define PIRx_ELx_BITS_PER_IDX 4 +#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX) +#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx)) /* * Permission Overlay Extension (POE) permission encodings. @@ -1040,12 +1049,34 @@ #define POE_RX UL(0x3) #define POE_W UL(0x4) #define POE_RW UL(0x5) -#define POE_XW UL(0x6) -#define POE_RXW UL(0x7) +#define POE_WX UL(0x6) +#define POE_RWX UL(0x7) #define POE_MASK UL(0xf) -/* Initial value for Permission Overlay Extension for EL0 */ -#define POR_EL0_INIT POE_RXW +#define POR_ELx_BITS_PER_IDX 4 +#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX) +#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK) +#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx)) + +/* + * Definitions for Guarded Control Stack + */ + +#define GCS_CAP_ADDR_MASK GENMASK(63, 12) +#define GCS_CAP_ADDR_SHIFT 12 +#define GCS_CAP_ADDR_WIDTH 52 +#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x) + +#define GCS_CAP_TOKEN_MASK GENMASK(11, 0) +#define GCS_CAP_TOKEN_SHIFT 0 +#define GCS_CAP_TOKEN_WIDTH 12 +#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x) + +#define GCS_CAP_VALID_TOKEN 0x1 +#define GCS_CAP_IN_PROGRESS_TOKEN 0x5 + +#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) #define ARM64_FEATURE_FIELD_BITS 4 diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index d6304e01afe0..da3152c16228 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -31,7 +31,7 @@ PROG COMMANDS | **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] | **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] +| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] [**kernel_btf** *BTF_FILE*] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** @@ -127,7 +127,7 @@ bpftool prog pin *PROG* *FILE* Note: *FILE* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. -bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach] +bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach] [kernel_btf *BTF_FILE*] Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog loadall** pins all programs from the *OBJ* under *PATH* directory. **type** @@ -153,6 +153,12 @@ bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | na program does not support autoattach, bpftool falls back to regular pinning for that program instead. + The **kernel_btf** option allows specifying an external BTF file to replace + the system's own vmlinux BTF file for CO-RE relocations. Note that any + other feature relying on BTF (such as fentry/fexit programs, struct_ops) + requires the BTF file for the actual kernel running on the host, often + exposed at /sys/kernel/btf/vmlinux. + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 1ce409a6cbd9..27512feb5c70 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -505,13 +505,13 @@ _bpftool() _bpftool_get_map_names return 0 ;; - pinned|pinmaps) + pinned|pinmaps|kernel_btf) _filedir return 0 ;; *) COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) - _bpftool_once_attr 'type pinmaps autoattach' + _bpftool_once_attr 'type pinmaps autoattach kernel_btf' _bpftool_one_of_list 'offload_dev xdpmeta_dev' return 0 ;; diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 93b139bfb988..944ebe21a216 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd) for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); - if (count < 0) + if (count < 0 && errno != EINVAL) return -1; if (count > 0) { @@ -318,11 +318,11 @@ static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type, static int do_show(int argc, char **argv) { - enum bpf_attach_type type; int has_attached_progs; const char *path; int cgroup_fd; int ret = -1; + unsigned int i; query_flags = 0; @@ -370,14 +370,14 @@ static int do_show(int argc, char **argv) "AttachFlags", "Name"); btf_vmlinux = libbpf_find_kernel_btf(); - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { /* * Not all attach types may be supported, so it's expected, * that some requests will fail. * If we were able to get the show for at least one * attach type, let's return 0. */ - if (show_bpf_progs(cgroup_fd, type, 0) == 0) + if (show_bpf_progs(cgroup_fd, cgroup_attach_types[i], 0) == 0) ret = 0; } @@ -400,9 +400,9 @@ exit: static int do_show_tree_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftw) { - enum bpf_attach_type type; int has_attached_progs; int cgroup_fd; + unsigned int i; if (typeflag != FTW_D) return 0; @@ -434,8 +434,8 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, } btf_vmlinux = libbpf_find_kernel_btf(); - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) - show_bpf_progs(cgroup_fd, type, ftw->level); + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) + show_bpf_progs(cgroup_fd, cgroup_attach_types[i], ftw->level); if (errno == EINVAL) /* Last attach type does not support query. diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 52fd2c9fac56..3535afc80a49 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -380,6 +380,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr) u64_to_ptr(info->perf_event.uprobe.file_name)); jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset); jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie); + jsonw_uint_field(wtr, "ref_ctr_offset", info->perf_event.uprobe.ref_ctr_offset); } static void @@ -823,6 +824,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info) printf("%s+%#x ", buf, info->perf_event.uprobe.offset); if (info->perf_event.uprobe.cookie) printf("cookie %llu ", info->perf_event.uprobe.cookie); + if (info->perf_event.uprobe.ref_ctr_offset) + printf("ref_ctr_offset 0x%llx ", info->perf_event.uprobe.ref_ctr_offset); } static void show_perf_event_tracepoint_plain(struct bpf_link_info *info) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f010295350be..96eea8a67225 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1681,8 +1681,17 @@ offload_dev: } else if (is_prefix(*argv, "autoattach")) { auto_attach = true; NEXT_ARG(); + } else if (is_prefix(*argv, "kernel_btf")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + open_opts.btf_custom_path = GET_ARG(); } else { - p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", + p_err("expected no more arguments, " + "'type', 'map', 'offload_dev', 'xdpmeta_dev', 'pinmaps', " + "'autoattach', or 'kernel_btf', got: '%s'?", *argv); goto err_free_reuse_maps; } @@ -2474,6 +2483,7 @@ static int do_help(int argc, char **argv) " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" " [autoattach]\n" + " [kernel_btf BTF_FILE]\n" " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s run PROG \\\n" diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index fd404729b115..85180e4aaa5a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1506,7 +1506,7 @@ union bpf_attr { __s32 map_token_fd; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { @@ -1995,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2051,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6723,6 +6727,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index c0e13cdf9660..b997c68bd945 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 686824b8b413..a50773d4616e 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -15,6 +15,14 @@ #define __array(name, val) typeof(val) *name[] #define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name +#ifndef likely +#define likely(x) (__builtin_expect(!!(x), 1)) +#endif + +#ifndef unlikely +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#endif + /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 38bc6b14b066..f1d495dc66bb 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -12,6 +12,7 @@ #include <sys/utsname.h> #include <sys/param.h> #include <sys/stat.h> +#include <sys/mman.h> #include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> @@ -120,6 +121,9 @@ struct btf { /* whether base_btf should be freed in btf_free for this instance */ bool owns_base; + /* whether raw_data is a (read-only) mmap */ + bool raw_data_is_mmap; + /* BTF object FD, if loaded into kernel */ int fd; @@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf) return (void *)btf->hdr != btf->raw_data; } +static void btf_free_raw_data(struct btf *btf) +{ + if (btf->raw_data_is_mmap) { + munmap(btf->raw_data, btf->raw_size); + btf->raw_data_is_mmap = false; + } else { + free(btf->raw_data); + } + btf->raw_data = NULL; +} + void btf__free(struct btf *btf) { if (IS_ERR_OR_NULL(btf)) @@ -970,7 +985,7 @@ void btf__free(struct btf *btf) free(btf->types_data); strset__free(btf->strs_set); } - free(btf->raw_data); + btf_free_raw_data(btf); free(btf->raw_data_swapped); free(btf->type_offs); if (btf->owns_base) @@ -996,7 +1011,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; btf->swapped_endian = base_btf->swapped_endian; } @@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf) return libbpf_ptr(btf_new_empty(base_btf)); } -static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap) { struct btf *btf; int err; @@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) btf->start_str_off = base_btf->hdr->str_len; } - btf->raw_data = malloc(size); - if (!btf->raw_data) { - err = -ENOMEM; - goto done; + if (is_mmap) { + btf->raw_data = (void *)data; + btf->raw_data_is_mmap = true; + } else { + btf->raw_data = malloc(size); + if (!btf->raw_data) { + err = -ENOMEM; + goto done; + } + memcpy(btf->raw_data, data, size); } - memcpy(btf->raw_data, data, size); + btf->raw_size = size; btf->hdr = btf->raw_data; @@ -1083,12 +1104,12 @@ done: struct btf *btf__new(const void *data, __u32 size) { - return libbpf_ptr(btf_new(data, size, NULL)); + return libbpf_ptr(btf_new(data, size, NULL, false)); } struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) { - return libbpf_ptr(btf_new(data, size, base_btf)); + return libbpf_ptr(btf_new(data, size, base_btf, false)); } struct btf_elf_secs { @@ -1148,6 +1169,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs else continue; + if (sh.sh_type != SHT_PROGBITS) { + pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n", + sh.sh_type, idx, name, path); + goto err; + } + data = elf_getdata(scn, 0); if (!data) { pr_warn("failed to get section(%d, %s) data from %s\n", @@ -1203,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, if (secs.btf_base_data) { dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, - NULL); + NULL, false); if (IS_ERR(dist_base_btf)) { err = PTR_ERR(dist_base_btf); dist_base_btf = NULL; @@ -1212,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, } btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, - dist_base_btf ?: base_btf); + dist_base_btf ?: base_btf, false); if (IS_ERR(btf)) { err = PTR_ERR(btf); goto done; @@ -1329,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) } /* finally parse BTF data */ - btf = btf_new(data, sz, base_btf); + btf = btf_new(data, sz, base_btf, false); err_out: free(data); @@ -1348,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse_raw(path, base_btf)); } +static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) +{ + struct stat st; + void *data; + struct btf *btf; + int fd, err; + + fd = open(path, O_RDONLY); + if (fd < 0) + return libbpf_err_ptr(-errno); + + if (fstat(fd, &st) < 0) { + err = -errno; + close(fd); + return libbpf_err_ptr(err); + } + + data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + err = -errno; + close(fd); + + if (data == MAP_FAILED) + return libbpf_err_ptr(err); + + btf = btf_new(data, st.st_size, base_btf, true); + if (IS_ERR(btf)) + munmap(data, st.st_size); + + return btf; +} + static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; @@ -1612,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) goto exit_free; } - btf = btf_new(ptr, btf_info.btf_size, base_btf); + btf = btf_new(ptr, btf_info.btf_size, base_btf, false); exit_free: free(ptr); @@ -1652,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) static void btf_invalidate_raw_data(struct btf *btf) { - if (btf->raw_data) { - free(btf->raw_data); - btf->raw_data = NULL; - } + if (btf->raw_data) + btf_free_raw_data(btf); if (btf->raw_data_swapped) { free(btf->raw_data_swapped); btf->raw_data_swapped = NULL; @@ -4350,46 +4406,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; } -/* Check if given two types are identical ARRAY definitions */ -static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth) { struct btf_type *t1, *t2; + int k1, k2; +recur: + if (depth <= 0) + return false; t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); - if (!btf_is_array(t1) || !btf_is_array(t2)) + + k1 = btf_kind(t1); + k2 = btf_kind(t2); + if (k1 != k2) return false; - return btf_equal_array(t1, t2); -} + switch (k1) { + case BTF_KIND_UNKN: /* VOID */ + return true; + case BTF_KIND_INT: + return btf_equal_int_tag(t1, t2); + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return btf_compat_enum(t1, t2); + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + return btf_equal_common(t1, t2); + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: + if (t1->info != t2->info || t1->name_off != t2->name_off) + return false; + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_ARRAY: { + struct btf_array *a1, *a2; -/* Check if given two types are identical STRUCT/UNION definitions */ -static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) -{ - const struct btf_member *m1, *m2; - struct btf_type *t1, *t2; - int n, i; + if (!btf_compat_array(t1, t2)) + return false; - t1 = btf_type_by_id(d->btf, id1); - t2 = btf_type_by_id(d->btf, id2); + a1 = btf_array(t1); + a2 = btf_array(t1); - if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) - return false; + if (a1->index_type != a2->index_type && + !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1)) + return false; - if (!btf_shallow_equal_struct(t1, t2)) - return false; + if (a1->type != a2->type && + !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1)) + return false; - m1 = btf_members(t1); - m2 = btf_members(t2); - for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { - if (m1->type != m2->type && - !btf_dedup_identical_arrays(d, m1->type, m2->type) && - !btf_dedup_identical_structs(d, m1->type, m2->type)) + return true; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m1, *m2; + int i, n; + + if (!btf_shallow_equal_struct(t1, t2)) return false; + + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { + if (m1->type == m2->type) + continue; + if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) + return false; + } + return true; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p1, *p2; + int i, n; + + if (!btf_compat_fnproto(t1, t2)) + return false; + + if (t1->type != t2->type && + !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1)) + return false; + + p1 = btf_params(t1); + p2 = btf_params(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) { + if (p1->type == p2->type) + continue; + if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1)) + return false; + } + return true; + } + default: + return false; } - return true; } + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -4508,19 +4627,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * different fields within the *same* struct. This breaks type * equivalence check, which makes an assumption that candidate * types sub-graph has a consistent and deduped-by-compiler - * types within a single CU. So work around that by explicitly - * allowing identical array types here. + * types within a single CU. And similar situation can happen + * with struct/union sometimes, and event with pointers. + * So accommodate cases like this doing a structural + * comparison recursively, but avoiding being stuck in endless + * loops by limiting the depth up to which we check. */ - if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) - return 1; - /* It turns out that similar situation can happen with - * struct/union sometimes, sigh... Handle the case where - * structs/unions are exactly the same, down to the referenced - * type IDs. Anything more complicated (e.g., if referenced - * types are different, but equivalent) is *way more* - * complicated and requires a many-to-many equivalence mapping. - */ - if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) + if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16)) return 1; return 0; } @@ -5268,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void) pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", sysfs_btf_path); } else { - btf = btf__parse(sysfs_btf_path, NULL); + btf = btf_parse_raw_mmap(sysfs_btf_path, NULL); + if (IS_ERR(btf)) + btf = btf__parse(sysfs_btf_path, NULL); + if (!btf) { err = -errno; pr_warn("failed to read kernel BTF from '%s': %s\n", diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6b85060f07b3..e9c641a2fb20 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -60,6 +60,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -284,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) old_errno = errno; va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); errno = old_errno; @@ -896,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -1725,15 +1727,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } -/* Some versions of Android don't provide memfd_create() in their libc - * implementation, so avoid complications and just go straight to Linux - * syscall. - */ -static int sys_memfd_create(const char *name, unsigned flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif @@ -9455,6 +9448,30 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log return 0; } +struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) +{ + if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->func_info; +} + +__u32 bpf_program__func_info_cnt(const struct bpf_program *prog) +{ + return prog->func_info_cnt; +} + +struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) +{ + if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->line_info; +} + +__u32 bpf_program__line_info_cnt(const struct bpf_program *prog) +{ + return prog->line_info_cnt; +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -11121,16 +11138,16 @@ static const char *tracefs_available_filter_functions_addrs(void) : TRACEFS"/available_filter_functions_addrs"; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); - /* sanitize binary_path in the probe name */ + /* sanitize name in the probe name */ for (i = 0; buf[i]; i++) { if (!isalnum(buf[i])) buf[i] = '_'; @@ -11255,9 +11272,9 @@ int probe_kern_syscall_wrapper(int token_fd) return pfd >= 0 ? 1 : 0; } else { /* legacy mode */ - char probe_name[128]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) return 0; @@ -11313,10 +11330,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11860,20 +11877,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { @@ -12297,13 +12300,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[PATH_MAX + 64]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -13371,7 +13375,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = sample_period; attr.wakeup_events = sample_period; p.attr = &attr; @@ -14099,6 +14102,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) } link = map_skel->link; + if (!link) { + pr_warn("map '%s': BPF map skeleton link is uninitialized\n", + bpf_map__name(map)); + continue; + } + if (*link) continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index fdcee6a71e0f..1137e7d2e1b5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -940,6 +940,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog); + +LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog); + /** * @brief **bpf_program__set_attach_target()** sets BTF-based attach target * for supported BPF program types: diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d8b71f22f197..1205f9a4fe04 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -437,6 +437,10 @@ LIBBPF_1.6.0 { bpf_linker__add_fd; bpf_linker__new_fd; bpf_object__prepare; + bpf_program__func_info; + bpf_program__func_info_cnt; + bpf_program__line_info; + bpf_program__line_info_cnt; btf__add_decl_attr; btf__add_type_attr; } LIBBPF_1.5.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 76669c73dcd1..477a3b3389a0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags) return syscall(__NR_dup3, oldfd, newfd, flags); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + /* Point *fixed_fd* to the same file that *tmp_fd* points to. * Regardless of success, *tmp_fd* is closed. * Whatever *fixed_fd* pointed to is closed silently. diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 800e0ef09c37..a469e5d4fee7 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); - fd = memfd_create(filename, 0); + fd = sys_memfd_create(filename, 0); if (fd < 0) { ret = -errno; pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); @@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* shdr->sh_link points to SYMTAB */ diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3b..06663f9ea581 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index eaa420ac848d..0ce251b8d466 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.22"; +static const char *version_str = "v1.23"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; @@ -26,6 +26,7 @@ static FILE *outf; static int cpu_model; static int cpu_stepping; +static int extended_family; #define MAX_CPUS_IN_ONE_REQ 512 static short max_target_cpus; @@ -143,6 +144,14 @@ int is_icx_platform(void) return 0; } +static int is_dmr_plus_platform(void) +{ + if (extended_family == 0x04) + return 1; + + return 0; +} + static int update_cpu_model(void) { unsigned int ebx, ecx, edx; @@ -150,6 +159,7 @@ static int update_cpu_model(void) __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; + extended_family = (fms >> 20) & 0x0f; cpu_model = (fms >> 4) & 0xf; if (family == 6 || family == 0xf) cpu_model += ((fms >> 16) & 0xf) << 4; @@ -1517,7 +1527,8 @@ display_result: usleep(2000); /* Adjusting uncore freq */ - isst_adjust_uncore_freq(id, tdp_level, &ctdp_level); + if (!is_dmr_plus_platform()) + isst_adjust_uncore_freq(id, tdp_level, &ctdp_level); fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c index da53aaa27fc9..4f389e1c0525 100644 --- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c +++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c @@ -227,6 +227,7 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index, static int tpmi_get_tdp_info(struct isst_id *id, int config_index, struct isst_pkg_ctdp_level_info *ctdp_level) { + struct isst_perf_level_fabric_info fabric_info; struct isst_perf_level_data_info info; int ret; @@ -253,6 +254,17 @@ static int tpmi_get_tdp_info(struct isst_id *id, int config_index, ctdp_level->uncore_p1 = info.p1_fabric_freq_mhz; ctdp_level->uncore_pm = info.pm_fabric_freq_mhz; + fabric_info.socket_id = id->pkg; + fabric_info.power_domain_id = id->punit; + fabric_info.level = config_index; + + ret = tpmi_process_ioctl(ISST_IF_GET_PERF_LEVEL_FABRIC_INFO, &fabric_info); + if (ret != -1) { + ctdp_level->uncore1_p0 = fabric_info.p0_fabric_freq_mhz[1]; + ctdp_level->uncore1_p1 = fabric_info.p1_fabric_freq_mhz[1]; + ctdp_level->uncore1_pm = fabric_info.pm_fabric_freq_mhz[1]; + } + debug_printf ("cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO tdp_ratio:%d pkg_tdp:%d ctdp_level->t_proc_hot:%d\n", id->cpu, config_index, ctdp_level->tdp_ratio, ctdp_level->pkg_tdp, diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index da5a59a4c545..e4884eb02837 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -460,6 +460,26 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level format_and_print(outf, level + 2, header, value); } + if (ctdp_level->uncore1_p1) { + snprintf(header, sizeof(header), "uncore-1-frequency-base(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_p1 * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + if (ctdp_level->uncore1_pm) { + snprintf(header, sizeof(header), "uncore-1-frequency-min(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_pm * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->uncore1_p0) { + snprintf(header, sizeof(header), "uncore-1-frequency-max(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_p0 * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + if (ctdp_level->mem_freq) { snprintf(header, sizeof(header), "max-mem-frequency(MHz)"); snprintf(value, sizeof(value), "%d", diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 39ee75677c2c..960f647cfc2d 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -147,6 +147,9 @@ struct isst_pkg_ctdp_level_info { int uncore_p0; int uncore_p1; int uncore_pm; + int uncore1_p0; + int uncore1_p1; + int uncore1_pm; int sse_p1; int avx2_p1; int avx512_p1; diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index f748f2c33b22..1789a61d0a9b 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,5 +1,6 @@ # TEMPORARY # Alphabetical order +dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge stacktrace_build_id stacktrace_build_id_nmi diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 6d8feda27ce9..12e99c0277a8 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,3 +1 @@ -fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 -fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 66bb50356be0..cf5ed3bee573 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -34,6 +34,9 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null) LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) +SKIP_DOCS ?= +SKIP_LLVM ?= + ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -172,6 +175,7 @@ override OUTPUT := $(patsubst %/,%,$(OUTPUT)) endif endif +ifneq ($(SKIP_LLVM),1) ifeq ($(feature-llvm),1) LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets @@ -180,13 +184,14 @@ ifeq ($(feature-llvm),1) # Prefer linking statically if it's available, otherwise fallback to shared ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static) LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) - LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))) LLVM_LDLIBS += -lstdc++ else LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS)) endif LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) endif +endif SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build @@ -358,7 +363,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin endif +ifneq ($(SKIP_DOCS),1) all: docs +endif docs: $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ @@ -673,9 +680,6 @@ ifneq ($2:$(OUTPUT),:$(shell pwd)) $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/ endif -$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS) -$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS) - # some X.test.o files have runtime dependencies on Y.bpf.o files $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS) @@ -686,7 +690,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(OUTPUT)/veristat \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@ $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@ $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \ $(OUTPUT)/$(if $2,$2/)bpftool @@ -811,6 +815,7 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h +$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -831,6 +836,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_local_storage_create.o \ $(OUTPUT)/bench_htab_mem.o \ $(OUTPUT)/bench_bpf_crypto.o \ + $(OUTPUT)/bench_sockmap.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 0fd8c9b0d38f..ddd73d06a1eb 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -283,6 +283,7 @@ extern struct argp bench_local_storage_create_argp; extern struct argp bench_htab_mem_argp; extern struct argp bench_trigger_batch_argp; extern struct argp bench_crypto_argp; +extern struct argp bench_sockmap_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -297,6 +298,7 @@ static const struct argp_child bench_parsers[] = { { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 }, { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 }, { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 }, + { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 }, {}, }; @@ -555,6 +557,7 @@ extern const struct bench bench_local_storage_create; extern const struct bench bench_htab_mem; extern const struct bench bench_crypto_encrypt; extern const struct bench bench_crypto_decrypt; +extern const struct bench bench_sockmap; static const struct bench *benchs[] = { &bench_count_global, @@ -621,6 +624,7 @@ static const struct bench *benchs[] = { &bench_htab_mem, &bench_crypto_encrypt, &bench_crypto_decrypt, + &bench_sockmap, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c index 926ee822143e..297e32390cd1 100644 --- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) } got = read(fd, buf, sizeof(buf) - 1); + close(fd); if (got <= 0) { *value = 0; return; @@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) buf[got] = 0; *value = strtoull(buf, NULL, 0); - - close(fd); } static void htab_mem_measure(struct bench_res *res) diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c new file mode 100644 index 000000000000..8ebf563a67a2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <error.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <sys/sendfile.h> +#include <arpa/inet.h> +#include <fcntl.h> +#include <argp.h> +#include "bench.h" +#include "bench_sockmap_prog.skel.h" + +#define FILE_SIZE (128 * 1024) +#define DATA_REPEAT_SIZE 10 + +static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +/* c1 <-> [p1, p2] <-> c2 + * RX bench(BPF_SK_SKB_STREAM_VERDICT): + * ARG_FW_RX_PASS: + * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2) + * ARG_FW_RX_VERDICT_EGRESS: + * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2) + * ARG_FW_RX_VERDICT_INGRESS: + * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2) + * + * TX bench(BPF_SK_MSG_VERDIC): + * ARG_FW_TX_PASS: + * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2) + * ARG_FW_TX_VERDICT_INGRESS: + * send(p2) -> verdict msg to rx queue of c2 -> recv(c2) + * ARG_FW_TX_VERDICT_EGRESS: + * send(p1) -> verdict msg to tx queue of p2 -> recv(c2) + */ +enum SOCKMAP_ARG_FLAG { + ARG_FW_RX_NORMAL = 11000, + ARG_FW_RX_PASS, + ARG_FW_RX_VERDICT_EGRESS, + ARG_FW_RX_VERDICT_INGRESS, + ARG_FW_TX_NORMAL, + ARG_FW_TX_PASS, + ARG_FW_TX_VERDICT_INGRESS, + ARG_FW_TX_VERDICT_EGRESS, + ARG_CTL_RX_STRP, + ARG_CONSUMER_DELAY_TIME, + ARG_PRODUCER_DURATION, +}; + +#define TXMODE_NORMAL() \ + ((ctx.mode) == ARG_FW_TX_NORMAL) + +#define TXMODE_BPF_INGRESS() \ + ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS) + +#define TXMODE_BPF_EGRESS() \ + ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS) + +#define TXMODE_BPF_PASS() \ + ((ctx.mode) == ARG_FW_TX_PASS) + +#define TXMODE_BPF() ( \ + TXMODE_BPF_PASS() || \ + TXMODE_BPF_INGRESS() || \ + TXMODE_BPF_EGRESS()) + +#define TXMODE() ( \ + TXMODE_NORMAL() || \ + TXMODE_BPF()) + +#define RXMODE_NORMAL() \ + ((ctx.mode) == ARG_FW_RX_NORMAL) + +#define RXMODE_BPF_PASS() \ + ((ctx.mode) == ARG_FW_RX_PASS) + +#define RXMODE_BPF_VERDICT_EGRESS() \ + ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS) + +#define RXMODE_BPF_VERDICT_INGRESS() \ + ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS) + +#define RXMODE_BPF_VERDICT() ( \ + RXMODE_BPF_VERDICT_INGRESS() || \ + RXMODE_BPF_VERDICT_EGRESS()) + +#define RXMODE_BPF() ( \ + RXMODE_BPF_PASS() || \ + RXMODE_BPF_VERDICT()) + +#define RXMODE() ( \ + RXMODE_NORMAL() || \ + RXMODE_BPF()) + +static struct socmap_ctx { + struct bench_sockmap_prog *skel; + enum SOCKMAP_ARG_FLAG mode; + #define c1 fds[0] + #define p1 fds[1] + #define c2 fds[2] + #define p2 fds[3] + #define sfd fds[4] + int fds[5]; + long send_calls; + long read_calls; + long prod_send; + long user_read; + int file_size; + int delay_consumer; + int prod_run_time; + int strp_size; +} ctx = { + .prod_send = 0, + .user_read = 0, + .file_size = FILE_SIZE, + .mode = ARG_FW_RX_VERDICT_EGRESS, + .fds = {0}, + .delay_consumer = 0, + .prod_run_time = 0, + .strp_size = 0, +}; + +static void bench_sockmap_prog_destroy(void) +{ + int i; + + for (i = 0; i < sizeof(ctx.fds); i++) { + if (ctx.fds[0] > 0) + close(ctx.fds[i]); + } + + bench_sockmap_prog__destroy(ctx.skel); +} + +static void init_addr(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static bool set_non_block(int fd, bool blocking) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (flags == -1) + return false; + flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK); + return (fcntl(fd, F_SETFL, flags) == 0); +} + +static int create_pair(int *c, int *p, int type) +{ + struct sockaddr_storage addr; + int err, cfd, pfd; + socklen_t addr_len = sizeof(struct sockaddr_storage); + + err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len); + if (err) { + fprintf(stderr, "getsockname error %d\n", errno); + return err; + } + cfd = socket(AF_INET, type, 0); + if (cfd < 0) { + fprintf(stderr, "socket error %d\n", errno); + return err; + } + + err = connect(cfd, (struct sockaddr *)&addr, addr_len); + if (err && errno != EINPROGRESS) { + fprintf(stderr, "connect error %d\n", errno); + return err; + } + + pfd = accept(ctx.sfd, NULL, NULL); + if (pfd < 0) { + fprintf(stderr, "accept error %d\n", errno); + return err; + } + *c = cfd; + *p = pfd; + return 0; +} + +static int create_sockets(void) +{ + struct sockaddr_storage addr; + int err, one = 1; + socklen_t addr_len; + + init_addr(&addr, &addr_len); + ctx.sfd = socket(AF_INET, SOCK_STREAM, 0); + if (ctx.sfd < 0) { + fprintf(stderr, "socket error:%d\n", errno); + return ctx.sfd; + } + err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) { + fprintf(stderr, "setsockopt error:%d\n", errno); + return err; + } + + err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len); + if (err) { + fprintf(stderr, "bind error:%d\n", errno); + return err; + } + + err = listen(ctx.sfd, SOMAXCONN); + if (err) { + fprintf(stderr, "listen error:%d\n", errno); + return err; + } + + err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM); + if (err) { + fprintf(stderr, "create_pair 1 error\n"); + return err; + } + + err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM); + if (err) { + fprintf(stderr, "create_pair 2 error\n"); + return err; + } + printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n", + ctx.c1, ctx.p1, ctx.c2, ctx.p2); + return 0; +} + +static void validate(void) +{ + if (env.consumer_cnt != 2 || env.producer_cnt != 1 || + !env.affinity) + goto err; + return; +err: + fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary"); + exit(1); +} + +static int setup_rx_sockmap(void) +{ + int verdict, pass, parser, map; + int zero = 0, one = 1; + int err; + + parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser); + verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict); + pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass); + map = bpf_map__fd(ctx.skel->maps.sock_map_rx); + + if (ctx.strp_size != 0) { + ctx.skel->bss->pkt_size = ctx.strp_size; + err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return err; + } + + if (RXMODE_BPF_VERDICT()) + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + else if (RXMODE_BPF_PASS()) + err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + return err; + + if (RXMODE_BPF_PASS()) + return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST); + + err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST); + if (err < 0) + return err; + + if (RXMODE_BPF_VERDICT_INGRESS()) { + ctx.skel->bss->verdict_dir = BPF_F_INGRESS; + err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST); + } else { + err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST); + } + if (err < 0) + return err; + + return 0; +} + +static int setup_tx_sockmap(void) +{ + int zero = 0, one = 1; + int prog, map; + int err; + + map = bpf_map__fd(ctx.skel->maps.sock_map_tx); + prog = TXMODE_BPF_PASS() ? + bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) : + bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict); + + err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0); + if (err) + return err; + + if (TXMODE_BPF_EGRESS()) { + err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST); + err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST); + } else { + ctx.skel->bss->verdict_dir = BPF_F_INGRESS; + err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST); + err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST); + } + + if (err < 0) + return err; + + return 0; +} + +static void setup(void) +{ + int err; + + ctx.skel = bench_sockmap_prog__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "error loading skel\n"); + exit(1); + } + + if (create_sockets()) { + fprintf(stderr, "create_net_mode error\n"); + goto err; + } + + if (RXMODE_BPF()) { + err = setup_rx_sockmap(); + if (err) { + fprintf(stderr, "setup_rx_sockmap error:%d\n", err); + goto err; + } + } else if (TXMODE_BPF()) { + err = setup_tx_sockmap(); + if (err) { + fprintf(stderr, "setup_tx_sockmap error:%d\n", err); + goto err; + } + } else { + fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode); + goto err; + } + + return; + +err: + bench_sockmap_prog_destroy(); + exit(1); +} + +static void measure(struct bench_res *res) +{ + res->drops = atomic_swap(&ctx.prod_send, 0); + res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0); + res->false_hits = atomic_swap(&ctx.user_read, 0); + res->important_hits = atomic_swap(&ctx.send_calls, 0); + res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32; +} + +static void verify_data(int *check_pos, char *buf, int rcv) +{ + for (int i = 0 ; i < rcv; i++) { + if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) { + fprintf(stderr, "verify data fail"); + exit(1); + } + (*check_pos)++; + if (*check_pos >= FILE_SIZE) + *check_pos = 0; + } +} + +static void *consumer(void *input) +{ + int rcv, sent; + int check_pos = 0; + int tid = (long)input; + int recv_buf_size = FILE_SIZE; + char *buf = malloc(recv_buf_size); + int delay_read = ctx.delay_consumer; + + if (!buf) { + fprintf(stderr, "fail to init read buffer"); + return NULL; + } + + while (true) { + if (tid == 1) { + /* consumer 1 is unused for tx test and stream verdict test */ + if (RXMODE_BPF() || TXMODE()) + return NULL; + /* it's only for RX_NORMAL which service as reserve-proxy mode */ + rcv = read(ctx.p1, buf, recv_buf_size); + if (rcv < 0) { + fprintf(stderr, "fail to read p1"); + return NULL; + } + + sent = send(ctx.p2, buf, recv_buf_size, 0); + if (sent < 0) { + fprintf(stderr, "fail to send p2"); + return NULL; + } + } else { + if (delay_read != 0) { + if (delay_read < 0) + return NULL; + sleep(delay_read); + delay_read = 0; + } + /* read real endpoint by consumer 0 */ + atomic_inc(&ctx.read_calls); + rcv = read(ctx.c2, buf, recv_buf_size); + if (rcv < 0 && errno != EAGAIN) { + fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno); + return NULL; + } + verify_data(&check_pos, buf, rcv); + atomic_add(&ctx.user_read, rcv); + } + } + + return NULL; +} + +static void *producer(void *input) +{ + int off = 0, fp, need_sent, sent; + int file_size = ctx.file_size; + struct timespec ts1, ts2; + int target; + FILE *file; + + file = tmpfile(); + if (!file) { + fprintf(stderr, "create file for sendfile"); + return NULL; + } + + /* we need simple verify */ + for (int i = 0; i < file_size; i++) { + if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) { + fprintf(stderr, "init tmpfile error"); + return NULL; + } + if (++off >= sizeof(snd_data)) + off = 0; + } + fflush(file); + fseek(file, 0, SEEK_SET); + + fp = fileno(file); + need_sent = file_size; + clock_gettime(CLOCK_MONOTONIC, &ts1); + + if (RXMODE_BPF_VERDICT()) + target = ctx.c1; + else if (TXMODE_BPF_EGRESS()) + target = ctx.p1; + else + target = ctx.p2; + set_non_block(target, true); + while (true) { + if (ctx.prod_run_time) { + clock_gettime(CLOCK_MONOTONIC, &ts2); + if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time) + return NULL; + } + + errno = 0; + atomic_inc(&ctx.send_calls); + sent = sendfile(target, fp, NULL, need_sent); + if (sent < 0) { + if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) { + fprintf(stderr, "sendfile return %d, errorno %d:%s\n", + sent, errno, strerror(errno)); + return NULL; + } + continue; + } else if (sent < need_sent) { + need_sent -= sent; + atomic_add(&ctx.prod_send, sent); + continue; + } + atomic_add(&ctx.prod_send, need_sent); + need_sent = file_size; + lseek(fp, 0, SEEK_SET); + } + + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz; + + prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0); + speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0); + bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0); + send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0); + read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0); + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, " + "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n", + prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + double verdict_mbs_mean = 0.0; + long verdict_total = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + verdict_total += res[i].hits / 1000000.0; + } + + printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n", + verdict_total, verdict_mbs_mean); +} + +static const struct argp_option opts[] = { + { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0, + "simple reserve-proxy mode, no bfp enabled"}, + { "rx-pass", ARG_FW_RX_PASS, NULL, 0, + "run bpf prog but no redir applied"}, + { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0, + "enable strparser and set the encapsulation size"}, + { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0, + "forward data with bpf(stream verdict)"}, + { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0, + "forward data with bpf(stream verdict)"}, + { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0, + "simple c-s mode, no bfp enabled"}, + { "tx-pass", ARG_FW_TX_PASS, NULL, 0, + "run bpf prog but no redir applied"}, + { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0, + "forward msg to ingress queue of another socket"}, + { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0, + "forward msg to egress queue of another socket"}, + { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0, + "delay consumer start"}, + { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0, + "producer duration"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS: + ctx.mode = key; + break; + case ARG_CONSUMER_DELAY_TIME: + ctx.delay_consumer = strtol(arg, NULL, 10); + break; + case ARG_PRODUCER_DURATION: + ctx.prod_run_time = strtol(arg, NULL, 10); + break; + case ARG_CTL_RX_STRP: + ctx.strp_size = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_sockmap_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Benchmark performance of creating bpf local storage */ +const struct bench bench_sockmap = { + .name = "sockmap", + .argp = &bench_sockmap_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 6535c8ae3c46..5e512a1d09d1 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -591,4 +591,9 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym; +struct bpf_iter_dmabuf; +extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; +extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; +extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3201a962b3dc..f74e1ea0ad3b 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y CONFIG_DUMMY=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y @@ -108,6 +110,7 @@ CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y CONFIG_TEST_BPF=m +CONFIG_UDMABUF=y CONFIG_USERFAULTFD=y CONFIG_VSOCKETS=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c index 7565fc7690c2..0223fce4db2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -51,9 +51,11 @@ static void test_arena_spin_lock_size(int size) struct arena_spin_lock *skel; pthread_t thread_id[16]; int prog_fd, i, err; + int nthreads; void *ret; - if (get_nprocs() < 2) { + nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id)); + if (nthreads < 2) { test__skip(); return; } @@ -66,25 +68,25 @@ static void test_arena_spin_lock_size(int size) goto end; } skel->bss->cs_count = size; - skel->bss->limit = repeat * 16; + skel->bss->limit = repeat * nthreads; - ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init"); + ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init"); prog_fd = bpf_program__fd(skel->progs.prog); - for (i = 0; i < 16; i++) { + for (i = 0; i < nthreads; i++) { err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); if (!ASSERT_OK(err, "pthread_create")) goto end_barrier; } - for (i = 0; i < 16; i++) { + for (i = 0; i < nthreads; i++) { if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) goto end_barrier; if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) goto end_barrier; } - ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value"); + ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value"); end_barrier: pthread_barrier_destroy(&barrier); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 329c7862b52d..cabc51c2ca6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -122,6 +122,85 @@ cleanup: test_attach_probe_manual__destroy(skel); } +/* attach uprobe/uretprobe long event name testings */ +static void test_attach_uprobe_long_event_name(void) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct bpf_link *uprobe_link, *uretprobe_link; + struct test_attach_probe_manual *skel; + ssize_t uprobe_offset; + char path[PATH_MAX] = {0}; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) + goto cleanup; + + if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink")) + goto cleanup; + + /* manual-attach uprobe/uretprobe */ + uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + uprobe_opts.ref_ctr_offset = 0; + uprobe_opts.retprobe = false; + uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, + 0 /* self pid */, + path, + uprobe_offset, + &uprobe_opts); + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name")) + goto cleanup; + skel->links.handle_uprobe = uprobe_link; + + uprobe_opts.retprobe = true; + uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, + -1 /* any pid */, + path, + uprobe_offset, &uprobe_opts); + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name")) + goto cleanup; + skel->links.handle_uretprobe = uretprobe_link; + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +/* attach kprobe/kretprobe long event name testings */ +static void test_attach_kprobe_long_event_name(void) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + struct bpf_link *kprobe_link, *kretprobe_link; + struct test_attach_probe_manual *skel; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + /* manual-attach kprobe/kretprobe */ + kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + "bpf_testmod_looooooooooooooooooooooooooooooong_name", + &kprobe_opts); + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name")) + goto cleanup; + skel->links.handle_kprobe = kprobe_link; + + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + "bpf_testmod_looooooooooooooooooooooooooooooong_name", + &kprobe_opts); + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name")) + goto cleanup; + skel->links.handle_kretprobe = kretprobe_link; + +cleanup: + test_attach_probe_manual__destroy(skel); +} + static void test_attach_probe_auto(struct test_attach_probe *skel) { struct bpf_link *uprobe_err_link; @@ -323,6 +402,11 @@ void test_attach_probe(void) if (test__start_subtest("uprobe-ref_ctr")) test_uprobe_ref_ctr(skel); + if (test__start_subtest("uprobe-long_name")) + test_attach_uprobe_long_event_name(); + if (test__start_subtest("kprobe-long_name")) + test_attach_kprobe_long_event_name(); + cleanup: test_attach_probe__destroy(skel); ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup"); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dbd13f8e42a7..dd6512fa652b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode) .repeat = 1, ); + if (SYS_NOFAIL("iptables-legacy --version")) { + fprintf(stdout, "Missing required iptables-legacy tool\n"); + test__skip(); + return; + } + skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index d9024c7a892a..5bc15bb6b7ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -440,6 +440,105 @@ cleanup: btf__free(btf1); } +/* Ensure module split BTF dedup worked correctly; when dedup fails badly + * core kernel types are in split BTF also, so ensure that references to + * such types point at base - not split - BTF. + * + * bpf_testmod_test_write() has multiple core kernel type parameters; + * + * ssize_t + * bpf_testmod_test_write(struct file *file, struct kobject *kobj, + * struct bin_attribute *bin_attr, + * char *buf, loff_t off, size_t len); + * + * Ensure each of the FUNC_PROTO params is a core kernel type. + * + * Do the same for + * + * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk); + * + * ...and + * + * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb); + * + */ +const char *mod_funcs[] = { + "bpf_testmod_test_write", + "bpf_kfunc_call_test3", + "bpf_kfunc_call_test_pass_ctx" +}; + +static void test_split_module(void) +{ + struct btf *vmlinux_btf, *btf1 = NULL; + int i, nr_base_types; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf")) + return; + nr_base_types = btf__type_cnt(vmlinux_btf); + if (!ASSERT_GT(nr_base_types, 0, "nr_base_types")) + goto cleanup; + + btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf); + if (!ASSERT_OK_PTR(btf1, "split_btf")) + return; + + for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) { + const struct btf_param *p; + const struct btf_type *t; + __u16 vlen; + __u32 id; + int j; + + id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC); + if (!ASSERT_GE(id, nr_base_types, "func_id")) + goto cleanup; + t = btf__type_by_id(btf1, id); + if (!ASSERT_OK_PTR(t, "func_id_type")) + goto cleanup; + t = btf__type_by_id(btf1, t->type); + if (!ASSERT_OK_PTR(t, "func_proto_id_type")) + goto cleanup; + if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto")) + goto cleanup; + vlen = btf_vlen(t); + + for (j = 0, p = btf_params(t); j < vlen; j++, p++) { + /* bpf_testmod uses resilient split BTF, so any + * reference types will be added to split BTF and their + * associated targets will be base BTF types; for example + * for a "struct sock *" the PTR will be in split BTF + * while the "struct sock" will be in base. + * + * In some cases like loff_t we have to resolve + * multiple typedefs hence the while() loop below. + * + * Note that resilient split BTF generation depends + * on pahole version, so we do not assert that + * reference types are in split BTF, as if pahole + * does not support resilient split BTF they will + * also be base BTF types. + */ + id = p->type; + do { + t = btf__type_by_id(btf1, id); + if (!ASSERT_OK_PTR(t, "param_ref_type")) + goto cleanup; + if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t)) + break; + id = t->type; + } while (true); + + if (!ASSERT_LT(id, nr_base_types, "verify_base_type")) + goto cleanup; + } + } +cleanup: + btf__free(btf1); + btf__free(vmlinux_btf); +} + void test_btf_dedup_split() { if (test__start_subtest("split_simple")) @@ -450,4 +549,6 @@ void test_btf_dedup_split() test_split_fwd_resolve(); if (test__start_subtest("split_dup_struct_in_cu")) test_split_dup_struct_in_cu(); + if (test__start_subtest("split_module")) + test_split_module(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index eef1158676ed..3696fb9a05ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -12,10 +12,11 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args) vfprintf(ctx, fmt, args); } -void test_btf_split() { +static void __test_btf_split(bool multi) +{ struct btf_dump *d = NULL; const struct btf_type *t; - struct btf *btf1, *btf2; + struct btf *btf1, *btf2, *btf3 = NULL; int str_off, i, err; btf1 = btf__new_empty(); @@ -63,14 +64,46 @@ void test_btf_split() { ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen"); ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name"); + if (multi) { + btf3 = btf__new_empty_split(btf2); + if (!ASSERT_OK_PTR(btf3, "multi_split_btf")) + goto cleanup; + } else { + btf3 = btf2; + } + + btf__add_union(btf3, "u1", 16); /* [5] union u1 { */ + btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */ + btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */ + /* } */ + + if (multi) { + t = btf__type_by_id(btf2, 5); + ASSERT_NULL(t, "multisplit_type_in_first_split"); + } + + t = btf__type_by_id(btf3, 5); + if (!ASSERT_OK_PTR(t, "split_union_type")) + goto cleanup; + ASSERT_EQ(btf_is_union(t), true, "split_union_kind"); + ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen"); + ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name"); + ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt"); + + t = btf__type_by_id(btf3, 1); + if (!ASSERT_OK_PTR(t, "split_base_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "split_base_int"); + ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name"); + /* BTF-to-C dump of split BTF */ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) return; - d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL); + d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i < btf__type_cnt(btf2); i++) { + for (i = 1; i < btf__type_cnt(btf3); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -79,12 +112,15 @@ void test_btf_split() { ASSERT_STREQ(dump_buf, "struct s1 {\n" " int f1;\n" -"};\n" -"\n" +"};\n\n" "struct s2 {\n" " struct s1 f1;\n" " int f2;\n" " int *f3;\n" +"};\n\n" +"union u1 {\n" +" struct s2 f1;\n" +" int uf2;\n" "};\n\n", "c_dump"); cleanup: @@ -94,4 +130,14 @@ cleanup: btf_dump__free(d); btf__free(btf1); btf__free(btf2); + if (btf2 != btf3) + btf__free(btf3); +} + +void test_btf_split(void) +{ + if (test__start_subtest("single_split")) + __test_btf_split(false); + if (test__start_subtest("multi_split")) + __test_btf_split(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c new file mode 100644 index 000000000000..3923e64c4c1d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2025 Isovalent */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +static void test_btf_mmap_sysfs(const char *path, struct btf *base) +{ + struct stat st; + __u64 btf_size, end; + void *raw_data = NULL; + int fd = -1; + long page_size; + struct btf *btf = NULL; + + page_size = sysconf(_SC_PAGESIZE); + if (!ASSERT_GE(page_size, 0, "get_page_size")) + goto cleanup; + + if (!ASSERT_OK(stat(path, &st), "stat_btf")) + goto cleanup; + + btf_size = st.st_size; + end = (btf_size + page_size - 1) / page_size * page_size; + + fd = open(path, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_btf")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared")) + goto cleanup; + + raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size")) + goto cleanup; + + raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_OK_PTR(raw_data, "mmap_btf")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1, + "mprotect_writable")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1, + "mprotect_executable")) + goto cleanup; + + /* Check padding is zeroed */ + for (int i = btf_size; i < end; i++) { + if (((__u8 *)raw_data)[i] != 0) { + PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i); + goto cleanup; + } + } + + btf = btf__new_split(raw_data, btf_size, base); + if (!ASSERT_OK_PTR(btf, "parse_btf")) + goto cleanup; + +cleanup: + btf__free(btf); + if (raw_data && raw_data != MAP_FAILED) + munmap(raw_data, btf_size); + if (fd >= 0) + close(fd); +} + +void test_btf_sysfs(void) +{ + test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c new file mode 100644 index 000000000000..6c2b0c3dbcd8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "dmabuf_iter.skel.h" + +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <linux/dma-buf.h> +#include <linux/dma-heap.h> +#include <linux/udmabuf.h> + +static int udmabuf = -1; +static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter"; +static size_t udmabuf_test_buffer_size; +static int sysheap_dmabuf = -1; +static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter"; +static size_t sysheap_test_buffer_size; + +static int create_udmabuf(void) +{ + struct udmabuf_create create; + int dev_udmabuf, memfd, local_udmabuf; + + udmabuf_test_buffer_size = 10 * getpagesize(); + + if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return -1; + + memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING); + if (!ASSERT_OK_FD(memfd, "memfd_create")) + return -1; + + if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate")) + goto close_memfd; + + if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal")) + goto close_memfd; + + dev_udmabuf = open("/dev/udmabuf", O_RDONLY); + if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf")) + goto close_memfd; + + memset(&create, 0, sizeof(create)); + create.memfd = memfd; + create.flags = UDMABUF_FLAGS_CLOEXEC; + create.offset = 0; + create.size = udmabuf_test_buffer_size; + + local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create); + close(dev_udmabuf); + if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create")) + goto close_memfd; + + if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name")) + goto close_udmabuf; + + return local_udmabuf; + +close_udmabuf: + close(local_udmabuf); +close_memfd: + close(memfd); + return -1; +} + +static int create_sys_heap_dmabuf(void) +{ + sysheap_test_buffer_size = 20 * getpagesize(); + + struct dma_heap_allocation_data data = { + .len = sysheap_test_buffer_size, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = 0, + }; + int heap_fd, ret; + + if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return -1; + + heap_fd = open("/dev/dma_heap/system", O_RDONLY); + if (!ASSERT_OK_FD(heap_fd, "open dma heap")) + return -1; + + ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data); + close(heap_fd); + if (!ASSERT_OK(ret, "syheap alloc")) + return -1; + + if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name")) + goto close_sysheap_dmabuf; + + return data.fd; + +close_sysheap_dmabuf: + close(data.fd); + return -1; +} + +static int create_test_buffers(void) +{ + udmabuf = create_udmabuf(); + sysheap_dmabuf = create_sys_heap_dmabuf(); + + if (udmabuf < 0 || sysheap_dmabuf < 0) + return -1; + + return 0; +} + +static void destroy_test_buffers(void) +{ + close(udmabuf); + udmabuf = -1; + + close(sysheap_dmabuf); + sysheap_dmabuf = -1; +} + +enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT }; +struct DmabufInfo { + unsigned long inode; + unsigned long size; + char name[DMA_BUF_NAME_LEN]; + char exporter[32]; +}; + +static bool check_dmabuf_info(const struct DmabufInfo *bufinfo, + unsigned long size, + const char *name, const char *exporter) +{ + return size == bufinfo->size && + !strcmp(name, bufinfo->name) && + !strcmp(exporter, bufinfo->exporter); +} + +static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel) +{ + int iter_fd; + char buf[256]; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ + + /* Next reads should return 0 */ + ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); + + close(iter_fd); +} + +static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel) +{ + bool found_test_sysheap_dmabuf = false; + bool found_test_udmabuf = false; + struct DmabufInfo bufinfo; + size_t linesize = 0; + char *line = NULL; + FILE *iter_file; + int iter_fd, f = INODE; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + iter_file = fdopen(iter_fd, "r"); + if (!ASSERT_OK_PTR(iter_file, "fdopen")) + goto close_iter_fd; + + while (getline(&line, &linesize, iter_file) != -1) { + if (f % FIELD_COUNT == INODE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1, + "read inode"); + } else if (f % FIELD_COUNT == SIZE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1, + "read size"); + } else if (f % FIELD_COUNT == NAME) { + ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1, + "read name"); + } else if (f % FIELD_COUNT == EXPORTER) { + ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1, + "read exporter"); + + if (check_dmabuf_info(&bufinfo, + sysheap_test_buffer_size, + sysheap_test_buffer_name, + "system")) + found_test_sysheap_dmabuf = true; + else if (check_dmabuf_info(&bufinfo, + udmabuf_test_buffer_size, + udmabuf_test_buffer_name, + "udmabuf")) + found_test_udmabuf = true; + } + ++f; + } + + ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields"); + + ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf"); + ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf"); + + free(line); + fclose(iter_file); +close_iter_fd: + close(iter_fd); +} + +static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + char key[DMA_BUF_NAME_LEN]; + int err, fd; + bool found; + + /* No need to attach it, just run it directly */ + fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each); + + err = bpf_prog_test_run_opts(fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key")) + return; + + do { + ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup"); + ASSERT_TRUE(found, "found test buffer"); + } while (bpf_map_get_next_key(map_fd, key, key)); +} + +void test_dmabuf_iter(void) +{ + struct dmabuf_iter *skel = NULL; + int map_fd; + const bool f = false; + + skel = dmabuf_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load")) + return; + + map_fd = bpf_map__fd(skel->maps.testbuf_hash); + if (!ASSERT_OK_FD(map_fd, "map_fd")) + goto destroy_skel; + + if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY), + "insert udmabuf")) + goto destroy_skel; + if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY), + "insert sysheap buffer")) + goto destroy_skel; + + if (!ASSERT_OK(create_test_buffers(), "create_test_buffers")) + goto destroy; + + if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach")) + goto destroy; + + if (test__start_subtest("no_infinite_reads")) + subtest_dmabuf_iter_check_no_infinite_reads(skel); + if (test__start_subtest("default_iter")) + subtest_dmabuf_iter_check_default_iter(skel); + if (test__start_subtest("open_coded")) + subtest_dmabuf_iter_check_open_coded(skel, map_fd); + +destroy: + destroy_test_buffers(); +destroy_skel: + dmabuf_iter__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index e29cc16124c2..62e7ec775f24 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -33,10 +33,19 @@ static struct { {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP}, + {"test_probe_read_user_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG}, + {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP}, }; static void verify_success(const char *prog_name, enum test_setup_type setup_type) { + char user_data[384] = {[0 ... 382] = 'a', '\0'}; struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; @@ -58,6 +67,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ if (!ASSERT_OK(err, "dynptr_success__load")) goto cleanup; + skel->bss->user_ptr = user_data; + skel->data->test_len[0] = sizeof(user_data); + memcpy(skel->bss->expected_str, user_data, sizeof(user_data)); + switch (setup_type) { case SETUP_SYSCALL_SLEEP: link = bpf_program__attach(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c new file mode 100644 index 000000000000..ca46fdd6e1ae --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <stdbool.h> +#include <test_progs.h> +#include "fd_htab_lookup.skel.h" + +struct htab_op_ctx { + int fd; + int loop; + unsigned int entries; + bool stop; +}; + +#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err))) + +static void *htab_lookup_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + unsigned int j; + + for (j = 0; j < ctx->entries; j++) { + unsigned int key = j, zero = 0, value; + int inner_fd, err; + + err = bpf_map_lookup_elem(ctx->fd, &key, &value); + if (err) { + ctx->stop = true; + return ERR_TO_RETVAL(1, err); + } + + inner_fd = bpf_map_get_fd_by_id(value); + if (inner_fd < 0) { + /* The old map has been freed */ + if (inner_fd == -ENOENT) + continue; + ctx->stop = true; + return ERR_TO_RETVAL(2, inner_fd); + } + + err = bpf_map_lookup_elem(inner_fd, &zero, &value); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(3, err); + } + close(inner_fd); + + if (value != key) { + ctx->stop = true; + return ERR_TO_RETVAL(4, -EINVAL); + } + } + } + + return NULL; +} + +static void *htab_update_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + unsigned int j; + + for (j = 0; j < ctx->entries; j++) { + unsigned int key = j, zero = 0; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (inner_fd < 0) { + ctx->stop = true; + return ERR_TO_RETVAL(1, inner_fd); + } + + err = bpf_map_update_elem(inner_fd, &zero, &key, 0); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(2, err); + } + + err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(3, err); + } + close(inner_fd); + } + } + + return NULL; +} + +static int setup_htab(int fd, unsigned int entries) +{ + unsigned int i; + + for (i = 0; i < entries; i++) { + unsigned int key = i, zero = 0; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "new array")) + return -1; + + err = bpf_map_update_elem(inner_fd, &zero, &key, 0); + if (!ASSERT_OK(err, "init array")) { + close(inner_fd); + return -1; + } + + err = bpf_map_update_elem(fd, &key, &inner_fd, 0); + if (!ASSERT_OK(err, "init outer")) { + close(inner_fd); + return -1; + } + close(inner_fd); + } + + return 0; +} + +static int get_int_from_env(const char *name, int dft) +{ + const char *value; + + value = getenv(name); + if (!value) + return dft; + + return atoi(value); +} + +void test_fd_htab_lookup(void) +{ + unsigned int i, wr_nr = 8, rd_nr = 16; + pthread_t tids[wr_nr + rd_nr]; + struct fd_htab_lookup *skel; + struct htab_op_ctx ctx; + int err; + + skel = fd_htab_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load")) + return; + + ctx.fd = bpf_map__fd(skel->maps.outer_map); + ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5); + ctx.stop = false; + ctx.entries = 8; + + err = setup_htab(ctx.fd, ctx.entries); + if (err) + goto destroy; + + memset(tids, 0, sizeof(tids)); + for (i = 0; i < wr_nr; i++) { + err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + for (i = 0; i < rd_nr; i++) { + err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + +reap: + for (i = 0; i < wr_nr + rd_nr; i++) { + void *ret = NULL; + char desc[32]; + + if (!tids[i]) + continue; + + snprintf(desc, sizeof(desc), "thread %u", i + 1); + err = pthread_join(tids[i], &ret); + ASSERT_OK(err, desc); + ASSERT_EQ(ret, NULL, desc); + } +destroy: + fd_htab_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index e59af2aa6601..e40114620751 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -37,6 +37,7 @@ static noinline void uprobe_func(void) static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { + ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */; struct bpf_link_info info; __u32 len = sizeof(info); char buf[PATH_MAX]; @@ -97,6 +98,7 @@ again: case BPF_PERF_EVENT_UPROBE: case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset"); ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, "name_len"); @@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_URETPROBE, .bpf_cookie = PERF_EVENT_COOKIE, ); + const char *sema[1] = { + "uprobe_link_info_sema_1", + }; + __u64 *ref_ctr_offset; struct bpf_link *link; int link_fd, err; + err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema, + (unsigned long **) &ref_ctr_offset, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object")) + return; + + opts.ref_ctr_offset = *ref_ctr_offset; link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, 0, /* self pid */ UPROBE_FILE, uprobe_offset, &opts); if (!ASSERT_OK_PTR(link, "attach_uprobe")) - return; + goto out; link_fd = bpf_link__fd(link); - err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); + err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset); ASSERT_OK(err, "verify_perf_link_info"); bpf_link__destroy(link); +out: + free(ref_ctr_offset); } static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies) diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 8e13a3416a21..1de14b111931 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -104,7 +104,7 @@ void test_kmem_cache_iter(void) goto destroy; memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf) > 0)) { + while (read(iter_fd, buf, sizeof(buf)) > 0) { /* Read out all contents */ printf("%s", buf); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 77d07e0a4a55..5266c7022863 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -7,6 +7,7 @@ #include "linked_list.skel.h" #include "linked_list_fail.skel.h" +#include "linked_list_peek.skel.h" static char log_buf[1024 * 1024]; @@ -805,3 +806,8 @@ void test_linked_list(void) test_linked_list_success(LIST_IN_LIST, true); test_linked_list_success(TEST_ALL, false); } + +void test_linked_list_peek(void) +{ + RUN_TESTS(linked_list_peek); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index 9818f06c97c5..d8f3d7a45fe9 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -8,6 +8,7 @@ #include "rbtree_fail.skel.h" #include "rbtree_btf_fail__wrong_node_type.skel.h" #include "rbtree_btf_fail__add_wrong_type.skel.h" +#include "rbtree_search.skel.h" static void test_rbtree_add_nodes(void) { @@ -187,3 +188,8 @@ void test_rbtree_fail(void) { RUN_TESTS(rbtree_fail); } + +void test_rbtree_search(void) +{ + RUN_TESTS(rbtree_search); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 0b9bd1d6f7cc..10a0ab954b8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -37,8 +37,10 @@ configure_stack(void) tc = popen("tc -V", "r"); if (CHECK_FAIL(!tc)) return false; - if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) + if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) { + pclose(tc); return false; + } if (strstr(tc_version, ", libbpf ")) prog = "test_sk_assign_libbpf.bpf.o"; else diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h index 1bdfb79ef009..e02cabcc814e 100644 --- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -3,6 +3,7 @@ #ifndef __SOCKET_HELPERS__ #define __SOCKET_HELPERS__ +#include <sys/un.h> #include <linux/vm_sockets.h> /* include/linux/net.h */ @@ -169,6 +170,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss, *len = sizeof(*addr6); } +static inline void init_addr_loopback_unix(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss)); + + addr->sun_family = AF_UNIX; + *len = sizeof(sa_family_t); +} + static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len) { @@ -190,6 +200,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, case AF_INET6: init_addr_loopback6(ss, len); return; + case AF_UNIX: + init_addr_loopback_unix(ss, len); + return; case AF_VSOCK: init_addr_loopback_vsock(ss, len); return; @@ -315,21 +328,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1) { __close_fd int s, c = -1, p = -1; struct sockaddr_storage addr; - socklen_t len = sizeof(addr); + socklen_t len; int err; s = socket_loopback(family, sotype); if (s < 0) return s; - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - c = xsocket(family, sotype, 0); if (c < 0) return c; + init_addr_loopback(family, &addr, &len); + err = xbind(c, sockaddr(&addr), len); + if (err) + return err; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + err = connect(c, sockaddr(&addr), len); if (err) { if (errno != EINPROGRESS) { @@ -391,4 +410,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, return err; } +static inline const char *socket_kind_to_str(int sock_fd) +{ + socklen_t opt_len; + int domain, type; + + opt_len = sizeof(domain); + if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len)) + FAIL_ERRNO("getsockopt(SO_DOMAIN)"); + + opt_len = sizeof(type); + if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len)) + FAIL_ERRNO("getsockopt(SO_TYPE)"); + + switch (domain) { + case AF_INET: + switch (type) { + case SOCK_STREAM: + return "tcp4"; + case SOCK_DGRAM: + return "udp4"; + } + break; + case AF_INET6: + switch (type) { + case SOCK_STREAM: + return "tcp6"; + case SOCK_DGRAM: + return "udp6"; + } + break; + case AF_UNIX: + switch (type) { + case SOCK_STREAM: + return "u_str"; + case SOCK_DGRAM: + return "u_dgr"; + case SOCK_SEQPACKET: + return "u_seq"; + } + break; + case AF_VSOCK: + switch (type) { + case SOCK_STREAM: + return "v_str"; + case SOCK_DGRAM: + return "v_dgr"; + case SOCK_SEQPACKET: + return "v_seq"; + } + break; + } + + return "???"; +} + #endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 3e5571dd578d..d815efac52fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -5,12 +5,15 @@ #define MAX_TEST_NAME 80 +#define u32(v) ((u32){(v)}) +#define u64(v) ((u64){(v)}) + #define __always_unused __attribute__((__unused__)) #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_delete"); \ __ret; \ }) @@ -18,7 +21,7 @@ #define xbpf_map_lookup_elem(fd, key, val) \ ({ \ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_lookup"); \ __ret; \ }) @@ -26,7 +29,7 @@ #define xbpf_map_update_elem(fd, key, val, flags) \ ({ \ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_update"); \ __ret; \ }) @@ -35,7 +38,7 @@ ({ \ int __ret = \ bpf_prog_attach((prog), (target), (type), (flags)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("prog_attach(" #type ")"); \ __ret; \ }) @@ -43,7 +46,7 @@ #define xbpf_prog_detach2(prog, target, type) \ ({ \ int __ret = bpf_prog_detach2((prog), (target), (type)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("prog_detach2(" #type ")"); \ __ret; \ }) @@ -66,21 +69,15 @@ __ret; \ }) -static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) +static inline int add_to_sockmap(int mapfd, int fd1, int fd2) { - u64 value; - u32 key; int err; - key = 0; - value = fd1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST); if (err) return err; - key = 1; - value = fd2; - return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST); } #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 0a99fd404f6d..b6c471da5c28 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -3,76 +3,62 @@ /* * Tests for sockmap/sockhash holding kTLS sockets. */ - +#include <error.h> #include <netinet/tcp.h> +#include <linux/tls.h> #include "test_progs.h" +#include "sockmap_helpers.h" +#include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_ktls.skel.h" #define MAX_TEST_NAME 80 #define TCP_ULP 31 -static int tcp_server(int family) +static int init_ktls_pairs(int c, int p) { - int err, s; - - s = socket(family, SOCK_STREAM, 0); - if (!ASSERT_GE(s, 0, "socket")) - return -1; - - err = listen(s, SOMAXCONN); - if (!ASSERT_OK(err, "listen")) - return -1; - - return s; -} + int err; + struct tls12_crypto_info_aes_gcm_128 crypto_rx; + struct tls12_crypto_info_aes_gcm_128 crypto_tx; -static int disconnect(int fd) -{ - struct sockaddr unspec = { AF_UNSPEC }; + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto out; - return connect(fd, &unspec, sizeof(unspec)); + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto out; + + memset(&crypto_rx, 0, sizeof(crypto_rx)); + memset(&crypto_tx, 0, sizeof(crypto_tx)); + crypto_rx.info.version = TLS_1_2_VERSION; + crypto_tx.info.version = TLS_1_2_VERSION; + crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128; + crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128; + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx)); + if (!ASSERT_OK(err, "setsockopt(TLS_RX)")) + goto out; + return 0; +out: + return -1; } -/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */ -static void test_sockmap_ktls_disconnect_after_delete(int family, int map) +static int create_ktls_pairs(int family, int sotype, int *c, int *p) { - struct sockaddr_storage addr = {0}; - socklen_t len = sizeof(addr); - int err, cli, srv, zero = 0; - - srv = tcp_server(family); - if (srv == -1) - return; - - err = getsockname(srv, (struct sockaddr *)&addr, &len); - if (!ASSERT_OK(err, "getsockopt")) - goto close_srv; + int err; - cli = socket(family, SOCK_STREAM, 0); - if (!ASSERT_GE(cli, 0, "socket")) - goto close_srv; - - err = connect(cli, (struct sockaddr *)&addr, len); - if (!ASSERT_OK(err, "connect")) - goto close_cli; - - err = bpf_map_update_elem(map, &zero, &cli, 0); - if (!ASSERT_OK(err, "bpf_map_update_elem")) - goto close_cli; - - err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); - if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) - goto close_cli; - - err = bpf_map_delete_elem(map, &zero); - if (!ASSERT_OK(err, "bpf_map_delete_elem")) - goto close_cli; - - err = disconnect(cli); + err = create_pair(family, sotype, c, p); + if (!ASSERT_OK(err, "create_pair()")) + return -1; -close_cli: - close(cli); -close_srv: - close(srv); + err = init_ktls_pairs(*c, *p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + return -1; + return 0; } static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) @@ -145,6 +131,189 @@ static const char *fmt_test_name(const char *subtest_name, int family, return test_name; } +static void test_sockmap_ktls_offload(int family, int sotype) +{ + int err; + int c = 0, p = 0, sent, recvd; + char msg[12] = "hello world\0"; + char rcv[13]; + + err = create_ktls_pairs(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_ktls_pairs()")) + goto out; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_OK(err, "send(msg)")) + goto out; + + recvd = recv(p, rcv, sizeof(rcv), 0); + if (!ASSERT_OK(err, "recv(msg)") || + !ASSERT_EQ(recvd, sent, "length mismatch")) + goto out; + + ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch"); + +out: + if (c) + close(c); + if (p) + close(p); +} + +static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push) +{ + int err, off; + int i, j; + int start_push = 0, push_len = 0; + int c = 0, p = 0, one = 1, sent, recvd; + int prog_fd, map_fd; + char msg[12] = "hello world\0"; + char rcv[20] = {0}; + struct test_sockmap_ktls *skel; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + skel->bss->cork_byte = sizeof(msg); + if (push) { + start_push = 1; + push_len = 2; + } + skel->bss->push_start = start_push; + skel->bss->push_end = push_len; + + off = sizeof(msg) / 2; + sent = send(c, msg, off, 0); + if (!ASSERT_EQ(sent, off, "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "expected no data")) + goto out; + + /* send remaining msg */ + sent = send(c, msg + off, sizeof(msg) - off, 0); + if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_OK(err, "recv(msg)") || + !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch")) + goto out; + + for (i = 0, j = 0; i < recvd;) { + /* skip checking the data that has been pushed in */ + if (i >= start_push && i <= start_push + push_len - 1) { + i++; + continue; + } + if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch")) + goto out; + i++; + j++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + +static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push) +{ + int c = -1, p = -1, one = 1, two = 2; + struct test_sockmap_ktls *skel; + unsigned char *data = NULL; + struct msghdr msg = {0}; + struct iovec iov[2]; + int prog_fd, map_fd; + int txrx_buf = 1024; + int iov_length = 8192; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int)); + err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int)); + if (!ASSERT_OK(err, "set buf limit")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out; + + skel->bss->apply_bytes = 1024; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + data = calloc(iov_length, sizeof(char)); + if (!data) + goto out; + + iov[0].iov_base = data; + iov[0].iov_len = iov_length; + iov[1].iov_base = data; + iov[1].iov_len = iov_length; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + + for (;;) { + err = sendmsg(c, &msg, MSG_DONTWAIT); + if (err <= 0) + break; + } + +out: + if (data) + free(data); + if (c != -1) + close(c); + if (p != -1) + close(p); + + test_sockmap_ktls__destroy(skel); +} + static void run_tests(int family, enum bpf_map_type map_type) { int map; @@ -153,18 +322,30 @@ static void run_tests(int family, enum bpf_map_type map_type) if (!ASSERT_GE(map, 0, "bpf_map_create")) return; - if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) - test_sockmap_ktls_disconnect_after_delete(family, map); if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } +static void run_ktls_test(int family, int sotype) +{ + if (test__start_subtest("tls simple offload")) + test_sockmap_ktls_offload(family, sotype); + if (test__start_subtest("tls tx cork")) + test_sockmap_ktls_tx_cork(family, sotype, false); + if (test__start_subtest("tls tx cork with push")) + test_sockmap_ktls_tx_cork(family, sotype, true); + if (test__start_subtest("tls tx egress with no buf")) + test_sockmap_ktls_tx_no_buf(family, sotype, true); +} + void test_sockmap_ktls(void) { run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP); run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH); run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP); run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH); + run_ktls_test(AF_INET, SOCK_STREAM); + run_ktls_test(AF_INET6, SOCK_STREAM); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 4ee1148d22be..1d98eee7a2c3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1366,237 +1366,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } -static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, - int sock_mapfd, int nop_mapfd, - int verd_mapfd, enum redir_mode mode, - int send_flags) -{ - const char *log_prefix = redir_mode_str(mode); - unsigned int pass; - int err, n; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); - - err = add_to_sockmap(sock_mapfd, peer0, peer1); - if (err) - return; - - if (nop_mapfd >= 0) { - err = add_to_sockmap(nop_mapfd, cli0, cli1); - if (err) - return; - } - - /* Last byte is OOB data when send_flags has MSG_OOB bit set */ - n = xsend(cli1, "ab", 2, send_flags); - if (n >= 0 && n < 2) - FAIL("%s: incomplete send", log_prefix); - if (n < 2) - return; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - return; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - - if (send_flags & MSG_OOB) { - /* Check that we can't read OOB while in sockmap */ - errno = 0; - n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); - if (n != -1 || errno != EOPNOTSUPP) - FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d", - log_prefix, n, errno); - - /* Remove peer1 from sockmap */ - xbpf_map_delete_elem(sock_mapfd, &(int){ 1 }); - - /* Check that OOB was dropped on redirect */ - errno = 0; - n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); - if (n != -1 || errno != EINVAL) - FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d", - log_prefix, n, errno); - } -} - -static void unix_redir_to_connected(int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) -{ - int c0, c1, p0, p1; - int sfd[2]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - goto close0; - c1 = sfd[0], p1 = sfd[1]; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close0: - xclose(c0); - xclose(p0); -} - -static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int sotype) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); - skel->bss->test_ingress = true; - unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int sotype) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(AF_UNIX); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - unix_skb_redir_to_connected(skel, map, sotype); -} - -/* Returns two connected loopback vsock sockets */ -static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) -{ - return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1); -} - -static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, - enum redir_mode mode, int sotype) -{ - const char *log_prefix = redir_mode_str(mode); - char a = 'a', b = 'b'; - int u0, u1, v0, v1; - int sfd[2]; - unsigned int pass; - int err, n; - u32 key; - - zero_verdict_count(verd_mapfd); - - if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd)) - return; - - u0 = sfd[0]; - u1 = sfd[1]; - - err = vsock_socketpair_connectible(sotype, &v0, &v1); - if (err) { - FAIL("vsock_socketpair_connectible() failed"); - goto close_uds; - } - - err = add_to_sockmap(sock_mapfd, u0, v0); - if (err) { - FAIL("add_to_sockmap failed"); - goto close_vsock; - } - - n = write(v1, &a, sizeof(a)); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto out; - - n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0); - if (n < 0) - FAIL("%s: recv() err, errno=%d", log_prefix, errno); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - if (b != a) - FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b); - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto out; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); -out: - key = 0; - bpf_map_delete_elem(sock_mapfd, &key); - key = 1; - bpf_map_delete_elem(sock_mapfd, &key); - -close_vsock: - close(v0); - close(v1); - -close_uds: - close(u0); - close(u1); -} - -static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, - int sotype) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype); - skel->bss->test_ingress = true; - vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(AF_VSOCK); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - - vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM); - vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET); -} - static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -1637,224 +1406,6 @@ static void test_reuseport(struct test_sockmap_listen *skel, } } -static int inet_socketpair(int family, int type, int *s, int *c) -{ - return create_pair(family, type | SOCK_NONBLOCK, s, c); -} - -static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, - enum redir_mode mode) -{ - int c0, c1, p0, p1; - int err; - - err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); - if (err) - return; - err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); - if (err) - goto close_cli0; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close_cli0: - xclose(c0); - xclose(p0); -} - -static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); - skel->bss->test_ingress = true; - udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(family); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - udp_skb_redir_to_connected(skel, map, family); -} - -static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) -{ - int c0, c1, p0, p1; - int sfd[2]; - int err; - - if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - err = inet_socketpair(family, type, &p1, &c1); - if (err) - goto close; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close: - xclose(c0); - xclose(p0); -} - -static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_EGRESS); - inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, - REDIR_EGRESS); - skel->bss->test_ingress = true; - inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_INGRESS); - inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, - REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, - int nop_mapfd, int verd_mapfd, - enum redir_mode mode, int send_flags) -{ - int c0, c1, p0, p1; - int sfd[2]; - int err; - - err = inet_socketpair(family, type, &p0, &c0); - if (err) - return; - - if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) - goto close_cli0; - c1 = sfd[0], p1 = sfd[1]; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd, - verd_mapfd, mode, send_flags); - - xclose(c1); - xclose(p1); -close_cli0: - xclose(c0); - xclose(p0); -} - -static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int nop_map = bpf_map__fd(skel->maps.nop_map); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, -1, verdict_map, - REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, -1, verdict_map, - REDIR_EGRESS, NO_FLAGS); - - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, NO_FLAGS); - - /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, MSG_OOB); - - skel->bss->test_ingress = true; - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, -1, verdict_map, - REDIR_INGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, -1, verdict_map, - REDIR_INGRESS, NO_FLAGS); - - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, NO_FLAGS); - - /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, MSG_OOB); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) -{ - const char *family_name, *map_name; - struct netns_obj *netns; - char s[MAX_TEST_NAME]; - - family_name = family_str(family); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - - netns = netns_new("sockmap_listen", true); - if (!ASSERT_OK_PTR(netns, "netns_new")) - return; - - inet_unix_skb_redir_to_connected(skel, map, family); - unix_inet_skb_redir_to_connected(skel, map, family); - - netns_free(netns); -} - static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, int family) { @@ -1863,8 +1414,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_redir(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_DGRAM); - test_udp_redir(skel, map, family); - test_udp_unix_redir(skel, map, family); } void serial_test_sockmap_listen(void) @@ -1880,16 +1429,10 @@ void serial_test_sockmap_listen(void) skel->bss->test_sockmap = true; run_tests(skel, skel->maps.sock_map, AF_INET); run_tests(skel, skel->maps.sock_map, AF_INET6); - test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); - test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); - test_vsock_redir(skel, skel->maps.sock_map); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); - test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); - test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); - test_vsock_redir(skel, skel->maps.sock_hash); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c new file mode 100644 index 000000000000..9c461d93113d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for sockmap/sockhash redirection. + * + * BPF_MAP_TYPE_SOCKMAP + * BPF_MAP_TYPE_SOCKHASH + * x + * sk_msg-to-egress + * sk_msg-to-ingress + * sk_skb-to-egress + * sk_skb-to-ingress + * x + * AF_INET, SOCK_STREAM + * AF_INET6, SOCK_STREAM + * AF_INET, SOCK_DGRAM + * AF_INET6, SOCK_DGRAM + * AF_UNIX, SOCK_STREAM + * AF_UNIX, SOCK_DGRAM + * AF_VSOCK, SOCK_STREAM + * AF_VSOCK, SOCK_SEQPACKET + */ + +#include <errno.h> +#include <error.h> +#include <sched.h> +#include <stdio.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <linux/string.h> +#include <linux/vm_sockets.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "linux/const.h" +#include "test_progs.h" +#include "sockmap_helpers.h" +#include "test_sockmap_redir.skel.h" + +/* The meaning of SUPPORTED is "will redirect packet as expected". + */ +#define SUPPORTED _BITUL(0) + +/* Note on sk_skb-to-ingress ->af_vsock: + * + * Peer socket may receive the packet some time after the return from sendmsg(). + * In a typical usage scenario, recvmsg() will block until the redirected packet + * appears in the destination queue, or timeout if the packet was dropped. By + * that point, the verdict map has already been updated to reflect what has + * happened. + * + * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg() + * takes place. Which means we may race the execution of the verdict logic and + * read map_verd before it has been updated, i.e. we might observe + * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. + * + * This confuses the selftest logic: if there was no packet dropped, where's the + * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 + * (which implies the verdict program has not been ran) just re-read the verdict + * map again. + */ +#define UNSUPPORTED_RACY_VERD _BITUL(1) + +enum prog_type { + SK_MSG_EGRESS, + SK_MSG_INGRESS, + SK_SKB_EGRESS, + SK_SKB_INGRESS, +}; + +enum { + SEND_INNER = 0, + SEND_OUTER, +}; + +enum { + RECV_INNER = 0, + RECV_OUTER, +}; + +struct maps { + int in; + int out; + int verd; +}; + +struct combo_spec { + enum prog_type prog_type; + const char *in, *out; +}; + +struct redir_spec { + const char *name; + int idx_send; + int idx_recv; + enum prog_type prog_type; +}; + +struct socket_spec { + int family; + int sotype; + int send_flags; + int in[2]; + int out[2]; +}; + +static int socket_spec_pairs(struct socket_spec *s) +{ + return create_socket_pairs(s->family, s->sotype, + &s->in[0], &s->out[0], + &s->in[1], &s->out[1]); +} + +static void socket_spec_close(struct socket_spec *s) +{ + xclose(s->in[0]); + xclose(s->in[1]); + xclose(s->out[0]); + xclose(s->out[1]); +} + +static void get_redir_params(struct redir_spec *redir, + struct test_sockmap_redir *skel, int *prog_fd, + enum bpf_attach_type *attach_type, + int *redirect_flags) +{ + enum prog_type type = redir->prog_type; + struct bpf_program *prog; + bool sk_msg; + + sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; + prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict; + + *prog_fd = bpf_program__fd(prog); + *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; + + if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS) + *redirect_flags = BPF_F_INGRESS; + else + *redirect_flags = 0; +} + +static void try_recv(const char *prefix, int fd, int flags, bool expect_success) +{ + ssize_t n; + char buf; + + errno = 0; + n = recv(fd, &buf, 1, flags); + if (n < 0 && expect_success) + FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); + if (!n && !expect_success) + FAIL("%s: expected failure: retval=%zd", prefix, n); +} + +static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out, + int sd_recv, int map_verd, int status) +{ + unsigned int drop, pass; + char recv_buf; + ssize_t n; + +get_verdict: + if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || + xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) + return; + + if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { + sched_yield(); + goto get_verdict; + } + + if (pass != 0) { + FAIL("unsupported: wanted verdict pass 0, have %u", pass); + return; + } + + /* If nothing was dropped, packet should have reached the peer */ + if (drop == 0) { + errno = 0; + n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); + if (n != 1) + FAIL_ERRNO("unsupported: packet missing, retval=%zd", n); + } + + /* Ensure queues are empty */ + try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); + if (sd_in != sd_send) + try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); + + try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); + if (sd_recv != sd_out) + try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); +} + +static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, + int sd_in, int sd_out, int sd_recv, + struct maps *maps, int status) +{ + unsigned int drop, pass; + char *send_buf = "ab"; + char recv_buf = '\0'; + ssize_t n, len = 1; + + /* Zero out the verdict map */ + if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || + xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) + return; + + if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) + return; + + if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) + goto del_in; + + /* Last byte is OOB data when send_flags has MSG_OOB bit set */ + if (send_flags & MSG_OOB) + len++; + n = send(sd_send, send_buf, len, send_flags); + if (n >= 0 && n < len) + FAIL("incomplete send"); + if (n < 0) { + /* sk_msg redirect combo not supported? */ + if (status & SUPPORTED || errno != EACCES) + FAIL_ERRNO("send"); + goto out; + } + + if (!(status & SUPPORTED)) { + handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, + maps->verd, status); + goto out; + } + + errno = 0; + n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); + if (n != 1) { + FAIL_ERRNO("recv_timeout()"); + goto out; + } + + /* Check verdict _after_ recv(); af_vsock may need time to catch up */ + if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || + xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) + goto out; + + if (drop != 0 || pass != 1) + FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", + drop, pass); + + if (recv_buf != send_buf[0]) + FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]); + + if (send_flags & MSG_OOB) { + /* Fail reading OOB while in sockmap */ + try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, + MSG_OOB | MSG_DONTWAIT, false); + + /* Remove sd_out from sockmap */ + xbpf_map_delete_elem(maps->out, &u32(0)); + + /* Check that OOB was dropped on redirect */ + try_recv("recv(sd_out, MSG_OOB)", sd_out, + MSG_OOB | MSG_DONTWAIT, false); + + goto del_in; + } +out: + xbpf_map_delete_elem(maps->out, &u32(0)); +del_in: + xbpf_map_delete_elem(maps->in, &u32(0)); +} + +static int is_redir_supported(enum prog_type type, const char *in, + const char *out) +{ + /* Matching based on strings returned by socket_kind_to_str(): + * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq + * Plus a wildcard: any + * Not in use: u_seq, v_dgr + */ + struct combo_spec *c, combos[] = { + /* Send to local: TCP -> any, but vsock */ + { SK_MSG_INGRESS, "tcp", "tcp" }, + { SK_MSG_INGRESS, "tcp", "udp" }, + { SK_MSG_INGRESS, "tcp", "u_str" }, + { SK_MSG_INGRESS, "tcp", "u_dgr" }, + + /* Send to egress: TCP -> TCP */ + { SK_MSG_EGRESS, "tcp", "tcp" }, + + /* Ingress to egress: any -> any */ + { SK_SKB_EGRESS, "any", "any" }, + + /* Ingress to local: any -> any, but vsock */ + { SK_SKB_INGRESS, "any", "tcp" }, + { SK_SKB_INGRESS, "any", "udp" }, + { SK_SKB_INGRESS, "any", "u_str" }, + { SK_SKB_INGRESS, "any", "u_dgr" }, + }; + + for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { + if (c->prog_type == type && + (!strcmp(c->in, "any") || strstarts(in, c->in)) && + (!strcmp(c->out, "any") || strstarts(out, c->out))) + return SUPPORTED; + } + + return 0; +} + +static int get_support_status(enum prog_type type, const char *in, + const char *out) +{ + int status = is_redir_supported(type, in, out); + + if (type == SK_SKB_INGRESS && strstarts(out, "v_")) + status |= UNSUPPORTED_RACY_VERD; + + return status; +} + +static void test_socket(enum bpf_map_type type, struct redir_spec *redir, + struct maps *maps, struct socket_spec *s_in, + struct socket_spec *s_out) +{ + int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; + const char *in_str, *out_str; + char s[MAX_TEST_NAME]; + + fd_in = s_in->in[0]; + fd_out = s_out->out[0]; + fd_send = s_in->in[redir->idx_send]; + fd_peer = s_in->in[redir->idx_send ^ 1]; + fd_recv = s_out->out[redir->idx_recv]; + flags = s_in->send_flags; + + in_str = socket_kind_to_str(fd_in); + out_str = socket_kind_to_str(fd_out); + status = get_support_status(redir->prog_type, in_str, out_str); + + snprintf(s, sizeof(s), + "%-4s %-17s %-5s %s %-5s%6s", + /* hash sk_skb-to-ingress u_str → v_str (OOB) */ + type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", + redir->name, + in_str, + status & SUPPORTED ? "→" : " ", + out_str, + (flags & MSG_OOB) ? "(OOB)" : ""); + + if (!test__start_subtest(s)) + return; + + test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, + maps, status); +} + +static void test_redir(enum bpf_map_type type, struct redir_spec *redir, + struct maps *maps) +{ + struct socket_spec *s, sockets[] = { + { AF_INET, SOCK_STREAM }, + // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ + { AF_INET6, SOCK_STREAM }, + { AF_INET, SOCK_DGRAM }, + { AF_INET6, SOCK_DGRAM }, + { AF_UNIX, SOCK_STREAM }, + { AF_UNIX, SOCK_STREAM, MSG_OOB }, + { AF_UNIX, SOCK_DGRAM }, + // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */ + { AF_VSOCK, SOCK_STREAM }, + // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ + { AF_VSOCK, SOCK_SEQPACKET }, + }; + + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) + if (socket_spec_pairs(s)) + goto out; + + /* Intra-proto */ + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) + test_socket(type, redir, maps, s, s); + + /* Cross-proto */ + for (int i = 0; i < ARRAY_SIZE(sockets); i++) { + for (int j = 0; j < ARRAY_SIZE(sockets); j++) { + struct socket_spec *out = &sockets[j]; + struct socket_spec *in = &sockets[i]; + + /* Skip intra-proto and between variants */ + if (out->send_flags || + (in->family == out->family && + in->sotype == out->sotype)) + continue; + + test_socket(type, redir, maps, in, out); + } + } +out: + while (--s >= sockets) + socket_spec_close(s); +} + +static void test_map(enum bpf_map_type type) +{ + struct redir_spec *r, redirs[] = { + { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, + { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, + { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, + { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, + }; + + for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { + enum bpf_attach_type attach_type; + struct test_sockmap_redir *skel; + struct maps maps; + int prog_fd; + + skel = test_sockmap_redir__open_and_load(); + if (!skel) { + FAIL("open_and_load"); + return; + } + + switch (type) { + case BPF_MAP_TYPE_SOCKMAP: + maps.in = bpf_map__fd(skel->maps.nop_map); + maps.out = bpf_map__fd(skel->maps.sock_map); + break; + case BPF_MAP_TYPE_SOCKHASH: + maps.in = bpf_map__fd(skel->maps.nop_hash); + maps.out = bpf_map__fd(skel->maps.sock_hash); + break; + default: + FAIL("Unsupported bpf_map_type"); + return; + } + + skel->bss->redirect_type = type; + maps.verd = bpf_map__fd(skel->maps.verdict_map); + get_redir_params(r, skel, &prog_fd, &attach_type, + &skel->bss->redirect_flags); + + if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) + return; + + test_redir(type, r, &maps); + + if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) + return; + + test_sockmap_redir__destroy(skel); + } +} + +void serial_test_sockmap_redir(void) +{ + test_map(BPF_MAP_TYPE_SOCKMAP); + test_map(BPF_MAP_TYPE_SOCKHASH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index c85798966aec..76d72a59365e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -56,6 +56,8 @@ #define MAC_DST_FWD "00:11:22:33:44:55" #define MAC_DST "00:22:33:44:55:66" +#define MAC_SRC_FWD "00:33:44:55:66:77" +#define MAC_SRC "00:44:55:66:77:88" #define IFADDR_STR_LEN 18 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" @@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) int err; if (result->dev_mode == MODE_VETH) { - SYS(fail, "ip link add src type veth peer name src_fwd"); - SYS(fail, "ip link add dst type veth peer name dst_fwd"); - - SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set dst address " MAC_DST); + SYS(fail, "ip link add src address " MAC_SRC " type veth " + "peer name src_fwd address " MAC_SRC_FWD); + SYS(fail, "ip link add dst address " MAC_DST " type veth " + "peer name dst_fwd address " MAC_DST_FWD); } else if (result->dev_mode == MODE_NETKIT) { err = create_netkit(NETKIT_L3, "src", "src_fwd"); if (!ASSERT_OK(err, "create_ifindex_src")) diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c new file mode 100644 index 000000000000..7d1b478c99a0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms Inc. */ +#include <test_progs.h> +#include "test_btf_ext.skel.h" +#include "btf_helpers.h" + +static void subtest_line_func_info(void) +{ + struct test_btf_ext *skel; + struct bpf_prog_info info; + struct bpf_line_info line_info[128], *libbpf_line_info; + struct bpf_func_info func_info[128], *libbpf_func_info; + __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt; + int err, fd; + + skel = test_btf_ext__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + fd = bpf_program__fd(skel->progs.global_func); + + memset(&info, 0, sizeof(info)); + info.line_info = ptr_to_u64(&line_info); + info.nr_line_info = sizeof(line_info); + info.line_info_rec_size = sizeof(*line_info); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_line_info")) + goto out; + + libbpf_line_info = bpf_program__line_info(skel->progs.global_func); + libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func); + + memset(&info, 0, sizeof(info)); + info.func_info = ptr_to_u64(&func_info); + info.nr_func_info = sizeof(func_info); + info.func_info_rec_size = sizeof(*func_info); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_func_info")) + goto out; + + libbpf_func_info = bpf_program__func_info(skel->progs.global_func); + libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func); + + if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info")) + goto out; + if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt")) + goto out; + if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info")) + goto out; + if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt")) + goto out; + ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info), + "line_info"); + ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info), + "func_info"); +out: + test_btf_ext__destroy(skel); +} + +void test_btf_ext(void) +{ + if (test__start_subtest("line_func_info")) + subtest_line_func_info(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index a95b42bf744a..47b56c258f3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -63,6 +63,9 @@ static void test_set_global_vars_succeeds(void) " -G \"var_eb = EB2\" "\ " -G \"var_ec = EC2\" "\ " -G \"var_b = 1\" "\ + " -G \"struct1.struct2.u.var_u8 = 170\" "\ + " -G \"union1.struct3.var_u8_l = 0xaa\" "\ + " -G \"union1.struct3.var_u8_h = 0xaa\" "\ "-vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); @@ -78,6 +81,8 @@ static void test_set_global_vars_succeeds(void) __CHECK_STR("_w=12 ", "var_eb = EB2"); __CHECK_STR("_w=13 ", "var_ec = EC2"); __CHECK_STR("_w=1 ", "var_b = 1"); + __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170"); + __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); out: teardown_fixture(fix); diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e66a57970d28..c9da06741104 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -14,6 +14,7 @@ #include "verifier_bounds_deduction_non_const.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_bpf_get_stack.skel.h" +#include "verifier_bpf_trap.skel.h" #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" #include "verifier_btf_unreliable_prog.skel.h" @@ -148,6 +149,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } +void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 3d47878ef6bf..19f92affc2da 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -351,9 +351,10 @@ void test_xdp_metadata(void) struct xdp_metadata2 *bpf_obj2 = NULL; struct xdp_metadata *bpf_obj = NULL; struct bpf_program *new_prog, *prog; + struct bpf_devmap_val devmap_e = {}; + struct bpf_map *prog_arr, *devmap; struct nstoken *tok = NULL; __u32 queue_id = QUEUE_ID; - struct bpf_map *prog_arr; struct xsk tx_xsk = {}; struct xsk rx_xsk = {}; __u32 val, key = 0; @@ -409,6 +410,13 @@ void test_xdp_metadata(void) bpf_program__set_ifindex(prog, rx_ifindex); bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); + /* Make sure we can load a dev-bound program that performs + * XDP_REDIRECT into a devmap. + */ + new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect"); + bpf_program__set_ifindex(new_prog, rx_ifindex); + bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) goto out; @@ -423,6 +431,18 @@ void test_xdp_metadata(void) "update prog_arr")) goto out; + /* Make sure we can't add dev-bound programs to devmaps. */ + devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map"); + if (!ASSERT_OK_PTR(devmap, "no dev_map found")) + goto out; + + devmap_e.bpf_prog.fd = val; + if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key), + &devmap_e, sizeof(devmap_e), + BPF_ANY), + "update dev_map")) + goto out; + /* Attach BPF program to RX interface. */ ret = bpf_xdp_attach(rx_ifindex, diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c new file mode 100644 index 000000000000..079bf3794b3a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +long process_byte = 0; +int verdict_dir = 0; +int dropped = 0; +int pkt_size = 0; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_tx SEC(".maps"); + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return pkt_size; +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + int one = 1; + int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir); + + if (ret == SK_DROP) + dropped++; + __sync_fetch_and_add(&process_byte, skb->len); + return ret; +} + +SEC("sk_skb/stream_verdict") +int prog_skb_pass(struct __sk_buff *skb) +{ + __sync_fetch_and_add(&process_byte, skb->len); + return SK_PASS; +} + +SEC("sk_msg") +int prog_skmsg_verdict(struct sk_msg_md *msg) +{ + int one = 1; + + __sync_fetch_and_add(&process_byte, msg->size); + return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir); +} + +SEC("sk_msg") +int prog_skmsg_pass(struct sk_msg_md *msg) +{ + __sync_fetch_and_add(&process_byte, msg->size); + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h index fb8dc0768999..d67466c1ff77 100644 --- a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h +++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h @@ -32,6 +32,7 @@ extern unsigned long CONFIG_NR_CPUS __kconfig; struct __qspinlock { union { atomic_t val; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct { u8 locked; u8 pending; @@ -40,6 +41,17 @@ struct __qspinlock { u16 locked_pending; u16 tail; }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif }; }; @@ -95,9 +107,6 @@ struct arena_qnode { #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) #define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES]; static inline u32 encode_tail(int cpu, int idx) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 863df7c0fdd0..6e208e24ba3b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -225,8 +225,9 @@ #define CAN_USE_BPF_ST #endif -#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c new file mode 100644 index 000000000000..13cdb11fdeb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC */ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +/* From uapi/linux/dma-buf.h */ +#define DMA_BUF_NAME_LEN 32 + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, DMA_BUF_NAME_LEN); + __type(value, bool); + __uint(max_entries, 5); +} testbuf_hash SEC(".maps"); + +/* + * Fields output by this iterator are delimited by newlines. Convert any + * newlines in user-provided printed strings to spaces. + */ +static void sanitize_string(char *src, size_t size) +{ + for (char *c = src; (size_t)(c - src) < size && *c; ++c) + if (*c == '\n') + *c = ' '; +} + +SEC("iter/dmabuf") +int dmabuf_collector(struct bpf_iter__dmabuf *ctx) +{ + const struct dma_buf *dmabuf = ctx->dmabuf; + struct seq_file *seq = ctx->meta->seq; + unsigned long inode = 0; + size_t size; + const char *pname, *exporter; + char name[DMA_BUF_NAME_LEN] = {'\0'}; + + if (!dmabuf) + return 0; + + if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) || + bpf_core_read(&size, sizeof(size), &dmabuf->size) || + bpf_core_read(&pname, sizeof(pname), &dmabuf->name) || + bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name)) + return 1; + + /* Buffers are not required to be named */ + if (pname) { + if (bpf_probe_read_kernel(name, sizeof(name), pname)) + return 1; + + /* Name strings can be provided by userspace */ + sanitize_string(name, sizeof(name)); + } + + BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter); + return 0; +} + +SEC("syscall") +int iter_dmabuf_for_each(const void *ctx) +{ + struct dma_buf *d; + + bpf_for_each(dmabuf, d) { + char name[DMA_BUF_NAME_LEN]; + const char *pname; + bool *found; + long len; + int i; + + if (bpf_core_read(&pname, sizeof(pname), &d->name)) + return 1; + + /* Buffers are not required to be named */ + if (!pname) + continue; + + len = bpf_probe_read_kernel_str(name, sizeof(name), pname); + if (len < 0) + return 1; + + /* + * The entire name buffer is used as a map key. + * Zeroize any uninitialized trailing bytes after the NUL. + */ + bpf_for(i, len, DMA_BUF_NAME_LEN) + name[i] = 0; + + found = bpf_map_lookup_elem(&testbuf_hash, name); + if (found) { + bool t = true; + + bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST); + } + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index e1fba28e4a86..a0391f9da2d4 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -680,3 +680,233 @@ out: bpf_ringbuf_discard_dynptr(&ptr_buf, 0); return XDP_DROP; } + +void *user_ptr; +/* Contains the copy of the data pointed by user_ptr. + * Size 384 to make it not fit into a single kernel chunk when copying + * but less than the maximum bpf stack size (512). + */ +char expected_str[384]; +__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; + +typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr); + +/* Returns the offset just before the end of the maximum sized xdp fragment. + * Any write larger than 32 bytes will be split between 2 fragments. + */ +__u32 xdp_near_frag_end_offset(void) +{ + const __u32 headroom = 256; + const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info); + + /* 32 bytes before the approximate end of the fragment */ + return max_frag_size - 32; +} + +/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks + * of type bpf_read_dynptr_fn_t to prevent compiler from generating + * indirect calls that make program fail to load with "unknown opcode" error. + */ +static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + char buf[sizeof(expected_str)]; + struct bpf_dynptr ptr_buf; + int i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf); + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr); + if (len > sizeof(buf)) + break; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0); + + if (err || bpf_memcmp(expected_str, buf, len)) + err = 1; + + /* Reset buffer and dynptr */ + __builtin_memset(buf, 0, sizeof(buf)); + err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0); + } + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); +} + +static __always_inline void test_dynptr_probe_str(void *ptr, + bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + char buf[sizeof(expected_str)]; + struct bpf_dynptr ptr_buf; + __u32 cnt, i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf); + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr); + if (cnt != len) + err = 1; + + if (len > sizeof(buf)) + continue; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0); + if (!len) + continue; + if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0') + err = 1; + } + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); +} + +static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr, + bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + struct bpf_dynptr ptr_xdp; + char buf[sizeof(expected_str)]; + __u32 off, i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + off = xdp_near_frag_end_offset(); + err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr); + if (len > sizeof(buf)) + continue; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0); + if (err || bpf_memcmp(expected_str, buf, len)) + err = 1; + /* Reset buffer and dynptr */ + __builtin_memset(buf, 0, sizeof(buf)); + err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0); + } +} + +static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr, + bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + struct bpf_dynptr ptr_xdp; + char buf[sizeof(expected_str)]; + __u32 cnt, off, i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + off = xdp_near_frag_end_offset(); + err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + if (err) + return; + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr); + if (cnt != len) + err = 1; + + if (len > sizeof(buf)) + continue; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0); + + if (!len) + continue; + if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0') + err = 1; + + __builtin_memset(buf, 0, sizeof(buf)); + err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0); + } +} + +SEC("xdp") +int test_probe_read_user_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr); + if (!err) + test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr); + return XDP_PASS; +} + +SEC("xdp") +int test_probe_read_kernel_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr); + if (!err) + test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr); + return XDP_PASS; +} + +SEC("xdp") +int test_probe_read_user_str_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr); + if (!err) + test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr); + return XDP_PASS; +} + +SEC("xdp") +int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr); + if (!err) + test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr); + return XDP_PASS; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_dynptr(void *ctx) +{ + test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr); + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_str_dynptr(void *ctx) +{ + test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr); + return 0; +} + +static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); +} + +static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task); +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_task_dynptr(void *ctx) +{ + test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task); + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_task_str_dynptr(void *ctx) +{ + test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c new file mode 100644 index 000000000000..a4a9e1db626f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 64); + __type(key, int); + __type(value, int); + __array(values, struct inner_map_type); +} outer_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 427b72954b87..76adf4a8f2da 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,8 +7,6 @@ #include "bpf_misc.h" #include "bpf_compiler.h" -#define unlikely(x) __builtin_expect(!!(x), 0) - static volatile int zero = 0; int my_pid; diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c new file mode 100644 index 000000000000..264e81bfb287 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + struct bpf_list_node l; + int key; +}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_list_head ghead __contains(node_data, l); + +#define list_entry(ptr, type, member) container_of(ptr, type, member) +#define NR_NODES 16 + +int zero = 0; + +SEC("syscall") +__retval(0) +long list_peek(void *ctx) +{ + struct bpf_list_node *l_n; + struct node_data *n; + int i, err = 0; + + bpf_spin_lock(&glock); + l_n = bpf_list_front(&ghead); + bpf_spin_unlock(&glock); + if (l_n) + return __LINE__; + + bpf_spin_lock(&glock); + l_n = bpf_list_back(&ghead); + bpf_spin_unlock(&glock); + if (l_n) + return __LINE__; + + for (i = zero; i < NR_NODES && can_loop; i++) { + n = bpf_obj_new(typeof(*n)); + if (!n) + return __LINE__; + n->key = i; + bpf_spin_lock(&glock); + bpf_list_push_back(&ghead, &n->l); + bpf_spin_unlock(&glock); + } + + bpf_spin_lock(&glock); + + l_n = bpf_list_front(&ghead); + if (!l_n) { + err = __LINE__; + goto done; + } + + n = list_entry(l_n, struct node_data, l); + if (n->key != 0) { + err = __LINE__; + goto done; + } + + l_n = bpf_list_back(&ghead); + if (!l_n) { + err = __LINE__; + goto done; + } + + n = list_entry(l_n, struct node_data, l); + if (n->key != NR_NODES - 1) { + err = __LINE__; + goto done; + } + +done: + bpf_spin_unlock(&glock); + return err; +} + +#define TEST_FB(op, dolock) \ +SEC("syscall") \ +__failure __msg(MSG) \ +long test_##op##_spinlock_##dolock(void *ctx) \ +{ \ + struct bpf_list_node *l_n; \ + __u64 jiffies = 0; \ + \ + if (dolock) \ + bpf_spin_lock(&glock); \ + l_n = bpf_list_##op(&ghead); \ + if (l_n) \ + jiffies = bpf_jiffies64(); \ + if (dolock) \ + bpf_spin_unlock(&glock); \ + \ + return !!jiffies; \ +} + +#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref" +TEST_FB(front, true) +TEST_FB(back, true) +#undef MSG + +#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head" +TEST_FB(front, false) +TEST_FB(back, false) +#undef MSG + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c index 1f1dd547e4ee..cfc1f48e0d28 100644 --- a/tools/testing/selftests/bpf/progs/prepare.c +++ b/tools/testing/selftests/bpf/progs/prepare.c @@ -2,7 +2,6 @@ /* Copyright (c) 2025 Meta */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> -//#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c index 5927054b6dd9..efa416f53968 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; int tid; int i; -SEC("tp_btf/bpf_testmod_test_raw_tp_null") +SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp") int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) { struct task_struct *task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c index 38d669957bf1..0d58114a4955 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -8,7 +8,7 @@ char _license[] SEC("license") = "GPL"; /* Ensure module parameter has PTR_MAYBE_NULL */ -SEC("tp_btf/bpf_testmod_test_raw_tp_null") +SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp") __failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index dbd5eee8e25e..4acb6af2dfe3 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx) } SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") +__retval(0) long rbtree_api_remove_unadded_node(void *ctx) { struct node_data *n, *m; - struct bpf_rb_node *res; + struct bpf_rb_node *res_n, *res_m; n = bpf_obj_new(typeof(*n)); if (!n) @@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx) bpf_spin_lock(&glock); bpf_rbtree_add(&groot, &n->node, less); - /* This remove should pass verifier */ - res = bpf_rbtree_remove(&groot, &n->node); - n = container_of(res, struct node_data, node); + res_n = bpf_rbtree_remove(&groot, &n->node); - /* This remove shouldn't, m isn't in an rbtree */ - res = bpf_rbtree_remove(&groot, &m->node); - m = container_of(res, struct node_data, node); + res_m = bpf_rbtree_remove(&groot, &m->node); bpf_spin_unlock(&glock); - if (n) - bpf_obj_drop(n); - if (m) - bpf_obj_drop(m); + bpf_obj_drop(m); + if (res_n) + bpf_obj_drop(container_of(res_n, struct node_data, node)); + if (res_m) { + bpf_obj_drop(container_of(res_m, struct node_data, node)); + /* m was not added to the rbtree */ + return 2; + } + return 0; } @@ -178,7 +179,7 @@ err_out: } SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") +__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer") long rbtree_api_add_release_unlock_escape(void *ctx) { struct node_data *n; @@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx) } SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") +__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer") long rbtree_api_first_release_unlock_escape(void *ctx) { struct bpf_rb_node *res; diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c new file mode 100644 index 000000000000..098ef970fac1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rbtree_search.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + struct bpf_refcount ref; + struct bpf_rb_node r0; + struct bpf_rb_node r1; + int key0; + int key1; +}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock0; +private(A) struct bpf_rb_root groot0 __contains(node_data, r0); + +private(B) struct bpf_spin_lock glock1; +private(B) struct bpf_rb_root groot1 __contains(node_data, r1); + +#define rb_entry(ptr, type, member) container_of(ptr, type, member) +#define NR_NODES 16 + +int zero = 0; + +static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = rb_entry(a, struct node_data, r0); + node_b = rb_entry(b, struct node_data, r0); + + return node_a->key0 < node_b->key0; +} + +static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = rb_entry(a, struct node_data, r1); + node_b = rb_entry(b, struct node_data, r1); + + return node_a->key1 < node_b->key1; +} + +SEC("syscall") +__retval(0) +long rbtree_search(void *ctx) +{ + struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES]; + long lookup_key = NR_NODES / 2; + struct node_data *n, *m; + int i, nr_gc = 0; + + for (i = zero; i < NR_NODES && can_loop; i++) { + n = bpf_obj_new(typeof(*n)); + if (!n) + return __LINE__; + + m = bpf_refcount_acquire(n); + + n->key0 = i; + m->key1 = i; + + bpf_spin_lock(&glock0); + bpf_rbtree_add(&groot0, &n->r0, less0); + bpf_spin_unlock(&glock0); + + bpf_spin_lock(&glock1); + bpf_rbtree_add(&groot1, &m->r1, less1); + bpf_spin_unlock(&glock1); + } + + n = NULL; + bpf_spin_lock(&glock0); + rb_n = bpf_rbtree_root(&groot0); + while (can_loop) { + if (!rb_n) { + bpf_spin_unlock(&glock0); + return __LINE__; + } + + n = rb_entry(rb_n, struct node_data, r0); + if (lookup_key == n->key0) + break; + if (nr_gc < NR_NODES) + gc_ns[nr_gc++] = rb_n; + if (lookup_key < n->key0) + rb_n = bpf_rbtree_left(&groot0, rb_n); + else + rb_n = bpf_rbtree_right(&groot0, rb_n); + } + + if (!n || lookup_key != n->key0) { + bpf_spin_unlock(&glock0); + return __LINE__; + } + + for (i = 0; i < nr_gc; i++) { + rb_n = gc_ns[i]; + gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n); + } + + m = bpf_refcount_acquire(n); + bpf_spin_unlock(&glock0); + + for (i = 0; i < nr_gc; i++) { + rb_n = gc_ns[i]; + if (rb_n) { + n = rb_entry(rb_n, struct node_data, r0); + bpf_obj_drop(n); + } + } + + if (!m) + return __LINE__; + + bpf_spin_lock(&glock1); + rb_m = bpf_rbtree_remove(&groot1, &m->r1); + bpf_spin_unlock(&glock1); + bpf_obj_drop(m); + if (!rb_m) + return __LINE__; + bpf_obj_drop(rb_entry(rb_m, struct node_data, r1)); + + return 0; +} + +#define TEST_ROOT(dolock) \ +SEC("syscall") \ +__failure __msg(MSG) \ +long test_root_spinlock_##dolock(void *ctx) \ +{ \ + struct bpf_rb_node *rb_n; \ + __u64 jiffies = 0; \ + \ + if (dolock) \ + bpf_spin_lock(&glock0); \ + rb_n = bpf_rbtree_root(&groot0); \ + if (rb_n) \ + jiffies = bpf_jiffies64(); \ + if (dolock) \ + bpf_spin_unlock(&glock0); \ + \ + return !!jiffies; \ +} + +#define TEST_LR(op, dolock) \ +SEC("syscall") \ +__failure __msg(MSG) \ +long test_##op##_spinlock_##dolock(void *ctx) \ +{ \ + struct bpf_rb_node *rb_n; \ + struct node_data *n; \ + __u64 jiffies = 0; \ + \ + bpf_spin_lock(&glock0); \ + rb_n = bpf_rbtree_root(&groot0); \ + if (!rb_n) { \ + bpf_spin_unlock(&glock0); \ + return 1; \ + } \ + n = rb_entry(rb_n, struct node_data, r0); \ + n = bpf_refcount_acquire(n); \ + bpf_spin_unlock(&glock0); \ + if (!n) \ + return 1; \ + \ + if (dolock) \ + bpf_spin_lock(&glock0); \ + rb_n = bpf_rbtree_##op(&groot0, &n->r0); \ + if (rb_n) \ + jiffies = bpf_jiffies64(); \ + if (dolock) \ + bpf_spin_unlock(&glock0); \ + \ + return !!jiffies; \ +} + +/* + * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG) + * to ensure the message itself is not in the bpf prog lineinfo + * which the verifier includes in its log. + * Otherwise, the test_loader will incorrectly match the prog lineinfo + * instead of the log generated by the verifier. + */ +#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref" +TEST_ROOT(true) +#undef MSG +#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref" +TEST_LR(left, true) +TEST_LR(right, true) +#undef MSG + +#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root" +TEST_ROOT(false) +TEST_LR(left, false) +TEST_LR(right, false) +#undef MSG + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c index 9adb5ba4cd4d..90f5656c3991 100644 --- a/tools/testing/selftests/bpf/progs/set_global_vars.c +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -24,6 +24,44 @@ const volatile enum Enumu64 var_eb = EB1; const volatile enum Enums64 var_ec = EC1; const volatile bool var_b = false; +struct Struct { + int:16; + __u16 filler; + struct { + const __u16 filler2; + }; + struct Struct2 { + __u16 filler; + volatile struct { + const int:1; + union { + const volatile __u8 var_u8; + const volatile __s16 filler3; + const int:1; + } u; + }; + } struct2; +}; + +const volatile __u32 stru = 0; /* same prefix as below */ +const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}}; + +union Union { + __u16 var_u16; + struct Struct3 { + struct { + __u8 var_u8_l; + }; + struct { + struct { + __u8 var_u8_h; + }; + }; + } struct3; +}; + +const volatile union Union union1 = {.var_u16 = -1}; + char arr[4] = {0}; SEC("socket") @@ -43,5 +81,8 @@ int test_set_globals(void *ctx) a = var_eb; a = var_ec; a = var_b; + a = struct1.struct2.u.var_u8; + a = union1.var_u16; + return a; } diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c new file mode 100644 index 000000000000..cdf20331db04 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms Inc. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__noinline static void f0(void) +{ + __u64 a = 1; + + __sink(a); +} + +SEC("xdp") +__u64 global_func(struct xdp_md *xdp) +{ + f0(); + return XDP_DROP; +} diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 7f3c233943b3..03d7f89787a1 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -19,7 +19,7 @@ int BPF_PROG(handle_raw_tp, __u32 raw_tp_bare_write_sz = 0; -SEC("raw_tp/bpf_testmod_test_write_bare") +SEC("raw_tp/bpf_testmod_test_write_bare_tp") int BPF_PROG(handle_raw_tp_bare, struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx) { @@ -31,7 +31,7 @@ int raw_tp_writable_bare_in_val = 0; int raw_tp_writable_bare_early_ret = 0; int raw_tp_writable_bare_out_val = 0; -SEC("raw_tp.w/bpf_testmod_test_writable_bare") +SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp") int BPF_PROG(handle_raw_tp_writable_bare, struct bpf_testmod_test_writable_ctx *writable) { diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c new file mode 100644 index 000000000000..8bdb9987c0c7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +int cork_byte; +int push_start; +int push_end; +int apply_bytes; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map SEC(".maps"); + +SEC("sk_msg") +int prog_sk_policy(struct sk_msg_md *msg) +{ + if (cork_byte > 0) + bpf_msg_cork_bytes(msg, cork_byte); + if (push_start > 0 && push_end > 0) + bpf_msg_push_data(msg, push_start, push_end, 0); + + return SK_PASS; +} + +SEC("sk_msg") +int prog_sk_policy_redir(struct sk_msg_md *msg) +{ + int two = 2; + + bpf_msg_apply_bytes(msg, apply_bytes); + return bpf_msg_redirect_map(msg, &sock_map, two, 0); +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c new file mode 100644 index 000000000000..34d9f4f2f0a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} nop_map, sock_map; + +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} nop_hash, sock_hash; + +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, unsigned int); +} verdict_map; + +/* Set by user space */ +int redirect_type; +int redirect_flags; + +#define redirect_map(__data) \ + _Generic((__data), \ + struct __sk_buff * : bpf_sk_redirect_map, \ + struct sk_msg_md * : bpf_msg_redirect_map \ + )((__data), &sock_map, (__u32){0}, redirect_flags) + +#define redirect_hash(__data) \ + _Generic((__data), \ + struct __sk_buff * : bpf_sk_redirect_hash, \ + struct sk_msg_md * : bpf_msg_redirect_hash \ + )((__data), &sock_hash, &(__u32){0}, redirect_flags) + +#define DEFINE_PROG(__type, __param) \ +SEC("sk_" XSTR(__type)) \ +int prog_ ## __type ## _verdict(__param data) \ +{ \ + unsigned int *count; \ + int verdict; \ + \ + if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \ + verdict = redirect_map(data); \ + else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \ + verdict = redirect_hash(data); \ + else \ + verdict = redirect_type - __MAX_BPF_MAP_TYPE; \ + \ + count = bpf_map_lookup_elem(&verdict_map, &verdict); \ + if (count) \ + (*count)++; \ + \ + return verdict; \ +} + +DEFINE_PROG(skb, struct __sk_buff *); +DEFINE_PROG(msg, struct sk_msg_md *); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index eb5cca1fce16..7d5293de1952 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx) (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6)) goto err; - if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7) + if (!ctx->attrs.wscale_ok || + !ctx->attrs.snd_wscale || + ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK) goto err; if (!ctx->attrs.tstamp_ok) diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 39ff06f2c834..cf0547a613ff 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -6,14 +6,14 @@ #include "../test_kmods/bpf_testmod.h" #include "bpf_misc.h" -SEC("tp_btf/bpf_testmod_test_nullable_bare") +SEC("tp_btf/bpf_testmod_test_nullable_bare_tp") __failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; } -SEC("tp_btf/bpf_testmod_test_nullable_bare") +SEC("tp_btf/bpf_testmod_test_nullable_bare_tp") int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx) { if (nullable_ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c new file mode 100644 index 000000000000..35e2cdc00a01 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if __clang_major__ >= 21 && 0 +SEC("socket") +__description("__builtin_trap with simple c code") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +void bpf_builtin_trap_with_simple_c(void) +{ + __builtin_trap(); +} +#endif + +SEC("socket") +__description("__bpf_trap with simple c code") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +void bpf_trap_with_simple_c(void) +{ + __bpf_trap(); +} + +SEC("socket") +__description("__bpf_trap as the second-from-last insn") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +__naked void bpf_trap_at_func_end(void) +{ + asm volatile ( + "r0 = 0;" + "call %[__bpf_trap];" + "exit;" + : + : __imm(__bpf_trap) + : __clobber_all); +} + +SEC("socket") +__description("dead code __bpf_trap in the middle of code") +__success +__naked void dead_bpf_trap_in_middle(void) +{ + asm volatile ( + "r0 = 0;" + "if r0 == 0 goto +1;" + "call %[__bpf_trap];" + "r0 = 2;" + "exit;" + : + : __imm(__bpf_trap) + : __clobber_all); +} + +SEC("socket") +__description("reachable __bpf_trap in the middle of code") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +__naked void live_bpf_trap_in_middle(void) +{ + asm volatile ( + "r0 = 0;" + "if r0 == 1 goto +1;" + "call %[__bpf_trap];" + "r0 = 2;" + "exit;" + : + : __imm(__bpf_trap) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index 28b939572cda..03942cec07e5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -65,4 +65,16 @@ __naked void ctx_access_u32_pointer_reject_8(void) " ::: __clobber_all); } +SEC("fentry/bpf_fentry_test10") +__description("btf_ctx_access const void pointer accept") +__success __retval(0) +__naked void ctx_access_const_void_pointer_accept(void) +{ + asm volatile (" \ + r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c index 77698d5a19e4..74f4f19c10b8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c +++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c @@ -10,65 +10,81 @@ SEC("socket") __description("load-acquire, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void load_acquire_8(void) { asm volatile ( - "w1 = 0x12;" + "r0 = 0;" + "w1 = 0xfe;" "*(u8 *)(r10 - 1) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1)) : __clobber_all); } SEC("socket") __description("load-acquire, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void load_acquire_16(void) { asm volatile ( - "w1 = 0x1234;" + "r0 = 0;" + "w1 = 0xfedc;" "*(u16 *)(r10 - 2) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2)) : __clobber_all); } SEC("socket") __description("load-acquire, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void load_acquire_32(void) { asm volatile ( - "w1 = 0x12345678;" + "r0 = 0;" + "w1 = 0xfedcba09;" "*(u32 *)(r10 - 4) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4)) : __clobber_all); } SEC("socket") __description("load-acquire, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void load_acquire_64(void) { asm volatile ( - "r1 = 0x1234567890abcdef ll;" + "r0 = 0;" + "r1 = 0xfedcba0987654321 ll;" "*(u64 *)(r10 - 8) = r1;" - ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); + ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 6662d4b39969..9fe5d255ee37 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -91,8 +91,7 @@ __naked int bpf_end_bswap(void) ::: __clobber_all); } -#if defined(ENABLE_ATOMICS_TESTS) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#ifdef CAN_USE_LOAD_ACQ_STORE_REL SEC("?raw_tp") __success __log_level(2) @@ -138,7 +137,7 @@ __naked int bpf_store_release(void) : __clobber_all); } -#endif /* load-acquire, store-release */ +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ #endif /* v4 instruction */ SEC("?raw_tp") @@ -179,4 +178,57 @@ __naked int state_loop_first_last_equal(void) ); } +__used __naked static void __bpf_cond_op_r10(void) +{ + asm volatile ( + "r2 = 2314885393468386424 ll;" + "goto +0;" + "if r2 <= r10 goto +3;" + "if r1 >= -1835016 goto +0;" + "if r2 <= 8 goto +0;" + "if r3 <= 0 goto +0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("8: (bd) if r2 <= r10 goto pc+3") +__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0") +__msg("10: (b5) if r2 <= 0x8 goto pc+0") +__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0") +__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3") +__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0") +__naked void bpf_cond_op_r10(void) +{ + asm volatile ( + "r3 = 0 ll;" + "call __bpf_cond_op_r10;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("3: (bf) r3 = r10") +__msg("4: (bd) if r3 <= r2 goto pc+1") +__msg("5: (b5) if r2 <= 0x8 goto pc+2") +__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10") +__naked void bpf_cond_op_not_r10(void) +{ + asm volatile ( + "r0 = 0;" + "r2 = 2314885393468386424 ll;" + "r3 = r10;" + "if r3 <= r2 goto +1;" + "if r2 <= 8 goto +2;" + "r0 = 2 ll;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c index c0442d5bb049..72f1eb006074 100644 --- a/tools/testing/selftests/bpf/progs/verifier_store_release.c +++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c @@ -6,18 +6,21 @@ #include "../../../include/linux/filter.h" #include "bpf_misc.h" -#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#ifdef CAN_USE_LOAD_ACQ_STORE_REL SEC("socket") __description("store-release, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void store_release_8(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12;" ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); - "w0 = *(u8 *)(r10 - 1);" + "w2 = *(u8 *)(r10 - 1);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -27,13 +30,17 @@ __naked void store_release_8(void) SEC("socket") __description("store-release, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void store_release_16(void) { asm volatile ( + "r0 = 0;" "w1 = 0x1234;" ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); - "w0 = *(u16 *)(r10 - 2);" + "w2 = *(u16 *)(r10 - 2);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -43,13 +50,17 @@ __naked void store_release_16(void) SEC("socket") __description("store-release, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void store_release_32(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12345678;" ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); - "w0 = *(u32 *)(r10 - 4);" + "w2 = *(u32 *)(r10 - 4);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -59,13 +70,17 @@ __naked void store_release_32(void) SEC("socket") __description("store-release, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void store_release_64(void) { asm volatile ( + "r0 = 0;" "r1 = 0x1234567890abcdef ll;" ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); - "r0 = *(u64 *)(r10 - 8);" + "r2 = *(u64 *)(r10 - 8);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -271,7 +286,7 @@ __naked void store_release_with_invalid_reg(void) : __clobber_all); } -#else +#else /* CAN_USE_LOAD_ACQ_STORE_REL */ SEC("socket") __description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test") @@ -281,6 +296,6 @@ int dummy_test(void) return 0; } -#endif +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index 31ca229bb3c0..09bb8a038d52 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -19,6 +19,13 @@ struct { __type(value, __u32); } prog_arr SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} dev_map SEC(".maps"); + extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, @@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx) return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } +SEC("xdp") +int redirect(struct xdp_md *ctx) +{ + return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 3220f1d28697..e6c248e3ae54 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -134,6 +134,10 @@ bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { return bpf_testmod_test_struct_arg_result; } +__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void) +{ +} + __bpf_kfunc void bpf_testmod_test_mod_kfunc(int i) { @@ -413,7 +417,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); - (void)trace_bpf_testmod_test_raw_tp_null(NULL); + (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL); bpf_testmod_test_struct_ops3(); @@ -431,14 +435,14 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_loop_test(101) > 100) trace_bpf_testmod_test_read(current, &ctx); - trace_bpf_testmod_test_nullable_bare(NULL); + trace_bpf_testmod_test_nullable_bare_tp(NULL); /* Magic number to enable writable tp */ if (len == 64) { struct bpf_testmod_test_writable_ctx writable = { .val = 1024, }; - trace_bpf_testmod_test_writable_bare(&writable); + trace_bpf_testmod_test_writable_bare_tp(&writable); if (writable.early_ret) return snprintf(buf, len, "%d\n", writable.val); } @@ -470,7 +474,7 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj, .len = len, }; - trace_bpf_testmod_test_write_bare(current, &ctx); + trace_bpf_testmod_test_write_bare_tp(current, &ctx); return -EIO; /* always fail */ } @@ -1340,7 +1344,7 @@ static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a)); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); - *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id); *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8); *insn++ = prog->insnsi[0]; @@ -1379,7 +1383,7 @@ static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struc *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a)); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); - *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id); *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6); *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2); *insn++ = BPF_EXIT_INSN(); diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 49f2fc61061f..9551d8d5f8f9 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester, emit_verifier_log(tester->log_buf, false /*force*/); validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log); + /* Restore capabilities because the kernel will silently ignore requests + * for program info (such as xlated program text) if we are not + * bpf-capable. Also, for some reason test_verifier executes programs + * with all capabilities restored. Do the same here. + */ + if (restore_capabilities(&caps)) + goto tobj_cleanup; + if (subspec->expect_xlated.cnt) { err = get_xlated_program_text(bpf_program__fd(tprog), tester->log_buf, tester->log_buf_sz); @@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester, } if (should_do_test_run(spec, subspec)) { - /* For some reason test_verifier executes programs - * with all capabilities restored. Do the same here. - */ - if (restore_capabilities(&caps)) - goto tobj_cleanup; - /* Do bpf_map__attach_struct_ops() for each struct_ops map. * This should trigger bpf_struct_ops->reg callback on kernel side. */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 447b68509d76..27db34ecf3f5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */ }; -static char bpf_vlog[UINT_MAX >> 8]; +static char bpf_vlog[UINT_MAX >> 5]; static int load_btf_spec(__u32 *types, int types_len, const char *strings, int strings_len) @@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->errstr_unpriv : test->errstr; opts.expected_attach_type = test->expected_attach_type; - if (verbose) - opts.log_level = verif_log_level | 4; /* force stats */ - else if (expected_ret == VERBOSE_ACCEPT) + if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; + else if (verbose) + opts.log_level = verif_log_level | 4; /* force stats */ else opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index a18972ffdeb6..b2bb20b00952 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1486,7 +1486,84 @@ static bool is_preset_supported(const struct btf_type *t) return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); } -static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, +const int btf_find_member(const struct btf *btf, + const struct btf_type *parent_type, + __u32 parent_offset, + const char *member_name, + int *member_tid, + __u32 *member_offset) +{ + int i; + + if (!btf_is_composite(parent_type)) + return -EINVAL; + + for (i = 0; i < btf_vlen(parent_type); ++i) { + const struct btf_member *member; + const struct btf_type *member_type; + int tid; + + member = btf_members(parent_type) + i; + tid = btf__resolve_type(btf, member->type); + if (tid < 0) + return -EINVAL; + + member_type = btf__type_by_id(btf, tid); + if (member->name_off) { + const char *name = btf__name_by_offset(btf, member->name_off); + + if (strcmp(member_name, name) == 0) { + if (btf_member_bitfield_size(parent_type, i) != 0) { + fprintf(stderr, "Bitfield presets are not supported %s\n", + name); + return -EINVAL; + } + *member_offset = parent_offset + member->offset; + *member_tid = tid; + return 0; + } + } else if (btf_is_composite(member_type)) { + int err; + + err = btf_find_member(btf, member_type, parent_offset + member->offset, + member_name, member_tid, member_offset); + if (!err) + return 0; + } + } + + return -EINVAL; +} + +static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, + struct btf_var_secinfo *sinfo, const char *var) +{ + char expr[256], *saveptr; + const struct btf_type *base_type, *member_type; + int err, member_tid; + char *name; + __u32 member_offset = 0; + + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + snprintf(expr, sizeof(expr), "%s", var); + strtok_r(expr, ".", &saveptr); + + while ((name = strtok_r(NULL, ".", &saveptr))) { + err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset); + if (err) { + fprintf(stderr, "Could not find member %s for variable %s\n", name, var); + return err; + } + member_type = btf__type_by_id(btf, member_tid); + sinfo->offset += member_offset / 8; + sinfo->size = member_type->size; + sinfo->type = member_tid; + base_type = member_type; + } + return 0; +} + +static int set_global_var(struct bpf_object *obj, struct btf *btf, struct bpf_map *map, struct btf_var_secinfo *sinfo, struct var_preset *preset) { @@ -1495,9 +1572,9 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct long long value = preset->ivalue; size_t size; - base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type)); if (!base_type) { - fprintf(stderr, "Failed to resolve type %d\n", t->type); + fprintf(stderr, "Failed to resolve type %d\n", sinfo->type); return -EINVAL; } if (!is_preset_supported(base_type)) { @@ -1530,7 +1607,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct if (value >= max_val || value < -max_val) { fprintf(stderr, "Variable %s value %lld is out of range [%lld; %lld]\n", - btf__name_by_offset(btf, t->name_off), value, + btf__name_by_offset(btf, base_type->name_off), value, is_signed ? -max_val : 0, max_val - 1); return -EINVAL; } @@ -1583,14 +1660,20 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (j = 0; j < n; ++j, ++sinfo) { const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); const char *var_name; + int var_len; if (!btf_is_var(var_type)) continue; var_name = btf__name_by_offset(btf, var_type->name_off); + var_len = strlen(var_name); for (k = 0; k < npresets; ++k) { - if (strcmp(var_name, presets[k].name) != 0) + struct btf_var_secinfo tmp_sinfo; + + if (strncmp(var_name, presets[k].name, var_len) != 0 || + (presets[k].name[var_len] != '\0' && + presets[k].name[var_len] != '.')) continue; if (presets[k].applied) { @@ -1598,13 +1681,17 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i var_name); return -EINVAL; } + tmp_sinfo = *sinfo; + err = adjust_var_secinfo(btf, var_type, + &tmp_sinfo, presets[k].name); + if (err) + return err; - err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); + err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k); if (err) return err; presets[k].applied = true; - break; } } } diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 20af35a91d6f..d9fffe06d3ea 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -3,7 +3,7 @@ top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch)) # Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) ARCH := x86 diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f62b0a5aba35..3e786080473d 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -47,6 +47,10 @@ LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c LIBKVM_riscv += lib/riscv/ucall.c +LIBKVM_loongarch += lib/loongarch/processor.c +LIBKVM_loongarch += lib/loongarch/ucall.c +LIBKVM_loongarch += lib/loongarch/exception.S + # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh @@ -147,6 +151,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls TEST_GEN_PROGS_arm64 += arm64/mmio_abort TEST_GEN_PROGS_arm64 += arm64/page_fault_test @@ -190,6 +195,19 @@ TEST_GEN_PROGS_riscv += coalesced_io_test TEST_GEN_PROGS_riscv += get-reg-list TEST_GEN_PROGS_riscv += steal_time +TEST_GEN_PROGS_loongarch += coalesced_io_test +TEST_GEN_PROGS_loongarch += demand_paging_test +TEST_GEN_PROGS_loongarch += dirty_log_perf_test +TEST_GEN_PROGS_loongarch += dirty_log_test +TEST_GEN_PROGS_loongarch += guest_print_test +TEST_GEN_PROGS_loongarch += hardware_disable_test +TEST_GEN_PROGS_loongarch += kvm_binary_stats_test +TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus +TEST_GEN_PROGS_loongarch += kvm_page_table_test +TEST_GEN_PROGS_loongarch += memslot_modification_stress_test +TEST_GEN_PROGS_loongarch += memslot_perf_test +TEST_GEN_PROGS_loongarch += set_memory_region_test + SPLIT_TESTS += arch_timer SPLIT_TESTS += get-reg-list diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c new file mode 100644 index 000000000000..3826772fd470 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/host_sve.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls. + * + * Copyright 2025 Arm, Ltd + */ + +#include <errno.h> +#include <signal.h> +#include <sys/auxv.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "ucall_common.h" + +static void guest_code(void) +{ + for (int i = 0; i < 10; i++) { + GUEST_UCALL_NONE(); + } + + GUEST_DONE(); +} + +void handle_sigill(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *uctx = ctx; + + printf(" < host signal %d >\n", sig); + + /* + * Skip the UDF + */ + uctx->uc_mcontext.pc += 4; +} + +void register_sigill_handler(void) +{ + struct sigaction sa = { + .sa_sigaction = handle_sigill, + .sa_flags = SA_SIGINFO, + }; + sigaction(SIGILL, &sa, NULL); +} + +static void do_sve_roundtrip(void) +{ + unsigned long before, after; + + /* + * Set all bits in a predicate register, force a save/restore via a + * SIGILL (which handle_sigill() will recover from), then report + * whether the value has changed. + */ + asm volatile( + " .arch_extension sve\n" + " ptrue p0.B\n" + " cntp %[before], p0, p0.B\n" + " udf #0\n" + " cntp %[after], p0, p0.B\n" + : [before] "=r" (before), + [after] "=r" (after) + : + : "p0" + ); + + if (before != after) { + TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n", + before, after); + } else { + printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n", + before, after); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + register_sigill_handler(); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + do_sve_roundtrip(); + + while (!guest_done) { + + printf("Running VCPU...\n"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_NONE: + do_sve_roundtrip(); + do_sve_roundtrip(); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + /* + * This is testing the host environment, we don't care about + * guest SVE support. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + printf("SVE not supported\n"); + return KSFT_SKIP; + } + + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 57708de2075d..8f422bfdfcb9 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -15,6 +15,8 @@ #include "test_util.h" #include <linux/bitfield.h> +bool have_cap_arm_mte; + enum ftr_type { FTR_EXACT, /* Use a predefined safe value */ FTR_LOWER_SAFE, /* Smaller value is safe */ @@ -543,6 +545,70 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); } +#define MTE_IDREG_TEST 1 +static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + uint64_t mte; + uint64_t mte_frac; + int idx, err; + + if (!have_cap_arm_mte) { + ksft_test_result_skip("MTE capability not supported, nothing to test\n"); + return; + } + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n"); + return; + } + + /* + * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2) + * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported + * and MTE_frac == 0 indicates it is supported. + * + * As MTE_frac was previously unconditionally read as 0, check + * that the set to 0 succeeds but does not change MTE_frac + * from unsupported (0xF) to supported (0). + * + */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + + mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val); + mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || + mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { + ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); + return; + } + + /* Try to set MTE_frac=0. */ + val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) { + ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n"); + return; + } + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); + else + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; @@ -673,6 +739,14 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) ksft_test_result_pass("%s\n", __func__); } +void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ + if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) { + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + have_cap_arm_mte = true; + } +} + int main(void) { struct kvm_vcpu *vcpu; @@ -701,7 +775,7 @@ int main(void) ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + - MPAM_IDREG_TEST; + MPAM_IDREG_TEST + MTE_IDREG_TEST; ksft_set_plan(test_cnt); @@ -709,6 +783,7 @@ int main(void) test_vcpu_ftr_id_regs(vcpu); test_vcpu_non_ftr_id_regs(vcpu); test_user_set_mpam_reg(vcpu); + test_user_set_mte_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 373912464fb4..93013564428b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -177,6 +177,7 @@ enum vm_guest_mode { VM_MODE_P36V48_4K, VM_MODE_P36V48_16K, VM_MODE_P36V48_64K, + VM_MODE_P47V47_16K, VM_MODE_P36V47_16K, NUM_VM_MODES, }; @@ -232,6 +233,11 @@ extern enum vm_guest_mode vm_mode_default; #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) +#elif defined(__loongarch__) +#define VM_MODE_DEFAULT VM_MODE_P47V47_16K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + #endif #define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h new file mode 100644 index 000000000000..6427a3275e6a --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#ifndef __ASSEMBLER__ +#include "ucall_common.h" + +#else +/* general registers */ +#define zero $r0 +#define ra $r1 +#define tp $r2 +#define sp $r3 +#define a0 $r4 +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 +#define s0 $r23 +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 +#endif + +/* + * LoongArch page table entry definition + * Original header file arch/loongarch/include/asm/loongarch.h + */ +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 + +#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT) +#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT) +#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT) +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) +/* Coherent Cached */ +#define _CACHE_CC BIT_ULL(_CACHE_SHIFT) +#define PS_4K 0x0000000c +#define PS_16K 0x0000000e +#define PS_64K 0x00000010 +#define PS_DEFAULT_SIZE PS_16K + +/* LoongArch Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT) +#define PLV_MASK 0x3 +#define LOONGARCH_CSR_PRMD 0x1 +#define LOONGARCH_CSR_EUEN 0x2 +#define LOONGARCH_CSR_ECFG 0x4 +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ +#define LOONGARCH_CSR_EENTRY 0xc +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */ +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define LOONGARCH_CSR_PGDL 0x19 +#define LOONGARCH_CSR_PGDH 0x1a +/* Page table base */ +#define LOONGARCH_CSR_PGD 0x1b +#define LOONGARCH_CSR_PWCTL0 0x1c +#define LOONGARCH_CSR_PWCTL1 0x1d +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define LOONGARCH_CSR_CPUID 0x20 +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_TMID 0x40 +#define LOONGARCH_CSR_TCFG 0x41 +/* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 +#define LOONGARCH_CSR_TLBRSAVE 0x8b +#define LOONGARCH_CSR_TLBREHI 0x8e +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) + +#define EXREGS_GPRS (32) + +#ifndef __ASSEMBLER__ +void handle_tlb_refill(void); +void handle_exception(void); + +struct ex_regs { + unsigned long regs[EXREGS_GPRS]; + unsigned long pc; + unsigned long estat; + unsigned long badv; +}; + +#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc) +#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat) +#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv) +#define EXREGS_SIZE sizeof(struct ex_regs) + +#else +#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) +#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) +#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8) +#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8) +#endif + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h new file mode 100644 index 000000000000..4ec801f37f00 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 5f389166338c..162f303d9daa 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -11,6 +11,19 @@ #include <asm/csr.h> #include "kvm_util.h" +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_FUNCT3 0x7000 +#define INSN_SHIFT_FUNCT3 12 + +#define INSN_CSR_MASK 0xfff00000 +#define INSN_CSR_SHIFT 20 + +#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3) +#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT) + static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, uint64_t idx, uint64_t size) { @@ -60,7 +73,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); } -struct ex_regs { +struct pt_regs { + unsigned long epc; unsigned long ra; unsigned long sp; unsigned long gp; @@ -92,16 +106,19 @@ struct ex_regs { unsigned long t4; unsigned long t5; unsigned long t6; - unsigned long epc; + /* Supervisor/Machine CSRs */ unsigned long status; + unsigned long badaddr; unsigned long cause; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #define NR_VECTORS 2 #define NR_EXCEPTIONS 32 #define EC_MASK (NR_EXCEPTIONS - 1) -typedef void(*exception_handler_fn)(struct ex_regs *); +typedef void(*exception_handler_fn)(struct pt_regs *); void vm_init_vector_tables(struct kvm_vm *vm); void vcpu_init_vector_tables(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 815bc45dd8dc..5649cf2f40e8 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -222,6 +222,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages", [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, @@ -248,6 +249,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 }, [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, @@ -319,6 +321,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape) case VM_MODE_P36V48_16K: vm->pgtable_levels = 4; break; + case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S new file mode 100644 index 000000000000..88bfa505c6f5 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "processor.h" + +/* address of refill exception should be 4K aligned */ +.balign 4096 +.global handle_tlb_refill +handle_tlb_refill: + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 + lddir t0, t0, 1 + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn + + /* + * save and restore all gprs except base register, + * and default value of base register is sp ($r3). + */ +.macro save_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\n, \base, 8 * \n + .endr +.endm + +.macro restore_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\n, \base, 8 * \n + .endr +.endm + +/* address of general exception should be 4K aligned */ +.balign 4096 +.global handle_exception +handle_exception: + csrwr sp, LOONGARCH_CSR_KS0 + csrrd sp, LOONGARCH_CSR_KS1 + addi.d sp, sp, -EXREGS_SIZE + + save_gprs sp + /* save sp register to stack */ + csrrd t0, LOONGARCH_CSR_KS0 + st.d t0, sp, 3 * 8 + + csrrd t0, LOONGARCH_CSR_ERA + st.d t0, sp, PC_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_ESTAT + st.d t0, sp, ESTAT_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_BADV + st.d t0, sp, BADV_OFFSET_EXREGS + + or a0, sp, zero + bl route_exception + restore_gprs sp + csrrd sp, LOONGARCH_CSR_KS0 + ertn diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c new file mode 100644 index 000000000000..0ac1abcb71cb --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <linux/compiler.h> + +#include "kvm_util.h" +#include "processor.h" +#include "ucall_common.h" + +#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 +#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 + +static vm_paddr_t invalid_pgtable[4]; + +static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +{ + unsigned int shift; + uint64_t mask; + + shift = level * (vm->page_shift - 3) + vm->page_shift; + mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> shift) & mask; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + return entry & ~((0x1UL << vm->page_shift) - 1); +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child) +{ + uint64_t *ptep; + int i, ptrs_per_pte; + + ptep = addr_gpa2hva(vm, table); + ptrs_per_pte = 1 << (vm->page_shift - 3); + for (i = 0; i < ptrs_per_pte; i++) + WRITE_ONCE(*(ptep + i), child); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + int i; + vm_paddr_t child, table; + + if (vm->pgd_created) + return; + + child = table = 0; + for (i = 0; i < vm->pgtable_levels; i++) { + invalid_pgtable[i] = child; + table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, + vm->memslots[MEM_REGION_PT]); + TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i); + virt_set_pgtable(vm, table, child); + child = table; + } + vm->pgd = table; + vm->pgd_created = true; +} + +static int virt_pte_none(uint64_t *ptep, int level) +{ + return *ptep == invalid_pgtable[level]; +} + +static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc) +{ + int level; + uint64_t *ptep; + vm_paddr_t child; + + if (!vm->pgd_created) + goto unmapped_gva; + + child = vm->pgd; + level = vm->pgtable_levels - 1; + while (level > 0) { + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + if (virt_pte_none(ptep, level)) { + if (alloc) { + child = vm_alloc_page_table(vm); + virt_set_pgtable(vm, child, invalid_pgtable[level - 1]); + WRITE_ONCE(*ptep, child); + } else + goto unmapped_gva; + + } else + child = pte_addr(vm, *ptep); + level--; + } + + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + ptep = virt_populate_pte(vm, gva, 0); + TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint32_t prot_bits; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = virt_populate_pte(vm, vaddr, 1); + prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; + WRITE_ONCE(*ptep, paddr | prot_bits); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ + uint64_t pte, *ptep; + static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (virt_pte_none(ptep, level)) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", + indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--); + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level; + + if (!vm->pgd_created) + return; + + level = vm->pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->pgd, level); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)", + uc.args[0], uc.args[1], uc.args[2]); +} + +void route_exception(struct ex_regs *regs) +{ + unsigned long pc, estat, badv; + + pc = regs->pc; + badv = regs->badv; + estat = regs->estat; + ucall(UCALL_UNHANDLED, 3, pc, estat, badv); + while (1) ; +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + int i; + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + "num: %u\n", num); + + vcpu_regs_get(vcpu, ®s); + + va_start(ap, num); + for (i = 0; i < num; i++) + regs.gpr[i + 4] = va_arg(ap, uint64_t); + va_end(ap); + + vcpu_regs_set(vcpu, ®s); +} + +static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_get_reg(vcpu, csrid, addr); +} + +static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, csrid, val); +} + +static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int width; + unsigned long val; + struct kvm_vm *vm = vcpu->vm; + + switch (vm->mode) { + case VM_MODE_P36V47_16K: + case VM_MODE_P47V47_16K: + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + /* user mode and page enable mode */ + val = PLV_USER | CSR_CRMD_PG; + loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1); + + val = 0; + width = vm->page_shift - 3; + + switch (vm->pgtable_levels) { + case 4: + /* pud page shift and width */ + val = (vm->page_shift + width * 2) << 20 | (width << 25); + /* fall throuth */ + case 3: + /* pmd page shift and width */ + val |= (vm->page_shift + width) << 10 | (width << 15); + /* pte page shift and width */ + val |= vm->page_shift | width << 5; + break; + default: + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels); + } + + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); + + /* PGD page shift and width */ + val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6; + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd); + + /* + * Refill exception runs on real mode + * Entry address should be physical address + */ + val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val); + + /* + * General exception runs on page-enabled mode + * Entry address should be virtual address + */ + val = (unsigned long)handle_exception; + loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val); + val &= ~CSR_TLBIDX_SIZEM; + val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); + + /* LOONGARCH_CSR_KS1 is used for exception stack */ + val = __vm_vaddr_alloc(vm, vm->page_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(val != 0, "No memory for exception stack"); + val = val + vm->page_size; + loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val); + val &= ~CSR_TLBREHI_PS; + val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + size_t stack_size; + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_vcpu *vcpu; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack"); + + loongarch_vcpu_setup(vcpu); + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.gpr[3] = stack_vaddr + stack_size; + vcpu_regs_set(vcpu, ®s); + + return vcpu; +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + /* Setup guest PC register */ + vcpu_regs_get(vcpu, ®s); + regs.pc = (uint64_t)guest_code; + vcpu_regs_set(vcpu, ®s); +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c new file mode 100644 index 000000000000..fc6cbb50573f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + */ +#include "kvm_util.h" + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +vm_vaddr_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + "Unexpected ucall exit mmio address access"); + + return (void *)(*((uint64_t *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S index aa0abd3f35bb..b787b982e922 100644 --- a/tools/testing/selftests/kvm/lib/riscv/handlers.S +++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S @@ -10,85 +10,88 @@ #include <asm/csr.h> .macro save_context - addi sp, sp, (-8*34) - sd x1, 0(sp) - sd x2, 8(sp) - sd x3, 16(sp) - sd x4, 24(sp) - sd x5, 32(sp) - sd x6, 40(sp) - sd x7, 48(sp) - sd x8, 56(sp) - sd x9, 64(sp) - sd x10, 72(sp) - sd x11, 80(sp) - sd x12, 88(sp) - sd x13, 96(sp) - sd x14, 104(sp) - sd x15, 112(sp) - sd x16, 120(sp) - sd x17, 128(sp) - sd x18, 136(sp) - sd x19, 144(sp) - sd x20, 152(sp) - sd x21, 160(sp) - sd x22, 168(sp) - sd x23, 176(sp) - sd x24, 184(sp) - sd x25, 192(sp) - sd x26, 200(sp) - sd x27, 208(sp) - sd x28, 216(sp) - sd x29, 224(sp) - sd x30, 232(sp) - sd x31, 240(sp) + addi sp, sp, (-8*36) + sd x1, 8(sp) + sd x2, 16(sp) + sd x3, 24(sp) + sd x4, 32(sp) + sd x5, 40(sp) + sd x6, 48(sp) + sd x7, 56(sp) + sd x8, 64(sp) + sd x9, 72(sp) + sd x10, 80(sp) + sd x11, 88(sp) + sd x12, 96(sp) + sd x13, 104(sp) + sd x14, 112(sp) + sd x15, 120(sp) + sd x16, 128(sp) + sd x17, 136(sp) + sd x18, 144(sp) + sd x19, 152(sp) + sd x20, 160(sp) + sd x21, 168(sp) + sd x22, 176(sp) + sd x23, 184(sp) + sd x24, 192(sp) + sd x25, 200(sp) + sd x26, 208(sp) + sd x27, 216(sp) + sd x28, 224(sp) + sd x29, 232(sp) + sd x30, 240(sp) + sd x31, 248(sp) csrr s0, CSR_SEPC csrr s1, CSR_SSTATUS - csrr s2, CSR_SCAUSE - sd s0, 248(sp) + csrr s2, CSR_STVAL + csrr s3, CSR_SCAUSE + sd s0, 0(sp) sd s1, 256(sp) sd s2, 264(sp) + sd s3, 272(sp) .endm .macro restore_context + ld s3, 272(sp) ld s2, 264(sp) ld s1, 256(sp) - ld s0, 248(sp) - csrw CSR_SCAUSE, s2 + ld s0, 0(sp) + csrw CSR_SCAUSE, s3 csrw CSR_SSTATUS, s1 csrw CSR_SEPC, s0 - ld x31, 240(sp) - ld x30, 232(sp) - ld x29, 224(sp) - ld x28, 216(sp) - ld x27, 208(sp) - ld x26, 200(sp) - ld x25, 192(sp) - ld x24, 184(sp) - ld x23, 176(sp) - ld x22, 168(sp) - ld x21, 160(sp) - ld x20, 152(sp) - ld x19, 144(sp) - ld x18, 136(sp) - ld x17, 128(sp) - ld x16, 120(sp) - ld x15, 112(sp) - ld x14, 104(sp) - ld x13, 96(sp) - ld x12, 88(sp) - ld x11, 80(sp) - ld x10, 72(sp) - ld x9, 64(sp) - ld x8, 56(sp) - ld x7, 48(sp) - ld x6, 40(sp) - ld x5, 32(sp) - ld x4, 24(sp) - ld x3, 16(sp) - ld x2, 8(sp) - ld x1, 0(sp) - addi sp, sp, (8*34) + ld x31, 248(sp) + ld x30, 240(sp) + ld x29, 232(sp) + ld x28, 224(sp) + ld x27, 216(sp) + ld x26, 208(sp) + ld x25, 200(sp) + ld x24, 192(sp) + ld x23, 184(sp) + ld x22, 176(sp) + ld x21, 168(sp) + ld x20, 160(sp) + ld x19, 152(sp) + ld x18, 144(sp) + ld x17, 136(sp) + ld x16, 128(sp) + ld x15, 120(sp) + ld x14, 112(sp) + ld x13, 104(sp) + ld x12, 96(sp) + ld x11, 88(sp) + ld x10, 80(sp) + ld x9, 72(sp) + ld x8, 64(sp) + ld x7, 56(sp) + ld x6, 48(sp) + ld x5, 40(sp) + ld x4, 32(sp) + ld x3, 24(sp) + ld x2, 16(sp) + ld x1, 8(sp) + addi sp, sp, (8*36) .endm .balign 4 diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index dd663bcf0cc0..2eac7d4b59e9 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -402,7 +402,7 @@ struct handlers { exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; }; -void route_exception(struct ex_regs *regs) +void route_exception(struct pt_regs *regs) { struct handlers *handlers = (struct handlers *)exception_handlers; int vector = 0, ec; diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 9e370800a6a2..f962fefc48fa 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -15,7 +15,7 @@ static int timer_irq = IRQ_S_TIMER; -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { uint64_t xcnt, xcnt_diff_us, cmp; unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index cfed6c727bfc..739d17befb5a 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -27,7 +27,7 @@ static void guest_code(void) GUEST_DONE(); } -static void guest_breakpoint_handler(struct ex_regs *regs) +static void guest_breakpoint_handler(struct pt_regs *regs) { WRITE_ONCE(sw_bp_addr, regs->epc); regs->epc += 4; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 569f2d67c9b8..a0b7dabb5040 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -17,6 +17,15 @@ enum { VCPU_FEATURE_SBI_EXT, }; +enum { + KVM_RISC_V_REG_OFFSET_VSTART = 0, + KVM_RISC_V_REG_OFFSET_VL, + KVM_RISC_V_REG_OFFSET_VTYPE, + KVM_RISC_V_REG_OFFSET_VCSR, + KVM_RISC_V_REG_OFFSET_VLENB, + KVM_RISC_V_REG_OFFSET_MAX, +}; + static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; bool filter_reg(__u64 reg) @@ -143,6 +152,38 @@ bool check_reject_set(int err) return err == EINVAL; } +static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, + uint64_t feature) +{ + unsigned long vlenb_reg = 0; + int rc; + u64 reg, size; + + /* Enable V extension so that we can get the vlenb register */ + rc = __vcpu_set_reg(vcpu, feature, 1); + if (rc) + return rc; + + vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]); + if (!vlenb_reg) { + TEST_FAIL("Can't compute vector register size from zero vlenb\n"); + return -EPERM; + } + + size = __builtin_ctzl(vlenb_reg); + size <<= KVM_REG_SIZE_SHIFT; + + for (int i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i); + s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg; + } + + /* We should assert if disabling failed here while enabling succeeded before */ + vcpu_set_reg(vcpu, feature, 0); + + return 0; +} + void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; @@ -172,6 +213,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) if (!s->feature) continue; + if (s->feature == KVM_RISCV_ISA_EXT_V) { + feature = RISCV_ISA_EXT_REG(s->feature); + rc = override_vector_reg_size(vcpu, s, feature); + if (rc) + goto skip; + } + switch (s->feature_type) { case VCPU_FEATURE_ISA_EXT: feature = RISCV_ISA_EXT_REG(s->feature); @@ -186,6 +234,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) /* Try to enable the desired extension */ __vcpu_set_reg(vcpu, feature, 1); +skip: /* Double check whether the desired extension was enabled */ __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature), "%s not available, skipping tests", s->name); @@ -410,6 +459,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) return strdup_printf("%lld /* UNKNOWN */", reg_off); } +static const char *vector_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_v_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR); + int reg_index = 0; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR); + + if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0)) + reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0); + switch (reg_off) { + case KVM_REG_RISCV_VECTOR_REG(0) ... + KVM_REG_RISCV_VECTOR_REG(31): + return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index); + case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vl): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + #define KVM_ISA_EXT_ARR(ext) \ [KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext @@ -639,6 +717,9 @@ void print_reg(const char *prefix, __u64 id) case KVM_REG_SIZE_U128: reg_size = "KVM_REG_SIZE_U128"; break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; default: printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); @@ -670,6 +751,10 @@ void print_reg(const char *prefix, __u64 id) printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", reg_size, fp_d_id_to_str(prefix, id)); break; + case KVM_REG_RISCV_VECTOR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n", + reg_size, vector_id_to_str(prefix, id)); + break; case KVM_REG_RISCV_ISA_EXT: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", reg_size, isa_ext_id_to_str(prefix, id)); @@ -874,6 +959,48 @@ static __u64 fp_d_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D, }; +/* Define a default vector registers with length. This will be overwritten at runtime */ +static __u64 vector_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V, +}; + #define SUBLIST_BASE \ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} @@ -898,6 +1025,9 @@ static __u64 fp_d_regs[] = { {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ .regs_n = ARRAY_SIZE(fp_d_regs),} +#define SUBLIST_V \ + {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),} + #define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \ static __u64 regs_##ext[] = { \ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ @@ -966,6 +1096,7 @@ KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); +KVM_ISA_EXT_SUBLIST_CONFIG(v, V); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); @@ -1040,6 +1171,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_fp_f, &config_fp_d, &config_h, + &config_v, &config_smnpm, &config_smstateen, &config_sscofpmf, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 03406de4989d..924a335d2262 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -73,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num) switch (csr_num) { switchcase_csr_read_32(CSR_CYCLE, ret) - switchcase_csr_read_32(CSR_CYCLEH, ret) default : break; } @@ -128,17 +127,36 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags) "Unable to stop counter %ld error %ld\n", counter, ret.error); } -static void guest_illegal_exception_handler(struct ex_regs *regs) +static void guest_illegal_exception_handler(struct pt_regs *regs) { + unsigned long insn; + int opcode, csr_num, funct3; + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, "Unexpected exception handler %lx\n", regs->cause); + insn = regs->badaddr; + opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT; + __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM, + "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn); + + csr_num = GET_CSR_NUM(insn); + funct3 = GET_RM(insn); + /* Validate if it is a CSR read/write operation */ + __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4), + "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n", + funct3, csr_num); + + /* Validate if it is a HPMCOUNTER CSR operation */ + __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31), + "Unexpected csr_num 0x%x\n", csr_num); + illegal_handler_invoked = true; /* skip the trapping instruction */ regs->epc += 4; } -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bc440d5aba57..ce3ac0fd6dfb 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -350,7 +350,7 @@ static void test_invalid_memory_region_flags(void) struct kvm_vm *vm; int r, i; -#if defined __aarch64__ || defined __riscv || defined __x86_64__ +#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__ supported_flags |= KVM_MEM_READONLY; #endif diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index d9d15c8f27c7..8d579bcee709 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -766,8 +766,8 @@ int osnoise_hist_main(int argc, char *argv[]) struct osnoise_params *params; struct osnoise_tool *record = NULL; struct osnoise_tool *tool = NULL; + enum result return_value = ERROR; struct trace_instance *trace; - int return_value = 1; int retval; params = osnoise_hist_parse_args(argc, argv); @@ -889,12 +889,13 @@ int osnoise_hist_main(int argc, char *argv[]) osnoise_print_stats(params, tool); - return_value = 0; + return_value = PASSED; if (osnoise_trace_is_off(tool, record)) { printf("rtla osnoise hit stop tracing\n"); save_trace_to_file(record ? record->trace.inst : NULL, params->trace_output); + return_value = FAILED; } out_hist: diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 3455ee73e2e6..2c12780c8aa9 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -594,8 +594,8 @@ int osnoise_top_main(int argc, char **argv) struct osnoise_params *params; struct osnoise_tool *record = NULL; struct osnoise_tool *tool = NULL; + enum result return_value = ERROR; struct trace_instance *trace; - int return_value = 1; int retval; params = osnoise_top_parse_args(argc, argv); @@ -715,12 +715,13 @@ int osnoise_top_main(int argc, char **argv) osnoise_print_stats(params, tool); - return_value = 0; + return_value = PASSED; if (osnoise_trace_is_off(tool, record)) { printf("osnoise hit stop tracing\n"); save_trace_to_file(record ? record->trace.inst : NULL, params->trace_output); + return_value = FAILED; } out_top: diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c index 5abee884037a..0bc44ce5d69b 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.c +++ b/tools/tracing/rtla/src/timerlat_bpf.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #ifdef HAVE_BPF_SKEL +#define _GNU_SOURCE #include "timerlat.h" #include "timerlat_bpf.h" #include "timerlat.skel.h" diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 9d9efeedc4c2..36d2294c963d 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1141,11 +1141,11 @@ int timerlat_hist_main(int argc, char *argv[]) struct timerlat_params *params; struct osnoise_tool *record = NULL; struct timerlat_u_params params_u; + enum result return_value = ERROR; struct osnoise_tool *tool = NULL; struct osnoise_tool *aa = NULL; struct trace_instance *trace; int dma_latency_fd = -1; - int return_value = 1; pthread_t timerlat_u; int retval; int nr_cpus, i; @@ -1378,7 +1378,7 @@ int timerlat_hist_main(int argc, char *argv[]) timerlat_print_stats(params, tool); - return_value = 0; + return_value = PASSED; if (osnoise_trace_is_off(tool, record) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); @@ -1388,6 +1388,7 @@ int timerlat_hist_main(int argc, char *argv[]) save_trace_to_file(record ? record->trace.inst : NULL, params->trace_output); + return_value = FAILED; } out_hist: diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 79cb6f28967f..7365e08fe986 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -985,12 +985,12 @@ int timerlat_top_main(int argc, char *argv[]) struct timerlat_params *params; struct osnoise_tool *record = NULL; struct timerlat_u_params params_u; + enum result return_value = ERROR; struct osnoise_tool *top = NULL; struct osnoise_tool *aa = NULL; struct trace_instance *trace; int dma_latency_fd = -1; pthread_t timerlat_u; - int return_value = 1; char *max_lat; int retval; int nr_cpus, i; @@ -1197,7 +1197,7 @@ int timerlat_top_main(int argc, char *argv[]) timerlat_print_stats(params, top); - return_value = 0; + return_value = PASSED; if (osnoise_trace_is_off(top, record) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); @@ -1207,6 +1207,7 @@ int timerlat_top_main(int argc, char *argv[]) save_trace_to_file(record ? record->trace.inst : NULL, params->trace_output); + return_value = FAILED; } else if (params->aa_only) { /* * If the trace did not stop with --aa-only, at least print the diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 4995d35cf3ec..d6ab15dcb490 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -227,6 +227,8 @@ long parse_ns_duration(char *val) # define __NR_sched_setattr 355 # elif __s390x__ # define __NR_sched_setattr 345 +# elif __loongarch__ +# define __NR_sched_setattr 274 # endif #endif diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 101d4799a009..a2a6f89f342d 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -83,3 +83,9 @@ int auto_house_keeping(cpu_set_t *monitored_cpus); #define ns_to_usf(x) (((double)x/1000)) #define ns_to_per(total, part) ((part * 100) / (double)total) + +enum result { + PASSED = 0, /* same as EXIT_SUCCESS */ + ERROR = 1, /* same as EXIT_FAILURE, an error in arguments */ + FAILED = 2, /* test hit the stop tracing condition */ +}; diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh index b1697b3e3f52..f2616a8e4179 100644 --- a/tools/tracing/rtla/tests/engine.sh +++ b/tools/tracing/rtla/tests/engine.sh @@ -39,6 +39,7 @@ reset_osnoise() { } check() { + expected_exitcode=${3:-0} # Simple check: run rtla with given arguments and test exit code. # If TEST_COUNT is set, run the test. Otherwise, just count. ctr=$(($ctr + 1)) @@ -49,7 +50,7 @@ check() { # Run rtla; in case of failure, include its output as comment # in the test results. result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$? - if [ $exitcode -eq 0 ] + if [ $exitcode -eq $expected_exitcode ] then echo "ok $ctr - $1" else @@ -68,12 +69,14 @@ check_with_osnoise_options() { # Save original arguments arg1=$1 arg2=$2 + arg3=$3 # Apply osnoise options (if not dry run) if [ -n "$TEST_COUNT" ] then [ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise shift + shift while shift do [ "$1" == "" ] && continue @@ -84,7 +87,7 @@ check_with_osnoise_options() { done fi - NO_RESET_OSNOISE=1 check "$arg1" "$arg2" + NO_RESET_OSNOISE=1 check "$arg1" "$arg2" "$arg3" } set_timeout() { diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t index bbed17580537..5f71401a139e 100644 --- a/tools/tracing/rtla/tests/hwnoise.t +++ b/tools/tracing/rtla/tests/hwnoise.t @@ -10,11 +10,11 @@ check "verify help page" \ check "detect noise higher than one microsecond" \ "hwnoise -c 0 -T 1 -d 5s -q" check "set the automatic trace mode" \ - "hwnoise -a 5 -d 30s" + "hwnoise -a 5 -d 30s" 2 check "set scheduling param to the osnoise tracer threads" \ "hwnoise -P F:1 -c 0 -r 900000 -d 1M -q" check "stop the trace if a single sample is higher than 1 us" \ - "hwnoise -s 1 -T 1 -t -d 30s" + "hwnoise -s 1 -T 1 -t -d 30s" 2 check "enable a trace event trigger" \ "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m" diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t index e5995c03c790..44908fc01abf 100644 --- a/tools/tracing/rtla/tests/osnoise.t +++ b/tools/tracing/rtla/tests/osnoise.t @@ -10,9 +10,9 @@ check "verify help page" \ check "verify the --priority/-P param" \ "osnoise top -P F:1 -c 0 -r 900000 -d 1M -q" check "verify the --stop/-s param" \ - "osnoise top -s 30 -T 1 -t" + "osnoise top -s 30 -T 1 -t" 2 check "verify the --trace param" \ - "osnoise hist -s 30 -T 1 -t" + "osnoise hist -s 30 -T 1 -t" 2 check "verify the --entries/-E param" \ "osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25" @@ -20,6 +20,6 @@ check "verify the --entries/-E param" \ # and stopping on threshold. # If default period is not set, this will time out. check_with_osnoise_options "apply default period" \ - "osnoise hist -s 1" period_us=600000000 + "osnoise hist -s 1" 2 period_us=600000000 test_end diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t index e939ff71d6be..579c12a85e8f 100644 --- a/tools/tracing/rtla/tests/timerlat.t +++ b/tools/tracing/rtla/tests/timerlat.t @@ -21,21 +21,21 @@ export RTLA_NO_BPF=$option check "verify help page" \ "timerlat --help" check "verify -s/--stack" \ - "timerlat top -s 3 -T 10 -t" + "timerlat top -s 3 -T 10 -t" 2 check "verify -P/--priority" \ "timerlat top -P F:1 -c 0 -d 1M -q" check "test in nanoseconds" \ - "timerlat top -i 2 -c 0 -n -d 30s" + "timerlat top -i 2 -c 0 -n -d 30s" 2 check "set the automatic trace mode" \ - "timerlat top -a 5 --dump-tasks" + "timerlat top -a 5 --dump-tasks" 2 check "print the auto-analysis if hits the stop tracing condition" \ - "timerlat top --aa-only 5" + "timerlat top --aa-only 5" 2 check "disable auto-analysis" \ - "timerlat top -s 3 -T 10 -t --no-aa" + "timerlat top -s 3 -T 10 -t --no-aa" 2 check "verify -c/--cpus" \ "timerlat hist -c 0 -d 30s" check "hist test in nanoseconds" \ - "timerlat hist -i 2 -c 0 -n -d 30s" + "timerlat hist -i 2 -c 0 -n -d 30s" 2 done test_end |