diff options
Diffstat (limited to 'arch/arm64/net/bpf_jit_comp.c')
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 391 |
1 files changed, 311 insertions, 80 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8446848edddb..da8b89dd2910 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include <linux/arm-smccc.h> #include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> @@ -17,6 +18,7 @@ #include <asm/asm-extable.h> #include <asm/byteorder.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/text-patching.h> @@ -272,7 +274,7 @@ static inline void emit_a64_add_i(const bool is64, const int dst, const int src, { if (is_addsub_imm(imm)) { emit(A64_ADD_I(is64, dst, src, imm), ctx); - } else if (is_addsub_imm(-imm)) { + } else if (is_addsub_imm(-(u32)imm)) { emit(A64_SUB_I(is64, dst, src, -imm), ctx); } else { emit_a64_mov_i(is64, tmp, imm, ctx); @@ -647,6 +649,81 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) return 0; } +static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const s32 imm = insn->imm; + const s16 off = insn->off; + const u8 code = insn->code; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; + const u8 dst = bpf2a64[insn->dst_reg]; + const u8 src = bpf2a64[insn->src_reg]; + const u8 tmp = bpf2a64[TMP_REG_1]; + u8 reg; + + switch (imm) { + case BPF_LOAD_ACQ: + reg = src; + break; + case BPF_STORE_REL: + reg = dst; + break; + default: + pr_err_once("unknown atomic load/store op code %02x\n", imm); + return -EINVAL; + } + + if (off) { + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } + + switch (imm) { + case BPF_LOAD_ACQ: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_LDARB(dst, reg), ctx); + break; + case BPF_H: + emit(A64_LDARH(dst, reg), ctx); + break; + case BPF_W: + emit(A64_LDAR32(dst, reg), ctx); + break; + case BPF_DW: + emit(A64_LDAR64(dst, reg), ctx); + break; + } + break; + case BPF_STORE_REL: + switch (BPF_SIZE(code)) { + case BPF_B: + emit(A64_STLRB(src, reg), ctx); + break; + case BPF_H: + emit(A64_STLRH(src, reg), ctx); + break; + case BPF_W: + emit(A64_STLR32(src, reg), ctx); + break; + case BPF_DW: + emit(A64_STLR64(src, reg), ctx); + break; + } + break; + default: + pr_err_once("unexpected atomic load/store op code %02x\n", + imm); + return -EINVAL; + } + + return 0; +} + #ifdef CONFIG_ARM64_LSE_ATOMICS static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { @@ -864,7 +941,51 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (capable(CAP_SYS_ADMIN)) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 ptr = bpf2a64[TCCNT_PTR]; @@ -877,10 +998,13 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); + /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1159,7 +1283,7 @@ emit_bswap_uxt: case BPF_ALU64 | BPF_SUB | BPF_K: if (is_addsub_imm(imm)) { emit(A64_SUB_I(is64, dst, dst, imm), ctx); - } else if (is_addsub_imm(-imm)) { + } else if (is_addsub_imm(-(u32)imm)) { emit(A64_ADD_I(is64, dst, dst, -imm), ctx); } else { emit_a64_mov_i(is64, tmp, imm, ctx); @@ -1330,7 +1454,7 @@ emit_cond_jmp: case BPF_JMP32 | BPF_JSLE | BPF_K: if (is_addsub_imm(imm)) { emit(A64_CMP_I(is64, dst, imm), ctx); - } else if (is_addsub_imm(-imm)) { + } else if (is_addsub_imm(-(u32)imm)) { emit(A64_CMN_I(is64, dst, -imm), ctx); } else { emit_a64_mov_i(is64, tmp, imm, ctx); @@ -1641,11 +1765,17 @@ emit_cond_jmp: return ret; break; + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (bpf_atomic_is_load_store(insn)) + ret = emit_atomic_ld_st(insn, ctx); + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) ret = emit_lse_atomic(insn, ctx); else ret = emit_ll_sc_atomic(insn, ctx); @@ -1817,7 +1947,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1880,7 +2010,7 @@ skip_init_ctx: goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); /* Extra pass to validate JITed code. */ @@ -1983,7 +2113,7 @@ bool bpf_jit_supports_subprog_tailcalls(void) } static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, - int args_off, int retval_off, int run_ctx_off, + int bargs_off, int retval_off, int run_ctx_off, bool save_ret) { __le32 *branch; @@ -2025,7 +2155,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); @@ -2050,7 +2180,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, } static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, - int args_off, int retval_off, int run_ctx_off, + int bargs_off, int retval_off, int run_ctx_off, __le32 **branches) { int i; @@ -2060,7 +2190,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, */ emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); for (i = 0; i < tl->nr_links; i++) { - invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off, run_ctx_off, true); /* if (*(u64 *)(sp + retval_off) != 0) * goto do_fexit; @@ -2074,23 +2204,125 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, } } -static void save_args(struct jit_ctx *ctx, int args_off, int nregs) +struct arg_aux { + /* how many args are passed through registers, the rest of the args are + * passed through stack + */ + int args_in_regs; + /* how many registers are used to pass arguments */ + int regs_for_args; + /* how much stack is used for additional args passed to bpf program + * that did not fit in original function registers + */ + int bstack_for_args; + /* home much stack is used for additional args passed to the + * original function when called from trampoline (this one needs + * arguments to be properly aligned) + */ + int ostack_for_args; +}; + +static int calc_arg_aux(const struct btf_func_model *m, + struct arg_aux *a) { - int i; + int stack_slots, nregs, slots, i; + + /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */ + for (i = 0, nregs = 0; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + if (nregs + slots <= 8) /* passed through register ? */ + nregs += slots; + else + break; + } - for (i = 0; i < nregs; i++) { - emit(A64_STR64I(i, A64_SP, args_off), ctx); - args_off += 8; + a->args_in_regs = i; + a->regs_for_args = nregs; + a->ostack_for_args = 0; + a->bstack_for_args = 0; + + /* the rest arguments are passed through stack */ + for (; i < m->nr_args; i++) { + /* We can not know for sure about exact alignment needs for + * struct passed on stack, so deny those + */ + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + return -ENOTSUPP; + stack_slots = (m->arg_size[i] + 7) / 8; + a->bstack_for_args += stack_slots * 8; + a->ostack_for_args = a->ostack_for_args + stack_slots * 8; + } + + return 0; +} + +static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes) +{ + if (effective_bytes) { + int garbage_bits = 64 - 8 * effective_bytes; +#ifdef CONFIG_CPU_BIG_ENDIAN + /* garbage bits are at the right end */ + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); +#else + /* garbage bits are at the left end */ + emit(A64_LSL(1, reg, reg, garbage_bits), ctx); + emit(A64_LSR(1, reg, reg, garbage_bits), ctx); +#endif } } -static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) +static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off, + const struct btf_func_model *m, + const struct arg_aux *a, + bool for_call_origin) { int i; + int reg; + int doff; + int soff; + int slots; + u8 tmp = bpf2a64[TMP_REG_1]; + + /* store arguments to the stack for the bpf program, or restore + * arguments from stack for the original function + */ + for (reg = 0; reg < a->regs_for_args; reg++) { + emit(for_call_origin ? + A64_LDR64I(reg, A64_SP, bargs_off) : + A64_STR64I(reg, A64_SP, bargs_off), + ctx); + bargs_off += 8; + } - for (i = 0; i < nregs; i++) { - emit(A64_LDR64I(i, A64_SP, args_off), ctx); - args_off += 8; + soff = 32; /* on stack arguments start from FP + 32 */ + doff = (for_call_origin ? oargs_off : bargs_off); + + /* save on stack arguments */ + for (i = a->args_in_regs; i < m->nr_args; i++) { + slots = (m->arg_size[i] + 7) / 8; + /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */ + while (slots-- > 0) { + emit(A64_LDR64I(tmp, A64_FP, soff), ctx); + /* if there is unused space in the last slot, clear + * the garbage contained in the space. + */ + if (slots == 0 && !for_call_origin) + clear_garbage(ctx, tmp, m->arg_size[i] % 8); + emit(A64_STR64I(tmp, A64_SP, doff), ctx); + soff += 8; + doff += 8; + } + } +} + +static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs) +{ + int reg; + + for (reg = 0; reg < nregs; reg++) { + emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx); + bargs_off += 8; } } @@ -2113,17 +2345,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *tlinks, void *func_addr, - int nregs, u32 flags) + const struct btf_func_model *m, + const struct arg_aux *a, + u32 flags) { int i; int stack_size; int retaddr_off; int regs_off; int retval_off; - int args_off; - int nregs_off; + int bargs_off; + int nfuncargs_off; int ip_off; int run_ctx_off; + int oargs_off; + int nfuncargs; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -2132,31 +2368,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, bool is_struct_ops = is_struct_ops_tramp(fentry); /* trampoline stack layout: - * [ parent ip ] - * [ FP ] - * SP + retaddr_off [ self ip ] - * [ FP ] + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] * - * [ padding ] align SP to multiples of 16 + * [ padding ] align SP to multiples of 16 * - * [ x20 ] callee saved reg x20 - * SP + regs_off [ x19 ] callee saved reg x19 + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 * - * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or - * BPF_TRAMP_F_RET_FENTRY_RET + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ arg reg N ] + * [ ... ] + * SP + bargs_off [ arg reg 1 ] for bpf * - * [ arg reg N ] - * [ ... ] - * SP + args_off [ arg reg 1 ] + * SP + nfuncargs_off [ arg regs count ] * - * SP + nregs_off [ arg regs count ] + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * - * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * SP + run_ctx_off [ bpf_tramp_run_ctx ] * - * SP + run_ctx_off [ bpf_tramp_run_ctx ] + * [ stack arg N ] + * [ ... ] + * SP + oargs_off [ stack arg 1 ] for original func */ stack_size = 0; + oargs_off = stack_size; + if (flags & BPF_TRAMP_F_CALL_ORIG) + stack_size += a->ostack_for_args; + run_ctx_off = stack_size; /* room for bpf_tramp_run_ctx */ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); @@ -2166,13 +2409,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; - nregs_off = stack_size; + nfuncargs_off = stack_size; /* room for args count */ stack_size += 8; - args_off = stack_size; + bargs_off = stack_size; /* room for args */ - stack_size += nregs * 8; + nfuncargs = a->regs_for_args + a->bstack_for_args / 8; + stack_size += 8 * nfuncargs; /* room for return value */ retval_off = stack_size; @@ -2219,11 +2463,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } /* save arg regs count*/ - emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); - emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); + emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx); - /* save arg regs */ - save_args(ctx, args_off, nregs); + /* save args for bpf */ + save_args(ctx, bargs_off, oargs_off, m, a, false); /* save callee saved registers */ emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -2239,7 +2483,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } for (i = 0; i < fentry->nr_links; i++) - invoke_bpf_prog(ctx, fentry->links[i], args_off, + invoke_bpf_prog(ctx, fentry->links[i], bargs_off, retval_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); @@ -2249,12 +2493,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (!branches) return -ENOMEM; - invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off, run_ctx_off, branches); } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, args_off, nregs); + /* save args for original func */ + save_args(ctx, bargs_off, oargs_off, m, a, true); /* call original func */ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); @@ -2273,7 +2518,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } for (i = 0; i < fexit->nr_links; i++) - invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off, run_ctx_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { @@ -2287,7 +2532,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, args_off, nregs); + restore_args(ctx, bargs_off, a->regs_for_args); /* restore callee saved register x19 and x20 */ emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -2324,21 +2569,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, return ctx->idx; } -static int btf_func_model_nregs(const struct btf_func_model *m) -{ - int nregs = m->nr_args; - int i; - - /* extra registers needed for struct argument */ - for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) - nregs += (m->arg_size[i] + 7) / 8 - 1; - } - - return nregs; -} - int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { @@ -2347,14 +2577,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, .idx = 0, }; struct bpf_tramp_image im; - int nregs, ret; + struct arg_aux aaux; + int ret; - nregs = btf_func_model_nregs(m); - /* the first 8 registers are used for arguments */ - if (nregs > 8) - return -ENOTSUPP; + ret = calc_arg_aux(m, &aaux); + if (ret < 0) + return ret; - ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); + ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags); if (ret < 0) return ret; @@ -2381,9 +2611,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { - int ret, nregs; - void *image, *tmp; u32 size = ro_image_end - ro_image; + struct arg_aux aaux; + void *image, *tmp; + int ret; /* image doesn't need to be in module memory range, so we can * use kvmalloc. @@ -2399,13 +2630,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, .write = true, }; - nregs = btf_func_model_nregs(m); - /* the first 8 registers are used for arguments */ - if (nregs > 8) - return -ENOTSUPP; jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); - ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); + ret = calc_arg_aux(m, &aaux); + if (ret) + goto out; + ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags); if (ret > 0 && validate_code(&ctx) < 0) { ret = -EINVAL; @@ -2669,7 +2899,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) switch (insn->code) { case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + if (!bpf_atomic_is_load_store(insn) && + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) return false; } return true; |