diff options
149 files changed, 1623 insertions, 607 deletions
diff --git a/.clang-format b/.clang-format index 2ceca764122f..c7060124a47a 100644 --- a/.clang-format +++ b/.clang-format @@ -748,6 +748,7 @@ ForEachMacros: - 'ynl_attr_for_each_nested' - 'ynl_attr_for_each_payload' - 'zorro_for_each_dev' + - 'zpci_bus_for_each' IncludeBlocks: Preserve IncludeCategories: @@ -1987,6 +1987,7 @@ D: netfilter: TCP window tracking code D: netfilter: raw table D: netfilter: iprange match D: netfilter: new logging interfaces +D: netfilter: ipset D: netfilter: various other hacks S: Tata S: Hungary diff --git a/MAINTAINERS b/MAINTAINERS index 8e90454bb817..dc731d37c8fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18028,7 +18028,6 @@ F: drivers/net/ethernet/neterion/ NETFILTER M: Pablo Neira Ayuso <pablo@netfilter.org> -M: Jozsef Kadlecsik <kadlec@netfilter.org> M: Florian Westphal <fw@strlen.de> R: Phil Sutter <phil@nwl.cc> L: netfilter-devel@vger.kernel.org diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 0941f6f58a14..69ecbd69ca8c 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -48,6 +48,13 @@ DEFINE_LOCK_GUARD_1(ksimd, kernel_neon_begin(_T->lock), kernel_neon_end(_T->lock)) -#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){}) +#define __scoped_ksimd(_label) \ + for (struct user_fpsimd_state __uninitialized __st; \ + true; ({ goto _label; })) \ + if (0) { \ +_label: break; \ + } else scoped_guard(ksimd, &__st) + +#define scoped_ksimd() __scoped_ksimd(__UNIQUE_ID(label)) #endif diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 74dd29816f36..b6eb7a465ad2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) return; - if (capable(CAP_SYS_ADMIN)) + if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) return; if (supports_clearbhb(SCOPE_SYSTEM)) { diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index 2cd28af50dd4..3d64a2251699 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -15,6 +15,7 @@ struct ipl_pl_hdr { #define IPL_PL_FLAG_IPLPS 0x80 #define IPL_PL_FLAG_SIPL 0x40 #define IPL_PL_FLAG_IPLSR 0x20 +#define IPL_PL_FLAG_SBP 0x10 /* IPL Parameter Block header */ struct ipl_pb_hdr { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 961a3d60a4dd..dcdc7e274848 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -262,6 +262,24 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) +#define DEFINE_IPL_ATTR_BOOTPROG_RW(_prefix, _name, _fmt_out, _fmt_in, _hdr, _value) \ + IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \ +static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + unsigned long long value; \ + if (sscanf(buf, _fmt_in, &value) != 1) \ + return -EINVAL; \ + (_value) = value; \ + (_hdr).flags &= ~IPL_PL_FLAG_SBP; \ + return len; \ +} \ +static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ + __ATTR(_name, 0644, \ + sys_##_prefix##_##_name##_show, \ + sys_##_prefix##_##_name##_store) + #define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\ IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ @@ -818,12 +836,13 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->fcp.lun); -DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", - reipl_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n", reipl_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", reipl_block_fcp->fcp.devno); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", + reipl_block_fcp->hdr, + reipl_block_fcp->fcp.bootprog); static void reipl_get_ascii_loadparm(char *loadparm, struct ipl_parameter_block *ibp) @@ -942,10 +961,11 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", reipl_block_nvme->nvme.fid); DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", reipl_block_nvme->nvme.nsid); -DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", - reipl_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", reipl_block_nvme->nvme.br_lba); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->hdr, + reipl_block_nvme->nvme.bootprog); static struct attribute *reipl_nvme_attrs[] = { &sys_reipl_nvme_fid_attr.attr, @@ -1038,8 +1058,9 @@ static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { }; DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd); -DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", - reipl_block_eckd->eckd.bootprog); +DEFINE_IPL_ATTR_BOOTPROG_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n", + reipl_block_eckd->hdr, + reipl_block_eckd->eckd.bootprog); static struct attribute *reipl_eckd_attrs[] = { &sys_reipl_eckd_device_attr.attr, @@ -1567,12 +1588,13 @@ DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->fcp.wwpn); DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->fcp.lun); -DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", - dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n", dump_block_fcp->fcp.br_lba); DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n", dump_block_fcp->fcp.devno); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", + dump_block_fcp->hdr, + dump_block_fcp->fcp.bootprog); DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr, dump_block_fcp->fcp, @@ -1604,10 +1626,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n", dump_block_nvme->nvme.fid); DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n", dump_block_nvme->nvme.nsid); -DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", - dump_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n", dump_block_nvme->nvme.br_lba); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", + dump_block_nvme->hdr, + dump_block_nvme->nvme.bootprog); DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr, dump_block_nvme->nvme, @@ -1635,8 +1658,9 @@ static const struct attribute_group dump_nvme_attr_group = { /* ECKD dump device attributes */ DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd); -DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", - dump_block_eckd->eckd.bootprog); +DEFINE_IPL_ATTR_BOOTPROG_RW(dump_eckd, bootprog, "%lld\n", "%llx\n", + dump_block_eckd->hdr, + dump_block_eckd->eckd.bootprog); IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd); IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 3aae7f70e6ab..18520d333058 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -104,7 +104,6 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo struct stack_frame_vdso_wrapper __user *sf_vdso; struct stack_frame_user __user *sf; unsigned long ip, sp; - bool first = true; if (!current->mm) return; @@ -133,24 +132,11 @@ void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *coo if (__get_user(ip, &sf->gprs[8])) break; } - /* Sanity check: ABI requires SP to be 8 byte aligned. */ - if (sp & 0x7) + /* Validate SP and RA (ABI requires SP to be 8 byte aligned). */ + if (sp & 0x7 || ip_invalid(ip)) break; - if (ip_invalid(ip)) { - /* - * If the instruction address is invalid, and this - * is the first stack frame, assume r14 has not - * been written to the stack yet. Otherwise exit. - */ - if (!first) - break; - ip = regs->gprs[14]; - if (ip_invalid(ip)) - break; - } if (!store_ip(consume_entry, cookie, entry, perf, ip)) break; - first = false; } pagefault_enable(); } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 5a6ace9d875a..57f3980b98a9 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -961,6 +961,7 @@ void zpci_device_reserved(struct zpci_dev *zdev) } void zpci_release_device(struct kref *kref) + __releases(&zpci_list_lock) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); @@ -1148,6 +1149,7 @@ static void zpci_add_devices(struct list_head *scan_list) int zpci_scan_devices(void) { + struct zpci_bus *zbus; LIST_HEAD(scan_list); int rc; @@ -1156,7 +1158,10 @@ int zpci_scan_devices(void) return rc; zpci_add_devices(&scan_list); - zpci_bus_scan_busses(); + zpci_bus_for_each(zbus) { + zpci_bus_scan_bus(zbus); + cond_resched(); + } return 0; } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 66c4bd888b29..42a13e451f64 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus) return ret; } -/* zpci_bus_scan_busses - Scan all registered busses - * - * Scan all available zbusses - * - */ -void zpci_bus_scan_busses(void) -{ - struct zpci_bus *zbus = NULL; - - mutex_lock(&zbus_list_lock); - list_for_each_entry(zbus, &zbus_list, bus_next) { - zpci_bus_scan_bus(zbus); - cond_resched(); - } - mutex_unlock(&zbus_list_lock); -} - static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && @@ -222,10 +205,29 @@ out_free_domain: return -ENOMEM; } -static void zpci_bus_release(struct kref *kref) +/** + * zpci_bus_release - Un-initialize resources associated with the zbus and + * free memory + * @kref: refcount * that is part of struct zpci_bus + * + * MUST be called with `zbus_list_lock` held, but the lock is released during + * run of the function. + */ +static inline void zpci_bus_release(struct kref *kref) + __releases(&zbus_list_lock) { struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref); + lockdep_assert_held(&zbus_list_lock); + + list_del(&zbus->bus_next); + mutex_unlock(&zbus_list_lock); + + /* + * At this point no-one should see this object, or be able to get a new + * reference to it. + */ + if (zbus->bus) { pci_lock_rescan_remove(); pci_stop_root_bus(zbus->bus); @@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref) pci_unlock_rescan_remove(); } - mutex_lock(&zbus_list_lock); - list_del(&zbus->bus_next); - mutex_unlock(&zbus_list_lock); zpci_remove_parent_msi_domain(zbus); kfree(zbus); } -static void zpci_bus_put(struct zpci_bus *zbus) +static inline void __zpci_bus_get(struct zpci_bus *zbus) +{ + lockdep_assert_held(&zbus_list_lock); + kref_get(&zbus->kref); +} + +static inline void zpci_bus_put(struct zpci_bus *zbus) { - kref_put(&zbus->kref, zpci_bus_release); + kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock); } static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid) @@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid) if (!zbus->multifunction) continue; if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) { - kref_get(&zbus->kref); + __zpci_bus_get(zbus); goto out_unlock; } } @@ -268,6 +273,44 @@ out_unlock: return zbus; } +/** + * zpci_bus_get_next - get the next zbus object from given position in the list + * @pos: current position/cursor in the global zbus list + * + * Acquires and releases references as the cursor iterates (might also free/ + * release the cursor). Is tolerant of concurrent operations on the list. + * + * To begin the iteration, set *@pos to %NULL before calling the function. + * + * *@pos is set to %NULL in cases where either the list is empty, or *@pos is + * the last element in the list. + * + * Context: Process context. May sleep. + */ +void zpci_bus_get_next(struct zpci_bus **pos) +{ + struct zpci_bus *curp = *pos, *next = NULL; + + mutex_lock(&zbus_list_lock); + if (curp) + next = list_next_entry(curp, bus_next); + else + next = list_first_entry(&zbus_list, typeof(*curp), bus_next); + + if (list_entry_is_head(next, &zbus_list, bus_next)) + next = NULL; + + if (next) + __zpci_bus_get(next); + + *pos = next; + mutex_unlock(&zbus_list_lock); + + /* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */ + if (curp) + zpci_bus_put(curp); +} + static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) { struct zpci_bus *zbus; @@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) zbus->topo = topo; zbus->topo_is_tid = topo_is_tid; INIT_LIST_HEAD(&zbus->bus_next); - mutex_lock(&zbus_list_lock); - list_add_tail(&zbus->bus_next, &zbus_list); - mutex_unlock(&zbus_list_lock); kref_init(&zbus->kref); INIT_LIST_HEAD(&zbus->resources); @@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) zbus->bus_resource.flags = IORESOURCE_BUS; pci_add_resource(&zbus->resources, &zbus->bus_resource); + mutex_lock(&zbus_list_lock); + list_add_tail(&zbus->bus_next, &zbus_list); + mutex_unlock(&zbus_list_lock); + return zbus; } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index ae3d7a9159bd..e440742e3145 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); void zpci_bus_device_unregister(struct zpci_dev *zdev); int zpci_bus_scan_bus(struct zpci_bus *zbus); -void zpci_bus_scan_busses(void); +void zpci_bus_get_next(struct zpci_bus **pos); + +/** + * zpci_bus_for_each - iterate over all the registered zbus objects + * @pos: a struct zpci_bus * as cursor + * + * Acquires and releases references as the cursor iterates over the registered + * objects. Is tolerant against concurrent removals of objects. + * + * Context: Process context. May sleep. + */ +#define zpci_bus_for_each(pos) \ + for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \ + zpci_bus_get_next(&(pos))) int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 977ee75e047c..f610fde2d5c4 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -2,6 +2,7 @@ #include <linux/objtool.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/bpf.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #include <asm/unwind.h> @@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip) } #endif +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, +}; + +static struct orc_entry *orc_bpf_find(unsigned long ip) +{ +#ifdef CONFIG_BPF_JIT + if (bpf_has_frame_pointer(ip)) + return &orc_fp_entry; +#endif + + return NULL; +} + /* * If we crash with IP==0, the last successfully executed instruction * was probably an indirect function call with a NULL function pointer, @@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; -/* Fake frame pointer entry -- used as a fallback for generated code */ -static struct orc_entry orc_fp_entry = { - .type = ORC_TYPE_CALL, - .sp_reg = ORC_REG_BP, - .sp_offset = 16, - .bp_reg = ORC_REG_PREV_SP, - .bp_offset = -16, -}; - static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; + /* BPF lookup: */ + orc = orc_bpf_find(ip); + if (orc) + return orc; + return orc_ftrace_find(ip); } @@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state) if (!orc) { /* * As a fallback, try to assume this code uses a frame pointer. - * This is useful for generated code, like BPF, which ORC - * doesn't know about. This is just a guess, so the rest of - * the unwind is no longer considered reliable. + * This is just a guess, so the rest of the unwind is no longer + * considered reliable. */ orc = &orc_fp_entry; state->error = true; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69dc7194e2c..b0bac2a66eff 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, image, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + + bpf_prog->aux->ksym.fp_start = prog - temp; + /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ @@ -2736,6 +2739,8 @@ emit_jmp: pop_r12(&prog); } EMIT1(0xC9); /* leave */ + bpf_prog->aux->ksym.fp_end = prog - temp; + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; @@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + if (im) + im->ksym.fp_start = prog - (u8 *)rw_image; + if (!is_imm8(stack_size)) { /* sub rsp, stack_size */ EMIT3_off32(0x48, 0x81, 0xEC, stack_size); @@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); + EMIT1(0xC9); /* leave */ + if (im) + im->ksym.fp_end = prog - (u8 *)rw_image; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ diff --git a/drivers/acpi/acpi_pcc.c b/drivers/acpi/acpi_pcc.c index 97064e943768..e3f302b9dee5 100644 --- a/drivers/acpi/acpi_pcc.c +++ b/drivers/acpi/acpi_pcc.c @@ -52,7 +52,7 @@ acpi_pcc_address_space_setup(acpi_handle region_handle, u32 function, struct pcc_data *data; struct acpi_pcc_info *ctx = handler_context; struct pcc_mbox_chan *pcc_chan; - static acpi_status ret; + acpi_status ret; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3bdeeee3414e..e66e20d1f31b 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1366,7 +1366,8 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) || - CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg)) { + CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg) || + CPC_IN_PCC(guaranteed_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 84676cc24221..0ee8ea971aa4 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1868,16 +1868,18 @@ void pm_runtime_init(struct device *dev) */ void pm_runtime_reinit(struct device *dev) { - if (!pm_runtime_enabled(dev)) { - if (dev->power.runtime_status == RPM_ACTIVE) - pm_runtime_set_suspended(dev); - if (dev->power.irq_safe) { - spin_lock_irq(&dev->power.lock); - dev->power.irq_safe = 0; - spin_unlock_irq(&dev->power.lock); - if (dev->parent) - pm_runtime_put(dev->parent); - } + if (pm_runtime_enabled(dev)) + return; + + if (dev->power.runtime_status == RPM_ACTIVE) + pm_runtime_set_suspended(dev); + + if (dev->power.irq_safe) { + spin_lock_irq(&dev->power.lock); + dev->power.irq_safe = 0; + spin_unlock_irq(&dev->power.lock); + if (dev->parent) + pm_runtime_put(dev->parent); } /* * Clear power.needs_force_resume in case it has been set by diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 54cf4d856179..436992331111 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -495,7 +495,11 @@ int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, return -EOVERFLOW; start_byte = start - ALIGN_DOWN(start, PAGE_SIZE); - dmabuf = dma_buf_get(fd); + if (IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) + dmabuf = dma_buf_get(fd); + else + dmabuf = ERR_PTR(-ENXIO); + if (!IS_ERR(dmabuf)) { pages = iopt_alloc_dmabuf_pages(ictx, dmabuf, start_byte, start, length, diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index c4322fd26f93..550ff36dec3a 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -1184,14 +1184,20 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, unsigned long start, size_t length) { + unsigned long last; struct iommufd_ioas *ioas; int rc; + if (!length) + return -EINVAL; + if (check_add_overflow(start, length - 1, &last)) + return -EOVERFLOW; + ioas = iommufd_get_ioas(ucmd->ictx, mockpt_id); if (IS_ERR(ioas)) return PTR_ERR(ioas); down_write(&ioas->iopt.iova_rwsem); - rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL); + rc = iopt_reserve_iova(&ioas->iopt, start, last, NULL); up_write(&ioas->iopt.iova_rwsem); iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; @@ -1215,8 +1221,10 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, page_size = 1 << __ffs(mock->domain.pgsize_bitmap); if (iova % page_size || length % page_size || (uintptr_t)uptr % page_size || - check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end)) - return -EINVAL; + check_add_overflow((uintptr_t)uptr, (uintptr_t)length, &end)) { + rc = -EINVAL; + goto out_put; + } for (; length; length -= page_size) { struct page *pages[1]; diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index e15e320db476..cfaea6178a71 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig CAN_DEV - tristate "CAN Device Drivers" + bool "CAN Device Drivers" default y depends on CAN help @@ -17,10 +17,7 @@ menuconfig CAN_DEV virtual ones. If you own such devices or plan to use the virtual CAN interfaces to develop applications, say Y here. - To compile as a module, choose M here: the module will be called - can-dev. - -if CAN_DEV +if CAN_DEV && CAN config CAN_VCAN tristate "Virtual Local CAN Interface (vcan)" diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index d7bc10a6b8ea..37e2f1a2faec 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_CAN_VCAN) += vcan.o obj-$(CONFIG_CAN_VXCAN) += vxcan.o obj-$(CONFIG_CAN_SLCAN) += slcan/ -obj-y += dev/ +obj-$(CONFIG_CAN_DEV) += dev/ obj-y += esd/ obj-y += rcar/ obj-y += rockchip/ diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index 633687d6b6c0..64226acf0f3d 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -1,9 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CAN_DEV) += can-dev.o - -can-dev-y += skb.o +obj-$(CONFIG_CAN) += can-dev.o +can-dev-$(CONFIG_CAN_DEV) += skb.o can-dev-$(CONFIG_CAN_CALC_BITTIMING) += calc_bittiming.o can-dev-$(CONFIG_CAN_NETLINK) += bittiming.o can-dev-$(CONFIG_CAN_NETLINK) += dev.o diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index e29e85b67fd4..a0233e550a5a 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1074,7 +1074,7 @@ out_usb_free_urb: usb_free_urb(urb); out_usb_kill_anchored_urbs: if (!parent->active_channels) { - usb_kill_anchored_urbs(&dev->tx_submitted); + usb_kill_anchored_urbs(&parent->rx_submitted); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) gs_usb_timestamp_stop(parent); diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 57dd063c0740..b094001a7c80 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -444,9 +444,6 @@ static void gswip_remove(struct platform_device *pdev) if (!priv) return; - /* disable the switch */ - gswip_disable_switch(priv); - dsa_unregister_switch(priv->ds); for (i = 0; i < priv->num_gphy_fw; i++) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 9c38e51a75e8..2e0f2afbadbb 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -294,8 +294,6 @@ struct gswip_priv { u16 version; }; -void gswip_disable_switch(struct gswip_priv *priv); - int gswip_probe_common(struct gswip_priv *priv, u32 version); #endif /* __LANTIQ_GSWIP_H */ diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index 9da39edf8f57..e790f2ef7588 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -752,6 +752,13 @@ static int gswip_setup(struct dsa_switch *ds) return 0; } +static void gswip_teardown(struct dsa_switch *ds) +{ + struct gswip_priv *priv = ds->priv; + + regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); +} + static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) @@ -1629,6 +1636,7 @@ static const struct phylink_mac_ops gswip_phylink_mac_ops = { static const struct dsa_switch_ops gswip_switch_ops = { .get_tag_protocol = gswip_get_tag_protocol, .setup = gswip_setup, + .teardown = gswip_teardown, .port_setup = gswip_port_setup, .port_enable = gswip_port_enable, .port_disable = gswip_port_disable, @@ -1656,12 +1664,6 @@ static const struct dsa_switch_ops gswip_switch_ops = { .port_hsr_leave = dsa_port_simple_hsr_leave, }; -void gswip_disable_switch(struct gswip_priv *priv) -{ - regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); -} -EXPORT_SYMBOL_GPL(gswip_disable_switch); - static int gswip_validate_cpu_port(struct dsa_switch *ds) { struct gswip_priv *priv = ds->priv; @@ -1718,15 +1720,14 @@ int gswip_probe_common(struct gswip_priv *priv, u32 version) err = gswip_validate_cpu_port(priv->ds); if (err) - goto disable_switch; + goto unregister_switch; dev_info(priv->dev, "probed GSWIP version %lx mod %lx\n", GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version)); return 0; -disable_switch: - gswip_disable_switch(priv); +unregister_switch: dsa_unregister_switch(priv->ds); return err; diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c index 0816c61a47f1..f8ff8a604bf5 100644 --- a/drivers/net/dsa/lantiq/mxl-gsw1xx.c +++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c @@ -11,10 +11,12 @@ #include <linux/bits.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/module.h> #include <linux/of_device.h> #include <linux/of_mdio.h> #include <linux/regmap.h> +#include <linux/workqueue.h> #include <net/dsa.h> #include "lantiq_gswip.h" @@ -29,6 +31,7 @@ struct gsw1xx_priv { struct regmap *clk; struct regmap *shell; struct phylink_pcs pcs; + struct delayed_work clear_raneg; phy_interface_t tbi_interface; struct gswip_priv gswip; }; @@ -145,7 +148,9 @@ static void gsw1xx_pcs_disable(struct phylink_pcs *pcs) { struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); - /* Assert SGMII shell reset */ + cancel_delayed_work_sync(&priv->clear_raneg); + + /* Assert SGMII shell reset (will also clear RANEG bit) */ regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ, GSW1XX_RST_REQ_SGMII_SHELL); @@ -255,10 +260,16 @@ static int gsw1xx_pcs_reset(struct gsw1xx_priv *priv) FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT, GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT_DEF); - /* TODO: Take care of inverted RX pair once generic property is + /* RX lane seems to be inverted internally, so bit + * GSW1XX_SGMII_PHY_RX0_CFG2_INVERT needs to be set for normal + * (ie. non-inverted) operation. + * + * TODO: Take care of inverted RX pair once generic property is * available */ + val |= GSW1XX_SGMII_PHY_RX0_CFG2_INVERT; + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_RX0_CFG2, val); if (ret < 0) return ret; @@ -422,12 +433,29 @@ static int gsw1xx_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, return 0; } +static void gsw1xx_pcs_clear_raneg(struct work_struct *work) +{ + struct gsw1xx_priv *priv = + container_of(work, struct gsw1xx_priv, clear_raneg.work); + + regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, + GSW1XX_SGMII_TBI_ANEGCTL_RANEG); +} + static void gsw1xx_pcs_an_restart(struct phylink_pcs *pcs) { struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + cancel_delayed_work_sync(&priv->clear_raneg); + regmap_set_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, GSW1XX_SGMII_TBI_ANEGCTL_RANEG); + + /* despite being documented as self-clearing, the RANEG bit + * sometimes remains set, preventing auto-negotiation from happening. + * MaxLinear advises to manually clear the bit after 10ms. + */ + schedule_delayed_work(&priv->clear_raneg, msecs_to_jiffies(10)); } static void gsw1xx_pcs_link_up(struct phylink_pcs *pcs, @@ -630,6 +658,8 @@ static int gsw1xx_probe(struct mdio_device *mdiodev) if (ret) return ret; + INIT_DELAYED_WORK(&priv->clear_raneg, gsw1xx_pcs_clear_raneg); + ret = gswip_probe_common(&priv->gswip, version); if (ret) return ret; @@ -642,25 +672,31 @@ static int gsw1xx_probe(struct mdio_device *mdiodev) static void gsw1xx_remove(struct mdio_device *mdiodev) { struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev); + struct gsw1xx_priv *gsw1xx_priv; if (!priv) return; - gswip_disable_switch(priv); - dsa_unregister_switch(priv->ds); + + gsw1xx_priv = container_of(priv, struct gsw1xx_priv, gswip); + cancel_delayed_work_sync(&gsw1xx_priv->clear_raneg); } static void gsw1xx_shutdown(struct mdio_device *mdiodev) { struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev); + struct gsw1xx_priv *gsw1xx_priv; if (!priv) return; + dsa_switch_shutdown(priv->ds); + dev_set_drvdata(&mdiodev->dev, NULL); - gswip_disable_switch(priv); + gsw1xx_priv = container_of(priv, struct gsw1xx_priv, gswip); + cancel_delayed_work_sync(&gsw1xx_priv->clear_raneg); } static const struct gswip_hw_info gsw12x_data = { diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 888f28f11406..90df02e0039c 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1790,6 +1790,9 @@ static int b44_nway_reset(struct net_device *dev) u32 bmcr; int r; + if (bp->flags & B44_FLAG_EXTERNAL_PHY) + return phy_ethtool_nway_reset(dev); + spin_lock_irq(&bp->lock); b44_readphy(bp, MII_BMCR, &bmcr); b44_readphy(bp, MII_BMCR, &bmcr); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 3e77a96e5a3e..c94a391b1ba5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -268,13 +268,11 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, case XDP_TX: rx_buf = &rxr->rx_buf_ring[cons]; mapping = rx_buf->mapping - bp->rx_dma_offset; - *event &= BNXT_TX_CMP_EVENT; if (unlikely(xdp_buff_has_frags(xdp))) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); tx_needed += sinfo->nr_frags; - *event = BNXT_AGG_EVENT; } if (tx_avail < tx_needed) { @@ -287,6 +285,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, dma_sync_single_for_device(&pdev->dev, mapping + offset, *len, bp->rx_dir); + *event &= ~BNXT_RX_EVENT; *event |= BNXT_TX_EVENT; __bnxt_xmit_xdp(bp, txr, mapping + offset, *len, NEXT_RX(rxr->rx_prod), xdp); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index d5e5800b84ef..53b26cece16a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1787,7 +1787,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames, int xdp_tx_bd_cnt, i, k; int xdp_tx_frm_cnt = 0; - if (unlikely(test_bit(ENETC_TX_DOWN, &priv->flags))) + if (unlikely(test_bit(ENETC_TX_DOWN, &priv->flags) || + !netif_carrier_ok(ndev))) return -ENETDOWN; enetc_lock_mdio(); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c685a5c0cc51..a753265961af 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3933,7 +3933,12 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index a5a2b18d309b..a7a088a77f37 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -647,12 +647,9 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = gve_alloc_counter_array(priv); if (err) goto abort_with_rss_config_cache; - err = gve_init_clock(priv); - if (err) - goto abort_with_counter; err = gve_alloc_notify_blocks(priv); if (err) - goto abort_with_clock; + goto abort_with_counter; err = gve_alloc_stats_report(priv); if (err) goto abort_with_ntfy_blocks; @@ -683,10 +680,16 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_clock(priv); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init clock"); + goto abort_with_ptype_lut; + } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); if (err) { dev_err(&priv->pdev->dev, "Failed to init RSS config"); - goto abort_with_ptype_lut; + goto abort_with_clock; } err = gve_adminq_report_stats(priv, priv->stats_report_len, @@ -698,6 +701,8 @@ static int gve_setup_device_resources(struct gve_priv *priv) gve_set_device_resources_ok(priv); return 0; +abort_with_clock: + gve_teardown_clock(priv); abort_with_ptype_lut: kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; @@ -705,8 +710,6 @@ abort_with_stats_report: gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); -abort_with_clock: - gve_teardown_clock(priv); abort_with_counter: gve_free_counter_array(priv); abort_with_rss_config_cache: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index cf8abbe01840..c589baea7c77 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10555,6 +10555,9 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, bool writen_to_tbl = false; int ret = 0; + if (vlan_id >= VLAN_N_VID) + return -EINVAL; + /* When device is resetting or reset failed, firmware is unable to * handle mailbox. Just record the vlan id, and remove it after * reset finished. diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index c7ff12a6c076..b7d4e06a55d4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -193,10 +193,10 @@ static int hclge_get_ring_chain_from_mbx( return -EINVAL; for (i = 0; i < ring_num; i++) { - if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) { + if (req->msg.param[i].tqp_index >= vport->nic.kinfo.num_tqps) { dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n", req->msg.param[i].tqp_index, - vport->nic.kinfo.rss_size - 1U); + vport->nic.kinfo.num_tqps - 1U); return -EINVAL; } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 8fcf220a120d..70327a73dee3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -368,12 +368,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) new_tqps = kinfo->rss_size * num_tc; kinfo->num_tqps = min(new_tqps, hdev->num_tqps); - kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps, + kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps, sizeof(struct hnae3_queue *), GFP_KERNEL); if (!kinfo->tqp) return -ENOMEM; - for (i = 0; i < kinfo->num_tqps; i++) { + for (i = 0; i < hdev->num_tqps; i++) { hdev->htqp[i].q.handle = &hdev->nic; hdev->htqp[i].q.tqp_index = i; kinfo->tqp[i] = &hdev->htqp[i].q; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 887adf4807d1..ea77fbd98396 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -197,6 +197,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, struct pci_dev *pdev = dev->pdev; int ret = 0; + if (mlx5_fw_reset_in_progress(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset"); + return -EBUSY; + } + if (mlx5_dev_is_lightweight(dev)) { if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 7bcf822a89f9..6b4ec457ce22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -33,6 +33,7 @@ #include "lib/eq.h" #include "fw_tracer.h" #include "fw_tracer_tracepoint.h" +#include <linux/ctype.h> static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) { @@ -358,6 +359,47 @@ static const char *VAL_PARM = "%llx"; static const char *REPLACE_64_VAL_PARM = "%x%x"; static const char *PARAM_CHAR = "%"; +static bool mlx5_is_valid_spec(const char *str) +{ + /* Parse format specifiers to find the actual type. + * Structure: %[flags][width][.precision][length]type + * Skip flags, width, precision & length. + */ + while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l') + str++; + + /* Check if it's a valid integer/hex specifier or %%: + * Valid formats: %x, %d, %i, %u, etc. + */ + if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' && + *str != 'u' && *str != 'c' && *str != '%') + return false; + + return true; +} + +static bool mlx5_tracer_validate_params(const char *str) +{ + const char *substr = str; + + if (!str) + return false; + + substr = strstr(substr, PARAM_CHAR); + while (substr) { + if (!mlx5_is_valid_spec(substr + 1)) + return false; + + if (*(substr + 1) == '%') + substr = strstr(substr + 2, PARAM_CHAR); + else + substr = strstr(substr + 1, PARAM_CHAR); + + } + + return true; +} + static int mlx5_tracer_message_hash(u32 message_id) { return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1); @@ -419,6 +461,10 @@ static int mlx5_tracer_get_num_of_params(char *str) char *substr, *pstr = str; int num_of_params = 0; + /* Validate that all parameters are valid before processing */ + if (!mlx5_tracer_validate_params(str)) + return -EINVAL; + /* replace %llx with %x%x */ substr = strstr(pstr, VAL_PARM); while (substr) { @@ -427,11 +473,15 @@ static int mlx5_tracer_get_num_of_params(char *str) substr = strstr(pstr, VAL_PARM); } - /* count all the % characters */ + /* count all the % characters, but skip %% (escaped percent) */ substr = strstr(str, PARAM_CHAR); while (substr) { - num_of_params += 1; - str = substr + 1; + if (*(substr + 1) != '%') { + num_of_params += 1; + str = substr + 1; + } else { + str = substr + 2; + } substr = strstr(str, PARAM_CHAR); } @@ -570,14 +620,17 @@ void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, { char tmp[512]; - snprintf(tmp, sizeof(tmp), str_frmt->string, - str_frmt->params[0], - str_frmt->params[1], - str_frmt->params[2], - str_frmt->params[3], - str_frmt->params[4], - str_frmt->params[5], - str_frmt->params[6]); + if (str_frmt->invalid_string) + snprintf(tmp, sizeof(tmp), "BAD_FORMAT: %s", str_frmt->string); + else + snprintf(tmp, sizeof(tmp), str_frmt->string, + str_frmt->params[0], + str_frmt->params[1], + str_frmt->params[2], + str_frmt->params[3], + str_frmt->params[4], + str_frmt->params[5], + str_frmt->params[6]); trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, str_frmt->event_id, tmp); @@ -609,6 +662,13 @@ static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer, return 0; } +static void mlx5_tracer_handle_bad_format_string(struct mlx5_fw_tracer *tracer, + struct tracer_string_format *cur_string) +{ + cur_string->invalid_string = true; + list_add_tail(&cur_string->list, &tracer->ready_strings_list); +} + static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, struct tracer_event *tracer_event) { @@ -619,12 +679,18 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, if (!cur_string) return mlx5_tracer_handle_raw_string(tracer, tracer_event); - cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); - cur_string->last_param_num = 0; cur_string->event_id = tracer_event->event_id; cur_string->tmsn = tracer_event->string_event.tmsn; cur_string->timestamp = tracer_event->string_event.timestamp; cur_string->lost = tracer_event->lost_event; + cur_string->last_param_num = 0; + cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); + if (cur_string->num_of_params < 0) { + pr_debug("%s Invalid format string parameters\n", + __func__); + mlx5_tracer_handle_bad_format_string(tracer, cur_string); + return 0; + } if (cur_string->num_of_params == 0) /* trace with no params */ list_add_tail(&cur_string->list, &tracer->ready_strings_list); } else { @@ -634,6 +700,11 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, __func__, tracer_event->string_event.tmsn); return mlx5_tracer_handle_raw_string(tracer, tracer_event); } + if (cur_string->num_of_params < 0) { + pr_debug("%s string parameter of invalid string, dumping\n", + __func__); + return 0; + } cur_string->last_param_num += 1; if (cur_string->last_param_num > TRACER_MAX_PARAMS) { pr_debug("%s Number of params exceeds the max (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 5c548bb74f07..30d0bcba8847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -125,6 +125,7 @@ struct tracer_string_format { struct list_head list; u32 timestamp; bool lost; + bool invalid_string; }; enum mlx5_fw_tracer_ownership_state { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 811178d8976c..262dc032e276 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -69,7 +69,7 @@ struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 -#define MLX5E_ETH_HARD_MTU (ETH_HLEN + PSP_ENCAP_HLEN + PSP_TRL_SIZE + VLAN_HLEN + ETH_FCS_LEN) +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 35d9530037a6..a8fb4bec369c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -342,9 +342,8 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, rt_dst_entry = &rt->dst; break; case AF_INET6: - rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow( - dev_net(netdev), NULL, &fl6, NULL); - if (IS_ERR(rt_dst_entry)) + if (!IS_ENABLED(CONFIG_IPV6) || + ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6)) goto neigh; break; default: @@ -359,6 +358,9 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, neigh_ha_snapshot(addr, n, netdev); ether_addr_copy(dst, addr); + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT && + is_zero_ether_addr(addr)) + neigh_event_send(n, NULL); dst_release(rt_dst_entry); neigh_release(n); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6168f0814414..07fc4d2c8fad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6825,7 +6825,6 @@ static void _mlx5e_remove(struct auxiliary_device *adev) * is already unregistered before changing to NIC profile. */ if (priv->netdev->reg_state == NETREG_REGISTERED) { - mlx5e_psp_unregister(priv); unregister_netdev(priv->netdev); _mlx5e_suspend(adev, false); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 14884b9ea7f3..a01ee656a1e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -939,7 +939,11 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) sq->dma_fifo_cc = dma_fifo_cc; sq->cc = sqcc; - netdev_tx_completed_queue(sq->txq, npkts, nbytes); + /* Do not update BQL for TXQs that got replaced by new active ones, as + * netdev_tx_reset_queue() is called for them in mlx5e_activate_txqsq(). + */ + if (sq == sq->priv->txq2sq[sq->txq_ix]) + netdev_tx_completed_queue(sq->txq, npkts, nbytes); } #ifdef CONFIG_MLX5_CORE_IPOIB diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8de6c7f6c294..ea94a727633f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -52,6 +52,7 @@ #include "devlink.h" #include "lag/lag.h" #include "en/tc/post_meter.h" +#include "fw_reset.h" /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. @@ -3991,6 +3992,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (IS_ERR(esw)) return PTR_ERR(esw); + if (mlx5_fw_reset_in_progress(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset"); + return -EBUSY; + } + if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 2bceb42c98cc..ae10665c53f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -15,6 +15,7 @@ enum { MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, + MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, }; struct mlx5_fw_reset { @@ -128,6 +129,16 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL); } +bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!fw_reset) + return false; + + return test_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); +} + static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev, u8 *reset_method) { @@ -243,6 +254,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); devl_unlock(devlink); } + + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); } static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) @@ -462,27 +475,48 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, reset_request_work); struct mlx5_core_dev *dev = fw_reset->dev; + bool nack_request = false; + struct devlink *devlink; int err; err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method); - if (err) + if (err) { + nack_request = true; mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err); + } else if (!mlx5_is_reset_now_capable(dev, fw_reset->reset_method) || + test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + &fw_reset->reset_flags)) { + nack_request = true; + } - if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || - !mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) { + devlink = priv_to_devlink(dev); + /* For external resets, try to acquire devl_lock. Skip if devlink reset is + * pending (lock already held) + */ + if (nack_request || + (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, + &fw_reset->reset_flags) && + !devl_trylock(devlink))) { err = mlx5_fw_reset_set_reset_sync_nack(dev); mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", err ? "Failed" : "Sent"); return; } + if (mlx5_sync_reset_set_reset_requested(dev)) - return; + goto unlock; + + set_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); err = mlx5_fw_reset_set_reset_sync_ack(dev); if (err) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); else mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); + +unlock: + if (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + devl_unlock(devlink); } static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id) @@ -722,6 +756,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; + + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } @@ -758,6 +794,7 @@ static void mlx5_sync_reset_timeout_work(struct work_struct *work) if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n"); } @@ -844,7 +881,8 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) cancel_work_sync(&fw_reset->reset_reload_work); cancel_work_sync(&fw_reset->reset_now_work); cancel_work_sync(&fw_reset->reset_abort_work); - cancel_delayed_work(&fw_reset->reset_timeout_work); + if (test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + mlx5_sync_reset_clear_reset_requested(dev, true); } static const struct devlink_param mlx5_fw_reset_devlink_params[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index d5b28525c960..2d96b2adc1cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -10,6 +10,7 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, struct netlink_ext_ack *extack); int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); +bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 1ac933cd8f02..a459a30f36ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1413,6 +1413,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) { mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); + dev->priv.hca_devcom_comp = NULL; } static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index aad52d3a90e6..2d86af8f0d9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -67,12 +67,19 @@ err_metadata: static int enable_mpesw(struct mlx5_lag *ldev) { - int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev0; int err; + int idx; int i; - if (idx < 0 || ldev->mode != MLX5_LAG_MODE_NONE) + if (ldev->mode == MLX5_LAG_MODE_MPESW) + return 0; + + if (ldev->mode != MLX5_LAG_MODE_NONE) + return -EINVAL; + + idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + if (idx < 0) return -EINVAL; dev0 = ldev->pf[idx].dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1ab569ce3fcf..4209da722f9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2231,6 +2231,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); err = mlx5_try_fast_unload(dev); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 5afe6b155ef0..81935f87bfcd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -440,7 +440,9 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, rhashtable_remove_fast(&mr_table->route_ht, &mr_orig_route->ht_node, mlxsw_sp_mr_route_ht_params); + mutex_lock(&mr_table->route_list_lock); list_del(&mr_orig_route->node); + mutex_unlock(&mr_table->route_list_lock); mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a2033837182e..2d0e89bd2fb9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2265,6 +2265,7 @@ mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, if (!neigh_entry) return NULL; + neigh_hold(n); neigh_entry->key.n = n; neigh_entry->rif = rif; INIT_LIST_HEAD(&neigh_entry->nexthop_list); @@ -2274,6 +2275,7 @@ mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) { + neigh_release(neigh_entry->key.n); kfree(neigh_entry); } @@ -2858,6 +2860,11 @@ static int mlxsw_sp_router_schedule_work(struct net *net, if (!net_work) return NOTIFY_BAD; + /* Take a reference to ensure the neighbour won't be destructed until + * we drop the reference in the work item. + */ + neigh_clone(n); + INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; net_work->n = n; @@ -2881,11 +2888,6 @@ static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router, struct net *net; net = neigh_parms_net(n->parms); - - /* Take a reference to ensure the neighbour won't be destructed until we - * drop the reference in delayed work. - */ - neigh_clone(n); return mlxsw_sp_router_schedule_work(net, router, n, mlxsw_sp_router_neigh_event_work); } @@ -4320,6 +4322,8 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, if (err) goto err_neigh_entry_insert; + neigh_release(old_n); + read_lock_bh(&n->lock); nud_state = n->nud_state; dead = n->dead; @@ -4328,14 +4332,10 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { - neigh_release(old_n); - neigh_clone(n); __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } - neigh_release(n); - return 0; err_neigh_entry_insert: @@ -4428,6 +4428,11 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, } } + /* Release the reference taken by neigh_lookup() / neigh_create() since + * neigh_entry already holds one. + */ + neigh_release(n); + /* If that is the first nexthop connected to that neigh, add to * nexthop_neighs_list */ @@ -4454,11 +4459,9 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; - struct neighbour *n; if (!neigh_entry) return; - n = neigh_entry->key.n; __mlxsw_sp_nexthop_neigh_update(nh, true); list_del(&nh->neigh_list_node); @@ -4472,8 +4475,6 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); - - neigh_release(n); } static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 405e91eb3141..755083852eef 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2655,9 +2655,6 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) static void rtl_prepare_power_down(struct rtl8169_private *tp) { - if (tp->dash_enabled) - return; - if (tp->mac_version == RTL_GIGA_MAC_VER_32 || tp->mac_version == RTL_GIGA_MAC_VER_33) rtl_ephy_write(tp, 0x19, 0xff64); @@ -4812,7 +4809,7 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_type != RTL_DASH_NONE && !tp->saved_wolopts) rtl8168_driver_stop(tp); } diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index a54d71155263..fe5b2926d8ab 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -209,6 +209,7 @@ config TI_ICSSG_PRUETH_SR1 depends on PRU_REMOTEPROC depends on NET_SWITCHDEV depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER + depends on PTP_1588_CLOCK_OPTIONAL help Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem. This subsystem is available on the AM65 SR1.0 platform. @@ -234,7 +235,7 @@ config TI_PRUETH depends on PRU_REMOTEPROC depends on NET_SWITCHDEV select TI_ICSS_IEP - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL help Some TI SoCs has Programmable Realtime Unit (PRU) cores which can support Single or Dual Ethernet ports with the help of firmware code diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index dea411e132db..2efa3ba148aa 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -737,6 +737,9 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, struct ethhdr *eth = eth_hdr(skb); rx_handler_result_t ret = RX_HANDLER_PASS; + if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + return RX_HANDLER_PASS; + if (is_multicast_ether_addr(eth->h_dest)) { if (ipvlan_external_frame(skb, port)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index f3d83b04c953..201dee1a1698 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -698,7 +698,7 @@ static int mv88q2xxx_hwmon_write(struct device *dev, switch (attr) { case hwmon_temp_max: - clamp_val(val, -75000, 180000); + val = clamp(val, -75000, 180000); val = (val / 1000) + 75; val = FIELD_PREP(MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK, val); diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 67ecf3d4af2b..6ff0385201a5 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -691,10 +691,6 @@ static int rtl8211f_config_aldps(struct phy_device *phydev) static int rtl8211f_config_phy_eee(struct phy_device *phydev) { - /* RTL8211FVD has no PHYCR2 register */ - if (phydev->drv->phy_id == RTL_8211FVD_PHYID) - return 0; - /* Disable PHY-mode EEE so LPI is passed to the MAC */ return phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 0401fa6b24d2..6166e9196364 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -497,6 +497,8 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex, sfp_fixup_nokia), + SFP_QUIRK_F("BIDB", "X-ONU-SFPP", sfp_fixup_potron), + // FLYPRO SFP-10GT-CS-30M uses Rollball protocol to talk to the PHY. SFP_QUIRK_F("FLYPRO", "SFP-10GT-CS-30M", sfp_fixup_rollball), diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index ffd7367ce119..018a80674f06 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -406,7 +406,7 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, "Reader power on cmd error %d\n", rc); - return rc; + return rc ?: -EINVAL; } rc = usb_submit_urb(phy->in_urb, GFP_KERNEL); diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index b9d87e56cbbc..3ff6da3bf4e6 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -2032,7 +2032,7 @@ end: return ret; } -int rapl_package_add_pmu(struct rapl_package *rp) +int rapl_package_add_pmu_locked(struct rapl_package *rp) { struct rapl_package_pmu_data *data = &rp->pmu_data; int idx; @@ -2040,8 +2040,6 @@ int rapl_package_add_pmu(struct rapl_package *rp) if (rp->has_pmu) return -EEXIST; - guard(cpus_read_lock)(); - for (idx = 0; idx < rp->nr_domains; idx++) { struct rapl_domain *rd = &rp->domains[idx]; int domain = rd->id; @@ -2091,17 +2089,23 @@ int rapl_package_add_pmu(struct rapl_package *rp) return rapl_pmu_update(rp); } +EXPORT_SYMBOL_GPL(rapl_package_add_pmu_locked); + +int rapl_package_add_pmu(struct rapl_package *rp) +{ + guard(cpus_read_lock)(); + + return rapl_package_add_pmu_locked(rp); +} EXPORT_SYMBOL_GPL(rapl_package_add_pmu); -void rapl_package_remove_pmu(struct rapl_package *rp) +void rapl_package_remove_pmu_locked(struct rapl_package *rp) { struct rapl_package *pos; if (!rp->has_pmu) return; - guard(cpus_read_lock)(); - list_for_each_entry(pos, &rapl_packages, plist) { /* PMU is still needed */ if (pos->has_pmu && pos != rp) @@ -2111,6 +2115,14 @@ void rapl_package_remove_pmu(struct rapl_package *rp) perf_pmu_unregister(&rapl_pmu.pmu); memset(&rapl_pmu, 0, sizeof(struct rapl_pmu)); } +EXPORT_SYMBOL_GPL(rapl_package_remove_pmu_locked); + +void rapl_package_remove_pmu(struct rapl_package *rp) +{ + guard(cpus_read_lock)(); + + rapl_package_remove_pmu_locked(rp); +} EXPORT_SYMBOL_GPL(rapl_package_remove_pmu); #endif diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 0ce1096b6314..9a7e150b3536 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -82,7 +82,7 @@ static int rapl_cpu_online(unsigned int cpu) if (IS_ERR(rp)) return PTR_ERR(rp); if (rapl_msr_pmu) - rapl_package_add_pmu(rp); + rapl_package_add_pmu_locked(rp); } cpumask_set_cpu(cpu, &rp->cpumask); return 0; @@ -101,7 +101,7 @@ static int rapl_cpu_down_prep(unsigned int cpu) lead_cpu = cpumask_first(&rp->cpumask); if (lead_cpu >= nr_cpu_ids) { if (rapl_msr_pmu) - rapl_package_remove_pmu(rp); + rapl_package_remove_pmu_locked(rp); rapl_remove_package_cpuslocked(rp); } else if (rp->lead_cpu == cpu) { rp->lead_cpu = lead_cpu; diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 4112a0097338..1ff369880beb 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -68,7 +68,7 @@ static ssize_t show_constraint_##_attr(struct device *dev, \ int id; \ struct powercap_zone_constraint *pconst;\ \ - if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \ + if (sscanf(dev_attr->attr.name, "constraint_%d_", &id) != 1) \ return -EINVAL; \ if (id >= power_zone->const_id_cnt) \ return -EINVAL; \ @@ -93,7 +93,7 @@ static ssize_t store_constraint_##_attr(struct device *dev,\ int id; \ struct powercap_zone_constraint *pconst;\ \ - if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) \ + if (sscanf(dev_attr->attr.name, "constraint_%d_", &id) != 1) \ return -EINVAL; \ if (id >= power_zone->const_id_cnt) \ return -EINVAL; \ @@ -162,7 +162,7 @@ static ssize_t show_constraint_name(struct device *dev, ssize_t len = -ENODATA; struct powercap_zone_constraint *pconst; - if (!sscanf(dev_attr->attr.name, "constraint_%d_", &id)) + if (sscanf(dev_attr->attr.name, "constraint_%d_", &id) != 1) return -EINVAL; if (id >= power_zone->const_id_cnt) return -EINVAL; @@ -625,17 +625,23 @@ struct powercap_control_type *powercap_register_control_type( INIT_LIST_HEAD(&control_type->node); control_type->dev.class = &powercap_class; dev_set_name(&control_type->dev, "%s", name); - result = device_register(&control_type->dev); - if (result) { - put_device(&control_type->dev); - return ERR_PTR(result); - } idr_init(&control_type->idr); mutex_lock(&powercap_cntrl_list_lock); list_add_tail(&control_type->node, &powercap_cntrl_list); mutex_unlock(&powercap_cntrl_list_lock); + result = device_register(&control_type->dev); + if (result) { + mutex_lock(&powercap_cntrl_list_lock); + list_del(&control_type->node); + mutex_unlock(&powercap_cntrl_list_lock); + + idr_destroy(&control_type->idr); + put_device(&control_type->dev); + return ERR_PTR(result); + } + return control_type; } EXPORT_SYMBOL_GPL(powercap_register_control_type); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 0d4dcc66e097..c693d934103a 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -503,7 +503,8 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, WCL_THERMAL, PROC_THERMAL_FEATURE_MSI_SUPPORT | PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_HINT | - PROC_THERMAL_FEATURE_POWER_FLOOR | PROC_THERMAL_FEATURE_PTC) }, + PROC_THERMAL_FEATURE_POWER_FLOOR | PROC_THERMAL_FEATURE_PTC | + PROC_THERMAL_FEATURE_SOC_POWER_SLIDER) }, { PCI_DEVICE_DATA(INTEL, NVL_H_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MSI_SUPPORT | PROC_THERMAL_FEATURE_WT_HINT | diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 17ca5c082643..89758c9934ec 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -500,7 +500,7 @@ void thermal_zone_set_trip_hyst(struct thermal_zone_device *tz, WRITE_ONCE(trip->hysteresis, hyst); thermal_notify_tz_trip_change(tz, trip); /* - * If the zone temperature is above or at the trip tmperature, the trip + * If the zone temperature is above or at the trip temperature, the trip * is in the trips_reached list and its threshold is equal to its low * temperature. It needs to stay in that list, but its threshold needs * to be updated and the list ordering may need to be restored. @@ -1043,7 +1043,7 @@ static void thermal_cooling_device_init_complete(struct thermal_cooling_device * * @np: a pointer to a device tree node. * @type: the thermal cooling device type. * @devdata: device private data. - * @ops: standard thermal cooling devices callbacks. + * @ops: standard thermal cooling devices callbacks. * * This interface function adds a new thermal cooling device (fan/processor/...) * to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7a501e73d880..1abc7ed2990e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2019,13 +2019,14 @@ out: else btrfs_delalloc_release_space(inode, data_reserved, page_start, reserved_space, true); - extent_changeset_free(data_reserved); out_noreserve: if (only_release_metadata) btrfs_check_nocow_unlock(inode); sb_end_pagefault(inode->vfs_inode.i_sb); + extent_changeset_free(data_reserved); + if (ret < 0) return vmf_error(ret); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c4bee47829ed..317db7d10a21 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -256,6 +256,7 @@ static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off if (ret < 0) { btrfs_err_rl(fs_info, "failed to lookup extent item for logical %llu: %d", logical, ret); + btrfs_release_path(&path); return; } eb = path.nodes[0]; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9e2b53e90dcb..d9d8d9968a58 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1243,14 +1243,7 @@ out: btrfs_end_transaction(trans); else if (trans) ret = btrfs_end_transaction(trans); - - /* - * At this point we either failed at allocating prealloc, or we - * succeeded and passed the ownership to it to add_qgroup_rb(). In any - * case, this needs to be NULL or there is something wrong. - */ - ASSERT(prealloc == NULL); - + kfree(prealloc); return ret; } @@ -1682,12 +1675,7 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); - /* - * At this point we either failed at allocating prealloc, or we - * succeeded and passed the ownership to it to add_qgroup_rb(). In any - * case, this needs to be NULL or there is something wrong. - */ - ASSERT(prealloc == NULL); + kfree(prealloc); return ret; } @@ -3279,7 +3267,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, struct btrfs_root *quota_root; struct btrfs_qgroup *srcgroup; struct btrfs_qgroup *dstgroup; - struct btrfs_qgroup *prealloc = NULL; + struct btrfs_qgroup *prealloc; struct btrfs_qgroup_list **qlist_prealloc = NULL; bool free_inherit = false; bool need_rescan = false; @@ -3520,14 +3508,7 @@ out: } if (free_inherit) kfree(inherit); - - /* - * At this point we either failed at allocating prealloc, or we - * succeeded and passed the ownership to it to add_qgroup_rb(). In any - * case, this needs to be NULL or there is something wrong. - */ - ASSERT(prealloc == NULL); - + kfree(prealloc); return ret; } diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 05cfda8af422..e9124605974b 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -187,7 +187,6 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, ret = btrfs_search_slot(&trans, root, &key, path, -1, 1); if (ret) { test_err("couldn't find backref %d", ret); - btrfs_free_path(path); return ret; } btrfs_del_item(&trans, root, path); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fff37c8d96a4..31edc93a383e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5865,14 +5865,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_inode *curr_inode = start_inode; int ret = 0; - /* - * If we are logging a new name, as part of a link or rename operation, - * don't bother logging new dentries, as we just want to log the names - * of an inode and that any new parents exist. - */ - if (ctx->logging_new_name) - return 0; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -6051,6 +6043,33 @@ static int conflicting_inode_is_dir(struct btrfs_root *root, u64 ino, return ret; } +static bool can_log_conflicting_inode(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode) +{ + if (!S_ISDIR(inode->vfs_inode.i_mode)) + return true; + + if (inode->last_unlink_trans < trans->transid) + return true; + + /* + * If this is a directory and its unlink_trans is not from a past + * transaction then we must fallback to a transaction commit in order + * to avoid getting a directory with 2 hard links after log replay. + * + * This happens if a directory A is renamed, moved from one parent + * directory to another one, a new file is created in the old parent + * directory with the old name of our directory A, the new file is + * fsynced, then we moved the new file to some other parent directory + * and fsync again the new file. This results in a log tree where we + * logged that directory A existed, with the INODE_REF item for the + * new location but without having logged its old parent inode, so + * that on log replay we add a new link for the new location but the + * old link remains, resulting in a link count of 2. + */ + return false; +} + static int add_conflicting_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -6154,6 +6173,11 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, return 0; } + if (!can_log_conflicting_inode(trans, inode)) { + btrfs_add_delayed_iput(inode); + return BTRFS_LOG_FORCE_COMMIT; + } + btrfs_add_delayed_iput(inode); ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS); @@ -6218,6 +6242,12 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, break; } + if (!can_log_conflicting_inode(trans, inode)) { + btrfs_add_delayed_iput(inode); + ret = BTRFS_LOG_FORCE_COMMIT; + break; + } + /* * Always log the directory, we cannot make this * conditional on need_log_inode() because the directory diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ae1742a35e76..13c514684cfb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7128,6 +7128,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, fs_devices->seeding = true; fs_devices->opened = 1; + list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list); return fs_devices; } diff --git a/fs/file_attr.c b/fs/file_attr.c index 4c4916632f11..13cdb31a3e94 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -2,6 +2,7 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/fscrypt.h> +#include <linux/fsnotify.h> #include <linux/fileattr.h> #include <linux/export.h> #include <linux/syscalls.h> @@ -298,6 +299,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, err = inode->i_op->fileattr_set(idmap, dentry, fa); if (err) goto out; + fsnotify_xattr(dentry); } out: diff --git a/fs/libfs.c b/fs/libfs.c index 9264523be85c..591eb649ebba 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -346,22 +346,22 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) * User space expects the directory offset value of the replaced * (new) directory entry to be unchanged after a rename. * - * Returns zero on success, a negative errno value on failure. + * Caller must have grabbed a slot for new_dentry in the maple_tree + * associated with new_dir, even if dentry is negative. */ -int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); long new_offset = dentry2offset(new_dentry); - simple_offset_remove(old_ctx, old_dentry); + if (WARN_ON(!new_offset)) + return; - if (new_offset) { - offset_set(new_dentry, 0); - return simple_offset_replace(new_ctx, old_dentry, new_offset); - } - return simple_offset_add(new_ctx, old_dentry); + simple_offset_remove(old_ctx, old_dentry); + offset_set(new_dentry, 0); + WARN_ON(simple_offset_replace(new_ctx, old_dentry, new_offset)); } /** @@ -388,31 +388,23 @@ int simple_offset_rename_exchange(struct inode *old_dir, long new_index = dentry2offset(new_dentry); int ret; - simple_offset_remove(old_ctx, old_dentry); - simple_offset_remove(new_ctx, new_dentry); + if (WARN_ON(!old_index || !new_index)) + return -EINVAL; - ret = simple_offset_replace(new_ctx, old_dentry, new_index); - if (ret) - goto out_restore; + ret = mtree_store(&new_ctx->mt, new_index, old_dentry, GFP_KERNEL); + if (WARN_ON(ret)) + return ret; - ret = simple_offset_replace(old_ctx, new_dentry, old_index); - if (ret) { - simple_offset_remove(new_ctx, old_dentry); - goto out_restore; + ret = mtree_store(&old_ctx->mt, old_index, new_dentry, GFP_KERNEL); + if (WARN_ON(ret)) { + mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); + return ret; } - ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); - if (ret) { - simple_offset_remove(new_ctx, old_dentry); - simple_offset_remove(old_ctx, new_dentry); - goto out_restore; - } + offset_set(old_dentry, new_index); + offset_set(new_dentry, old_index); + simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); return 0; - -out_restore: - (void)simple_offset_replace(old_ctx, old_dentry, old_index); - (void)simple_offset_replace(new_ctx, new_dentry, new_index); - return ret; } /** diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index d27ff5e5f165..71bd44e5ab6d 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -270,8 +270,15 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, /* * Include parent/name in notification either if some notification * groups require parent info or the parent is interested in this event. + * The parent interest in ACCESS/MODIFY events does not apply to special + * files, where read/write are not on the filesystem of the parent and + * events can provide an undesirable side-channel for information + * exfiltration. */ - parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; + parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS && + !(data_type == FSNOTIFY_EVENT_PATH && + d_is_special(dentry) && + (mask & (FS_ACCESS | FS_MODIFY))); if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index e9534258d1ef..75d372ceb655 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 57 -#define CIFS_VERSION "2.57" +#define SMB3_PRODUCT_BUILD 58 +#define CIFS_VERSION "2.58" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index eeb4011cb217..fdd84369e7b8 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -12,7 +12,7 @@ #include <net/sock.h> #include <linux/unaligned.h> #include "../common/smbfsctl.h" -#include "../common/smb2pdu.h" +#include "../common/smb1pdu.h" #define CIFS_PROT 0 #define POSIX_PROT (CIFS_PROT+1) diff --git a/fs/smb/common/smb1pdu.h b/fs/smb/common/smb1pdu.h new file mode 100644 index 000000000000..df6d4e11ae92 --- /dev/null +++ b/fs/smb/common/smb1pdu.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * + * Copyright (C) International Business Machines Corp., 2002,2009 + * 2018 Samsung Electronics Co., Ltd. + * Author(s): Steve French <sfrench@us.ibm.com> + * Namjae Jeon <linkinjeon@kernel.org> + * + */ + +#ifndef _COMMON_SMB1_PDU_H +#define _COMMON_SMB1_PDU_H + +#define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) + +/* + * See MS-CIFS 2.2.3.1 + * MS-SMB 2.2.3.1 + */ +struct smb_hdr { + __u8 Protocol[4]; + __u8 Command; + union { + struct { + __u8 ErrorClass; + __u8 Reserved; + __le16 Error; + } __packed DosError; + __le32 CifsError; + } __packed Status; + __u8 Flags; + __le16 Flags2; /* note: le */ + __le16 PidHigh; + union { + struct { + __le32 SequenceNumber; /* le */ + __u32 Reserved; /* zero */ + } __packed Sequence; + __u8 SecuritySignature[8]; /* le */ + } __packed Signature; + __u8 pad[2]; + __u16 Tid; + __le16 Pid; + __u16 Uid; + __le16 Mid; + __u8 WordCount; +} __packed; + +/* See MS-CIFS 2.2.4.52.1 */ +typedef struct smb_negotiate_req { + struct smb_hdr hdr; /* wct = 0 */ + __le16 ByteCount; + unsigned char DialectsArray[]; +} __packed SMB_NEGOTIATE_REQ; + +#endif /* _COMMON_SMB1_PDU_H */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 3c8d8a4e7439..f5ebbe31384a 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1293,6 +1293,7 @@ struct create_durable_handle_reconnect_v2 { struct create_context_hdr ccontext; __u8 Name[8]; struct durable_reconnect_context_v2 dcontext; + __u8 Pad[4]; } __packed; /* See MS-SMB2 2.2.14.2.12 */ @@ -1985,39 +1986,6 @@ struct smb2_lease_ack { __le64 LeaseDuration; } __packed; -/* - * See MS-CIFS 2.2.3.1 - * MS-SMB 2.2.3.1 - */ -struct smb_hdr { - __u8 Protocol[4]; - __u8 Command; - union { - struct { - __u8 ErrorClass; - __u8 Reserved; - __le16 Error; - } __packed DosError; - __le32 CifsError; - } __packed Status; - __u8 Flags; - __le16 Flags2; /* note: le */ - __le16 PidHigh; - union { - struct { - __le32 SequenceNumber; /* le */ - __u32 Reserved; /* zero */ - } __packed Sequence; - __u8 SecuritySignature[8]; /* le */ - } __packed Signature; - __u8 pad[2]; - __u16 Tid; - __le16 Pid; - __u16 Uid; - __le16 Mid; - __u8 WordCount; -} __packed; - #define OP_BREAK_STRUCT_SIZE_20 24 #define OP_BREAK_STRUCT_SIZE_21 36 @@ -2122,11 +2090,4 @@ struct smb_hdr { #define SET_MINIMUM_RIGHTS (FILE_READ_EA | FILE_READ_ATTRIBUTES \ | READ_CONTROL | SYNCHRONIZE) -/* See MS-CIFS 2.2.4.52.1 */ -typedef struct smb_negotiate_req { - struct smb_hdr hdr; /* wct = 0 */ - __le16 ByteCount; - unsigned char DialectsArray[]; -} __packed SMB_NEGOTIATE_REQ; - #endif /* _COMMON_SMB2PDU_H */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 384b19177e1c..ee4c2726771a 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -133,6 +133,14 @@ struct smbdirect_socket { struct smbdirect_socket_parameters parameters; /* + * The state for connect/negotiation + */ + struct { + spinlock_t lock; + struct work_struct work; + } connect; + + /* * The state for keepalive and timeout handling */ struct { @@ -353,6 +361,10 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_WORK(&sc->disconnect_work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->disconnect_work); + spin_lock_init(&sc->connect.lock); + INIT_WORK(&sc->connect.work, __smbdirect_socket_disabled_work); + disable_work_sync(&sc->connect.work); + INIT_WORK(&sc->idle.immediate_work, __smbdirect_socket_disabled_work); disable_work_sync(&sc->idle.immediate_work); INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); diff --git a/fs/smb/common/smbglob.h b/fs/smb/common/smbglob.h index 9562845a5617..4e33d91cdc9d 100644 --- a/fs/smb/common/smbglob.h +++ b/fs/smb/common/smbglob.h @@ -11,8 +11,6 @@ #ifndef _COMMON_SMB_GLOB_H #define _COMMON_SMB_GLOB_H -#define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) - struct smb_version_values { char *version_string; __u16 protocol_id; diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 1c181ef99929..7d880ff34402 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -325,8 +325,10 @@ struct ksmbd_session *ksmbd_session_lookup_all(struct ksmbd_conn *conn, sess = ksmbd_session_lookup(conn, id); if (!sess && conn->binding) sess = ksmbd_session_lookup_slowpath(id); - if (sess && sess->state != SMB2_SESSION_VALID) + if (sess && sess->state != SMB2_SESSION_VALID) { + ksmbd_user_session_put(sess); sess = NULL; + } return sess; } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 27f87a13f20a..8aa483800014 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2363,7 +2363,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, int rc = 0; unsigned int next = 0; - if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + + if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + 1 + le16_to_cpu(eabuf->EaValueLength)) return -EINVAL; @@ -2440,7 +2440,7 @@ next: break; } - if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + + if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength + 1 + le16_to_cpu(eabuf->EaValueLength)) { rc = -EINVAL; break; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index 067b45048c73..95bf1465387b 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -10,6 +10,7 @@ #include "glob.h" #include "../common/smbglob.h" +#include "../common/smb1pdu.h" #include "../common/smb2pdu.h" #include "../common/fscc.h" #include "smb2pdu.h" diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 5aa7a66334d9..05598d994a68 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1307,9 +1307,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, granted |= le32_to_cpu(ace->access_req); ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); } - - if (!pdacl->num_aces) - granted = GENERIC_ALL_FLAGS; } if (!uid) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 4e7ab8d9314f..f585359684d4 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -242,6 +242,7 @@ static void smb_direct_disconnect_rdma_work(struct work_struct *work) * disable[_delayed]_work_sync() */ disable_work(&sc->disconnect_work); + disable_work(&sc->connect.work); disable_work(&sc->recv_io.posted.refill_work); disable_delayed_work(&sc->idle.timer_work); disable_work(&sc->idle.immediate_work); @@ -297,6 +298,7 @@ smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) * not queued again but here we don't block and avoid * disable[_delayed]_work_sync() */ + disable_work(&sc->connect.work); disable_work(&sc->recv_io.posted.refill_work); disable_work(&sc->idle.immediate_work); disable_delayed_work(&sc->idle.timer_work); @@ -467,6 +469,7 @@ static void free_transport(struct smb_direct_transport *t) */ smb_direct_disconnect_wake_up_all(sc); + disable_work_sync(&sc->connect.work); disable_work_sync(&sc->recv_io.posted.refill_work); disable_delayed_work_sync(&sc->idle.timer_work); disable_work_sync(&sc->idle.immediate_work); @@ -635,28 +638,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (sc->recv_io.expected) { case SMBDIRECT_EXPECT_NEGOTIATE_REQ: - if (wc->byte_len < sizeof(struct smbdirect_negotiate_req)) { - put_recvmsg(sc, recvmsg); - smb_direct_disconnect_rdma_connection(sc); - return; - } - sc->recv_io.reassembly.full_packet_received = true; - /* - * Some drivers (at least mlx5_ib) might post a - * recv completion before RDMA_CM_EVENT_ESTABLISHED, - * we need to adjust our expectation in that case. - */ - if (!sc->first_error && sc->status == SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING) - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; - if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) { - put_recvmsg(sc, recvmsg); - smb_direct_disconnect_rdma_connection(sc); - return; - } - sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; - enqueue_reassembly(sc, recvmsg, 0); - wake_up(&sc->status_wait); - return; + /* see smb_direct_negotiate_recv_done */ + break; case SMBDIRECT_EXPECT_DATA_TRANSFER: { struct smbdirect_data_transfer *data_transfer = (struct smbdirect_data_transfer *)recvmsg->packet; @@ -742,6 +725,126 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) smb_direct_disconnect_rdma_connection(sc); } +static void smb_direct_negotiate_recv_work(struct work_struct *work); + +static void smb_direct_negotiate_recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbdirect_recv_io *recv_io = + container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); + struct smbdirect_socket *sc = recv_io->socket; + unsigned long flags; + + /* + * reset the common recv_done for later reuse. + */ + recv_io->cqe.done = recv_done; + + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { + put_recvmsg(sc, recv_io); + if (wc->status != IB_WC_WR_FLUSH_ERR) { + pr_err("Negotiate Recv error. status='%s (%d)' opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, + wc->opcode); + smb_direct_disconnect_rdma_connection(sc); + } + return; + } + + ksmbd_debug(RDMA, "Negotiate Recv completed. status='%s (%d)', opcode=%d\n", + ib_wc_status_msg(wc->status), wc->status, + wc->opcode); + + ib_dma_sync_single_for_cpu(sc->ib.dev, + recv_io->sge.addr, + recv_io->sge.length, + DMA_FROM_DEVICE); + + /* + * This is an internal error! + */ + if (WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_NEGOTIATE_REQ)) { + put_recvmsg(sc, recv_io); + smb_direct_disconnect_rdma_connection(sc); + return; + } + + /* + * Don't reset timer to the keepalive interval in + * this will be done in smb_direct_negotiate_recv_work. + */ + + /* + * Only remember the recv_io if it has enough bytes, + * this gives smb_direct_negotiate_recv_work enough + * information in order to disconnect if it was not + * valid. + */ + sc->recv_io.reassembly.full_packet_received = true; + if (wc->byte_len >= sizeof(struct smbdirect_negotiate_req)) + enqueue_reassembly(sc, recv_io, 0); + else + put_recvmsg(sc, recv_io); + + /* + * Some drivers (at least mlx5_ib and irdma in roce mode) + * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED, + * we need to adjust our expectation in that case. + * + * So we defer further processing of the negotiation + * to smb_direct_negotiate_recv_work(). + * + * If we are already in SMBDIRECT_SOCKET_NEGOTIATE_NEEDED + * we queue the work directly otherwise + * smb_direct_cm_handler() will do it, when + * RDMA_CM_EVENT_ESTABLISHED arrived. + */ + spin_lock_irqsave(&sc->connect.lock, flags); + if (!sc->first_error) { + INIT_WORK(&sc->connect.work, smb_direct_negotiate_recv_work); + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) + queue_work(sc->workqueue, &sc->connect.work); + } + spin_unlock_irqrestore(&sc->connect.lock, flags); +} + +static void smb_direct_negotiate_recv_work(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, connect.work); + const struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_recv_io *recv_io; + + if (sc->first_error) + return; + + ksmbd_debug(RDMA, "Negotiate Recv Work running\n"); + + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + + /* + * If smb_direct_negotiate_recv_done() detected an + * invalid request we want to disconnect. + */ + recv_io = get_first_reassembly(sc); + if (!recv_io) { + smb_direct_disconnect_rdma_connection(sc); + return; + } + + if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_NEEDED)) { + smb_direct_disconnect_rdma_connection(sc); + return; + } + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; + wake_up(&sc->status_wait); +} + static int smb_direct_post_recv(struct smbdirect_socket *sc, struct smbdirect_recv_io *recvmsg) { @@ -758,7 +861,6 @@ static int smb_direct_post_recv(struct smbdirect_socket *sc, return ret; recvmsg->sge.length = sp->max_recv_size; recvmsg->sge.lkey = sc->ib.pd->local_dma_lkey; - recvmsg->cqe.done = recv_done; wr.wr_cqe = &recvmsg->cqe; wr.next = NULL; @@ -1732,6 +1834,7 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct smbdirect_socket *sc = cm_id->context; + unsigned long flags; ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n", cm_id, rdma_event_msg(event->event), event->event); @@ -1739,18 +1842,27 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: { /* - * Some drivers (at least mlx5_ib) might post a - * recv completion before RDMA_CM_EVENT_ESTABLISHED, + * Some drivers (at least mlx5_ib and irdma in roce mode) + * might post a recv completion before RDMA_CM_EVENT_ESTABLISHED, * we need to adjust our expectation in that case. * - * As we already started the negotiation, we just - * ignore RDMA_CM_EVENT_ESTABLISHED here. + * If smb_direct_negotiate_recv_done was called first + * it initialized sc->connect.work only for us to + * start, so that we turned into + * SMBDIRECT_SOCKET_NEGOTIATE_NEEDED, before + * smb_direct_negotiate_recv_work() runs. + * + * If smb_direct_negotiate_recv_done didn't happen + * yet. sc->connect.work is still be disabled and + * queue_work() is a no-op. */ - if (!sc->first_error && sc->status > SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING) - break; if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING)) break; sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; + spin_lock_irqsave(&sc->connect.lock, flags); + if (!sc->first_error) + queue_work(sc->workqueue, &sc->connect.work); + spin_unlock_irqrestore(&sc->connect.lock, flags); wake_up(&sc->status_wait); break; } @@ -1921,6 +2033,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) recvmsg = get_free_recvmsg(sc); if (!recvmsg) return -ENOMEM; + recvmsg->cqe.done = smb_direct_negotiate_recv_done; ret = smb_direct_post_recv(sc, recvmsg); if (ret) { @@ -2339,6 +2452,7 @@ respond: static int smb_direct_connect(struct smbdirect_socket *sc) { + struct smbdirect_recv_io *recv_io; int ret; ret = smb_direct_init_params(sc); @@ -2353,6 +2467,9 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return ret; } + list_for_each_entry(recv_io, &sc->recv_io.free.list, list) + recv_io->cqe.done = recv_done; + ret = smb_direct_create_qpair(sc); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 98b0eb966d91..f891344bd76b 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -702,7 +702,7 @@ retry: rd.old_parent = NULL; rd.new_parent = new_path.dentry; rd.flags = flags; - rd.delegated_inode = NULL, + rd.delegated_inode = NULL; err = start_renaming_dentry(&rd, lookup_flags, old_child, &new_last); if (err) goto out_drop_write; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6498be4c44f8..e5be698256d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1283,6 +1283,8 @@ struct bpf_ksym { struct list_head lnode; struct latch_tree_node tnode; bool prog; + u32 fp_start; + u32 fp_end; }; enum bpf_tramp_prog_type { @@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +bool bpf_has_frame_pointer(unsigned long ip); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); diff --git a/include/linux/fs.h b/include/linux/fs.h index 04ceeca12a0d..f5c9cf28c4dc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3247,7 +3247,7 @@ struct offset_ctx { void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); -int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, +void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index e9ade2ff4af6..f479ef5b3341 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -214,10 +214,14 @@ void rapl_remove_package(struct rapl_package *rp); #ifdef CONFIG_PERF_EVENTS int rapl_package_add_pmu(struct rapl_package *rp); +int rapl_package_add_pmu_locked(struct rapl_package *rp); void rapl_package_remove_pmu(struct rapl_package *rp); +void rapl_package_remove_pmu_locked(struct rapl_package *rp); #else static inline int rapl_package_add_pmu(struct rapl_package *rp) { return 0; } +static inline int rapl_package_add_pmu_locked(struct rapl_package *rp) { return 0; } static inline void rapl_package_remove_pmu(struct rapl_package *rp) { } +static inline void rapl_package_remove_pmu_locked(struct rapl_package *rp) { } #endif #endif /* __INTEL_RAPL_H__ */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0eccd9c3a883..365925c9d262 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -123,27 +123,15 @@ void inet_frags_fini(struct inet_frags *); int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); -static inline void fqdir_pre_exit(struct fqdir *fqdir) -{ - /* Prevent creation of new frags. - * Pairs with READ_ONCE() in inet_frag_find(). - */ - WRITE_ONCE(fqdir->high_thresh, 0); - - /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() - * and ip6frag_expire_frag_queue(). - */ - WRITE_ONCE(fqdir->dead, true); -} +void fqdir_pre_exit(struct fqdir *fqdir); void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q, int *refs); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); -/* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason); +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason); static inline void inet_frag_putn(struct inet_frag_queue *q, int refs) { diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 38ef66826939..41d9fc6965f9 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -69,9 +69,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) int refs = 1; rcu_read_lock(); - /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(fq->q.fqdir->dead)) - goto out_rcu_unlock; spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) @@ -80,6 +77,12 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q, &refs); + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) { + inet_frag_queue_flush(&fq->q, 0); + goto out; + } + dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fab7dc73f738..0e266c2d0e7f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1091,6 +1091,29 @@ struct nft_rule_blob { __attribute__((aligned(__alignof__(struct nft_rule_dp)))); }; +enum nft_chain_types { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + +/** + * struct nft_chain_validate_state - validation state + * + * If a chain is encountered again during table validation it is + * possible to avoid revalidation provided the calling context is + * compatible. This structure stores relevant calling context of + * previous validations. + * + * @hook_mask: the hook numbers and locations the chain is linked to + * @depth: the deepest call chain level the chain is linked to + */ +struct nft_chain_validate_state { + u8 hook_mask[NFT_CHAIN_T_MAX]; + u8 depth; +}; + /** * struct nft_chain - nf_tables chain * @@ -1109,6 +1132,7 @@ struct nft_rule_blob { * @udlen: user data length * @udata: user data in the chain * @blob_next: rule blob pointer to the next in the chain + * @vstate: validation state */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; @@ -1128,9 +1152,10 @@ struct nft_chain { /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; + struct nft_chain_validate_state vstate; }; -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv); @@ -1138,13 +1163,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -enum nft_chain_types { - NFT_CHAIN_T_DEFAULT = 0, - NFT_CHAIN_T_ROUTE, - NFT_CHAIN_T_NAT, - NFT_CHAIN_T_MAX -}; - /** * struct nft_chain_type - nf_tables chain type info * diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h index 4ec4c0eabbbb..0bcad967854f 100644 --- a/include/uapi/linux/energy_model.h +++ b/include/uapi/linux/energy_model.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_ENERGY_MODEL_H #define _UAPI_LINUX_ENERGY_MODEL_H diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 04eea6d1d0a9..72a5d030154e 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -40,6 +40,7 @@ #define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3) #define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4) #define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5) +#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0) struct mptcp_info { __u8 mptcpi_subflows; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c8ae6ab31651..1b9b18e5b03c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) NULL; } +bool bpf_has_frame_pointer(unsigned long ip) +{ + struct bpf_ksym *ksym; + unsigned long offset; + + guard(rcu)(); + + ksym = bpf_ksym_find(ip); + if (!ksym || !ksym->fp_start || !ksym->fp_end) + return false; + + offset = ip - ksym->start; + + return offset >= ksym->fp_start && offset < ksym->fp_end; +} + const struct exception_table_entry *search_bpf_extables(unsigned long addr) { const struct exception_table_entry *e = NULL; diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c index 4dd7ef7c145c..cd500248abd9 100644 --- a/kernel/bpf/dmabuf_iter.c +++ b/kernel/bpf/dmabuf_iter.c @@ -6,10 +6,33 @@ #include <linux/kernel.h> #include <linux/seq_file.h> +struct dmabuf_iter_priv { + /* + * If this pointer is non-NULL, the buffer's refcount is elevated to + * prevent destruction between stop/start. If reading is not resumed and + * start is never called again, then dmabuf_iter_seq_fini drops the + * reference when the iterator is released. + */ + struct dma_buf *dmabuf; +}; + static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) { - if (*pos) - return NULL; + struct dmabuf_iter_priv *p = seq->private; + + if (*pos) { + struct dma_buf *dmabuf = p->dmabuf; + + if (!dmabuf) + return NULL; + + /* + * Always resume from where we stopped, regardless of the value + * of pos. + */ + p->dmabuf = NULL; + return dmabuf; + } return dma_buf_iter_begin(); } @@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) { struct dma_buf *dmabuf = v; - if (dmabuf) - dma_buf_put(dmabuf); + if (dmabuf) { + struct dmabuf_iter_priv *p = seq->private; + + p->dmabuf = dmabuf; + } } static const struct seq_operations dmabuf_iter_seq_ops = { @@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux, seq_puts(seq, "dmabuf iter\n"); } +static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) +{ + struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv; + + p->dmabuf = NULL; + return 0; +} + +static void dmabuf_iter_seq_fini(void *priv) +{ + struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv; + + if (p->dmabuf) + dma_buf_put(p->dmabuf); +} + static const struct bpf_iter_seq_info dmabuf_iter_seq_info = { .seq_ops = &dmabuf_iter_seq_ops, - .init_seq_private = NULL, - .fini_seq_private = NULL, - .seq_priv_size = 0, + .init_seq_private = dmabuf_iter_seq_init, + .fini_seq_private = dmabuf_iter_seq_fini, + .seq_priv_size = sizeof(struct dmabuf_iter_priv), }; static struct bpf_iter_reg bpf_dmabuf_reg_info = { diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index a198e40c799b..150e5871e66f 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -71,7 +71,6 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) { struct llist_head *lhead; struct css_rstat_cpu *rstatc; - struct css_rstat_cpu __percpu *rstatc_pcpu; struct llist_node *self; /* @@ -104,18 +103,22 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu) /* * This function can be renentered by irqs and nmis for the same cgroup * and may try to insert the same per-cpu lnode into the llist. Note - * that llist_add() does not protect against such scenarios. + * that llist_add() does not protect against such scenarios. In addition + * this same per-cpu lnode can be modified through init_llist_node() + * from css_rstat_flush() running on a different CPU. * * To protect against such stacked contexts of irqs/nmis, we use the * fact that lnode points to itself when not on a list and then use - * this_cpu_cmpxchg() to atomically set to NULL to select the winner + * try_cmpxchg() to atomically set to NULL to select the winner * which will call llist_add(). The losers can assume the insertion is * successful and the winner will eventually add the per-cpu lnode to * the llist. + * + * Please note that we can not use this_cpu_cmpxchg() here as on some + * archs it is not safe against modifications from multiple CPUs. */ self = &rstatc->lnode; - rstatc_pcpu = css->rstat_cpu; - if (this_cpu_cmpxchg(rstatc_pcpu->lnode.next, self, NULL) != self) + if (!try_cmpxchg(&rstatc->lnode.next, &self, NULL)) return; lhead = ss_lhead_cpu(css->ss, cpu); diff --git a/kernel/power/em_netlink_autogen.c b/kernel/power/em_netlink_autogen.c index a7a09ab1d1c2..ceb3b2bb6ebe 100644 --- a/kernel/power/em_netlink_autogen.c +++ b/kernel/power/em_netlink_autogen.c @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN kernel source */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #include <net/netlink.h> #include <net/genetlink.h> diff --git a/kernel/power/em_netlink_autogen.h b/kernel/power/em_netlink_autogen.h index 78ce609641f1..140ab548103c 100644 --- a/kernel/power/em_netlink_autogen.h +++ b/kernel/power/em_netlink_autogen.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN kernel header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _LINUX_EM_GEN_H #define _LINUX_EM_GEN_H diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 05f5a49e9649..94164f2dec6d 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -41,6 +41,13 @@ static bool scx_init_task_enabled; static bool scx_switching_all; DEFINE_STATIC_KEY_FALSE(__scx_switched_all); +/* + * Tracks whether scx_enable() called scx_bypass(true). Used to balance bypass + * depth on enable failure. Will be removed when bypass depth is moved into the + * sched instance. + */ +static bool scx_bypassed_for_enable; + static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0); @@ -975,6 +982,30 @@ static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p) __scx_add_event(sch, SCX_EV_REFILL_SLICE_DFL, 1); } +static void local_dsq_post_enq(struct scx_dispatch_q *dsq, struct task_struct *p, + u64 enq_flags) +{ + struct rq *rq = container_of(dsq, struct rq, scx.local_dsq); + bool preempt = false; + + /* + * If @rq is in balance, the CPU is already vacant and looking for the + * next task to run. No need to preempt or trigger resched after moving + * @p into its local DSQ. + */ + if (rq->scx.flags & SCX_RQ_IN_BALANCE) + return; + + if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr && + rq->curr->sched_class == &ext_sched_class) { + rq->curr->scx.slice = 0; + preempt = true; + } + + if (preempt || sched_class_above(&ext_sched_class, rq->curr->sched_class)) + resched_curr(rq); +} + static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq, struct task_struct *p, u64 enq_flags) { @@ -1086,22 +1117,10 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq, if (enq_flags & SCX_ENQ_CLEAR_OPSS) atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE); - if (is_local) { - struct rq *rq = container_of(dsq, struct rq, scx.local_dsq); - bool preempt = false; - - if ((enq_flags & SCX_ENQ_PREEMPT) && p != rq->curr && - rq->curr->sched_class == &ext_sched_class) { - rq->curr->scx.slice = 0; - preempt = true; - } - - if (preempt || sched_class_above(&ext_sched_class, - rq->curr->sched_class)) - resched_curr(rq); - } else { + if (is_local) + local_dsq_post_enq(dsq, p, enq_flags); + else raw_spin_unlock(&dsq->lock); - } } static void task_unlink_from_dsq(struct task_struct *p, @@ -1625,6 +1644,8 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags, dsq_mod_nr(dst_dsq, 1); p->scx.dsq = dst_dsq; + + local_dsq_post_enq(dst_dsq, p, enq_flags); } /** @@ -2402,7 +2423,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, * ops.enqueue() that @p is the only one available for this cpu, * which should trigger an explicit follow-up scheduling event. */ - if (sched_class_above(&ext_sched_class, next->sched_class)) { + if (next && sched_class_above(&ext_sched_class, next->sched_class)) { WARN_ON_ONCE(!(sch->ops.flags & SCX_OPS_ENQ_LAST)); do_enqueue_task(rq, p, SCX_ENQ_LAST, -1); } else { @@ -2425,7 +2446,7 @@ static struct task_struct * do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) { struct task_struct *prev = rq->curr; - bool keep_prev, kick_idle = false; + bool keep_prev; struct task_struct *p; /* see kick_cpus_irq_workfn() */ @@ -2467,12 +2488,8 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx) refill_task_slice_dfl(rcu_dereference_sched(scx_root), p); } else { p = first_local_task(rq); - if (!p) { - if (kick_idle) - scx_kick_cpu(rcu_dereference_sched(scx_root), - cpu_of(rq), SCX_KICK_IDLE); + if (!p) return NULL; - } if (unlikely(!p->scx.slice)) { struct scx_sched *sch = rcu_dereference_sched(scx_root); @@ -3575,7 +3592,7 @@ static void scx_sched_free_rcu_work(struct work_struct *work) int node; irq_work_sync(&sch->error_irq_work); - kthread_stop(sch->helper->task); + kthread_destroy_worker(sch->helper); free_percpu(sch->pcpu); @@ -4318,6 +4335,11 @@ static void scx_disable_workfn(struct kthread_work *work) scx_dsp_max_batch = 0; free_kick_syncs(); + if (scx_bypassed_for_enable) { + scx_bypassed_for_enable = false; + scx_bypass(false); + } + mutex_unlock(&scx_enable_mutex); WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING); @@ -4786,7 +4808,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) return sch; err_stop_helper: - kthread_stop(sch->helper->task); + kthread_destroy_worker(sch->helper); err_free_pcpu: free_percpu(sch->pcpu); err_free_gdsqs: @@ -4970,6 +4992,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) * Init in bypass mode to guarantee forward progress. */ scx_bypass(true); + scx_bypassed_for_enable = true; for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i]) @@ -5067,6 +5090,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) scx_task_iter_stop(&sti); percpu_up_write(&scx_fork_rwsem); + scx_bypassed_for_enable = false; scx_bypass(false); if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d57727abaade..fe28d86f7c35 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -965,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_d_path_btf_ids[0], - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .allowed = bpf_d_path_allowed, }; diff --git a/lib/crypto/riscv/.gitignore b/lib/crypto/riscv/.gitignore new file mode 100644 index 000000000000..0d47d4f21c6d --- /dev/null +++ b/lib/crypto/riscv/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +poly1305-core.S diff --git a/mm/shmem.c b/mm/shmem.c index b329b5302c48..ec6c01378e9d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4019,22 +4019,10 @@ static int shmem_whiteout(struct mnt_idmap *idmap, whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); if (!whiteout) return -ENOMEM; - error = shmem_mknod(idmap, old_dir, whiteout, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); dput(whiteout); - if (error) - return error; - - /* - * Cheat and hash the whiteout while the old dentry is still in - * place, instead of playing games with FS_RENAME_DOES_D_MOVE. - * - * d_lookup() will consistently find one of them at this point, - * not sure which one, but that isn't even important. - */ - d_rehash(whiteout); - return 0; + return error; } /* @@ -4050,6 +4038,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, { struct inode *inode = d_inode(old_dentry); int they_are_dirs = S_ISDIR(inode->i_mode); + bool had_offset = false; int error; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) @@ -4062,16 +4051,23 @@ static int shmem_rename2(struct mnt_idmap *idmap, if (!simple_empty(new_dentry)) return -ENOTEMPTY; + error = simple_offset_add(shmem_get_offset_ctx(new_dir), new_dentry); + if (error == -EBUSY) + had_offset = true; + else if (unlikely(error)) + return error; + if (flags & RENAME_WHITEOUT) { error = shmem_whiteout(idmap, old_dir, old_dentry); - if (error) + if (error) { + if (!had_offset) + simple_offset_remove(shmem_get_offset_ctx(new_dir), + new_dentry); return error; + } } - error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - return error; - + simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); if (d_really_is_positive(new_dentry)) { (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) { diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 6651a8dc62e0..d4d63586053a 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -92,8 +92,15 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) len = le16_to_cpu(tmp); /* Subtract for FCS on length if FCS is not used. */ - if (!this->dofcs) + if (!this->dofcs) { + if (len < 2) { + ++cffrml_rcv_error; + pr_err("Invalid frame length (%d)\n", len); + cfpkt_destroy(pkt); + return -EPROTO; + } len -= 2; + } if (cfpkt_setlen(pkt, len) < 0) { ++cffrml_rcv_error; diff --git a/net/can/Kconfig b/net/can/Kconfig index e4ccf731a24c..af64a6f76458 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -5,7 +5,6 @@ menuconfig CAN tristate "CAN bus subsystem support" - select CAN_DEV help Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial communications protocol. Development of the CAN bus started in diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6272326dd614..ff9c4fd7b433 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -482,6 +482,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, in goto out_release_sock; } + if (ndev->reg_state != NETREG_REGISTERED) { + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + can_ml = can_get_ml_priv(ndev); if (!can_ml) { dev_put(ndev); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fbf5c8001c9d..613a911dda10 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1567,6 +1567,8 @@ int j1939_session_activate(struct j1939_session *session) if (active) { j1939_session_put(active); ret = -EAGAIN; + } else if (priv->ndev->reg_state != NETREG_REGISTERED) { + ret = -ENODEV; } else { WARN_ON_ONCE(session->state != J1939_SESSION_NEW); list_add_tail(&session->active_session_list_entry, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index fa83ddade4f8..9431e305b233 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2383,7 +2383,10 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) return -ENOMEM; WARN_ON_ONCE(!ret); - gstrings.len = ret; + if (gstrings.len && gstrings.len != ret) + gstrings.len = 0; + else + gstrings.len = ret; if (gstrings.len) { data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); @@ -2509,10 +2512,13 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = n_stats; + if (stats.n_stats && stats.n_stats != n_stats) + stats.n_stats = 0; + else + stats.n_stats = n_stats; - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); + if (stats.n_stats) { + data = vzalloc(array_size(stats.n_stats, sizeof(u64))); if (!data) return -ENOMEM; ops->get_ethtool_stats(dev, &stats, data); @@ -2524,7 +2530,9 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) if (copy_to_user(useraddr, &stats, sizeof(stats))) goto out; useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) + if (stats.n_stats && + copy_to_user(useraddr, data, + array_size(stats.n_stats, sizeof(u64)))) goto out; ret = 0; @@ -2560,6 +2568,10 @@ static int ethtool_get_phy_stats_phydev(struct phy_device *phydev, return -EOPNOTSUPP; n_stats = phy_ops->get_sset_count(phydev); + if (stats->n_stats && stats->n_stats != n_stats) { + stats->n_stats = 0; + return 0; + } ret = ethtool_vzalloc_stats_array(n_stats, data); if (ret) @@ -2580,6 +2592,10 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev, return -EOPNOTSUPP; n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); + if (stats->n_stats && stats->n_stats != n_stats) { + stats->n_stats = 0; + return 0; + } ret = ethtool_vzalloc_stats_array(n_stats, data); if (ret) @@ -2616,7 +2632,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) } useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64)))) + if (stats.n_stats && + copy_to_user(useraddr, data, + array_size(stats.n_stats, sizeof(u64)))) ret = -EFAULT; out: diff --git a/net/handshake/request.c b/net/handshake/request.c index 274d2c89b6b2..6b7e3e0bf399 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -276,6 +276,8 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, out_unlock: spin_unlock(&hn->hn_lock); out_err: + /* Restore original destructor so socket teardown still runs on failure */ + req->hr_sk->sk_destruct = req->hr_odestruct; trace_handshake_submit_err(net, req, req->hr_sk, ret); handshake_req_destroy(req); return ret; @@ -324,7 +326,11 @@ bool handshake_req_cancel(struct sock *sk) hn = handshake_pernet(net); if (hn && remove_pending(hn, req)) { - /* Request hadn't been accepted */ + /* Request hadn't been accepted - mark cancelled */ + if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + trace_handshake_cancel_busy(net, req, sk); + return false; + } goto out_true; } if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 339f0d220212..aefc9b6936ba 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -205,6 +205,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, __pskb_copy(frame->skb_prp, skb_headroom(frame->skb_prp), GFP_ATOMIC); + if (!frame->skb_std) + return NULL; } else { /* Unexpected */ WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 025895eb6ec5..001ee5c4d962 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -218,6 +218,41 @@ static int __init inet_frag_wq_init(void) pure_initcall(inet_frag_wq_init); +void fqdir_pre_exit(struct fqdir *fqdir) +{ + struct inet_frag_queue *fq; + struct rhashtable_iter hti; + + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); + + rhashtable_walk_enter(&fqdir->rhashtable, &hti); + rhashtable_walk_start(&hti); + + while ((fq = rhashtable_walk_next(&hti))) { + if (IS_ERR(fq)) { + if (PTR_ERR(fq) != -EAGAIN) + break; + continue; + } + spin_lock_bh(&fq->lock); + if (!(fq->flags & INET_FRAG_COMPLETE)) + inet_frag_queue_flush(fq, 0); + spin_unlock_bh(&fq->lock); + } + + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +} +EXPORT_SYMBOL(fqdir_pre_exit); + void fqdir_exit(struct fqdir *fqdir) { INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); @@ -263,8 +298,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason) +static unsigned int +inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason) { struct rb_node *p = rb_first(root); unsigned int sum = 0; @@ -284,7 +319,17 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root, } return sum; } -EXPORT_SYMBOL(inet_frag_rbtree_purge); + +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason) +{ + unsigned int sum; + + reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT; + sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); + sub_frag_mem_limit(q->fqdir, sum); +} +EXPORT_SYMBOL(inet_frag_queue_flush); void inet_frag_destroy(struct inet_frag_queue *q) { @@ -327,7 +372,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); - /* One reference for the timer, one for the hash table. */ + /* One reference for the timer, one for the hash table. + * We never take any extra references, only decrement this field. + */ refcount_set(&q->refcnt, 2); return q; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f7012479713b..56b0f738d2f2 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -134,11 +134,6 @@ static void ip_expire(struct timer_list *t) net = qp->q.fqdir->net; rcu_read_lock(); - - /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(qp->q.fqdir->dead)) - goto out_rcu_unlock; - spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -146,6 +141,13 @@ static void ip_expire(struct timer_list *t) qp->q.flags |= INET_FRAG_DROP; inet_frag_kill(&qp->q, &refs); + + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) { + inet_frag_queue_flush(&qp->q, 0); + goto out; + } + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); @@ -240,16 +242,10 @@ static int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { - unsigned int sum_truesize = 0; - - if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { - refcount_inc(&qp->q.refcnt); + if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) return -ETIMEDOUT; - } - sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, - SKB_DROP_REASON_FRAG_TOO_FAR); - sub_frag_mem_limit(qp->q.fqdir, sum_truesize); + inet_frag_queue_flush(&qp->q, SKB_DROP_REASON_FRAG_TOO_FAR); qp->q.flags = 0; qp->q.len = 0; diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 20328920f6ed..be71fc9b4638 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -4,7 +4,7 @@ config MPTCP depends on INET select SKB_EXTENSIONS select CRYPTO_LIB_SHA256 - select CRYPTO + select CRYPTO_LIB_UTILS help Multipath TCP (MPTCP) connections send and receive data over multiple subflows in order to utilize multiple network paths. Each subflow diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d5b383870f79..7aa42de9c47b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, } if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) - entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); + entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) & + MPTCP_PM_ADDR_FLAGS_MASK; if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e212c1374bd0..9b1fafd87cb9 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1623,7 +1623,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) struct mptcp_sendmsg_info info = { .flags = flags, }; - bool do_check_data_fin = false; + bool copied = false; int push_count = 1; while (mptcp_send_head(sk) && (push_count > 0)) { @@ -1665,7 +1665,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) push_count--; continue; } - do_check_data_fin = true; + copied = true; } } } @@ -1674,11 +1674,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) if (ssk) mptcp_push_release(ssk, &info); - /* ensure the rtx timer is running */ - if (!mptcp_rtx_timer_pending(sk)) - mptcp_reset_rtx_timer(sk); - if (do_check_data_fin) + /* Avoid scheduling the rtx timer if no data has been pushed; the timer + * will be updated on positive acks by __mptcp_cleanup_una(). + */ + if (copied) { + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); mptcp_check_send_data_fin(sk); + } } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) @@ -2766,10 +2769,13 @@ static void __mptcp_retrans(struct sock *sk) /* * make the whole retrans decision, xmit, disallow - * fallback atomic + * fallback atomic, note that we can't retrans even + * when an infinite fallback is in progress, i.e. new + * subflows are disallowed. */ spin_lock_bh(&msk->fallback_lock); - if (__mptcp_check_fallback(msk)) { + if (__mptcp_check_fallback(msk) || + !msk->allow_subflows) { spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); goto clear_scheduled; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3162ce3c2640..64c697212578 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -408,6 +408,9 @@ err_put: return -1; err_unreach: + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + dst_link_failure(skb); return -1; } diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index f1be4dd5cf85..3654f1e8976c 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -172,14 +172,14 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; bool refcounted = false; + int err = 0; if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) return -ENOENT; if (ct && nf_ct_is_confirmed(ct)) { - if (refcounted) - nf_ct_put(ct); - return -EEXIST; + err = -EEXIST; + goto out_put; } if ((u32)jiffies == list->last_gc) @@ -231,12 +231,16 @@ static int __nf_conncount_add(struct net *net, } add_new_node: - if (WARN_ON_ONCE(list->count > INT_MAX)) - return -EOVERFLOW; + if (WARN_ON_ONCE(list->count > INT_MAX)) { + err = -EOVERFLOW; + goto out_put; + } conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return -ENOMEM; + if (conn == NULL) { + err = -ENOMEM; + goto out_put; + } conn->tuple = tuple; conn->zone = *zone; @@ -249,7 +253,7 @@ add_new_node: out_put: if (refcounted) nf_ct_put(ct); - return 0; + return err; } int nf_conncount_add_skb(struct net *net, @@ -456,11 +460,10 @@ restart: rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); - - if (refcounted) - nf_ct_put(ct); } out_unlock: + if (refcounted) + nf_ct_put(ct); spin_unlock_bh(&nf_conncount_locks[hash]); return count; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0b95f226f211..d1f8eb725d42 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2487,6 +2487,7 @@ void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) { struct nf_ct_iter_data iter_data = {}; + unsigned long start = jiffies; struct net *net; int busy; @@ -2507,6 +2508,8 @@ i_see_dead_people: busy = 1; } if (busy) { + DEBUG_NET_WARN_ONCE(time_after(jiffies, start + 60 * HZ), + "conntrack cleanup blocked for 60s"); schedule(); goto i_see_dead_people; } diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c index f0984cf69a09..eb24fe2715dc 100644 --- a/net/netfilter/nf_flow_table_path.c +++ b/net/netfilter/nf_flow_table_path.c @@ -250,6 +250,9 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0) nft_dev_path_info(&stack, &info, ha, &ft->data); + if (info.outdev) + route->tuple[dir].out.ifindex = info.outdev->ifindex; + if (!info.indev || !nft_flowtable_find_dev(info.indev, ft)) return; @@ -269,7 +272,6 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, route->tuple[!dir].in.num_encaps = info.num_encaps; route->tuple[!dir].in.ingress_vlans = info.ingress_vlans; - route->tuple[dir].out.ifindex = info.outdev->ifindex; if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 78a61dac4ade..e6b24586d2fe 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -294,25 +294,13 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, ct = nf_ct_tuplehash_to_ctrack(thash); - /* NB: IP_CT_DIR_ORIGINAL should be impossible because - * nf_nat_used_tuple() handles origin collisions. - * - * Handle remote chance other CPU confirmed its ct right after. - */ - if (thash->tuple.dst.dir != IP_CT_DIR_REPLY) - goto out; - /* clashing connection subject to NAT? Retry with new tuple. */ if (READ_ONCE(ct->status) & uses_nat) goto out; if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) && - nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) { + &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple)) taken = false; - goto out; - } out: nf_ct_put(ct); return taken; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f3de2f9bbebf..618af6e90773 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } + +static bool nft_chain_vstate_valid(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_base_chain *base_chain; + enum nft_chain_types type; + u8 hooknum; + + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) + return false; + + base_chain = nft_base_chain(ctx->chain); + hooknum = base_chain->ops.hooknum; + type = base_chain->type->type; + + /* chain is already validated for this call depth */ + if (chain->vstate.depth >= ctx->level && + chain->vstate.hook_mask[type] & BIT(hooknum)) + return true; + + return false; +} + static void nf_tables_trans_destroy_work(struct work_struct *w); static void nft_trans_gc_work(struct work_struct *work); @@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + const struct nft_base_chain *base_chain; + enum nft_chain_types type; + u8 hooknum; + + /* ctx->chain must hold the calling base chain. */ + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) { + memset(&chain->vstate, 0, sizeof(chain->vstate)); + return; + } + + base_chain = nft_base_chain(ctx->chain); + hooknum = base_chain->ops.hooknum; + type = base_chain->type->type; + + BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX); + + chain->vstate.hook_mask[type] |= BIT(hooknum); + if (chain->vstate.depth < ctx->level) + chain->vstate.depth = ctx->level; +} + /** nft_chain_validate - loop detection and hook validation * * @ctx: context containing call depth and base chain @@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r * and set lookups until either the jump limit is hit or all reachable * chains have been validated. */ -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain) { struct nft_expr *expr, *last; struct nft_rule *rule; int err; + BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255); if (ctx->level == NFT_JUMP_STACK_SIZE) return -EMLINK; + if (ctx->level > 0) { + /* jumps to base chains are not allowed. */ + if (nft_is_base_chain(chain)) + return -ELOOP; + + if (nft_chain_vstate_valid(ctx, chain)) + return 0; + } + list_for_each_entry(rule, &chain->rules, list) { if (fatal_signal_pending(current)) return -EINTR; @@ -4115,8 +4171,11 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (err < 0) return err; } + + cond_resched(); } + nft_chain_vstate_update(ctx, chain); return 0; } EXPORT_SYMBOL_GPL(nft_chain_validate); @@ -4128,7 +4187,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) .net = net, .family = table->family, }; - int err; + int err = 0; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) @@ -4137,12 +4196,14 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) ctx.chain = chain; err = nft_chain_validate(&ctx, chain); if (err < 0) - return err; - - cond_resched(); + goto err; } - return 0; +err: + list_for_each_entry(chain, &table->chains, list) + memset(&chain->vstate, 0, sizeof(chain->vstate)); + + return err; } int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, @@ -11676,21 +11737,10 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_data_types type, unsigned int len) { - int err; - switch (reg) { case NFT_REG_VERDICT: if (type != NFT_DATA_VERDICT) return -EINVAL; - - if (data != NULL && - (data->verdict.code == NFT_GOTO || - data->verdict.code == NFT_JUMP)) { - err = nft_chain_validate(ctx, data->verdict.chain); - if (err < 0) - return err; - } - break; default: if (type != NFT_DATA_VALUE) diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 5e531394a724..2b3cbceb0b52 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -43,8 +43,10 @@ void nr_output(struct sock *sk, struct sk_buff *skb) frontlen = skb_headroom(skb); while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) + if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) { + kfree_skb(skb); return; + } skb_reserve(skbn, frontlen); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 1cb4f97335d8..2d536901309e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2802,13 +2802,20 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } -static bool validate_push_nsh(const struct nlattr *attr, bool log) +static bool validate_push_nsh(const struct nlattr *a, bool log) { + struct nlattr *nsh_key = nla_data(a); struct sw_flow_match match; struct sw_flow_key key; + /* There must be one and only one NSH header. */ + if (!nla_ok(nsh_key, nla_len(a)) || + nla_total_size(nla_len(nsh_key)) != nla_len(a) || + nla_type(nsh_key) != OVS_KEY_ATTR_NSH) + return false; + ovs_match_init(&match, &key, true, NULL); - return !nsh_key_put_from_nlattr(attr, &match, false, true, log); + return !nsh_key_put_from_nlattr(nsh_key, &match, false, true, log); } /* Return false if there are any non-masked bits set. @@ -3389,7 +3396,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; } mac_proto = MAC_PROTO_NONE; - if (!validate_push_nsh(nla_data(a), log)) + if (!validate_push_nsh(a, log)) return -EINVAL; break; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index f27b583def78..91c96cc625bd 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -281,6 +281,15 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (dev == skb->dev && want_ingress == at_ingress) { + pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n", + netdev_name(skb->dev), + at_ingress ? "ingress" : "egress", + netdev_name(dev), + want_ingress ? "ingress" : "egress"); + goto err_cant_do; + } + /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 82635dd2cfa5..306e046276d4 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -652,7 +652,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); for (i = nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + if (cl_is_active(&q->classes[i])) list_del_init(&q->classes[i].alist); qdisc_purge_queue(q->classes[i].qdisc); } @@ -664,6 +664,10 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } + for (i = q->nstrict; i < nstrict; i++) { + if (cl_is_active(&q->classes[i])) + list_del_init(&q->classes[i].alist); + } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 069b7e45d8bd..531cb0690007 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -492,6 +492,8 @@ static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk) struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct ipv6_txoptions *opt; + inet_sk(newsk)->inet_opt = NULL; + newnp = inet6_sk(newsk); rcu_read_lock(); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d808096f5ab1..2493a5b1fa3c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4863,8 +4863,6 @@ static struct sock *sctp_clone_sock(struct sock *sk, newsp->pf->to_sk_daddr(&asoc->peer.primary_addr, newsk); newinet->inet_dport = htons(asoc->peer.port); - - newsp->pf->copy_ip_options(sk, newsk); atomic_set(&newinet->inet_id, get_random_u16()); inet_set_bit(MC_LOOP, newsk); @@ -4874,17 +4872,20 @@ static struct sock *sctp_clone_sock(struct sock *sk, #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *newnp = inet6_sk(newsk); + struct ipv6_pinfo *newnp; newinet->pinet6 = &((struct sctp6_sock *)newsk)->inet6; newinet->ipv6_fl_list = NULL; + newnp = inet6_sk(newsk); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; } #endif + newsp->pf->copy_ip_options(sk, newsk); + newsp->do_auto_asconf = 0; skb_queue_head_init(&newsp->pd_lobby); diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 325addf83cc6..277ef504bc26 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -22,10 +22,10 @@ config SMC_DIAG config SMC_HS_CTRL_BPF bool "Generic eBPF hook for SMC handshake flow" - depends on SMC && BPF_SYSCALL + depends on SMC && BPF_JIT && BPF_SYSCALL default y help SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC handshake flow, which offer much greater flexibility in modifying the behavior of the SMC protocol stack compared to a complete kernel-based approach. Select - this option if you want filtring the handshake process via eBPF programs.
\ No newline at end of file + this option if you want filtring the handshake process via eBPF programs. diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 78323d43e63e..25f65817faab 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -199,7 +199,7 @@ static void unix_free_vertices(struct scm_fp_list *fpl) } } -static DEFINE_SPINLOCK(unix_gc_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(unix_gc_lock); void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver) { diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 586d1b2595d1..5442073a2e42 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -224,6 +224,8 @@ endif $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) $(QUIET_CLANG)$(CLANG) \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3dc8a8078815..f4dfd23148a5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8484,7 +8484,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, struct bpf_object *obj = ctx; const struct btf_type *t; struct extern_desc *ext; - char *res; + const char *res; res = strstr(sym_name, ".llvm."); if (sym_type == 'd' && res) @@ -11818,7 +11818,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, * * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") */ - char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + char sym_trim[256], *psym_trim = sym_trim; + const char *sym_sfx; if (!(sym_sfx = strstr(sym_name, ".llvm."))) return 0; @@ -12401,7 +12402,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) if (!search_paths[i]) continue; for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { - char *next_path; + const char *next_path; int seg_len; if (s[0] == ':') diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 865fd2e8519e..08205f9fc525 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -13,6 +13,7 @@ UAPI_PATH:=../../../../include/uapi/ # need the explicit -D matching what's in /usr, to avoid multiple definitions. get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) +get_hdr_inc2=-D$(1) -D$(2) -include $(UAPI_PATH)/linux/$(3) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) @@ -48,3 +49,4 @@ CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \ $(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \ $(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h) CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) +CFLAGS_wireguard:=$(call get_hdr_inc2,_LINUX_WIREGUARD_H,_WG_UAPI_WIREGUARD_H,wireguard.h) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b7030a6e2e76..4aa60e83ff19 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -437,6 +437,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) \ -std=gnu11 \ -fno-strict-aliasing \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -Wno-compare-distinct-pointer-types \ -Wno-initializer-overrides \ # diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index ccc768592e66..1a2a2f1abf03 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid) return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN); } +static inline long syscall_close(int fd) +{ + return syscall(__NR_close_range, + (unsigned int)fd, + (unsigned int)fd, + 0u); +} + static int trigger_fstat_events(pid_t pid) { int sockfd = -1, procfd = -1, devfd = -1; @@ -104,18 +112,34 @@ out_close: /* sys_close no longer triggers filp_close, but we can * call sys_close_range instead which still does */ -#define close(fd) syscall(__NR_close_range, fd, fd, 0) + syscall_close(pipefd[0]); + syscall_close(pipefd[1]); + syscall_close(sockfd); + syscall_close(procfd); + syscall_close(devfd); + syscall_close(localfd); + syscall_close(indicatorfd); + return ret; +} - close(pipefd[0]); - close(pipefd[1]); - close(sockfd); - close(procfd); - close(devfd); - close(localfd); - close(indicatorfd); +static void attach_and_load(struct test_d_path **skel) +{ + int err; -#undef close - return ret; + *skel = test_d_path__open_and_load(); + if (CHECK(!*skel, "setup", "d_path skeleton failed\n")) + goto cleanup; + + err = test_d_path__attach(*skel); + if (CHECK(err, "setup", "attach failed: %d\n", err)) + goto cleanup; + + (*skel)->bss->my_pid = getpid(); + return; + +cleanup: + test_d_path__destroy(*skel); + *skel = NULL; } static void test_d_path_basic(void) @@ -124,16 +148,11 @@ static void test_d_path_basic(void) struct test_d_path *skel; int err; - skel = test_d_path__open_and_load(); - if (CHECK(!skel, "setup", "d_path skeleton failed\n")) - goto cleanup; - - err = test_d_path__attach(skel); - if (CHECK(err, "setup", "attach failed: %d\n", err)) + attach_and_load(&skel); + if (!skel) goto cleanup; bss = skel->bss; - bss->my_pid = getpid(); err = trigger_fstat_events(bss->my_pid); if (err < 0) @@ -195,6 +214,39 @@ static void test_d_path_check_types(void) test_d_path_check_types__destroy(skel); } +/* Check if the verifier correctly generates code for + * accessing the memory modified by d_path helper. + */ +static void test_d_path_mem_access(void) +{ + int localfd = -1; + char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX"; + struct test_d_path__bss *bss; + struct test_d_path *skel; + + attach_and_load(&skel); + if (!skel) + goto cleanup; + + bss = skel->bss; + + localfd = mkstemp(path_template); + if (CHECK(localfd < 0, "trigger", "mkstemp failed\n")) + goto cleanup; + + if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n")) + goto cleanup; + remove(path_template); + + if (CHECK(!bss->path_match_fallocate, "check", + "failed to read fallocate path")) + goto cleanup; + +cleanup: + syscall_close(localfd); + test_d_path__destroy(skel); +} + void test_d_path(void) { if (test__start_subtest("basic")) @@ -205,4 +257,7 @@ void test_d_path(void) if (test__start_subtest("check_alloc_mem")) test_d_path_check_types(); + + if (test__start_subtest("check_mem_access")) + test_d_path_mem_access(); } diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c index 6c2b0c3dbcd8..e442be9dde7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -73,12 +73,10 @@ close_memfd: return -1; } -static int create_sys_heap_dmabuf(void) +static int create_sys_heap_dmabuf(size_t bytes) { - sysheap_test_buffer_size = 20 * getpagesize(); - struct dma_heap_allocation_data data = { - .len = sysheap_test_buffer_size, + .len = bytes, .fd = 0, .fd_flags = O_RDWR | O_CLOEXEC, .heap_flags = 0, @@ -110,7 +108,9 @@ close_sysheap_dmabuf: static int create_test_buffers(void) { udmabuf = create_udmabuf(); - sysheap_dmabuf = create_sys_heap_dmabuf(); + + sysheap_test_buffer_size = 20 * getpagesize(); + sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size); if (udmabuf < 0 || sysheap_dmabuf < 0) return -1; @@ -219,6 +219,26 @@ close_iter_fd: close(iter_fd); } +static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel) +{ + int iter_fd; + char buf[1024]; + size_t total_bytes_read = 0; + ssize_t bytes_read; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0) + total_bytes_read += bytes_read; + + ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read"); + + close(iter_fd); +} + + static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd) { LIBBPF_OPTS(bpf_test_run_opts, topts); @@ -275,6 +295,23 @@ void test_dmabuf_iter(void) subtest_dmabuf_iter_check_no_infinite_reads(skel); if (test__start_subtest("default_iter")) subtest_dmabuf_iter_check_default_iter(skel); + if (test__start_subtest("lots_of_buffers")) { + size_t NUM_BUFS = 100; + int buffers[NUM_BUFS]; + int i; + + for (i = 0; i < NUM_BUFS; ++i) { + buffers[i] = create_sys_heap_dmabuf(getpagesize()); + if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd")) + goto cleanup_bufs; + } + + subtest_dmabuf_iter_check_lots_of_buffers(skel); + +cleanup_bufs: + for (--i; i >= 0; --i) + close(buffers[i]); + } if (test__start_subtest("open_coded")) subtest_dmabuf_iter_check_open_coded(skel, map_fd); diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c index 84e1f883f97b..561b2f861808 100644 --- a/tools/testing/selftests/bpf/progs/test_d_path.c +++ b/tools/testing/selftests/bpf/progs/test_d_path.c @@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {}; int called_stat = 0; int called_close = 0; +int path_match_fallocate = 0; SEC("fentry/security_inode_getattr") int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, @@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id) return 0; } +SEC("fentry/vfs_fallocate") +int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + int ret = 0; + char path_fallocate[MAX_PATH_LEN] = {}; + + if (pid != my_pid) + return 0; + + ret = bpf_d_path(&file->f_path, + path_fallocate, MAX_PATH_LEN); + if (ret < 0) + return 0; + + if (!path_fallocate[0]) + return 0; + + path_match_fallocate = 1; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 10e051b6f592..dadad277f4eb 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -755,9 +755,6 @@ TEST_F(iommufd_ioas, get_hw_info) struct iommu_test_hw_info info; uint64_t trailing_bytes; } buffer_larger; - struct iommu_test_hw_info_buffer_smaller { - __u32 flags; - } buffer_smaller; if (self->device_id) { uint8_t max_pasid = 0; @@ -789,8 +786,9 @@ TEST_F(iommufd_ioas, get_hw_info) * the fields within the size range still gets updated. */ test_cmd_get_hw_info(self->device_id, - IOMMU_HW_INFO_TYPE_DEFAULT, - &buffer_smaller, sizeof(buffer_smaller)); + IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact, + offsetofend(struct iommu_test_hw_info, + flags)); test_cmd_get_hw_info_pasid(self->device_id, &max_pasid); ASSERT_EQ(0, max_pasid); if (variant->pasid_capable) { diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 3cd677b72072..4c0375e28bbe 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,9 @@ -CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end +top_srcdir := ../../../../.. +include $(top_srcdir)/scripts/Makefile.compiler + +cc-option = $(call __cc-option, $(CC),,$(1),$(2)) + +CFLAGS += $(KHDR_INCLUDES) -Wall $(call cc-option,-Wflex-array-member-not-at-end) TEST_GEN_PROGS := \ diag_uid \ diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index ce64518aaa11..75a6c3d3c1da 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -29,6 +29,7 @@ CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_U32=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_META=m CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh index 6a570d256e07..2cf4c6d9245b 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -138,13 +138,18 @@ install_capture() defer tc qdisc del dev "$dev" clsact tc filter add dev "$dev" ingress proto ip pref 104 \ - flower skip_hw ip_proto udp dst_port "$VXPORT" \ - action pass + u32 match ip protocol 0x11 0xff \ + match u16 "$VXPORT" 0xffff at 0x16 \ + match u16 0x0800 0xffff at 0x30 \ + action pass defer tc filter del dev "$dev" ingress proto ip pref 104 tc filter add dev "$dev" ingress proto ipv6 pref 106 \ - flower skip_hw ip_proto udp dst_port "$VXPORT" \ - action pass + u32 match ip6 protocol 0x11 0xff \ + match u16 "$VXPORT" 0xffff at 0x2a \ + match u16 0x86dd 0xffff at 0x44 \ + match u8 0x11 0xff at 0x4c \ + action pass defer tc filter del dev "$dev" ingress proto ipv6 pref 106 } @@ -248,13 +253,6 @@ vx_create() } export -f vx_create -vx_wait() -{ - # Wait for all the ARP, IGMP etc. noise to settle down so that the - # tunnel is clear for measurements. - sleep 10 -} - vx10_create() { vx_create vx10 10 id 1000 "$@" @@ -267,18 +265,6 @@ vx20_create() } export -f vx20_create -vx10_create_wait() -{ - vx10_create "$@" - vx_wait -} - -vx20_create_wait() -{ - vx20_create "$@" - vx_wait -} - ns_init_common() { local ns=$1; shift @@ -554,7 +540,7 @@ ipv4_nomcroute() # Install a misleading (S,G) rule to attempt to trick the system into # pushing the packets elsewhere. adf_install_broken_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" do_test 4 10 0 "IPv4 nomcroute" } @@ -562,7 +548,7 @@ ipv6_nomcroute() { # Like for IPv4, install a misleading (S,G). adf_install_broken_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" do_test 6 10 0 "IPv6 nomcroute" } @@ -581,35 +567,35 @@ ipv6_nomcroute_rx() ipv4_mcroute() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute do_test 4 10 10 "IPv4 mcroute" } ipv6_mcroute() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute do_test 6 10 10 "IPv6 mcroute" } ipv4_mcroute_rx() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute ipv4_do_test_rx 0 "IPv4 mcroute ping" } ipv6_mcroute_rx() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute ipv6_do_test_rx 0 "IPv6 mcroute ping" } ipv4_mcroute_changelink() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" ip link set dev vx10 type vxlan mcroute sleep 1 do_test 4 10 10 "IPv4 mcroute changelink" @@ -618,7 +604,7 @@ ipv4_mcroute_changelink() ipv6_mcroute_changelink() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute ip link set dev vx20 type vxlan mcroute sleep 1 do_test 6 10 10 "IPv6 mcroute changelink" @@ -627,47 +613,47 @@ ipv6_mcroute_changelink() ipv4_mcroute_starg() { adf_install_starg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute do_test 4 10 10 "IPv4 mcroute (*,G)" } ipv6_mcroute_starg() { adf_install_starg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute do_test 6 10 10 "IPv6 mcroute (*,G)" } ipv4_mcroute_starg_rx() { adf_install_starg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" } ipv6_mcroute_starg_rx() { adf_install_starg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" } ipv4_mcroute_noroute() { - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute do_test 4 0 0 "IPv4 mcroute, no route" } ipv6_mcroute_noroute() { - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute do_test 6 0 0 "IPv6 mcroute, no route" } ipv4_mcroute_fdb() { adf_install_sg - vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + vx10_create local 192.0.2.100 dev "$IPMR" mcroute bridge fdb add dev vx10 \ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" do_test 4 10 10 "IPv4 mcroute FDB" @@ -676,7 +662,7 @@ ipv4_mcroute_fdb() ipv6_mcroute_fdb() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 dev "$IPMR" mcroute bridge -6 fdb add dev vx20 \ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" do_test 6 10 10 "IPv6 mcroute FDB" @@ -686,7 +672,7 @@ ipv6_mcroute_fdb() ipv4_mcroute_fdb_oif0() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" do_test 4 10 10 "IPv4 mcroute oif=0" @@ -703,7 +689,7 @@ ipv6_mcroute_fdb_oif0() defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" do_test 6 10 10 "IPv6 mcroute oif=0" @@ -716,7 +702,7 @@ ipv4_mcroute_fdb_oif0_sep() adf_install_sg_sep adf_ip_addr_add lo 192.0.2.120/28 - vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" @@ -727,7 +713,7 @@ ipv4_mcroute_fdb_oif0_sep_rx() adf_install_sg_sep_rx lo adf_ip_addr_add lo 192.0.2.120/28 - vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" @@ -738,7 +724,7 @@ ipv4_mcroute_fdb_sep_rx() adf_install_sg_sep_rx lo adf_ip_addr_add lo 192.0.2.120/28 - vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add \ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo @@ -750,7 +736,7 @@ ipv6_mcroute_fdb_sep_rx() adf_install_sg_sep_rx "X$IPMR" adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 - vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ self static dst "$GROUP6" via "X$IPMR" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index f448bafb3f20..0ec131b339bc 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -280,7 +280,8 @@ tc_rule_stats_get() local selector=${1:-.packets}; shift tc -j -s filter show dev $dev $dir pref $pref \ - | jq ".[1].options.actions[].stats$selector" + | jq ".[] | select(.options.actions) | + .options.actions[].stats$selector" } tc_rule_handle_stats_get() diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h index 17dc34a612c6..03912902a6d3 100644 --- a/tools/testing/selftests/net/lib/ksft.h +++ b/tools/testing/selftests/net/lib/ksft.h @@ -24,7 +24,8 @@ static inline void ksft_ready(void) fd = STDOUT_FILENO; } - write(fd, msg, sizeof(msg)); + if (write(fd, msg, sizeof(msg)) < 0) + perror("write()"); if (fd != STDOUT_FILENO) close(fd); } @@ -48,7 +49,8 @@ static inline void ksft_wait(void) fd = STDIN_FILENO; } - read(fd, &byte, sizeof(byte)); + if (read(fd, &byte, sizeof(byte)) < 0) + perror("read()"); if (fd != STDIN_FILENO) close(fd); } diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ec6a87588191..123d9d7a0278 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -192,6 +192,10 @@ check "show_endpoints" \ flush_endpoint check "show_endpoints" "" "flush addrs" +add_endpoint 10.0.1.1 flags unknown +check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" +flush_endpoint + set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 65b374232ff5..99eecccbf0c8 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -24,6 +24,8 @@ #define IPPROTO_MPTCP 262 #endif +#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7) + static void syntax(char *argv[]) { fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]); @@ -836,6 +838,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (!strcmp(tok, "unknown")) + flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN; else error(1, errno, "unknown flag %s", argv[arg]); @@ -1048,6 +1052,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) { + printf("unknown"); + flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 7fc6c5dbd551..84b8eb12143a 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -116,7 +116,7 @@ run_one_clash_test() # not a failure: clash resolution logic did not trigger. # With right timing, xmit completed sequentially and # no parallel insertion occurs. - return $ksft_skip + return $ksft_xfail } run_clash_test() @@ -133,12 +133,12 @@ run_clash_test() if [ $rv -eq 0 ];then echo "PASS: clash resolution test for $daddr:$dport on attempt $i" return 0 - elif [ $rv -eq $ksft_skip ]; then + elif [ $rv -eq $ksft_xfail ]; then softerr=1 fi done - [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" + [ $softerr -eq 1 ] && echo "XFAIL: clash resolution for $daddr:$dport did not trigger" } ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" @@ -167,8 +167,7 @@ load_simple_ruleset "$nsclient2" run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 if [ $clash_resolution_active -eq 0 ];then - [ "$ret" -eq 0 ] && ret=$ksft_skip - echo "SKIP: Clash resolution did not trigger" + [ "$ret" -eq 0 ] && ret=$ksft_xfail fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c index 507930cee8cb..462d628cc3bd 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c @@ -33,9 +33,14 @@ static void die(const char *e) exit(111); } -static void die_port(uint16_t got, uint16_t want) +static void die_port(const struct sockaddr_in *sin, uint16_t want) { - fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); + uint16_t got = ntohs(sin->sin_port); + char str[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)); + + fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str); exit(1); } @@ -100,7 +105,7 @@ int main(int argc, char *argv[]) die("child recvfrom"); if (peer.sin_port != htons(PORT)) - die_port(peer.sin_port, PORT); + die_port(&peer, PORT); } else { if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) continue; @@ -109,7 +114,7 @@ int main(int argc, char *argv[]) die("parent recvfrom"); if (peer.sin_port != htons((PORT + 1))) - die_port(peer.sin_port, PORT + 1); + die_port(&peer, PORT + 1); } } diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh index a24c896347a8..dc7e9d6da062 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh @@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then echo "PASS: No SNAT performed for null bindings" else echo "ERROR: SNAT performed without any matching snat rule" + ip netns exec "$ns0" conntrack -L + ip netns exec "$ns0" conntrack -S exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt index 3442cd29bc93..cdb3910af95b 100644 --- a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt @@ -26,7 +26,7 @@ +0.01 > R 643160523:643160523(0) win 0 -+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` ++0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` // Must go through. +0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c index eb3cac5e583c..8d82140f0f76 100644 --- a/tools/testing/selftests/net/tfo.c +++ b/tools/testing/selftests/net/tfo.c @@ -81,7 +81,8 @@ static void run_server(void) if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); - read(connfd, buf, 64); + if (read(connfd, buf, 64) < 0) + perror("read()"); fprintf(outfile, "%d\n", opt); fclose(outfile); diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index a3ef4b57eb5f..a4d16a460fbe 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2786,10 +2786,10 @@ TEST_F(tls_err, epoll_partial_rec) TEST_F(tls_err, poll_partial_rec_async) { struct pollfd pfd = { }; + char token = '\0'; ssize_t rec_len; char rec[256]; char buf[128]; - char token; int p[2]; int ret; diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c index aa2d7d32dda9..5748d2c69903 100644 --- a/tools/testing/selftests/sched_ext/runner.c +++ b/tools/testing/selftests/sched_ext/runner.c @@ -46,6 +46,14 @@ static void print_test_preamble(const struct scx_test *test, bool quiet) if (!quiet) printf("DESCRIPTION: %s\n", test->description); printf("OUTPUT:\n"); + + /* + * The tests may fork with the preamble buffered + * in the children's stdout. Flush before the test + * to avoid printing the message multiple times. + */ + fflush(stdout); + fflush(stderr); } static const char *status_to_result(enum scx_test_status status) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b73bd255ea36..da156feabcbf 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -1052,5 +1052,51 @@ "$TC qdisc del dev $DEV1 ingress_block 21 clsact", "$TC actions flush action mirred" ] + }, + { + "id": "7eba", + "name": "Redirect multiport: dummy egress -> dummy egress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root drr", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 47de27fd4f90..6a39640aa2a8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1033,5 +1033,83 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "6e4f", + "name": "Try to delete ets drr class' qdisc while still keeping it in the active list", + "category": [ + "qdisc", + "ets", + "tbf" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1", + "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s", + "$TC filter add dev $DUMMY parent 1: basic classid 1:2", + "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 1 strict 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "ets", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1:" + ] + }, + { + "id": "0b8f", + "name": "Try to add ets class to the active list twice", + "category": [ + "qdisc", + "ets", + "tbf" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1", + "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s", + "$TC filter add dev $DUMMY parent 1: basic classid 1:2", + "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "ets", + "handle": "1:", + "bytes": 98, + "packets": 1 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1:" + ] } ] |
