diff options
434 files changed, 18693 insertions, 15676 deletions
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml new file mode 100644 index 000000000000..9fbeb626ab23 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: MediaTek PCIE Mirror Controller for MT7622 + +maintainers: + - Lorenzo Bianconi <lorenzo@kernel.org> + - Felix Fietkau <nbd@nbd.name> + +description: + The mediatek PCIE mirror provides a configuration interface for PCIE + controller on MT7622 soc. + +properties: + compatible: + items: + - enum: + - mediatek,mt7622-pcie-mirror + - const: syscon + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + soc { + #address-cells = <2>; + #size-cells = <2>; + pcie_mirror: pcie-mirror@10000400 { + compatible = "mediatek,mt7622-pcie-mirror", "syscon"; + reg = <0 0x10000400 0 0x10>; + }; + }; diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml new file mode 100644 index 000000000000..787d6673f952 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-wed.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: MediaTek Wireless Ethernet Dispatch Controller for MT7622 + +maintainers: + - Lorenzo Bianconi <lorenzo@kernel.org> + - Felix Fietkau <nbd@nbd.name> + +description: + The mediatek wireless ethernet dispatch controller can be configured to + intercept and handle access to the WLAN DMA queues and PCIe interrupts + and implement hardware flow offloading from ethernet to WLAN. + +properties: + compatible: + items: + - enum: + - mediatek,mt7622-wed + - const: syscon + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interrupt-controller/irq.h> + soc { + #address-cells = <2>; + #size-cells = <2>; + wed0: wed@1020a000 { + compatible = "mediatek,mt7622-wed","syscon"; + reg = <0 0x1020a000 0 0x1000>; + interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 72d03e07cf7c..f18d70189375 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -41,6 +41,16 @@ Required properties: - mediatek,pctl: phandle to the syscon node that handles the ports slew rate and driver current: only for MT2701 and MT7623 SoC +Optional properties: +- dma-coherent: present if dma operations are coherent +- mediatek,cci-control: phandle to the cache coherent interconnect node +- mediatek,hifsys: phandle to the mediatek hifsys controller used to provide + various clocks and reset to the system. +- mediatek,wed: a list of phandles to wireless ethernet dispatch nodes for + MT7622 SoC. +- mediatek,pcie-mirror: phandle to the mediatek pcie-mirror controller for + MT7622 SoC. + * Ethernet MAC node Required properties: diff --git a/Documentation/devicetree/bindings/net/mscc,miim.yaml b/Documentation/devicetree/bindings/net/mscc,miim.yaml new file mode 100644 index 000000000000..2c451cfa4e0b --- /dev/null +++ b/Documentation/devicetree/bindings/net/mscc,miim.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/mscc,miim.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microsemi MII Management Controller (MIIM) + +maintainers: + - Alexandre Belloni <alexandre.belloni@bootlin.com> + +allOf: + - $ref: "mdio.yaml#" + +properties: + compatible: + enum: + - mscc,ocelot-miim + - microchip,lan966x-miim + + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + reg: + items: + - description: base address + - description: associated reset register for internal PHYs + minItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: true + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + +unevaluatedProperties: false + +examples: + - | + mdio@107009c { + compatible = "mscc,ocelot-miim"; + reg = <0x107009c 0x36>, <0x10700f0 0x8>; + interrupts = <14>; + #address-cells = <1>; + #size-cells = <0>; + + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/mscc-miim.txt b/Documentation/devicetree/bindings/net/mscc-miim.txt deleted file mode 100644 index 70e0cb1ee485..000000000000 --- a/Documentation/devicetree/bindings/net/mscc-miim.txt +++ /dev/null @@ -1,26 +0,0 @@ -Microsemi MII Management Controller (MIIM) / MDIO -================================================= - -Properties: -- compatible: must be "mscc,ocelot-miim" or "microchip,lan966x-miim" -- reg: The base address of the MDIO bus controller register bank. Optionally, a - second register bank can be defined if there is an associated reset register - for internal PHYs -- #address-cells: Must be <1>. -- #size-cells: Must be <0>. MDIO addresses have no size component. -- interrupts: interrupt specifier (refer to the interrupt binding) - -Typically an MDIO bus might have several children. - -Example: - mdio@107009c { - #address-cells = <1>; - #size-cells = <0>; - compatible = "mscc,ocelot-miim"; - reg = <0x107009c 0x36>, <0x10700f0 0x8>; - interrupts = <14>; - - phy0: ethernet-phy@0 { - reg = <0>; - }; - }; diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 6b5dc203da2b..21a97703421d 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -39,6 +39,7 @@ Contents: intel/iavf intel/ice marvell/octeontx2 + marvell/octeon_ep mellanox/mlx5 microsoft/netvsc neterion/s2io diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst new file mode 100644 index 000000000000..bc562c49011b --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst @@ -0,0 +1,35 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +==================================================================== +Linux kernel networking driver for Marvell's Octeon PCI Endpoint NIC +==================================================================== + +Network driver for Marvell's Octeon PCI EndPoint NIC. +Copyright (c) 2020 Marvell International Ltd. + +Contents +======== + +- `Overview`_ +- `Supported Devices`_ +- `Interface Control`_ + +Overview +======== +This driver implements networking functionality of Marvell's Octeon PCI +EndPoint NIC. + +Supported Devices +================= +Currently, this driver support following devices: + * Network controller: Cavium, Inc. Device b200 + +Interface Control +================= +Network Interface control like changing mtu, link speed, link down/up are +done by writing command to mailbox command queue, a mailbox interface +implemented through a reserved region in BAR4. +This driver writes the commands into the mailbox and the firmware on the +Octeon device processes them. The firmware also sends unsolicited notifications +to driver for events suchs as link change, through notification queue +implemented as part of mailbox interface. diff --git a/MAINTAINERS b/MAINTAINERS index 7341667e7313..84158e08e6e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11830,6 +11830,13 @@ S: Supported F: Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt F: drivers/mmc/host/sdhci-xenon* +MARVELL OCTEON ENDPOINT DRIVER +M: Veerasenareddy Burru <vburru@marvell.com> +M: Abhijit Ayarekar <aayarekar@marvell.com> +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/marvell/octeon_ep + MATROX FRAMEBUFFER DRIVER L: linux-fbdev@vger.kernel.org S: Orphan diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 6f8cb3ad1e84..f232f8baf4e8 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -357,7 +357,7 @@ }; cci_control2: slave-if@5000 { - compatible = "arm,cci-400-ctrl-if"; + compatible = "arm,cci-400-ctrl-if", "syscon"; interface-type = "ace"; reg = <0x5000 0x1000>; }; @@ -901,6 +901,11 @@ }; }; + hifsys: syscon@1af00000 { + compatible = "mediatek,mt7622-hifsys", "syscon"; + reg = <0 0x1af00000 0 0x70>; + }; + ethsys: syscon@1b000000 { compatible = "mediatek,mt7622-ethsys", "syscon"; @@ -919,6 +924,26 @@ #dma-cells = <1>; }; + pcie_mirror: pcie-mirror@10000400 { + compatible = "mediatek,mt7622-pcie-mirror", + "syscon"; + reg = <0 0x10000400 0 0x10>; + }; + + wed0: wed@1020a000 { + compatible = "mediatek,mt7622-wed", + "syscon"; + reg = <0 0x1020a000 0 0x1000>; + interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>; + }; + + wed1: wed@1020b000 { + compatible = "mediatek,mt7622-wed", + "syscon"; + reg = <0 0x1020b000 0 0x1000>; + interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_LOW>; + }; + eth: ethernet@1b100000 { compatible = "mediatek,mt7622-eth", "mediatek,mt2701-eth", @@ -945,6 +970,11 @@ power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>; mediatek,ethsys = <ðsys>; mediatek,sgmiisys = <&sgmiisys>; + cci-control-port = <&cci_control2>; + mediatek,wed = <&wed0>, <&wed1>; + mediatek,pcie-mirror = <&pcie_mirror>; + mediatek,hifsys = <&hifsys>; + dma-coherent; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1e5760d567ae..6aa2dc836db1 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -201,6 +201,8 @@ enum aarch64_insn_size_type { enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_REG_OFFSET, AARCH64_INSN_LDST_STORE_REG_OFFSET, + AARCH64_INSN_LDST_LOAD_IMM_OFFSET, + AARCH64_INSN_LDST_STORE_IMM_OFFSET, AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX, AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, @@ -335,6 +337,7 @@ __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) __AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) +__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) __AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000) __AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000) @@ -342,6 +345,7 @@ __AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) __AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) __AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) @@ -501,6 +505,11 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, enum aarch64_insn_register offset, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + unsigned int imm, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 5e90887deec4..695d7368fadc 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -299,29 +299,24 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, return insn; } +static const u32 aarch64_insn_ldst_size[] = { + [AARCH64_INSN_SIZE_8] = 0, + [AARCH64_INSN_SIZE_16] = 1, + [AARCH64_INSN_SIZE_32] = 2, + [AARCH64_INSN_SIZE_64] = 3, +}; + static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, u32 insn) { u32 size; - switch (type) { - case AARCH64_INSN_SIZE_8: - size = 0; - break; - case AARCH64_INSN_SIZE_16: - size = 1; - break; - case AARCH64_INSN_SIZE_32: - size = 2; - break; - case AARCH64_INSN_SIZE_64: - size = 3; - break; - default: + if (type < AARCH64_INSN_SIZE_8 || type > AARCH64_INSN_SIZE_64) { pr_err("%s: unknown size encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } + size = aarch64_insn_ldst_size[type]; insn &= ~GENMASK(31, 30); insn |= size << 30; @@ -504,6 +499,50 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, offset); } +u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + unsigned int imm, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type) +{ + u32 insn; + u32 shift; + + if (size < AARCH64_INSN_SIZE_8 || size > AARCH64_INSN_SIZE_64) { + pr_err("%s: unknown size encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + shift = aarch64_insn_ldst_size[size]; + if (imm & ~(BIT(12 + shift) - BIT(shift))) { + pr_err("%s: invalid imm: %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } + + imm >>= shift; + + switch (type) { + case AARCH64_INSN_LDST_LOAD_IMM_OFFSET: + insn = aarch64_insn_get_ldr_imm_value(); + break; + case AARCH64_INSN_LDST_STORE_IMM_OFFSET: + insn = aarch64_insn_get_str_imm_value(); + break; + default: + pr_err("%s: unknown load/store encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); +} + u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index dd59b5ad8fe4..194c95ccc1cf 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -66,6 +66,20 @@ #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD) +/* Load/store register (immediate offset) */ +#define A64_LS_IMM(Rt, Rn, imm, size, type) \ + aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \ + AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_##type##_IMM_OFFSET) +#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) +#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) +#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) +#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) +#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) +#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) +#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) +#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) + /* Load/store register pair */ #define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \ @@ -249,6 +263,9 @@ /* HINTs */ #define A64_HINT(x) aarch64_insn_gen_hint(x) +#define A64_PACIASP A64_HINT(AARCH64_INSN_HINT_PACIASP) +#define A64_AUTIASP A64_HINT(AARCH64_INSN_HINT_AUTIASP) + /* BTI */ #define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) #define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index fcc675aa1670..8ab4035dea27 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -26,6 +26,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 4) #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -63,6 +64,7 @@ static const int bpf2a64[] = { [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), + [FP_BOTTOM] = A64_R(27), }; struct jit_ctx { @@ -73,6 +75,7 @@ struct jit_ctx { int exentry_idx; __le32 *image; u32 stack_size; + int fpb_offset; }; static inline void emit(const u32 insn, struct jit_ctx *ctx) @@ -191,11 +194,53 @@ static bool is_addsub_imm(u32 imm) return !(imm & ~0xfff) || !(imm & ~0xfff000); } +/* + * There are 3 types of AArch64 LDR/STR (immediate) instruction: + * Post-index, Pre-index, Unsigned offset. + * + * For BPF ldr/str, the "unsigned offset" type is sufficient. + * + * "Unsigned offset" type LDR(immediate) format: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * "Unsigned offset" type STR(immediate) format: + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * The offset is calculated from imm12 and scale in the following way: + * + * offset = (u64)imm12 << scale + */ +static bool is_lsi_offset(int offset, int scale) +{ + if (offset < 0) + return false; + + if (offset > (0xFFF << scale)) + return false; + + if (offset & ((1 << scale) - 1)) + return false; + + return true; +} + /* Tail call offset to jump into */ -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) -#define PROLOGUE_OFFSET 8 +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ + IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) +#define PROLOGUE_OFFSET 9 #else -#define PROLOGUE_OFFSET 7 +#define PROLOGUE_OFFSET 8 #endif static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) @@ -207,6 +252,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tcc = bpf2a64[TCALL_CNT]; + const u8 fpb = bpf2a64[FP_BOTTOM]; const int idx0 = ctx->idx; int cur_offset; @@ -233,8 +279,11 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + /* Sign lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_PACIASP, ctx); /* BTI landing pad */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) emit(A64_BTI_C, ctx); /* Save FP and LR registers to stay align with ARM64 AAPCS */ @@ -245,6 +294,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx); emit(A64_PUSH(fp, tcc, A64_SP), ctx); + emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); @@ -265,6 +315,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_BTI_J, ctx); } + emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); + /* Stack must be multiples of 16B */ ctx->stack_size = round_up(prog->aux->stack_depth, 16); @@ -512,10 +564,13 @@ static void build_epilogue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 fpb = bpf2a64[FP_BOTTOM]; /* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + /* Restore x27 and x28 */ + emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); /* Restore fs (x25) and x26 */ emit(A64_POP(fp, A64_R(26), A64_SP), ctx); @@ -529,6 +584,10 @@ static void build_epilogue(struct jit_ctx *ctx) /* Set return value */ emit(A64_MOV(1, A64_R(0), r0), ctx); + /* Authenticate lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_AUTIASP, ctx); + emit(A64_RET(A64_LR), ctx); } @@ -609,6 +668,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; + const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 fpb = bpf2a64[FP_BOTTOM]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@ -617,6 +678,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 jmp_cond; s32 jmp_offset; u32 a64_insn; + u8 src_adj; + u8 dst_adj; + int off_adj; int ret; switch (code) { @@ -971,19 +1035,45 @@ emit_cond_jmp: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: - emit_a64_mov_i(1, tmp, off, ctx); + if (ctx->fpb_offset > 0 && src == fp) { + src_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + src_adj = src; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_LDR32(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR32(dst, src, tmp), ctx); + } break; case BPF_H: - emit(A64_LDRH(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDRH(dst, src, tmp), ctx); + } break; case BPF_B: - emit(A64_LDRB(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDRB(dst, src, tmp), ctx); + } break; case BPF_DW: - emit(A64_LDR64(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_LDR64I(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR64(dst, src, tmp), ctx); + } break; } @@ -1010,21 +1100,47 @@ emit_cond_jmp: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: + if (ctx->fpb_offset > 0 && dst == fp) { + dst_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + dst_adj = dst; + off_adj = off; + } /* Load imm to a register then store it */ - emit_a64_mov_i(1, tmp2, off, ctx); emit_a64_mov_i(1, tmp, imm, ctx); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR32(tmp, dst, tmp2), ctx); + } break; case BPF_H: - emit(A64_STRH(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRH(tmp, dst, tmp2), ctx); + } break; case BPF_B: - emit(A64_STRB(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRB(tmp, dst, tmp2), ctx); + } break; case BPF_DW: - emit(A64_STR64(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR64(tmp, dst, tmp2), ctx); + } break; } break; @@ -1034,19 +1150,45 @@ emit_cond_jmp: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); + if (ctx->fpb_offset > 0 && dst == fp) { + dst_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + dst_adj = dst; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR32(src, dst, tmp), ctx); + } break; case BPF_H: - emit(A64_STRH(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRH(src, dst, tmp), ctx); + } break; case BPF_B: - emit(A64_STRB(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRB(src, dst, tmp), ctx); + } break; case BPF_DW: - emit(A64_STR64(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR64(src, dst, tmp), ctx); + } break; } break; @@ -1069,6 +1211,79 @@ emit_cond_jmp: return 0; } +/* + * Return 0 if FP may change at runtime, otherwise find the minimum negative + * offset to FP, converts it to positive number, and align down to 8 bytes. + */ +static int find_fpb_offset(struct bpf_prog *prog) +{ + int i; + int offset = 0; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + const u8 class = BPF_CLASS(insn->code); + const u8 mode = BPF_MODE(insn->code); + const u8 src = insn->src_reg; + const u8 dst = insn->dst_reg; + const s32 imm = insn->imm; + const s16 off = insn->off; + + switch (class) { + case BPF_STX: + case BPF_ST: + /* fp holds atomic operation result */ + if (class == BPF_STX && mode == BPF_ATOMIC && + ((imm == BPF_XCHG || + imm == (BPF_FETCH | BPF_ADD) || + imm == (BPF_FETCH | BPF_AND) || + imm == (BPF_FETCH | BPF_XOR) || + imm == (BPF_FETCH | BPF_OR)) && + src == BPF_REG_FP)) + return 0; + + if (mode == BPF_MEM && dst == BPF_REG_FP && + off < offset) + offset = insn->off; + break; + + case BPF_JMP32: + case BPF_JMP: + break; + + case BPF_LDX: + case BPF_LD: + /* fp holds load result */ + if (dst == BPF_REG_FP) + return 0; + + if (class == BPF_LDX && mode == BPF_MEM && + src == BPF_REG_FP && off < offset) + offset = off; + break; + + case BPF_ALU: + case BPF_ALU64: + default: + /* fp holds ALU result */ + if (dst == BPF_REG_FP) + return 0; + } + } + + if (offset < 0) { + /* + * safely be converted to a positive 'int', since insn->off + * is 's16' + */ + offset = -offset; + /* align down to 8 bytes */ + offset = ALIGN_DOWN(offset, 8); + } + + return offset; +} + static int build_body(struct jit_ctx *ctx, bool extra_pass) { const struct bpf_prog *prog = ctx->prog; @@ -1190,6 +1405,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } + ctx.fpb_offset = find_fpb_offset(prog); + /* * 1. Initial fake pass to compute ctx->idx and ctx->offset. * diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 5cb91509bb7c..b0489437621a 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -182,7 +182,6 @@ CONFIG_ATM_FIRESTREAM=m CONFIG_ATM_ZATM=m CONFIG_ATM_NICSTAR=m CONFIG_ATM_IDT77252=m -CONFIG_ATM_AMBASSADOR=m CONFIG_ATM_HORIZON=m CONFIG_ATM_IA=m CONFIG_ATM_FORE200E=m @@ -214,7 +213,6 @@ CONFIG_ATH_DEBUG=y CONFIG_ATH5K=y CONFIG_ATH5K_DEBUG=y CONFIG_WAN=y -CONFIG_LANMEDIA=m CONFIG_HDLC=m CONFIG_HDLC_RAW=m CONFIG_HDLC_RAW_ETH=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index 205d3b34528c..c98099f0b354 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -259,7 +259,6 @@ CONFIG_ATM_FIRESTREAM=m CONFIG_ATM_ZATM=m CONFIG_ATM_NICSTAR=m CONFIG_ATM_IDT77252=m -CONFIG_ATM_AMBASSADOR=m CONFIG_ATM_HORIZON=m CONFIG_ATM_IA=m CONFIG_ATM_FORE200E=m @@ -363,7 +362,6 @@ CONFIG_USB_AN2720=y CONFIG_USB_EPSON2888=y CONFIG_USB_SIERRA_NET=m CONFIG_WAN=y -CONFIG_LANMEDIA=m CONFIG_HDLC=m CONFIG_HDLC_RAW=m CONFIG_HDLC_RAW_ETH=m diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig index b9370bbca828..7be08e24955c 100644 --- a/drivers/atm/Kconfig +++ b/drivers/atm/Kconfig @@ -244,31 +244,6 @@ config ATM_IDT77252_USE_SUNI depends on ATM_IDT77252 default y -config ATM_AMBASSADOR - tristate "Madge Ambassador (Collage PCI 155 Server)" - depends on PCI && VIRT_TO_BUS - select BITREVERSE - help - This is a driver for ATMizer based ATM card produced by Madge - Networks Ltd. Say Y (or M to compile as a module named ambassador) - here if you have one of these cards. - -config ATM_AMBASSADOR_DEBUG - bool "Enable debugging messages" - depends on ATM_AMBASSADOR - help - Somewhat useful debugging messages are available. The choice of - messages is controlled by a bitmap. This may be specified as a - module argument (kernel command line argument as well?), changed - dynamically using an ioctl (not yet) or changed by sending the - string "Dxxxx" to VCI 1023 (where x is a hex digit). See the file - <file:drivers/atm/ambassador.h> for the meanings of the bits in the - mask. - - When active, these messages can have a significant impact on the - speed of the driver, and the size of your syslog files! When - inactive, they will have only a modest impact on performance. - config ATM_HORIZON tristate "Madge Horizon [Ultra] (Collage PCI 25 and Collage PCI 155 Client)" depends on PCI && VIRT_TO_BUS diff --git a/drivers/atm/Makefile b/drivers/atm/Makefile index aa191616a72e..99ecbc280643 100644 --- a/drivers/atm/Makefile +++ b/drivers/atm/Makefile @@ -7,7 +7,6 @@ fore_200e-y := fore200e.o obj-$(CONFIG_ATM_ZATM) += zatm.o uPD98402.o obj-$(CONFIG_ATM_NICSTAR) += nicstar.o -obj-$(CONFIG_ATM_AMBASSADOR) += ambassador.o obj-$(CONFIG_ATM_HORIZON) += horizon.o obj-$(CONFIG_ATM_IA) += iphase.o suni.o obj-$(CONFIG_ATM_FORE200E) += fore_200e.o diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c deleted file mode 100644 index c039b8a4fefe..000000000000 --- a/drivers/atm/ambassador.c +++ /dev/null @@ -1,2400 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - Madge Ambassador ATM Adapter driver. - Copyright (C) 1995-1999 Madge Networks Ltd. - -*/ - -/* * dedicated to the memory of Graham Gordon 1971-1998 * */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/atmdev.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/poison.h> -#include <linux/bitrev.h> -#include <linux/mutex.h> -#include <linux/firmware.h> -#include <linux/ihex.h> -#include <linux/slab.h> - -#include <linux/atomic.h> -#include <asm/io.h> -#include <asm/byteorder.h> - -#include "ambassador.h" - -#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>" -#define description_string "Madge ATM Ambassador driver" -#define version_string "1.2.4" - -static inline void __init show_version (void) { - printk ("%s version %s\n", description_string, version_string); -} - -/* - - Theory of Operation - - I Hardware, detection, initialisation and shutdown. - - 1. Supported Hardware - - This driver is for the PCI ATMizer-based Ambassador card (except - very early versions). It is not suitable for the similar EISA "TR7" - card. Commercially, both cards are known as Collage Server ATM - adapters. - - The loader supports image transfer to the card, image start and few - other miscellaneous commands. - - Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023. - - The cards are big-endian. - - 2. Detection - - Standard PCI stuff, the early cards are detected and rejected. - - 3. Initialisation - - The cards are reset and the self-test results are checked. The - microcode image is then transferred and started. This waits for a - pointer to a descriptor containing details of the host-based queues - and buffers and various parameters etc. Once they are processed - normal operations may begin. The BIA is read using a microcode - command. - - 4. Shutdown - - This may be accomplished either by a card reset or via the microcode - shutdown command. Further investigation required. - - 5. Persistent state - - The card reset does not affect PCI configuration (good) or the - contents of several other "shared run-time registers" (bad) which - include doorbell and interrupt control as well as EEPROM and PCI - control. The driver must be careful when modifying these registers - not to touch bits it does not use and to undo any changes at exit. - - II Driver software - - 0. Generalities - - The adapter is quite intelligent (fast) and has a simple interface - (few features). VPI is always zero, 1024 VCIs are supported. There - is limited cell rate support. UBR channels can be capped and ABR - (explicit rate, but not EFCI) is supported. There is no CBR or VBR - support. - - 1. Driver <-> Adapter Communication - - Apart from the basic loader commands, the driver communicates - through three entities: the command queue (CQ), the transmit queue - pair (TXQ) and the receive queue pairs (RXQ). These three entities - are set up by the host and passed to the microcode just after it has - been started. - - All queues are host-based circular queues. They are contiguous and - (due to hardware limitations) have some restrictions as to their - locations in (bus) memory. They are of the "full means the same as - empty so don't do that" variety since the adapter uses pointers - internally. - - The queue pairs work as follows: one queue is for supply to the - adapter, items in it are pending and are owned by the adapter; the - other is the queue for return from the adapter, items in it have - been dealt with by the adapter. The host adds items to the supply - (TX descriptors and free RX buffer descriptors) and removes items - from the return (TX and RX completions). The adapter deals with out - of order completions. - - Interrupts (card to host) and the doorbell (host to card) are used - for signalling. - - 1. CQ - - This is to communicate "open VC", "close VC", "get stats" etc. to - the adapter. At most one command is retired every millisecond by the - card. There is no out of order completion or notification. The - driver needs to check the return code of the command, waiting as - appropriate. - - 2. TXQ - - TX supply items are of variable length (scatter gather support) and - so the queue items are (more or less) pointers to the real thing. - Each TX supply item contains a unique, host-supplied handle (the skb - bus address seems most sensible as this works for Alphas as well, - there is no need to do any endian conversions on the handles). - - TX return items consist of just the handles above. - - 3. RXQ (up to 4 of these with different lengths and buffer sizes) - - RX supply items consist of a unique, host-supplied handle (the skb - bus address again) and a pointer to the buffer data area. - - RX return items consist of the handle above, the VC, length and a - status word. This just screams "oh so easy" doesn't it? - - Note on RX pool sizes: - - Each pool should have enough buffers to handle a back-to-back stream - of minimum sized frames on a single VC. For example: - - frame spacing = 3us (about right) - - delay = IRQ lat + RX handling + RX buffer replenish = 20 (us) (a guess) - - min number of buffers for one VC = 1 + delay/spacing (buffers) - - delay/spacing = latency = (20+2)/3 = 7 (buffers) (rounding up) - - The 20us delay assumes that there is no need to sleep; if we need to - sleep to get buffers we are going to drop frames anyway. - - In fact, each pool should have enough buffers to support the - simultaneous reassembly of a separate frame on each VC and cope with - the case in which frames complete in round robin cell fashion on - each VC. - - Only one frame can complete at each cell arrival, so if "n" VCs are - open, the worst case is to have them all complete frames together - followed by all starting new frames together. - - desired number of buffers = n + delay/spacing - - These are the extreme requirements, however, they are "n+k" for some - "k" so we have only the constant to choose. This is the argument - rx_lats which current defaults to 7. - - Actually, "n ? n+k : 0" is better and this is what is implemented, - subject to the limit given by the pool size. - - 4. Driver locking - - Simple spinlocks are used around the TX and RX queue mechanisms. - Anyone with a faster, working method is welcome to implement it. - - The adapter command queue is protected with a spinlock. We always - wait for commands to complete. - - A more complex form of locking is used around parts of the VC open - and close functions. There are three reasons for a lock: 1. we need - to do atomic rate reservation and release (not used yet), 2. Opening - sometimes involves two adapter commands which must not be separated - by another command on the same VC, 3. the changes to RX pool size - must be atomic. The lock needs to work over context switches, so we - use a semaphore. - - III Hardware Features and Microcode Bugs - - 1. Byte Ordering - - *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*! - - 2. Memory access - - All structures that are not accessed using DMA must be 4-byte - aligned (not a problem) and must not cross 4MB boundaries. - - There is a DMA memory hole at E0000000-E00000FF (groan). - - TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB - but for a hardware bug). - - RX buffers (DMA write) must not cross 16MB boundaries and must - include spare trailing bytes up to the next 4-byte boundary; they - will be written with rubbish. - - The PLX likes to prefetch; if reading up to 4 u32 past the end of - each TX fragment is not a problem, then TX can be made to go a - little faster by passing a flag at init that disables a prefetch - workaround. We do not pass this flag. (new microcode only) - - Now we: - . Note that alloc_skb rounds up size to a 16byte boundary. - . Ensure all areas do not traverse 4MB boundaries. - . Ensure all areas do not start at a E00000xx bus address. - (I cannot be certain, but this may always hold with Linux) - . Make all failures cause a loud message. - . Discard non-conforming SKBs (causes TX failure or RX fill delay). - . Discard non-conforming TX fragment descriptors (the TX fails). - In the future we could: - . Allow RX areas that traverse 4MB (but not 16MB) boundaries. - . Segment TX areas into some/more fragments, when necessary. - . Relax checks for non-DMA items (ignore hole). - . Give scatter-gather (iovec) requirements using ???. (?) - - 3. VC close is broken (only for new microcode) - - The VC close adapter microcode command fails to do anything if any - frames have been received on the VC but none have been transmitted. - Frames continue to be reassembled and passed (with IRQ) to the - driver. - - IV To Do List - - . Fix bugs! - - . Timer code may be broken. - - . Deal with buggy VC close (somehow) in microcode 12. - - . Handle interrupted and/or non-blocking writes - is this a job for - the protocol layer? - - . Add code to break up TX fragments when they span 4MB boundaries. - - . Add SUNI phy layer (need to know where SUNI lives on card). - - . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b) - leave extra headroom space for Ambassador TX descriptors. - - . Understand these elements of struct atm_vcc: recvq (proto?), - sleep, callback, listenq, backlog_quota, reply and user_back. - - . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable). - - . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow. - - . Decide whether RX buffer recycling is or can be made completely safe; - turn it back on. It looks like Werner is going to axe this. - - . Implement QoS changes on open VCs (involves extracting parts of VC open - and close into separate functions and using them to make changes). - - . Hack on command queue so that someone can issue multiple commands and wait - on the last one (OR only "no-op" or "wait" commands are waited for). - - . Eliminate need for while-schedule around do_command. - -*/ - -static void do_housekeeping (struct timer_list *t); -/********** globals **********/ - -static unsigned short debug = 0; -static unsigned int cmds = 8; -static unsigned int txs = 32; -static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 }; -static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 }; -static unsigned int rx_lats = 7; -static unsigned char pci_lat = 0; - -static const unsigned long onegigmask = -1 << 30; - -/********** access to adapter **********/ - -static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) { - PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data); -#ifdef AMB_MMIO - dev->membase[addr / sizeof(u32)] = data; -#else - outl (data, dev->iobase + addr); -#endif -} - -static inline u32 rd_plain (const amb_dev * dev, size_t addr) { -#ifdef AMB_MMIO - u32 data = dev->membase[addr / sizeof(u32)]; -#else - u32 data = inl (dev->iobase + addr); -#endif - PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data); - return data; -} - -static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) { - __be32 be = cpu_to_be32 (data); - PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be); -#ifdef AMB_MMIO - dev->membase[addr / sizeof(u32)] = be; -#else - outl (be, dev->iobase + addr); -#endif -} - -static inline u32 rd_mem (const amb_dev * dev, size_t addr) { -#ifdef AMB_MMIO - __be32 be = dev->membase[addr / sizeof(u32)]; -#else - __be32 be = inl (dev->iobase + addr); -#endif - u32 data = be32_to_cpu (be); - PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be); - return data; -} - -/********** dump routines **********/ - -static inline void dump_registers (const amb_dev * dev) { -#ifdef DEBUG_AMBASSADOR - if (debug & DBG_REGS) { - size_t i; - PRINTD (DBG_REGS, "reading PLX control: "); - for (i = 0x00; i < 0x30; i += sizeof(u32)) - rd_mem (dev, i); - PRINTD (DBG_REGS, "reading mailboxes: "); - for (i = 0x40; i < 0x60; i += sizeof(u32)) - rd_mem (dev, i); - PRINTD (DBG_REGS, "reading doorb irqev irqen reset:"); - for (i = 0x60; i < 0x70; i += sizeof(u32)) - rd_mem (dev, i); - } -#else - (void) dev; -#endif - return; -} - -static inline void dump_loader_block (volatile loader_block * lb) { -#ifdef DEBUG_AMBASSADOR - unsigned int i; - PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:", - lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command)); - for (i = 0; i < MAX_COMMAND_DATA; ++i) - PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i])); - PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid)); -#else - (void) lb; -#endif - return; -} - -static inline void dump_command (command * cmd) { -#ifdef DEBUG_AMBASSADOR - unsigned int i; - PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:", - cmd, /*be32_to_cpu*/ (cmd->request)); - for (i = 0; i < 3; ++i) - PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i])); - PRINTDE (DBG_CMD, ""); -#else - (void) cmd; -#endif - return; -} - -static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) { -#ifdef DEBUG_AMBASSADOR - unsigned int i; - unsigned char * data = skb->data; - PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc); - for (i=0; i<skb->len && i < 256;i++) - PRINTDM (DBG_DATA, "%02x ", data[i]); - PRINTDE (DBG_DATA,""); -#else - (void) prefix; - (void) vc; - (void) skb; -#endif - return; -} - -/********** check memory areas for use by Ambassador **********/ - -/* see limitations under Hardware Features */ - -static int check_area (void * start, size_t length) { - // assumes length > 0 - const u32 fourmegmask = -1 << 22; - const u32 twofivesixmask = -1 << 8; - const u32 starthole = 0xE0000000; - u32 startaddress = virt_to_bus (start); - u32 lastaddress = startaddress+length-1; - if ((startaddress ^ lastaddress) & fourmegmask || - (startaddress & twofivesixmask) == starthole) { - PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!", - startaddress, lastaddress); - return -1; - } else { - return 0; - } -} - -/********** free an skb (as per ATM device driver documentation) **********/ - -static void amb_kfree_skb (struct sk_buff * skb) { - if (ATM_SKB(skb)->vcc->pop) { - ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb); - } else { - dev_kfree_skb_any (skb); - } -} - -/********** TX completion **********/ - -static void tx_complete (amb_dev * dev, tx_out * tx) { - tx_simple * tx_descr = bus_to_virt (tx->handle); - struct sk_buff * skb = tx_descr->skb; - - PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx); - - // VC layer stats - atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); - - // free the descriptor - kfree (tx_descr); - - // free the skb - amb_kfree_skb (skb); - - dev->stats.tx_ok++; - return; -} - -/********** RX completion **********/ - -static void rx_complete (amb_dev * dev, rx_out * rx) { - struct sk_buff * skb = bus_to_virt (rx->handle); - u16 vc = be16_to_cpu (rx->vc); - // unused: u16 lec_id = be16_to_cpu (rx->lec_id); - u16 status = be16_to_cpu (rx->status); - u16 rx_len = be16_to_cpu (rx->length); - - PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len); - - // XXX move this in and add to VC stats ??? - if (!status) { - struct atm_vcc * atm_vcc = dev->rxer[vc]; - dev->stats.rx.ok++; - - if (atm_vcc) { - - if (rx_len <= atm_vcc->qos.rxtp.max_sdu) { - - if (atm_charge (atm_vcc, skb->truesize)) { - - // prepare socket buffer - ATM_SKB(skb)->vcc = atm_vcc; - skb_put (skb, rx_len); - - dump_skb ("<<<", vc, skb); - - // VC layer stats - atomic_inc(&atm_vcc->stats->rx); - __net_timestamp(skb); - // end of our responsibility - atm_vcc->push (atm_vcc, skb); - return; - - } else { - // someone fix this (message), please! - PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize); - // drop stats incremented in atm_charge - } - - } else { - PRINTK (KERN_INFO, "dropped over-size frame"); - // should we count this? - atomic_inc(&atm_vcc->stats->rx_drop); - } - - } else { - PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc); - // this is an adapter bug, only in new version of microcode - } - - } else { - dev->stats.rx.error++; - if (status & CRC_ERR) - dev->stats.rx.badcrc++; - if (status & LEN_ERR) - dev->stats.rx.toolong++; - if (status & ABORT_ERR) - dev->stats.rx.aborted++; - if (status & UNUSED_ERR) - dev->stats.rx.unused++; - } - - dev_kfree_skb_any (skb); - return; -} - -/* - - Note on queue handling. - - Here "give" and "take" refer to queue entries and a queue (pair) - rather than frames to or from the host or adapter. Empty frame - buffers are given to the RX queue pair and returned unused or - containing RX frames. TX frames (well, pointers to TX fragment - lists) are given to the TX queue pair, completions are returned. - -*/ - -/********** command queue **********/ - -// I really don't like this, but it's the best I can do at the moment - -// also, the callers are responsible for byte order as the microcode -// sometimes does 16-bit accesses (yuk yuk yuk) - -static int command_do (amb_dev * dev, command * cmd) { - amb_cq * cq = &dev->cq; - volatile amb_cq_ptrs * ptrs = &cq->ptrs; - command * my_slot; - - PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev); - - if (test_bit (dead, &dev->flags)) - return 0; - - spin_lock (&cq->lock); - - // if not full... - if (cq->pending < cq->maximum) { - // remember my slot for later - my_slot = ptrs->in; - PRINTD (DBG_CMD, "command in slot %p", my_slot); - - dump_command (cmd); - - // copy command in - *ptrs->in = *cmd; - cq->pending++; - ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit); - - // mail the command - wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in)); - - if (cq->pending > cq->high) - cq->high = cq->pending; - spin_unlock (&cq->lock); - - // these comments were in a while-loop before, msleep removes the loop - // go to sleep - // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout); - msleep(cq->pending); - - // wait for my slot to be reached (all waiters are here or above, until...) - while (ptrs->out != my_slot) { - PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } - - // wait on my slot (... one gets to its slot, and... ) - while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) { - PRINTD (DBG_CMD, "wait: command slot completion"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } - - PRINTD (DBG_CMD, "command complete"); - // update queue (... moves the queue along to the next slot) - spin_lock (&cq->lock); - cq->pending--; - // copy command out - *cmd = *ptrs->out; - ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit); - spin_unlock (&cq->lock); - - return 0; - } else { - cq->filled++; - spin_unlock (&cq->lock); - return -EAGAIN; - } - -} - -/********** TX queue pair **********/ - -static int tx_give (amb_dev * dev, tx_in * tx) { - amb_txq * txq = &dev->txq; - unsigned long flags; - - PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev); - - if (test_bit (dead, &dev->flags)) - return 0; - - spin_lock_irqsave (&txq->lock, flags); - - if (txq->pending < txq->maximum) { - PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr); - - *txq->in.ptr = *tx; - txq->pending++; - txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit); - // hand over the TX and ring the bell - wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr)); - wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME); - - if (txq->pending > txq->high) - txq->high = txq->pending; - spin_unlock_irqrestore (&txq->lock, flags); - return 0; - } else { - txq->filled++; - spin_unlock_irqrestore (&txq->lock, flags); - return -EAGAIN; - } -} - -static int tx_take (amb_dev * dev) { - amb_txq * txq = &dev->txq; - unsigned long flags; - - PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev); - - spin_lock_irqsave (&txq->lock, flags); - - if (txq->pending && txq->out.ptr->handle) { - // deal with TX completion - tx_complete (dev, txq->out.ptr); - // mark unused again - txq->out.ptr->handle = 0; - // remove item - txq->pending--; - txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit); - - spin_unlock_irqrestore (&txq->lock, flags); - return 0; - } else { - - spin_unlock_irqrestore (&txq->lock, flags); - return -1; - } -} - -/********** RX queue pairs **********/ - -static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) { - amb_rxq * rxq = &dev->rxq[pool]; - unsigned long flags; - - PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool); - - spin_lock_irqsave (&rxq->lock, flags); - - if (rxq->pending < rxq->maximum) { - PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr); - - *rxq->in.ptr = *rx; - rxq->pending++; - rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit); - // hand over the RX buffer - wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr)); - - spin_unlock_irqrestore (&rxq->lock, flags); - return 0; - } else { - spin_unlock_irqrestore (&rxq->lock, flags); - return -1; - } -} - -static int rx_take (amb_dev * dev, unsigned char pool) { - amb_rxq * rxq = &dev->rxq[pool]; - unsigned long flags; - - PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool); - - spin_lock_irqsave (&rxq->lock, flags); - - if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) { - // deal with RX completion - rx_complete (dev, rxq->out.ptr); - // mark unused again - rxq->out.ptr->status = 0; - rxq->out.ptr->length = 0; - // remove item - rxq->pending--; - rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit); - - if (rxq->pending < rxq->low) - rxq->low = rxq->pending; - spin_unlock_irqrestore (&rxq->lock, flags); - return 0; - } else { - if (!rxq->pending && rxq->buffers_wanted) - rxq->emptied++; - spin_unlock_irqrestore (&rxq->lock, flags); - return -1; - } -} - -/********** RX Pool handling **********/ - -/* pre: buffers_wanted = 0, post: pending = 0 */ -static void drain_rx_pool (amb_dev * dev, unsigned char pool) { - amb_rxq * rxq = &dev->rxq[pool]; - - PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool); - - if (test_bit (dead, &dev->flags)) - return; - - /* we are not quite like the fill pool routines as we cannot just - remove one buffer, we have to remove all of them, but we might as - well pretend... */ - if (rxq->pending > rxq->buffers_wanted) { - command cmd; - cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q); - cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT); - while (command_do (dev, &cmd)) - schedule(); - /* the pool may also be emptied via the interrupt handler */ - while (rxq->pending > rxq->buffers_wanted) - if (rx_take (dev, pool)) - schedule(); - } - - return; -} - -static void drain_rx_pools (amb_dev * dev) { - unsigned char pool; - - PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev); - - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - drain_rx_pool (dev, pool); -} - -static void fill_rx_pool (amb_dev * dev, unsigned char pool, - gfp_t priority) -{ - rx_in rx; - amb_rxq * rxq; - - PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority); - - if (test_bit (dead, &dev->flags)) - return; - - rxq = &dev->rxq[pool]; - while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) { - - struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority); - if (!skb) { - PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool); - return; - } - if (check_area (skb->data, skb->truesize)) { - dev_kfree_skb_any (skb); - return; - } - // cast needed as there is no %? for pointer differences - PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li", - skb, skb->head, (long) skb_end_offset(skb)); - rx.handle = virt_to_bus (skb); - rx.host_address = cpu_to_be32 (virt_to_bus (skb->data)); - if (rx_give (dev, &rx, pool)) - dev_kfree_skb_any (skb); - - } - - return; -} - -// top up all RX pools -static void fill_rx_pools (amb_dev * dev) { - unsigned char pool; - - PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev); - - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - fill_rx_pool (dev, pool, GFP_ATOMIC); - - return; -} - -/********** enable host interrupts **********/ - -static void interrupts_on (amb_dev * dev) { - wr_plain (dev, offsetof(amb_mem, interrupt_control), - rd_plain (dev, offsetof(amb_mem, interrupt_control)) - | AMB_INTERRUPT_BITS); -} - -/********** disable host interrupts **********/ - -static void interrupts_off (amb_dev * dev) { - wr_plain (dev, offsetof(amb_mem, interrupt_control), - rd_plain (dev, offsetof(amb_mem, interrupt_control)) - &~ AMB_INTERRUPT_BITS); -} - -/********** interrupt handling **********/ - -static irqreturn_t interrupt_handler(int irq, void *dev_id) { - amb_dev * dev = dev_id; - - PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id); - - { - u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt)); - - // for us or someone else sharing the same interrupt - if (!interrupt) { - PRINTD (DBG_IRQ, "irq not for me: %d", irq); - return IRQ_NONE; - } - - // definitely for us - PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt); - wr_plain (dev, offsetof(amb_mem, interrupt), -1); - } - - { - unsigned int irq_work = 0; - unsigned char pool; - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - while (!rx_take (dev, pool)) - ++irq_work; - while (!tx_take (dev)) - ++irq_work; - - if (irq_work) { - fill_rx_pools (dev); - - PRINTD (DBG_IRQ, "work done: %u", irq_work); - } else { - PRINTD (DBG_IRQ|DBG_WARN, "no work done"); - } - } - - PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id); - return IRQ_HANDLED; -} - -/********** make rate (not quite as much fun as Horizon) **********/ - -static int make_rate (unsigned int rate, rounding r, - u16 * bits, unsigned int * actual) { - unsigned char exp = -1; // hush gcc - unsigned int man = -1; // hush gcc - - PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate); - - // rates in cells per second, ITU format (nasty 16-bit floating-point) - // given 5-bit e and 9-bit m: - // rate = EITHER (1+m/2^9)*2^e OR 0 - // bits = EITHER 1<<14 | e<<9 | m OR 0 - // (bit 15 is "reserved", bit 14 "non-zero") - // smallest rate is 0 (special representation) - // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1) - // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0) - // simple algorithm: - // find position of top bit, this gives e - // remove top bit and shift (rounding if feeling clever) by 9-e - - // ucode bug: please don't set bit 14! so 0 rate not representable - - if (rate > 0xffc00000U) { - // larger than largest representable rate - - if (r == round_up) { - return -EINVAL; - } else { - exp = 31; - man = 511; - } - - } else if (rate) { - // representable rate - - exp = 31; - man = rate; - - // invariant: rate = man*2^(exp-31) - while (!(man & (1<<31))) { - exp = exp - 1; - man = man<<1; - } - - // man has top bit set - // rate = (2^31+(man-2^31))*2^(exp-31) - // rate = (1+(man-2^31)/2^31)*2^exp - man = man<<1; - man &= 0xffffffffU; // a nop on 32-bit systems - // rate = (1+man/2^32)*2^exp - - // exp is in the range 0 to 31, man is in the range 0 to 2^32-1 - // time to lose significance... we want m in the range 0 to 2^9-1 - // rounding presents a minor problem... we first decide which way - // we are rounding (based on given rounding direction and possibly - // the bits of the mantissa that are to be discarded). - - switch (r) { - case round_down: { - // just truncate - man = man>>(32-9); - break; - } - case round_up: { - // check all bits that we are discarding - if (man & (~0U>>9)) { - man = (man>>(32-9)) + 1; - if (man == (1<<9)) { - // no need to check for round up outside of range - man = 0; - exp += 1; - } - } else { - man = (man>>(32-9)); - } - break; - } - case round_nearest: { - // check msb that we are discarding - if (man & (1<<(32-9-1))) { - man = (man>>(32-9)) + 1; - if (man == (1<<9)) { - // no need to check for round up outside of range - man = 0; - exp += 1; - } - } else { - man = (man>>(32-9)); - } - break; - } - } - - } else { - // zero rate - not representable - - if (r == round_down) { - return -EINVAL; - } else { - exp = 0; - man = 0; - } - - } - - PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp); - - if (bits) - *bits = /* (1<<14) | */ (exp<<9) | man; - - if (actual) - *actual = (exp >= 9) - ? (1 << exp) + (man << (exp-9)) - : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp)); - - return 0; -} - -/********** Linux ATM Operations **********/ - -// some are not yet implemented while others do not make sense for -// this device - -/********** Open a VC **********/ - -static int amb_open (struct atm_vcc * atm_vcc) -{ - int error; - - struct atm_qos * qos; - struct atm_trafprm * txtp; - struct atm_trafprm * rxtp; - u16 tx_rate_bits = -1; // hush gcc - u16 tx_vc_bits = -1; // hush gcc - u16 tx_frame_bits = -1; // hush gcc - - amb_dev * dev = AMB_DEV(atm_vcc->dev); - amb_vcc * vcc; - unsigned char pool = -1; // hush gcc - short vpi = atm_vcc->vpi; - int vci = atm_vcc->vci; - - PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci); - -#ifdef ATM_VPI_UNSPEC - // UNSPEC is deprecated, remove this code eventually - if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) { - PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)"); - return -EINVAL; - } -#endif - - if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) && - 0 <= vci && vci < (1<<NUM_VCI_BITS))) { - PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci); - return -EINVAL; - } - - qos = &atm_vcc->qos; - - if (qos->aal != ATM_AAL5) { - PRINTD (DBG_QOS, "AAL not supported"); - return -EINVAL; - } - - // traffic parameters - - PRINTD (DBG_QOS, "TX:"); - txtp = &qos->txtp; - if (txtp->traffic_class != ATM_NONE) { - switch (txtp->traffic_class) { - case ATM_UBR: { - // we take "the PCR" as a rate-cap - int pcr = atm_pcr_goal (txtp); - if (!pcr) { - // no rate cap - tx_rate_bits = 0; - tx_vc_bits = TX_UBR; - tx_frame_bits = TX_FRAME_NOTCAP; - } else { - rounding r; - if (pcr < 0) { - r = round_down; - pcr = -pcr; - } else { - r = round_up; - } - error = make_rate (pcr, r, &tx_rate_bits, NULL); - if (error) - return error; - tx_vc_bits = TX_UBR_CAPPED; - tx_frame_bits = TX_FRAME_CAPPED; - } - break; - } -#if 0 - case ATM_ABR: { - pcr = atm_pcr_goal (txtp); - PRINTD (DBG_QOS, "pcr goal = %d", pcr); - break; - } -#endif - default: { - // PRINTD (DBG_QOS, "request for non-UBR/ABR denied"); - PRINTD (DBG_QOS, "request for non-UBR denied"); - return -EINVAL; - } - } - PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx", - tx_rate_bits, tx_vc_bits); - } - - PRINTD (DBG_QOS, "RX:"); - rxtp = &qos->rxtp; - if (rxtp->traffic_class == ATM_NONE) { - // do nothing - } else { - // choose an RX pool (arranged in increasing size) - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) { - PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)", - pool, rxtp->max_sdu, dev->rxq[pool].buffer_size); - break; - } - if (pool == NUM_RX_POOLS) { - PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL, - "no pool suitable for VC (RX max_sdu %d is too large)", - rxtp->max_sdu); - return -EINVAL; - } - - switch (rxtp->traffic_class) { - case ATM_UBR: { - break; - } -#if 0 - case ATM_ABR: { - pcr = atm_pcr_goal (rxtp); - PRINTD (DBG_QOS, "pcr goal = %d", pcr); - break; - } -#endif - default: { - // PRINTD (DBG_QOS, "request for non-UBR/ABR denied"); - PRINTD (DBG_QOS, "request for non-UBR denied"); - return -EINVAL; - } - } - } - - // get space for our vcc stuff - vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL); - if (!vcc) { - PRINTK (KERN_ERR, "out of memory!"); - return -ENOMEM; - } - atm_vcc->dev_data = (void *) vcc; - - // no failures beyond this point - - // we are not really "immediately before allocating the connection - // identifier in hardware", but it will just have to do! - set_bit(ATM_VF_ADDR,&atm_vcc->flags); - - if (txtp->traffic_class != ATM_NONE) { - command cmd; - - vcc->tx_frame_bits = tx_frame_bits; - - mutex_lock(&dev->vcc_sf); - if (dev->rxer[vci]) { - // RXer on the channel already, just modify rate... - cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE); - cmd.args.modify_rate.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT); - while (command_do (dev, &cmd)) - schedule(); - // ... and TX flags, preserving the RX pool - cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS); - cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.modify_flags.flags = cpu_to_be32 - ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT) - | (tx_vc_bits << SRB_FLAGS_SHIFT) ); - while (command_do (dev, &cmd)) - schedule(); - } else { - // no RXer on the channel, just open (with pool zero) - cmd.request = cpu_to_be32 (SRB_OPEN_VC); - cmd.args.open.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT); - cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT); - while (command_do (dev, &cmd)) - schedule(); - } - dev->txer[vci].tx_present = 1; - mutex_unlock(&dev->vcc_sf); - } - - if (rxtp->traffic_class != ATM_NONE) { - command cmd; - - vcc->rx_info.pool = pool; - - mutex_lock(&dev->vcc_sf); - /* grow RX buffer pool */ - if (!dev->rxq[pool].buffers_wanted) - dev->rxq[pool].buffers_wanted = rx_lats; - dev->rxq[pool].buffers_wanted += 1; - fill_rx_pool (dev, pool, GFP_KERNEL); - - if (dev->txer[vci].tx_present) { - // TXer on the channel already - // switch (from pool zero) to this pool, preserving the TX bits - cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS); - cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.modify_flags.flags = cpu_to_be32 - ( (pool << SRB_POOL_SHIFT) - | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) ); - } else { - // no TXer on the channel, open the VC (with no rate info) - cmd.request = cpu_to_be32 (SRB_OPEN_VC); - cmd.args.open.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT); - cmd.args.open.rate = cpu_to_be32 (0); - } - while (command_do (dev, &cmd)) - schedule(); - // this link allows RX frames through - dev->rxer[vci] = atm_vcc; - mutex_unlock(&dev->vcc_sf); - } - - // indicate readiness - set_bit(ATM_VF_READY,&atm_vcc->flags); - - return 0; -} - -/********** Close a VC **********/ - -static void amb_close (struct atm_vcc * atm_vcc) { - amb_dev * dev = AMB_DEV (atm_vcc->dev); - amb_vcc * vcc = AMB_VCC (atm_vcc); - u16 vci = atm_vcc->vci; - - PRINTD (DBG_VCC|DBG_FLOW, "amb_close"); - - // indicate unreadiness - clear_bit(ATM_VF_READY,&atm_vcc->flags); - - // disable TXing - if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) { - command cmd; - - mutex_lock(&dev->vcc_sf); - if (dev->rxer[vci]) { - // RXer still on the channel, just modify rate... XXX not really needed - cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE); - cmd.args.modify_rate.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.modify_rate.rate = cpu_to_be32 (0); - // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool - } else { - // no RXer on the channel, close channel - cmd.request = cpu_to_be32 (SRB_CLOSE_VC); - cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0 - } - dev->txer[vci].tx_present = 0; - while (command_do (dev, &cmd)) - schedule(); - mutex_unlock(&dev->vcc_sf); - } - - // disable RXing - if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) { - command cmd; - - // this is (the?) one reason why we need the amb_vcc struct - unsigned char pool = vcc->rx_info.pool; - - mutex_lock(&dev->vcc_sf); - if (dev->txer[vci].tx_present) { - // TXer still on the channel, just go to pool zero XXX not really needed - cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS); - cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0 - cmd.args.modify_flags.flags = cpu_to_be32 - (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT); - } else { - // no TXer on the channel, close the VC - cmd.request = cpu_to_be32 (SRB_CLOSE_VC); - cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0 - } - // forget the rxer - no more skbs will be pushed - if (atm_vcc != dev->rxer[vci]) - PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p", - "arghhh! we're going to die!", - vcc, dev->rxer[vci]); - dev->rxer[vci] = NULL; - while (command_do (dev, &cmd)) - schedule(); - - /* shrink RX buffer pool */ - dev->rxq[pool].buffers_wanted -= 1; - if (dev->rxq[pool].buffers_wanted == rx_lats) { - dev->rxq[pool].buffers_wanted = 0; - drain_rx_pool (dev, pool); - } - mutex_unlock(&dev->vcc_sf); - } - - // free our structure - kfree (vcc); - - // say the VPI/VCI is free again - clear_bit(ATM_VF_ADDR,&atm_vcc->flags); - - return; -} - -/********** Send **********/ - -static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) { - amb_dev * dev = AMB_DEV(atm_vcc->dev); - amb_vcc * vcc = AMB_VCC(atm_vcc); - u16 vc = atm_vcc->vci; - unsigned int tx_len = skb->len; - unsigned char * tx_data = skb->data; - tx_simple * tx_descr; - tx_in tx; - - if (test_bit (dead, &dev->flags)) - return -EIO; - - PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u", - vc, tx_data, tx_len); - - dump_skb (">>>", vc, skb); - - if (!dev->txer[vc].tx_present) { - PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc); - return -EBADFD; - } - - // this is a driver private field so we have to set it ourselves, - // despite the fact that we are _required_ to use it to check for a - // pop function - ATM_SKB(skb)->vcc = atm_vcc; - - if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) { - PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping..."); - return -EIO; - } - - if (check_area (skb->data, skb->len)) { - atomic_inc(&atm_vcc->stats->tx_err); - return -ENOMEM; // ? - } - - // allocate memory for fragments - tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL); - if (!tx_descr) { - PRINTK (KERN_ERR, "could not allocate TX descriptor"); - return -ENOMEM; - } - if (check_area (tx_descr, sizeof(tx_simple))) { - kfree (tx_descr); - return -ENOMEM; - } - PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr); - - tx_descr->skb = skb; - - tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len); - tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data)); - - tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr); - tx_descr->tx_frag_end.vc = 0; - tx_descr->tx_frag_end.next_descriptor_length = 0; - tx_descr->tx_frag_end.next_descriptor = 0; -#ifdef AMB_NEW_MICROCODE - tx_descr->tx_frag_end.cpcs_uu = 0; - tx_descr->tx_frag_end.cpi = 0; - tx_descr->tx_frag_end.pad = 0; -#endif - - tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc); - tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end)); - tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag)); - - while (tx_give (dev, &tx)) - schedule(); - return 0; -} - -/********** Change QoS on a VC **********/ - -// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags); - -/********** Free RX Socket Buffer **********/ - -#if 0 -static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) { - amb_dev * dev = AMB_DEV (atm_vcc->dev); - amb_vcc * vcc = AMB_VCC (atm_vcc); - unsigned char pool = vcc->rx_info.pool; - rx_in rx; - - // This may be unsafe for various reasons that I cannot really guess - // at. However, I note that the ATM layer calls kfree_skb rather - // than dev_kfree_skb at this point so we are least covered as far - // as buffer locking goes. There may be bugs if pcap clones RX skbs. - - PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)", - skb, atm_vcc, vcc); - - rx.handle = virt_to_bus (skb); - rx.host_address = cpu_to_be32 (virt_to_bus (skb->data)); - - skb->data = skb->head; - skb_reset_tail_pointer(skb); - skb->len = 0; - - if (!rx_give (dev, &rx, pool)) { - // success - PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool); - return; - } - - // just do what the ATM layer would have done - dev_kfree_skb_any (skb); - - return; -} -#endif - -/********** Proc File Output **********/ - -static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) { - amb_dev * dev = AMB_DEV (atm_dev); - int left = *pos; - unsigned char pool; - - PRINTD (DBG_FLOW, "amb_proc_read"); - - /* more diagnostics here? */ - - if (!left--) { - amb_stats * s = &dev->stats; - return sprintf (page, - "frames: TX OK %lu, RX OK %lu, RX bad %lu " - "(CRC %lu, long %lu, aborted %lu, unused %lu).\n", - s->tx_ok, s->rx.ok, s->rx.error, - s->rx.badcrc, s->rx.toolong, - s->rx.aborted, s->rx.unused); - } - - if (!left--) { - amb_cq * c = &dev->cq; - return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ", - c->pending, c->high, c->maximum); - } - - if (!left--) { - amb_txq * t = &dev->txq; - return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n", - t->pending, t->maximum, t->high, t->filled); - } - - if (!left--) { - unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:"); - for (pool = 0; pool < NUM_RX_POOLS; ++pool) { - amb_rxq * r = &dev->rxq[pool]; - count += sprintf (page+count, " %u/%u/%u %u %u", - r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied); - } - count += sprintf (page+count, ".\n"); - return count; - } - - if (!left--) { - unsigned int count = sprintf (page, "RX buffer sizes:"); - for (pool = 0; pool < NUM_RX_POOLS; ++pool) { - amb_rxq * r = &dev->rxq[pool]; - count += sprintf (page+count, " %u", r->buffer_size); - } - count += sprintf (page+count, ".\n"); - return count; - } - -#if 0 - if (!left--) { - // suni block etc? - } -#endif - - return 0; -} - -/********** Operation Structure **********/ - -static const struct atmdev_ops amb_ops = { - .open = amb_open, - .close = amb_close, - .send = amb_send, - .proc_read = amb_proc_read, - .owner = THIS_MODULE, -}; - -/********** housekeeping **********/ -static void do_housekeeping (struct timer_list *t) { - amb_dev * dev = from_timer(dev, t, housekeeping); - - // could collect device-specific (not driver/atm-linux) stats here - - // last resort refill once every ten seconds - fill_rx_pools (dev); - mod_timer(&dev->housekeeping, jiffies + 10*HZ); - - return; -} - -/********** creation of communication queues **********/ - -static int create_queues(amb_dev *dev, unsigned int cmds, unsigned int txs, - unsigned int *rxs, unsigned int *rx_buffer_sizes) -{ - unsigned char pool; - size_t total = 0; - void * memory; - void * limit; - - PRINTD (DBG_FLOW, "create_queues %p", dev); - - total += cmds * sizeof(command); - - total += txs * (sizeof(tx_in) + sizeof(tx_out)); - - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out)); - - memory = kmalloc (total, GFP_KERNEL); - if (!memory) { - PRINTK (KERN_ERR, "could not allocate queues"); - return -ENOMEM; - } - if (check_area (memory, total)) { - PRINTK (KERN_ERR, "queues allocated in nasty area"); - kfree (memory); - return -ENOMEM; - } - - limit = memory + total; - PRINTD (DBG_INIT, "queues from %p to %p", memory, limit); - - PRINTD (DBG_CMD, "command queue at %p", memory); - - { - command * cmd = memory; - amb_cq * cq = &dev->cq; - - cq->pending = 0; - cq->high = 0; - cq->maximum = cmds - 1; - - cq->ptrs.start = cmd; - cq->ptrs.in = cmd; - cq->ptrs.out = cmd; - cq->ptrs.limit = cmd + cmds; - - memory = cq->ptrs.limit; - } - - PRINTD (DBG_TX, "TX queue pair at %p", memory); - - { - tx_in * in = memory; - tx_out * out; - amb_txq * txq = &dev->txq; - - txq->pending = 0; - txq->high = 0; - txq->filled = 0; - txq->maximum = txs - 1; - - txq->in.start = in; - txq->in.ptr = in; - txq->in.limit = in + txs; - - memory = txq->in.limit; - out = memory; - - txq->out.start = out; - txq->out.ptr = out; - txq->out.limit = out + txs; - - memory = txq->out.limit; - } - - PRINTD (DBG_RX, "RX queue pairs at %p", memory); - - for (pool = 0; pool < NUM_RX_POOLS; ++pool) { - rx_in * in = memory; - rx_out * out; - amb_rxq * rxq = &dev->rxq[pool]; - - rxq->buffer_size = rx_buffer_sizes[pool]; - rxq->buffers_wanted = 0; - - rxq->pending = 0; - rxq->low = rxs[pool] - 1; - rxq->emptied = 0; - rxq->maximum = rxs[pool] - 1; - - rxq->in.start = in; - rxq->in.ptr = in; - rxq->in.limit = in + rxs[pool]; - - memory = rxq->in.limit; - out = memory; - - rxq->out.start = out; - rxq->out.ptr = out; - rxq->out.limit = out + rxs[pool]; - - memory = rxq->out.limit; - } - - if (memory == limit) { - return 0; - } else { - PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit); - kfree (limit - total); - return -ENOMEM; - } - -} - -/********** destruction of communication queues **********/ - -static void destroy_queues (amb_dev * dev) { - // all queues assumed empty - void * memory = dev->cq.ptrs.start; - // includes txq.in, txq.out, rxq[].in and rxq[].out - - PRINTD (DBG_FLOW, "destroy_queues %p", dev); - - PRINTD (DBG_INIT, "freeing queues at %p", memory); - kfree (memory); - - return; -} - -/********** basic loader commands and error handling **********/ -// centisecond timeouts - guessing away here -static unsigned int command_timeouts [] = { - [host_memory_test] = 15, - [read_adapter_memory] = 2, - [write_adapter_memory] = 2, - [adapter_start] = 50, - [get_version_number] = 10, - [interrupt_host] = 1, - [flash_erase_sector] = 1, - [adap_download_block] = 1, - [adap_erase_flash] = 1, - [adap_run_in_iram] = 1, - [adap_end_download] = 1 -}; - - -static unsigned int command_successes [] = { - [host_memory_test] = COMMAND_PASSED_TEST, - [read_adapter_memory] = COMMAND_READ_DATA_OK, - [write_adapter_memory] = COMMAND_WRITE_DATA_OK, - [adapter_start] = COMMAND_COMPLETE, - [get_version_number] = COMMAND_COMPLETE, - [interrupt_host] = COMMAND_COMPLETE, - [flash_erase_sector] = COMMAND_COMPLETE, - [adap_download_block] = COMMAND_COMPLETE, - [adap_erase_flash] = COMMAND_COMPLETE, - [adap_run_in_iram] = COMMAND_COMPLETE, - [adap_end_download] = COMMAND_COMPLETE -}; - -static int decode_loader_result (loader_command cmd, u32 result) -{ - int res; - const char *msg; - - if (result == command_successes[cmd]) - return 0; - - switch (result) { - case BAD_COMMAND: - res = -EINVAL; - msg = "bad command"; - break; - case COMMAND_IN_PROGRESS: - res = -ETIMEDOUT; - msg = "command in progress"; - break; - case COMMAND_PASSED_TEST: - res = 0; - msg = "command passed test"; - break; - case COMMAND_FAILED_TEST: - res = -EIO; - msg = "command failed test"; - break; - case COMMAND_READ_DATA_OK: - res = 0; - msg = "command read data ok"; - break; - case COMMAND_READ_BAD_ADDRESS: - res = -EINVAL; - msg = "command read bad address"; - break; - case COMMAND_WRITE_DATA_OK: - res = 0; - msg = "command write data ok"; - break; - case COMMAND_WRITE_BAD_ADDRESS: - res = -EINVAL; - msg = "command write bad address"; - break; - case COMMAND_WRITE_FLASH_FAILURE: - res = -EIO; - msg = "command write flash failure"; - break; - case COMMAND_COMPLETE: - res = 0; - msg = "command complete"; - break; - case COMMAND_FLASH_ERASE_FAILURE: - res = -EIO; - msg = "command flash erase failure"; - break; - case COMMAND_WRITE_BAD_DATA: - res = -EINVAL; - msg = "command write bad data"; - break; - default: - res = -EINVAL; - msg = "unknown error"; - PRINTD (DBG_LOAD|DBG_ERR, - "decode_loader_result got %d=%x !", - result, result); - break; - } - - PRINTK (KERN_ERR, "%s", msg); - return res; -} - -static int do_loader_command(volatile loader_block *lb, const amb_dev *dev, - loader_command cmd) -{ - - unsigned long timeout; - - PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command"); - - /* do a command - - Set the return value to zero, set the command type and set the - valid entry to the right magic value. The payload is already - correctly byte-ordered so we leave it alone. Hit the doorbell - with the bus address of this structure. - - */ - - lb->result = 0; - lb->command = cpu_to_be32 (cmd); - lb->valid = cpu_to_be32 (DMA_VALID); - // dump_registers (dev); - // dump_loader_block (lb); - wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask); - - timeout = command_timeouts[cmd] * 10; - - while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS)) - if (timeout) { - timeout = msleep_interruptible(timeout); - } else { - PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd); - dump_registers (dev); - dump_loader_block (lb); - return -ETIMEDOUT; - } - - if (cmd == adapter_start) { - // wait for start command to acknowledge... - timeout = 100; - while (rd_plain (dev, offsetof(amb_mem, doorbell))) - if (timeout) { - timeout = msleep_interruptible(timeout); - } else { - PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x", - be32_to_cpu (lb->result)); - dump_registers (dev); - return -ETIMEDOUT; - } - return 0; - } else { - return decode_loader_result (cmd, be32_to_cpu (lb->result)); - } - -} - -/* loader: determine loader version */ - -static int get_loader_version(loader_block *lb, const amb_dev *dev, - u32 *version) -{ - int res; - - PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version"); - - res = do_loader_command (lb, dev, get_version_number); - if (res) - return res; - if (version) - *version = be32_to_cpu (lb->payload.version); - return 0; -} - -/* loader: write memory data blocks */ - -static int loader_write(loader_block *lb, const amb_dev *dev, - const struct ihex_binrec *rec) -{ - transfer_block * tb = &lb->payload.transfer; - - PRINTD (DBG_FLOW|DBG_LOAD, "loader_write"); - - tb->address = rec->addr; - tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4); - memcpy(tb->data, rec->data, be16_to_cpu(rec->len)); - return do_loader_command (lb, dev, write_adapter_memory); -} - -/* loader: verify memory data blocks */ - -static int loader_verify(loader_block *lb, const amb_dev *dev, - const struct ihex_binrec *rec) -{ - transfer_block * tb = &lb->payload.transfer; - int res; - - PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify"); - - tb->address = rec->addr; - tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4); - res = do_loader_command (lb, dev, read_adapter_memory); - if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len))) - res = -EINVAL; - return res; -} - -/* loader: start microcode */ - -static int loader_start(loader_block *lb, const amb_dev *dev, u32 address) -{ - PRINTD (DBG_FLOW|DBG_LOAD, "loader_start"); - - lb->payload.start = cpu_to_be32 (address); - return do_loader_command (lb, dev, adapter_start); -} - -/********** reset card **********/ - -static inline void sf (const char * msg) -{ - PRINTK (KERN_ERR, "self-test failed: %s", msg); -} - -static int amb_reset (amb_dev * dev, int diags) { - u32 word; - - PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset"); - - word = rd_plain (dev, offsetof(amb_mem, reset_control)); - // put card into reset state - wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS); - // wait a short while - udelay (10); -#if 1 - // put card into known good state - wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS); - // clear all interrupts just in case - wr_plain (dev, offsetof(amb_mem, interrupt), -1); -#endif - // clear self-test done flag - wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0); - // take card out of reset state - wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS); - - if (diags) { - unsigned long timeout; - // 4.2 second wait - msleep(4200); - // half second time-out - timeout = 500; - while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready))) - if (timeout) { - timeout = msleep_interruptible(timeout); - } else { - PRINTD (DBG_LOAD|DBG_ERR, "reset timed out"); - return -ETIMEDOUT; - } - - // get results of self-test - // XXX double check byte-order - word = rd_mem (dev, offsetof(amb_mem, mb.loader.result)); - if (word & SELF_TEST_FAILURE) { - if (word & GPINT_TST_FAILURE) - sf ("interrupt"); - if (word & SUNI_DATA_PATTERN_FAILURE) - sf ("SUNI data pattern"); - if (word & SUNI_DATA_BITS_FAILURE) - sf ("SUNI data bits"); - if (word & SUNI_UTOPIA_FAILURE) - sf ("SUNI UTOPIA interface"); - if (word & SUNI_FIFO_FAILURE) - sf ("SUNI cell buffer FIFO"); - if (word & SRAM_FAILURE) - sf ("bad SRAM"); - // better return value? - return -EIO; - } - - } - return 0; -} - -/********** transfer and start the microcode **********/ - -static int ucode_init(loader_block *lb, amb_dev *dev) -{ - const struct firmware *fw; - unsigned long start_address; - const struct ihex_binrec *rec; - const char *errmsg = NULL; - int res; - - res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev); - if (res) { - PRINTK (KERN_ERR, "Cannot load microcode data"); - return res; - } - - /* First record contains just the start address */ - rec = (const struct ihex_binrec *)fw->data; - if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) { - errmsg = "no start record"; - goto fail; - } - start_address = be32_to_cpup((__be32 *)rec->data); - - rec = ihex_next_binrec(rec); - - PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init"); - - while (rec) { - PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr), - be16_to_cpu(rec->len)); - if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) { - errmsg = "record too long"; - goto fail; - } - if (be16_to_cpu(rec->len) & 3) { - errmsg = "odd number of bytes"; - goto fail; - } - res = loader_write(lb, dev, rec); - if (res) - break; - - res = loader_verify(lb, dev, rec); - if (res) - break; - rec = ihex_next_binrec(rec); - } - release_firmware(fw); - if (!res) - res = loader_start(lb, dev, start_address); - - return res; -fail: - release_firmware(fw); - PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg); - return -EINVAL; -} - -/********** give adapter parameters **********/ - -static inline __be32 bus_addr(void * addr) { - return cpu_to_be32 (virt_to_bus (addr)); -} - -static int amb_talk(amb_dev *dev) -{ - adap_talk_block a; - unsigned char pool; - unsigned long timeout; - - PRINTD (DBG_FLOW, "amb_talk %p", dev); - - a.command_start = bus_addr (dev->cq.ptrs.start); - a.command_end = bus_addr (dev->cq.ptrs.limit); - a.tx_start = bus_addr (dev->txq.in.start); - a.tx_end = bus_addr (dev->txq.in.limit); - a.txcom_start = bus_addr (dev->txq.out.start); - a.txcom_end = bus_addr (dev->txq.out.limit); - - for (pool = 0; pool < NUM_RX_POOLS; ++pool) { - // the other "a" items are set up by the adapter - a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start); - a.rec_struct[pool].buffer_end = bus_addr (dev->rxq[pool].in.limit); - a.rec_struct[pool].rx_start = bus_addr (dev->rxq[pool].out.start); - a.rec_struct[pool].rx_end = bus_addr (dev->rxq[pool].out.limit); - a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size); - } - -#ifdef AMB_NEW_MICROCODE - // disable fast PLX prefetching - a.init_flags = 0; -#endif - - // pass the structure - wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a)); - - // 2.2 second wait (must not touch doorbell during 2 second DMA test) - msleep(2200); - // give the adapter another half second? - timeout = 500; - while (rd_plain (dev, offsetof(amb_mem, doorbell))) - if (timeout) { - timeout = msleep_interruptible(timeout); - } else { - PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out"); - return -ETIMEDOUT; - } - - return 0; -} - -// get microcode version -static void amb_ucode_version(amb_dev *dev) -{ - u32 major; - u32 minor; - command cmd; - cmd.request = cpu_to_be32 (SRB_GET_VERSION); - while (command_do (dev, &cmd)) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } - major = be32_to_cpu (cmd.args.version.major); - minor = be32_to_cpu (cmd.args.version.minor); - PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor); -} - -// get end station address -static void amb_esi(amb_dev *dev, u8 *esi) -{ - u32 lower4; - u16 upper2; - command cmd; - - cmd.request = cpu_to_be32 (SRB_GET_BIA); - while (command_do (dev, &cmd)) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule(); - } - lower4 = be32_to_cpu (cmd.args.bia.lower4); - upper2 = be32_to_cpu (cmd.args.bia.upper2); - PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2); - - if (esi) { - unsigned int i; - - PRINTDB (DBG_INIT, "ESI:"); - for (i = 0; i < ESI_LEN; ++i) { - if (i < 4) - esi[i] = bitrev8(lower4>>(8*i)); - else - esi[i] = bitrev8(upper2>>(8*(i-4))); - PRINTDM (DBG_INIT, " %02x", esi[i]); - } - - PRINTDE (DBG_INIT, ""); - } - - return; -} - -static void fixup_plx_window (amb_dev *dev, loader_block *lb) -{ - // fix up the PLX-mapped window base address to match the block - unsigned long blb; - u32 mapreg; - blb = virt_to_bus(lb); - // the kernel stack had better not ever cross a 1Gb boundary! - mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10])); - mapreg &= ~onegigmask; - mapreg |= blb & onegigmask; - wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg); - return; -} - -static int amb_init(amb_dev *dev) -{ - loader_block lb; - - u32 version; - - if (amb_reset (dev, 1)) { - PRINTK (KERN_ERR, "card reset failed!"); - } else { - fixup_plx_window (dev, &lb); - - if (get_loader_version (&lb, dev, &version)) { - PRINTK (KERN_INFO, "failed to get loader version"); - } else { - PRINTK (KERN_INFO, "loader version is %08x", version); - - if (ucode_init (&lb, dev)) { - PRINTK (KERN_ERR, "microcode failure"); - } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) { - PRINTK (KERN_ERR, "failed to get memory for queues"); - } else { - - if (amb_talk (dev)) { - PRINTK (KERN_ERR, "adapter did not accept queues"); - } else { - - amb_ucode_version (dev); - return 0; - - } /* amb_talk */ - - destroy_queues (dev); - } /* create_queues, ucode_init */ - - amb_reset (dev, 0); - } /* get_loader_version */ - - } /* amb_reset */ - - return -EINVAL; -} - -static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) -{ - unsigned char pool; - - // set up known dev items straight away - dev->pci_dev = pci_dev; - pci_set_drvdata(pci_dev, dev); - - dev->iobase = pci_resource_start (pci_dev, 1); - dev->irq = pci_dev->irq; - dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0)); - - // flags (currently only dead) - dev->flags = 0; - - // Allocate cell rates (fibre) - // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53 - // to be really pedantic, this should be ATM_OC3c_PCR - dev->tx_avail = ATM_OC3_PCR; - dev->rx_avail = ATM_OC3_PCR; - - // semaphore for txer/rxer modifications - we cannot use a - // spinlock as the critical region needs to switch processes - mutex_init(&dev->vcc_sf); - // queue manipulation spinlocks; we want atomic reads and - // writes to the queue descriptors (handles IRQ and SMP) - // consider replacing "int pending" -> "atomic_t available" - // => problem related to who gets to move queue pointers - spin_lock_init (&dev->cq.lock); - spin_lock_init (&dev->txq.lock); - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - spin_lock_init (&dev->rxq[pool].lock); -} - -static void setup_pci_dev(struct pci_dev *pci_dev) -{ - unsigned char lat; - - // enable bus master accesses - pci_set_master(pci_dev); - - // frobnicate latency (upwards, usually) - pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat); - - if (!pci_lat) - pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat; - - if (lat != pci_lat) { - PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu", - lat, pci_lat); - pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat); - } -} - -static int amb_probe(struct pci_dev *pci_dev, - const struct pci_device_id *pci_ent) -{ - amb_dev * dev; - int err; - unsigned int irq; - - err = pci_enable_device(pci_dev); - if (err < 0) { - PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card"); - goto out; - } - - // read resources from PCI configuration space - irq = pci_dev->irq; - - if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) { - PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card"); - err = -EINVAL; - goto out_disable; - } - - PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at" - " IO %llx, IRQ %u, MEM %p", - (unsigned long long)pci_resource_start(pci_dev, 1), - irq, bus_to_virt(pci_resource_start(pci_dev, 0))); - - // check IO region - err = pci_request_region(pci_dev, 1, DEV_LABEL); - if (err < 0) { - PRINTK (KERN_ERR, "IO range already in use!"); - goto out_disable; - } - - dev = kzalloc(sizeof(amb_dev), GFP_KERNEL); - if (!dev) { - PRINTK (KERN_ERR, "out of memory!"); - err = -ENOMEM; - goto out_release; - } - - setup_dev(dev, pci_dev); - - err = amb_init(dev); - if (err < 0) { - PRINTK (KERN_ERR, "adapter initialisation failure"); - goto out_free; - } - - setup_pci_dev(pci_dev); - - // grab (but share) IRQ and install handler - err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev); - if (err < 0) { - PRINTK (KERN_ERR, "request IRQ failed!"); - goto out_reset; - } - - dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1, - NULL); - if (!dev->atm_dev) { - PRINTD (DBG_ERR, "failed to register Madge ATM adapter"); - err = -EINVAL; - goto out_free_irq; - } - - PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p", - dev->atm_dev->number, dev, dev->atm_dev); - dev->atm_dev->dev_data = (void *) dev; - - // register our address - amb_esi (dev, dev->atm_dev->esi); - - // 0 bits for vpi, 10 bits for vci - dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS; - dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS; - - timer_setup(&dev->housekeeping, do_housekeeping, 0); - mod_timer(&dev->housekeeping, jiffies); - - // enable host interrupts - interrupts_on (dev); - -out: - return err; - -out_free_irq: - free_irq(irq, dev); -out_reset: - amb_reset(dev, 0); -out_free: - kfree(dev); -out_release: - pci_release_region(pci_dev, 1); -out_disable: - pci_disable_device(pci_dev); - goto out; -} - - -static void amb_remove_one(struct pci_dev *pci_dev) -{ - struct amb_dev *dev; - - dev = pci_get_drvdata(pci_dev); - - PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev); - del_timer_sync(&dev->housekeeping); - // the drain should not be necessary - drain_rx_pools(dev); - interrupts_off(dev); - amb_reset(dev, 0); - free_irq(dev->irq, dev); - pci_disable_device(pci_dev); - destroy_queues(dev); - atm_dev_deregister(dev->atm_dev); - kfree(dev); - pci_release_region(pci_dev, 1); -} - -static void __init amb_check_args (void) { - unsigned char pool; - unsigned int max_rx_size; - -#ifdef DEBUG_AMBASSADOR - PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK); -#else - if (debug) - PRINTK (KERN_NOTICE, "no debugging support"); -#endif - - if (cmds < MIN_QUEUE_SIZE) - PRINTK (KERN_NOTICE, "cmds has been raised to %u", - cmds = MIN_QUEUE_SIZE); - - if (txs < MIN_QUEUE_SIZE) - PRINTK (KERN_NOTICE, "txs has been raised to %u", - txs = MIN_QUEUE_SIZE); - - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - if (rxs[pool] < MIN_QUEUE_SIZE) - PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u", - pool, rxs[pool] = MIN_QUEUE_SIZE); - - // buffers sizes should be greater than zero and strictly increasing - max_rx_size = 0; - for (pool = 0; pool < NUM_RX_POOLS; ++pool) - if (rxs_bs[pool] <= max_rx_size) - PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)", - pool, rxs_bs[pool]); - else - max_rx_size = rxs_bs[pool]; - - if (rx_lats < MIN_RX_BUFFERS) - PRINTK (KERN_NOTICE, "rx_lats has been raised to %u", - rx_lats = MIN_RX_BUFFERS); - - return; -} - -/********** module stuff **********/ - -MODULE_AUTHOR(maintainer_string); -MODULE_DESCRIPTION(description_string); -MODULE_LICENSE("GPL"); -MODULE_FIRMWARE("atmsar11.fw"); -module_param(debug, ushort, 0644); -module_param(cmds, uint, 0); -module_param(txs, uint, 0); -module_param_array(rxs, uint, NULL, 0); -module_param_array(rxs_bs, uint, NULL, 0); -module_param(rx_lats, uint, 0); -module_param(pci_lat, byte, 0); -MODULE_PARM_DESC(debug, "debug bitmap, see .h file"); -MODULE_PARM_DESC(cmds, "number of command queue entries"); -MODULE_PARM_DESC(txs, "number of TX queue entries"); -MODULE_PARM_DESC(rxs, "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]"); -MODULE_PARM_DESC(rxs_bs, "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]"); -MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies"); -MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles"); - -/********** module entry **********/ - -static const struct pci_device_id amb_pci_tbl[] = { - { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 }, - { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 }, - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, amb_pci_tbl); - -static struct pci_driver amb_driver = { - .name = "amb", - .probe = amb_probe, - .remove = amb_remove_one, - .id_table = amb_pci_tbl, -}; - -static int __init amb_module_init (void) -{ - PRINTD (DBG_FLOW|DBG_INIT, "init_module"); - - BUILD_BUG_ON(sizeof(amb_mem) != 4*16 + 4*12); - - show_version(); - - amb_check_args(); - - // get the juice - return pci_register_driver(&amb_driver); -} - -/********** module exit **********/ - -static void __exit amb_module_exit (void) -{ - PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module"); - - pci_unregister_driver(&amb_driver); -} - -module_init(amb_module_init); -module_exit(amb_module_exit); diff --git a/drivers/atm/ambassador.h b/drivers/atm/ambassador.h deleted file mode 100644 index 086ceb8568dc..000000000000 --- a/drivers/atm/ambassador.h +++ /dev/null @@ -1,648 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - Madge Ambassador ATM Adapter driver. - Copyright (C) 1995-1999 Madge Networks Ltd. - -*/ - -#ifndef AMBASSADOR_H -#define AMBASSADOR_H - - -#ifdef CONFIG_ATM_AMBASSADOR_DEBUG -#define DEBUG_AMBASSADOR -#endif - -#define DEV_LABEL "amb" - -#ifndef PCI_VENDOR_ID_MADGE -#define PCI_VENDOR_ID_MADGE 0x10B6 -#endif -#ifndef PCI_VENDOR_ID_MADGE_AMBASSADOR -#define PCI_DEVICE_ID_MADGE_AMBASSADOR 0x1001 -#endif -#ifndef PCI_VENDOR_ID_MADGE_AMBASSADOR_BAD -#define PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD 0x1002 -#endif - -// diagnostic output - -#define PRINTK(severity,format,args...) \ - printk(severity DEV_LABEL ": " format "\n" , ## args) - -#ifdef DEBUG_AMBASSADOR - -#define DBG_ERR 0x0001 -#define DBG_WARN 0x0002 -#define DBG_INFO 0x0004 -#define DBG_INIT 0x0008 -#define DBG_LOAD 0x0010 -#define DBG_VCC 0x0020 -#define DBG_QOS 0x0040 -#define DBG_CMD 0x0080 -#define DBG_TX 0x0100 -#define DBG_RX 0x0200 -#define DBG_SKB 0x0400 -#define DBG_POOL 0x0800 -#define DBG_IRQ 0x1000 -#define DBG_FLOW 0x2000 -#define DBG_REGS 0x4000 -#define DBG_DATA 0x8000 -#define DBG_MASK 0xffff - -/* the ## prevents the annoying double expansion of the macro arguments */ -/* KERN_INFO is used since KERN_DEBUG often does not make it to the console */ -#define PRINTDB(bits,format,args...) \ - ( (debug & (bits)) ? printk (KERN_INFO DEV_LABEL ": " format , ## args) : 1 ) -#define PRINTDM(bits,format,args...) \ - ( (debug & (bits)) ? printk (format , ## args) : 1 ) -#define PRINTDE(bits,format,args...) \ - ( (debug & (bits)) ? printk (format "\n" , ## args) : 1 ) -#define PRINTD(bits,format,args...) \ - ( (debug & (bits)) ? printk (KERN_INFO DEV_LABEL ": " format "\n" , ## args) : 1 ) - -#else - -#define PRINTD(bits,format,args...) -#define PRINTDB(bits,format,args...) -#define PRINTDM(bits,format,args...) -#define PRINTDE(bits,format,args...) - -#endif - -#define PRINTDD(bits,format,args...) -#define PRINTDDB(sec,fmt,args...) -#define PRINTDDM(sec,fmt,args...) -#define PRINTDDE(sec,fmt,args...) - -// tunable values (?) - -/* MUST be powers of two -- why ? */ -#define COM_Q_ENTRIES 8 -#define TX_Q_ENTRIES 32 -#define RX_Q_ENTRIES 64 - -// fixed values - -// guessing -#define AMB_EXTENT 0x80 - -// Minimum allowed size for an Ambassador queue -#define MIN_QUEUE_SIZE 2 - -// Ambassador microcode allows 1 to 4 pools, we use 4 (simpler) -#define NUM_RX_POOLS 4 - -// minimum RX buffers required to cope with replenishing delay -#define MIN_RX_BUFFERS 1 - -// minimum PCI latency we will tolerate (32 IS TOO SMALL) -#define MIN_PCI_LATENCY 64 // 255 - -// VCs supported by card (VPI always 0) -#define NUM_VPI_BITS 0 -#define NUM_VCI_BITS 10 -#define NUM_VCS 1024 - -/* The status field bits defined so far. */ -#define RX_ERR 0x8000 // always present if there is an error (hmm) -#define CRC_ERR 0x4000 // AAL5 CRC error -#define LEN_ERR 0x2000 // overlength frame -#define ABORT_ERR 0x1000 // zero length field in received frame -#define UNUSED_ERR 0x0800 // buffer returned unused - -// Adaptor commands - -#define SRB_OPEN_VC 0 -/* par_0: dwordswap(VC_number) */ -/* par_1: dwordswap(flags<<16) or wordswap(flags)*/ -/* flags: */ - -/* LANE: 0x0004 */ -/* NOT_UBR: 0x0008 */ -/* ABR: 0x0010 */ - -/* RxPool0: 0x0000 */ -/* RxPool1: 0x0020 */ -/* RxPool2: 0x0040 */ -/* RxPool3: 0x0060 */ - -/* par_2: dwordswap(fp_rate<<16) or wordswap(fp_rate) */ - -#define SRB_CLOSE_VC 1 -/* par_0: dwordswap(VC_number) */ - -#define SRB_GET_BIA 2 -/* returns */ -/* par_0: dwordswap(half BIA) */ -/* par_1: dwordswap(half BIA) */ - -#define SRB_GET_SUNI_STATS 3 -/* par_0: dwordswap(physical_host_address) */ - -#define SRB_SET_BITS_8 4 -#define SRB_SET_BITS_16 5 -#define SRB_SET_BITS_32 6 -#define SRB_CLEAR_BITS_8 7 -#define SRB_CLEAR_BITS_16 8 -#define SRB_CLEAR_BITS_32 9 -/* par_0: dwordswap(ATMizer address) */ -/* par_1: dwordswap(mask) */ - -#define SRB_SET_8 10 -#define SRB_SET_16 11 -#define SRB_SET_32 12 -/* par_0: dwordswap(ATMizer address) */ -/* par_1: dwordswap(data) */ - -#define SRB_GET_32 13 -/* par_0: dwordswap(ATMizer address) */ -/* returns */ -/* par_1: dwordswap(ATMizer data) */ - -#define SRB_GET_VERSION 14 -/* returns */ -/* par_0: dwordswap(Major Version) */ -/* par_1: dwordswap(Minor Version) */ - -#define SRB_FLUSH_BUFFER_Q 15 -/* Only flags to define which buffer pool; all others must be zero */ -/* par_0: dwordswap(flags<<16) or wordswap(flags)*/ - -#define SRB_GET_DMA_SPEEDS 16 -/* returns */ -/* par_0: dwordswap(Read speed (bytes/sec)) */ -/* par_1: dwordswap(Write speed (bytes/sec)) */ - -#define SRB_MODIFY_VC_RATE 17 -/* par_0: dwordswap(VC_number) */ -/* par_1: dwordswap(fp_rate<<16) or wordswap(fp_rate) */ - -#define SRB_MODIFY_VC_FLAGS 18 -/* par_0: dwordswap(VC_number) */ -/* par_1: dwordswap(flags<<16) or wordswap(flags)*/ - -/* flags: */ - -/* LANE: 0x0004 */ -/* NOT_UBR: 0x0008 */ -/* ABR: 0x0010 */ - -/* RxPool0: 0x0000 */ -/* RxPool1: 0x0020 */ -/* RxPool2: 0x0040 */ -/* RxPool3: 0x0060 */ - -#define SRB_RATE_SHIFT 16 -#define SRB_POOL_SHIFT (SRB_FLAGS_SHIFT+5) -#define SRB_FLAGS_SHIFT 16 - -#define SRB_STOP_TASKING 19 -#define SRB_START_TASKING 20 -#define SRB_SHUT_DOWN 21 -#define MAX_SRB 21 - -#define SRB_COMPLETE 0xffffffff - -#define TX_FRAME 0x80000000 - -// number of types of SRB MUST be a power of two -- why? -#define NUM_OF_SRB 32 - -// number of bits of period info for rate -#define MAX_RATE_BITS 6 - -#define TX_UBR 0x0000 -#define TX_UBR_CAPPED 0x0008 -#define TX_ABR 0x0018 -#define TX_FRAME_NOTCAP 0x0000 -#define TX_FRAME_CAPPED 0x8000 - -#define FP_155_RATE 0x24b1 -#define FP_25_RATE 0x1f9d - -/* #define VERSION_NUMBER 0x01000000 // initial release */ -/* #define VERSION_NUMBER 0x01010000 // fixed startup probs PLX MB0 not cleared */ -/* #define VERSION_NUMBER 0x01020000 // changed SUNI reset timings; allowed r/w onchip */ - -/* #define VERSION_NUMBER 0x01030000 // clear local doorbell int reg on reset */ -/* #define VERSION_NUMBER 0x01040000 // PLX bug work around version PLUS */ -/* remove race conditions on basic interface */ -/* indicate to the host that diagnostics */ -/* have finished; if failed, how and what */ -/* failed */ -/* fix host memory test to fix PLX bug */ -/* allow flash upgrade and BIA upgrade directly */ -/* */ -#define VERSION_NUMBER 0x01050025 /* Jason's first hacked version. */ -/* Change in download algorithm */ - -#define DMA_VALID 0xb728e149 /* completely random */ - -#define FLASH_BASE 0xa0c00000 -#define FLASH_SIZE 0x00020000 /* 128K */ -#define BIA_BASE (FLASH_BASE+0x0001c000) /* Flash Sector 7 */ -#define BIA_ADDRESS ((void *)0xa0c1c000) -#define PLX_BASE 0xe0000000 - -typedef enum { - host_memory_test = 1, - read_adapter_memory, - write_adapter_memory, - adapter_start, - get_version_number, - interrupt_host, - flash_erase_sector, - adap_download_block = 0x20, - adap_erase_flash, - adap_run_in_iram, - adap_end_download -} loader_command; - -#define BAD_COMMAND (-1) -#define COMMAND_IN_PROGRESS 1 -#define COMMAND_PASSED_TEST 2 -#define COMMAND_FAILED_TEST 3 -#define COMMAND_READ_DATA_OK 4 -#define COMMAND_READ_BAD_ADDRESS 5 -#define COMMAND_WRITE_DATA_OK 6 -#define COMMAND_WRITE_BAD_ADDRESS 7 -#define COMMAND_WRITE_FLASH_FAILURE 8 -#define COMMAND_COMPLETE 9 -#define COMMAND_FLASH_ERASE_FAILURE 10 -#define COMMAND_WRITE_BAD_DATA 11 - -/* bit fields for mailbox[0] return values */ - -#define GPINT_TST_FAILURE 0x00000001 -#define SUNI_DATA_PATTERN_FAILURE 0x00000002 -#define SUNI_DATA_BITS_FAILURE 0x00000004 -#define SUNI_UTOPIA_FAILURE 0x00000008 -#define SUNI_FIFO_FAILURE 0x00000010 -#define SRAM_FAILURE 0x00000020 -#define SELF_TEST_FAILURE 0x0000003f - -/* mailbox[1] = 0 in progress, -1 on completion */ -/* mailbox[2] = current test 00 00 test(8 bit) phase(8 bit) */ -/* mailbox[3] = last failure, 00 00 test(8 bit) phase(8 bit) */ -/* mailbox[4],mailbox[5],mailbox[6] random failure values */ - -/* PLX/etc. memory map including command structure */ - -/* These registers may also be memory mapped in PCI memory */ - -#define UNUSED_LOADER_MAILBOXES 6 - -typedef struct { - u32 stuff[16]; - union { - struct { - u32 result; - u32 ready; - u32 stuff[UNUSED_LOADER_MAILBOXES]; - } loader; - struct { - u32 cmd_address; - u32 tx_address; - u32 rx_address[NUM_RX_POOLS]; - u32 gen_counter; - u32 spare; - } adapter; - } mb; - u32 doorbell; - u32 interrupt; - u32 interrupt_control; - u32 reset_control; -} amb_mem; - -/* RESET bit, IRQ (card to host) and doorbell (host to card) enable bits */ -#define AMB_RESET_BITS 0x40000000 -#define AMB_INTERRUPT_BITS 0x00000300 -#define AMB_DOORBELL_BITS 0x00030000 - -/* loader commands */ - -#define MAX_COMMAND_DATA 13 -#define MAX_TRANSFER_DATA 11 - -typedef struct { - __be32 address; - __be32 count; - __be32 data[MAX_TRANSFER_DATA]; -} transfer_block; - -typedef struct { - __be32 result; - __be32 command; - union { - transfer_block transfer; - __be32 version; - __be32 start; - __be32 data[MAX_COMMAND_DATA]; - } payload; - __be32 valid; -} loader_block; - -/* command queue */ - -/* Again all data are BIG ENDIAN */ - -typedef struct { - union { - struct { - __be32 vc; - __be32 flags; - __be32 rate; - } open; - struct { - __be32 vc; - __be32 rate; - } modify_rate; - struct { - __be32 vc; - __be32 flags; - } modify_flags; - struct { - __be32 vc; - } close; - struct { - __be32 lower4; - __be32 upper2; - } bia; - struct { - __be32 address; - } suni; - struct { - __be32 major; - __be32 minor; - } version; - struct { - __be32 read; - __be32 write; - } speed; - struct { - __be32 flags; - } flush; - struct { - __be32 address; - __be32 data; - } memory; - __be32 par[3]; - } args; - __be32 request; -} command; - -/* transmit queues and associated structures */ - -/* The hosts transmit structure. All BIG ENDIAN; host address - restricted to first 1GByte, but address passed to the card must - have the top MS bit or'ed in. -- check this */ - -/* TX is described by 1+ tx_frags followed by a tx_frag_end */ - -typedef struct { - __be32 bytes; - __be32 address; -} tx_frag; - -/* apart from handle the fields here are for the adapter to play with - and should be set to zero */ - -typedef struct { - u32 handle; - u16 vc; - u16 next_descriptor_length; - u32 next_descriptor; -#ifdef AMB_NEW_MICROCODE - u8 cpcs_uu; - u8 cpi; - u16 pad; -#endif -} tx_frag_end; - -typedef struct { - tx_frag tx_frag; - tx_frag_end tx_frag_end; - struct sk_buff * skb; -} tx_simple; - -#if 0 -typedef union { - tx_frag fragment; - tx_frag_end end_of_list; -} tx_descr; -#endif - -/* this "points" to the sequence of fragments and trailer */ - -typedef struct { - __be16 vc; - __be16 tx_descr_length; - __be32 tx_descr_addr; -} tx_in; - -/* handle is the handle from tx_in */ - -typedef struct { - u32 handle; -} tx_out; - -/* receive frame structure */ - -/* All BIG ENDIAN; handle is as passed from host; length is zero for - aborted frames, and frames with errors. Header is actually VC - number, lec-id is NOT yet supported. */ - -typedef struct { - u32 handle; - __be16 vc; - __be16 lec_id; // unused - __be16 status; - __be16 length; -} rx_out; - -/* buffer supply structure */ - -typedef struct { - u32 handle; - __be32 host_address; -} rx_in; - -/* This first structure is the area in host memory where the adapter - writes its pointer values. These pointer values are BIG ENDIAN and - reside in the same 4MB 'page' as this structure. The host gives the - adapter the address of this block by sending a doorbell interrupt - to the adapter after downloading the code and setting it going. The - addresses have the top 10 bits set to 1010000010b -- really? - - The host must initialise these before handing the block to the - adapter. */ - -typedef struct { - __be32 command_start; /* SRB commands completions */ - __be32 command_end; /* SRB commands completions */ - __be32 tx_start; - __be32 tx_end; - __be32 txcom_start; /* tx completions */ - __be32 txcom_end; /* tx completions */ - struct { - __be32 buffer_start; - __be32 buffer_end; - u32 buffer_q_get; - u32 buffer_q_end; - u32 buffer_aptr; - __be32 rx_start; /* rx completions */ - __be32 rx_end; - u32 rx_ptr; - __be32 buffer_size; /* size of host buffer */ - } rec_struct[NUM_RX_POOLS]; -#ifdef AMB_NEW_MICROCODE - u16 init_flags; - u16 talk_block_spare; -#endif -} adap_talk_block; - -/* This structure must be kept in line with the vcr image in sarmain.h - - This is the structure in the host filled in by the adapter by - GET_SUNI_STATS */ - -typedef struct { - u8 racp_chcs; - u8 racp_uhcs; - u16 spare; - u32 racp_rcell; - u32 tacp_tcell; - u32 flags; - u32 dropped_cells; - u32 dropped_frames; -} suni_stats; - -typedef enum { - dead -} amb_flags; - -#define NEXTQ(current,start,limit) \ - ( (current)+1 < (limit) ? (current)+1 : (start) ) - -typedef struct { - command * start; - command * in; - command * out; - command * limit; -} amb_cq_ptrs; - -typedef struct { - spinlock_t lock; - unsigned int pending; - unsigned int high; - unsigned int filled; - unsigned int maximum; // size - 1 (q implementation) - amb_cq_ptrs ptrs; -} amb_cq; - -typedef struct { - spinlock_t lock; - unsigned int pending; - unsigned int high; - unsigned int filled; - unsigned int maximum; // size - 1 (q implementation) - struct { - tx_in * start; - tx_in * ptr; - tx_in * limit; - } in; - struct { - tx_out * start; - tx_out * ptr; - tx_out * limit; - } out; -} amb_txq; - -typedef struct { - spinlock_t lock; - unsigned int pending; - unsigned int low; - unsigned int emptied; - unsigned int maximum; // size - 1 (q implementation) - struct { - rx_in * start; - rx_in * ptr; - rx_in * limit; - } in; - struct { - rx_out * start; - rx_out * ptr; - rx_out * limit; - } out; - unsigned int buffers_wanted; - unsigned int buffer_size; -} amb_rxq; - -typedef struct { - unsigned long tx_ok; - struct { - unsigned long ok; - unsigned long error; - unsigned long badcrc; - unsigned long toolong; - unsigned long aborted; - unsigned long unused; - } rx; -} amb_stats; - -// a single struct pointed to by atm_vcc->dev_data - -typedef struct { - u8 tx_vc_bits:7; - u8 tx_present:1; -} amb_tx_info; - -typedef struct { - unsigned char pool; -} amb_rx_info; - -typedef struct { - amb_rx_info rx_info; - u16 tx_frame_bits; - unsigned int tx_rate; - unsigned int rx_rate; -} amb_vcc; - -struct amb_dev { - u8 irq; - unsigned long flags; - u32 iobase; - u32 * membase; - - amb_cq cq; - amb_txq txq; - amb_rxq rxq[NUM_RX_POOLS]; - - struct mutex vcc_sf; - amb_tx_info txer[NUM_VCS]; - struct atm_vcc * rxer[NUM_VCS]; - unsigned int tx_avail; - unsigned int rx_avail; - - amb_stats stats; - - struct atm_dev * atm_dev; - struct pci_dev * pci_dev; - struct timer_list housekeeping; -}; - -typedef struct amb_dev amb_dev; - -#define AMB_DEV(atm_dev) ((amb_dev *) (atm_dev)->dev_data) -#define AMB_VCC(atm_vcc) ((amb_vcc *) (atm_vcc)->dev_data) - -/* rate rounding */ - -typedef enum { - round_up, - round_down, - round_nearest -} rounding; - -#endif diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a311df07b1bd..4deb60a3b43f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2613,7 +2613,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_counters); SET_DEVICE_OP(dev_ops, create_cq); SET_DEVICE_OP(dev_ops, create_flow); - SET_DEVICE_OP(dev_ops, create_flow_action_esp); SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); @@ -2676,7 +2675,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); - SET_DEVICE_OP(dev_ops, modify_flow_action_esp); SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index d42ed7ff223e..0ddcf6da66c4 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -46,385 +46,6 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, return action->device->ops.destroy_flow_action(action); } -static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, - u32 flags, bool is_modify) -{ - u64 verbs_flags = flags; - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN)) - verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED; - - if (is_modify && uverbs_attr_is_valid(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) - verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS; - - return verbs_flags; -}; - -static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat) -{ - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm = - &keymat->keymat.aes_gcm; - - if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return -EOPNOTSUPP; - - if (aes_gcm->key_len != 32 && - aes_gcm->key_len != 24 && - aes_gcm->key_len != 16) - return -EINVAL; - - if (aes_gcm->icv_len != 16 && - aes_gcm->icv_len != 8 && - aes_gcm->icv_len != 12) - return -EINVAL; - - return 0; -} - -static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = { - [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm, -}; - -static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) -{ - /* This is used in order to modify an esp flow action with an enabled - * replay protection to a disabled one. This is only supported via - * modify, as in create verb we can simply drop the REPLAY attribute and - * achieve the same thing. - */ - return is_modify ? 0 : -EINVAL; -} - -static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) -{ - /* Some replay protections could always be enabled without validating - * anything. - */ - return 0; -} - -static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) = { - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none, - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok, -}; - -static int parse_esp_ip(enum ib_flow_spec_type proto, - const void __user *val_ptr, - size_t len, union ib_flow_spec *out) -{ - int ret; - const struct ib_uverbs_flow_ipv4_filter ipv4 = { - .src_ip = cpu_to_be32(0xffffffffUL), - .dst_ip = cpu_to_be32(0xffffffffUL), - .proto = 0xff, - .tos = 0xff, - .ttl = 0xff, - .flags = 0xff, - }; - const struct ib_uverbs_flow_ipv6_filter ipv6 = { - .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - .flow_label = cpu_to_be32(0xffffffffUL), - .next_hdr = 0xff, - .traffic_class = 0xff, - .hop_limit = 0xff, - }; - union { - struct ib_uverbs_flow_ipv4_filter ipv4; - struct ib_uverbs_flow_ipv6_filter ipv6; - } user_val = {}; - const void *user_pmask; - size_t val_len; - - /* If the flow IPv4/IPv6 flow specifications are extended, the mask - * should be changed as well. - */ - BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) + - sizeof(ipv4.flags) != sizeof(ipv4)); - BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) + - sizeof(ipv6.reserved) != sizeof(ipv6)); - - switch (proto) { - case IB_FLOW_SPEC_IPV4: - if (len > sizeof(user_val.ipv4) && - !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4), - len - sizeof(user_val.ipv4))) - return -EOPNOTSUPP; - - val_len = min_t(size_t, len, sizeof(user_val.ipv4)); - ret = copy_from_user(&user_val.ipv4, val_ptr, - val_len); - if (ret) - return -EFAULT; - - user_pmask = &ipv4; - break; - case IB_FLOW_SPEC_IPV6: - if (len > sizeof(user_val.ipv6) && - !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6), - len - sizeof(user_val.ipv6))) - return -EOPNOTSUPP; - - val_len = min_t(size_t, len, sizeof(user_val.ipv6)); - ret = copy_from_user(&user_val.ipv6, val_ptr, - val_len); - if (ret) - return -EFAULT; - - user_pmask = &ipv6; - break; - default: - return -EOPNOTSUPP; - } - - return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask, - &user_val, - val_len, out); -} - -static int flow_action_esp_get_encap(struct ib_flow_spec_list *out, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uverbs_flow_action_esp_encap uverbs_encap; - int ret; - - ret = uverbs_copy_from(&uverbs_encap, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP); - if (ret) - return ret; - - /* We currently support only one encap */ - if (uverbs_encap.next_ptr) - return -EOPNOTSUPP; - - if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 && - uverbs_encap.type != IB_FLOW_SPEC_IPV6) - return -EOPNOTSUPP; - - return parse_esp_ip(uverbs_encap.type, - u64_to_user_ptr(uverbs_encap.val_ptr), - uverbs_encap.len, - &out->spec); -} - -struct ib_flow_action_esp_attr { - struct ib_flow_action_attrs_esp hdr; - struct ib_flow_action_attrs_esp_keymats keymat; - struct ib_flow_action_attrs_esp_replays replay; - /* We currently support only one spec */ - struct ib_flow_spec_list encap; -}; - -#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW -static int parse_flow_action_esp(struct ib_device *ib_dev, - struct uverbs_attr_bundle *attrs, - struct ib_flow_action_esp_attr *esp_attr, - bool is_modify) -{ - struct ib_uverbs_flow_action_esp uverbs_esp = {}; - int ret; - - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ESN); - if (IS_UVERBS_COPY_ERR(ret)) - return ret; - - /* This can be called from FLOW_ACTION_ESP_MODIFY where - * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional - */ - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) { - ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS); - if (ret) - return ret; - - if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1)) - return -EOPNOTSUPP; - - esp_attr->hdr.spi = uverbs_esp.spi; - esp_attr->hdr.seq = uverbs_esp.seq; - esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad; - esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts; - } - esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags, - is_modify); - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) { - esp_attr->keymat.protocol = - uverbs_attr_get_enum_id(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); - ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat, - attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); - if (ret) - return ret; - - ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat); - if (ret) - return ret; - - esp_attr->hdr.keymat = &esp_attr->keymat; - } - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) { - esp_attr->replay.protocol = - uverbs_attr_get_enum_id(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); - - ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay, - attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); - if (ret) - return ret; - - ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay, - is_modify); - if (ret) - return ret; - - esp_attr->hdr.replay = &esp_attr->replay; - } - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) { - ret = flow_action_esp_get_encap(&esp_attr->encap, attrs); - if (ret) - return ret; - - esp_attr->hdr.encap = &esp_attr->encap; - } - - return 0; -} - -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); - struct ib_device *ib_dev = attrs->context->device; - int ret; - struct ib_flow_action *action; - struct ib_flow_action_esp_attr esp_attr = {}; - - if (!ib_dev->ops.create_flow_action_esp) - return -EOPNOTSUPP; - - ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false); - if (ret) - return ret; - - /* No need to check as this attribute is marked as MANDATORY */ - action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr, - attrs); - if (IS_ERR(action)) - return PTR_ERR(action); - - uverbs_flow_action_fill_action(action, uobj, ib_dev, - IB_FLOW_ACTION_ESP); - - return 0; -} - -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); - struct ib_flow_action *action = uobj->object; - int ret; - struct ib_flow_action_esp_attr esp_attr = {}; - - if (!action->device->ops.modify_flow_action_esp) - return -EOPNOTSUPP; - - ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true); - if (ret) - return ret; - - if (action->type != IB_FLOW_ACTION_ESP) - return -EINVAL; - - return action->device->ops.modify_flow_action_esp(action, - &esp_attr.hdr, - attrs); -} - -static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { - [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT( - struct ib_uverbs_flow_action_esp_keymat_aes_gcm, - aes_key), - }, -}; - -static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, - size), - }, -}; - -DECLARE_UVERBS_NAMED_METHOD( - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, - hard_limit_pkts), - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32), - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_MANDATORY), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN( - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD( - UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, - hard_limit_pkts), - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32), - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN( - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), - UA_OPTIONAL)); - DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_FLOW_ACTION_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, @@ -435,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_FLOW_ACTION, UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY)); const struct uapi_definition uverbs_def_obj_flow_action[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 661ed2b44508..9c2886bc72cb 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -15,7 +15,6 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> -#include <linux/mlx5/accel.h> #include <linux/mlx5/eswitch.h> #include <net/inet_ecn.h> #include "mlx5_ib.h" @@ -148,16 +147,6 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, { switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; case IB_FLOW_ACTION_UNSPECIFIED: if (maction->flow_action_raw.sub_type == MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { @@ -368,14 +357,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, ib_spec->type & IB_FLOW_SPEC_INNER); break; case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; + return -EOPNOTSUPP; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) @@ -587,47 +569,6 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) return false; } -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - static bool is_valid_ethertype(struct mlx5_core_dev *mdev, const struct ib_flow_attr *flow_attr, bool check_inner) @@ -1154,8 +1095,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { + if (is_egress) { err = -EINVAL; goto free; } @@ -1740,149 +1680,6 @@ unlock: return ERR_PTR(err); } -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ - MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { switch (maction->flow_action_raw.sub_type) { @@ -1906,13 +1703,6 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) struct mlx5_ib_flow_action *maction = to_mflow_act(action); switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; case IB_FLOW_ACTION_UNSPECIFIED: destroy_flow_action_raw(maction); break; @@ -2709,11 +2499,6 @@ static const struct ib_device_ops flow_ops = { .destroy_flow_action = mlx5_ib_destroy_flow_action, }; -static const struct ib_device_ops flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) { dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); @@ -2724,9 +2509,5 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) mutex_init(&dev->flow_db->lock); ib_set_device_ops(&dev->ib_dev, &flow_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); - return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 32a0ea820573..61aa196d6484 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -41,7 +41,6 @@ #include "wr.h" #include "restrack.h" #include "counters.h" -#include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_verbs.h> @@ -906,10 +905,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP | MLX5_RX_HASH_INNER; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - resp.rss_caps.rx_hash_fields_mask |= - MLX5_RX_HASH_IPSEC_SPI; resp.response_length += sizeof(resp.rss_caps); } } else { @@ -1791,23 +1786,6 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_EGRESS)) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ - } - resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; resp->num_ports = dev->num_ports; @@ -3605,13 +3583,6 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR, &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY)); ADD_UVERBS_ATTRIBUTES_SIMPLE( - mlx5_ib_flow_action, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - enum mlx5_ib_uapi_flow_action_flags)); - -ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_query_context, UVERBS_OBJECT_DEVICE, UVERBS_METHOD_QUERY_CONTEXT, @@ -3628,8 +3599,6 @@ static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN(mlx5_ib_dm_defs), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index a6606736d8c5..2776ca5fc33f 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -121,7 +121,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (sk->sk_state == MISDN_CLOSED) return 0; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 19f0035d4410..c4ea73d996e8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -24,6 +24,11 @@ #include "mt7530.h" +static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs) +{ + return container_of(pcs, struct mt753x_pcs, pcs); +} + /* String, offset, and register size in bytes if different from 4 bytes */ static const struct mt7530_mib_desc mt7530_mib[] = { MIB_DESC(1, 0x00, "TxDrop"), @@ -2389,35 +2394,30 @@ mt7531_setup(struct dsa_switch *ds) return 0; } -static bool -mt7530_phy_mode_supported(struct dsa_switch *ds, int port, - const struct phylink_link_state *state) +static void mt7530_mac_port_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - struct mt7530_priv *priv = ds->priv; - switch (port) { case 0 ... 4: /* Internal phy */ - if (state->interface != PHY_INTERFACE_MODE_GMII) - return false; + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); break; + case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */ - if (!phy_interface_mode_is_rgmii(state->interface) && - state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_GMII) - return false; + phy_interface_set_rgmii(config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_MII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); break; + case 6: /* 1st cpu port */ - if (state->interface != PHY_INTERFACE_MODE_RGMII && - state->interface != PHY_INTERFACE_MODE_TRGMII) - return false; + __set_bit(PHY_INTERFACE_MODE_RGMII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_TRGMII, + config->supported_interfaces); break; - default: - dev_err(priv->dev, "%s: unsupported port: %i\n", __func__, - port); - return false; } - - return true; } static bool mt7531_is_rgmii_port(struct mt7530_priv *priv, u32 port) @@ -2425,42 +2425,35 @@ static bool mt7531_is_rgmii_port(struct mt7530_priv *priv, u32 port) return (port == 5) && (priv->p5_intf_sel != P5_INTF_SEL_GMAC5_SGMII); } -static bool -mt7531_phy_mode_supported(struct dsa_switch *ds, int port, - const struct phylink_link_state *state) +static void mt7531_mac_port_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { struct mt7530_priv *priv = ds->priv; switch (port) { case 0 ... 4: /* Internal phy */ - if (state->interface != PHY_INTERFACE_MODE_GMII) - return false; + __set_bit(PHY_INTERFACE_MODE_GMII, + config->supported_interfaces); break; + case 5: /* 2nd cpu port supports either rgmii or sgmii/8023z */ - if (mt7531_is_rgmii_port(priv, port)) - return phy_interface_mode_is_rgmii(state->interface); + if (mt7531_is_rgmii_port(priv, port)) { + phy_interface_set_rgmii(config->supported_interfaces); + break; + } fallthrough; + case 6: /* 1st cpu port supports sgmii/8023z only */ - if (state->interface != PHY_INTERFACE_MODE_SGMII && - !phy_interface_mode_is_8023z(state->interface)) - return false; + __set_bit(PHY_INTERFACE_MODE_SGMII, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + config->supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_2500BASEX, + config->supported_interfaces); + + config->mac_capabilities |= MAC_2500FD; break; - default: - dev_err(priv->dev, "%s: unsupported port: %i\n", __func__, - port); - return false; } - - return true; -} - -static bool -mt753x_phy_mode_supported(struct dsa_switch *ds, int port, - const struct phylink_link_state *state) -{ - struct mt7530_priv *priv = ds->priv; - - return priv->info->phy_mode_supported(ds, port, state); } static int @@ -2533,30 +2526,11 @@ static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, return 0; } -static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port, - unsigned long *supported) -{ - /* Port5 supports ethier RGMII or SGMII. - * Port6 supports SGMII only. - */ - switch (port) { - case 5: - if (mt7531_is_rgmii_port(priv, port)) - break; - fallthrough; - case 6: - phylink_set(supported, 1000baseX_Full); - phylink_set(supported, 2500baseX_Full); - phylink_set(supported, 2500baseT_Full); - } -} - -static void -mt7531_sgmii_link_up_force(struct dsa_switch *ds, int port, - unsigned int mode, phy_interface_t interface, - int speed, int duplex) +static void mt7531_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, int speed, int duplex) { - struct mt7530_priv *priv = ds->priv; + struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv; + int port = pcs_to_mt753x_pcs(pcs)->port; unsigned int val; /* For adjusting speed and duplex of SGMII force mode. */ @@ -2582,6 +2556,9 @@ mt7531_sgmii_link_up_force(struct dsa_switch *ds, int port, /* MT7531 SGMII 1G force mode can only work in full duplex mode, * no matter MT7531_SGMII_FORCE_HALF_DUPLEX is set or not. + * + * The speed check is unnecessary as the MAC capabilities apply + * this restriction. --rmk */ if ((speed == SPEED_10 || speed == SPEED_100) && duplex != DUPLEX_FULL) @@ -2657,9 +2634,10 @@ static int mt7531_sgmii_setup_mode_an(struct mt7530_priv *priv, int port, return 0; } -static void mt7531_sgmii_restart_an(struct dsa_switch *ds, int port) +static void mt7531_pcs_an_restart(struct phylink_pcs *pcs) { - struct mt7530_priv *priv = ds->priv; + struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv; + int port = pcs_to_mt753x_pcs(pcs)->port; u32 val; /* Only restart AN when AN is enabled */ @@ -2716,6 +2694,24 @@ mt753x_mac_config(struct dsa_switch *ds, int port, unsigned int mode, return priv->info->mac_port_config(ds, port, mode, state->interface); } +static struct phylink_pcs * +mt753x_phylink_mac_select_pcs(struct dsa_switch *ds, int port, + phy_interface_t interface) +{ + struct mt7530_priv *priv = ds->priv; + + switch (interface) { + case PHY_INTERFACE_MODE_TRGMII: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + return &priv->pcs[port].pcs; + + default: + return NULL; + } +} + static void mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) @@ -2723,9 +2719,6 @@ mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, struct mt7530_priv *priv = ds->priv; u32 mcr_cur, mcr_new; - if (!mt753x_phy_mode_supported(ds, port, state)) - goto unsupported; - switch (port) { case 0 ... 4: /* Internal phy */ if (state->interface != PHY_INTERFACE_MODE_GMII) @@ -2780,17 +2773,6 @@ unsupported: mt7530_write(priv, MT7530_PMCR_P(port), mcr_new); } -static void -mt753x_phylink_mac_an_restart(struct dsa_switch *ds, int port) -{ - struct mt7530_priv *priv = ds->priv; - - if (!priv->info->mac_pcs_an_restart) - return; - - priv->info->mac_pcs_an_restart(ds, port); -} - static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) @@ -2800,16 +2782,13 @@ static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port, mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK); } -static void mt753x_mac_pcs_link_up(struct dsa_switch *ds, int port, - unsigned int mode, phy_interface_t interface, - int speed, int duplex) +static void mt753x_phylink_pcs_link_up(struct phylink_pcs *pcs, + unsigned int mode, + phy_interface_t interface, + int speed, int duplex) { - struct mt7530_priv *priv = ds->priv; - - if (!priv->info->mac_pcs_link_up) - return; - - priv->info->mac_pcs_link_up(ds, port, mode, interface, speed, duplex); + if (pcs->ops->pcs_link_up) + pcs->ops->pcs_link_up(pcs, mode, interface, speed, duplex); } static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, @@ -2822,8 +2801,6 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, struct mt7530_priv *priv = ds->priv; u32 mcr; - mt753x_mac_pcs_link_up(ds, port, mode, interface, speed, duplex); - mcr = PMCR_RX_EN | PMCR_TX_EN | PMCR_FORCE_LNK; /* MT753x MAC works in 1G full duplex mode for all up-clocked @@ -2903,81 +2880,51 @@ mt7531_cpu_port_config(struct dsa_switch *ds, int port) return ret; mt7530_write(priv, MT7530_PMCR_P(port), PMCR_CPU_PORT_SETTING(priv->id)); + mt753x_phylink_pcs_link_up(&priv->pcs[port].pcs, MLO_AN_FIXED, + interface, speed, DUPLEX_FULL); mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, interface, NULL, speed, DUPLEX_FULL, true, true); return 0; } -static void -mt7530_mac_port_validate(struct dsa_switch *ds, int port, - unsigned long *supported) +static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port, + struct phylink_config *config) { - if (port == 5) - phylink_set(supported, 1000baseX_Full); -} - -static void mt7531_mac_port_validate(struct dsa_switch *ds, int port, - unsigned long *supported) -{ - struct mt7530_priv *priv = ds->priv; - - mt7531_sgmii_validate(priv, port, supported); -} - -static void -mt753x_phylink_validate(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state) -{ - __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; struct mt7530_priv *priv = ds->priv; - if (state->interface != PHY_INTERFACE_MODE_NA && - !mt753x_phy_mode_supported(ds, port, state)) { - linkmode_zero(supported); - return; - } - - phylink_set_port_modes(mask); - - if (state->interface != PHY_INTERFACE_MODE_TRGMII && - !phy_interface_mode_is_8023z(state->interface)) { - phylink_set(mask, 10baseT_Half); - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Half); - phylink_set(mask, 100baseT_Full); - phylink_set(mask, Autoneg); - } - - /* This switch only supports 1G full-duplex. */ - if (state->interface != PHY_INTERFACE_MODE_MII) - phylink_set(mask, 1000baseT_Full); + /* This switch only supports full-duplex at 1Gbps */ + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | + MAC_10 | MAC_100 | MAC_1000FD; - priv->info->mac_port_validate(ds, port, mask); + /* This driver does not make use of the speed, duplex, pause or the + * advertisement in its mac_config, so it is safe to mark this driver + * as non-legacy. + */ + config->legacy_pre_march2020 = false; - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); + priv->info->mac_port_get_caps(ds, port, config); +} - linkmode_and(supported, supported, mask); - linkmode_and(state->advertising, state->advertising, mask); +static int mt753x_pcs_validate(struct phylink_pcs *pcs, + unsigned long *supported, + const struct phylink_link_state *state) +{ + /* Autonegotiation is not supported in TRGMII nor 802.3z modes */ + if (state->interface == PHY_INTERFACE_MODE_TRGMII || + phy_interface_mode_is_8023z(state->interface)) + phylink_clear(supported, Autoneg); - /* We can only operate at 2500BaseX or 1000BaseX. If requested - * to advertise both, only report advertising at 2500BaseX. - */ - phylink_helper_basex_speed(state); + return 0; } -static int -mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state) +static void mt7530_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) { - struct mt7530_priv *priv = ds->priv; + struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv; + int port = pcs_to_mt753x_pcs(pcs)->port; u32 pmsr; - if (port < 0 || port >= MT7530_NUM_PORTS) - return -EINVAL; - pmsr = mt7530_read(priv, MT7530_PMSR_P(port)); state->link = (pmsr & PMSR_LINK); @@ -3004,8 +2951,6 @@ mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port, state->pause |= MLO_PAUSE_RX; if (pmsr & PMSR_TX_FC) state->pause |= MLO_PAUSE_TX; - - return 1; } static int @@ -3047,32 +2992,51 @@ mt7531_sgmii_pcs_get_state_an(struct mt7530_priv *priv, int port, return 0; } -static int -mt7531_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state) +static void mt7531_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) { - struct mt7530_priv *priv = ds->priv; + struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv; + int port = pcs_to_mt753x_pcs(pcs)->port; if (state->interface == PHY_INTERFACE_MODE_SGMII) - return mt7531_sgmii_pcs_get_state_an(priv, port, state); - - return -EOPNOTSUPP; + mt7531_sgmii_pcs_get_state_an(priv, port, state); + else + state->link = false; } -static int -mt753x_phylink_mac_link_state(struct dsa_switch *ds, int port, - struct phylink_link_state *state) +static int mt753x_pcs_config(struct phylink_pcs *pcs, unsigned int mode, + phy_interface_t interface, + const unsigned long *advertising, + bool permit_pause_to_mac) { - struct mt7530_priv *priv = ds->priv; + return 0; +} - return priv->info->mac_port_get_state(ds, port, state); +static void mt7530_pcs_an_restart(struct phylink_pcs *pcs) +{ } +static const struct phylink_pcs_ops mt7530_pcs_ops = { + .pcs_validate = mt753x_pcs_validate, + .pcs_get_state = mt7530_pcs_get_state, + .pcs_config = mt753x_pcs_config, + .pcs_an_restart = mt7530_pcs_an_restart, +}; + +static const struct phylink_pcs_ops mt7531_pcs_ops = { + .pcs_validate = mt753x_pcs_validate, + .pcs_get_state = mt7531_pcs_get_state, + .pcs_config = mt753x_pcs_config, + .pcs_an_restart = mt7531_pcs_an_restart, + .pcs_link_up = mt7531_pcs_link_up, +}; + static int mt753x_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; int ret = priv->info->sw_setup(ds); + int i; if (ret) return ret; @@ -3085,6 +3049,13 @@ mt753x_setup(struct dsa_switch *ds) if (ret && priv->irq) mt7530_free_irq_common(priv); + /* Initialise the PCS devices */ + for (i = 0; i < priv->ds->num_ports; i++) { + priv->pcs[i].pcs.ops = priv->info->pcs_ops; + priv->pcs[i].priv = priv; + priv->pcs[i].port = i; + } + return ret; } @@ -3144,10 +3115,9 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_vlan_del = mt7530_port_vlan_del, .port_mirror_add = mt753x_port_mirror_add, .port_mirror_del = mt753x_port_mirror_del, - .phylink_validate = mt753x_phylink_validate, - .phylink_mac_link_state = mt753x_phylink_mac_link_state, + .phylink_get_caps = mt753x_phylink_get_caps, + .phylink_mac_select_pcs = mt753x_phylink_mac_select_pcs, .phylink_mac_config = mt753x_phylink_mac_config, - .phylink_mac_an_restart = mt753x_phylink_mac_an_restart, .phylink_mac_link_down = mt753x_phylink_mac_link_down, .phylink_mac_link_up = mt753x_phylink_mac_link_up, .get_mac_eee = mt753x_get_mac_eee, @@ -3157,39 +3127,34 @@ static const struct dsa_switch_ops mt7530_switch_ops = { static const struct mt753x_info mt753x_table[] = { [ID_MT7621] = { .id = ID_MT7621, + .pcs_ops = &mt7530_pcs_ops, .sw_setup = mt7530_setup, .phy_read = mt7530_phy_read, .phy_write = mt7530_phy_write, .pad_setup = mt7530_pad_clk_setup, - .phy_mode_supported = mt7530_phy_mode_supported, - .mac_port_validate = mt7530_mac_port_validate, - .mac_port_get_state = mt7530_phylink_mac_link_state, + .mac_port_get_caps = mt7530_mac_port_get_caps, .mac_port_config = mt7530_mac_config, }, [ID_MT7530] = { .id = ID_MT7530, + .pcs_ops = &mt7530_pcs_ops, .sw_setup = mt7530_setup, .phy_read = mt7530_phy_read, .phy_write = mt7530_phy_write, .pad_setup = mt7530_pad_clk_setup, - .phy_mode_supported = mt7530_phy_mode_supported, - .mac_port_validate = mt7530_mac_port_validate, - .mac_port_get_state = mt7530_phylink_mac_link_state, + .mac_port_get_caps = mt7530_mac_port_get_caps, .mac_port_config = mt7530_mac_config, }, [ID_MT7531] = { .id = ID_MT7531, + .pcs_ops = &mt7531_pcs_ops, .sw_setup = mt7531_setup, .phy_read = mt7531_ind_phy_read, .phy_write = mt7531_ind_phy_write, .pad_setup = mt7531_pad_setup, .cpu_port_config = mt7531_cpu_port_config, - .phy_mode_supported = mt7531_phy_mode_supported, - .mac_port_validate = mt7531_mac_port_validate, - .mac_port_get_state = mt7531_phylink_mac_link_state, + .mac_port_get_caps = mt7531_mac_port_get_caps, .mac_port_config = mt7531_mac_config, - .mac_pcs_an_restart = mt7531_sgmii_restart_an, - .mac_pcs_link_up = mt7531_sgmii_link_up_force, }, }; @@ -3246,9 +3211,8 @@ mt7530_probe(struct mdio_device *mdiodev) */ if (!priv->info->sw_setup || !priv->info->pad_setup || !priv->info->phy_read || !priv->info->phy_write || - !priv->info->phy_mode_supported || - !priv->info->mac_port_validate || - !priv->info->mac_port_get_state || !priv->info->mac_port_config) + !priv->info->mac_port_get_caps || + !priv->info->mac_port_config) return -EINVAL; priv->id = priv->info->id; diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 91508e2feef9..71e36b69b96d 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -741,6 +741,12 @@ static const char *p5_intf_modes(unsigned int p5_interface) struct mt7530_priv; +struct mt753x_pcs { + struct phylink_pcs pcs; + struct mt7530_priv *priv; + int port; +}; + /* struct mt753x_info - This is the main data structure for holding the specific * part for each supported device * @sw_setup: Holding the handler to a device initialization @@ -752,36 +758,27 @@ struct mt7530_priv; * port * @mac_port_validate: Holding the way to set addition validate type for a * certan MAC port - * @mac_port_get_state: Holding the way getting the MAC/PCS state for a certain - * MAC port * @mac_port_config: Holding the way setting up the PHY attribute to a * certain MAC port - * @mac_pcs_an_restart Holding the way restarting PCS autonegotiation for a - * certain MAC port - * @mac_pcs_link_up: Holding the way setting up the PHY attribute to the pcs - * of the certain MAC port */ struct mt753x_info { enum mt753x_id id; + const struct phylink_pcs_ops *pcs_ops; + int (*sw_setup)(struct dsa_switch *ds); int (*phy_read)(struct mt7530_priv *priv, int port, int regnum); int (*phy_write)(struct mt7530_priv *priv, int port, int regnum, u16 val); int (*pad_setup)(struct dsa_switch *ds, phy_interface_t interface); int (*cpu_port_config)(struct dsa_switch *ds, int port); - bool (*phy_mode_supported)(struct dsa_switch *ds, int port, - const struct phylink_link_state *state); + void (*mac_port_get_caps)(struct dsa_switch *ds, int port, + struct phylink_config *config); void (*mac_port_validate)(struct dsa_switch *ds, int port, + phy_interface_t interface, unsigned long *supported); - int (*mac_port_get_state)(struct dsa_switch *ds, int port, - struct phylink_link_state *state); int (*mac_port_config)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); - void (*mac_pcs_an_restart)(struct dsa_switch *ds, int port); - void (*mac_pcs_link_up)(struct dsa_switch *ds, int port, - unsigned int mode, phy_interface_t interface, - int speed, int duplex); }; /* struct mt7530_priv - This is the main data structure for holding the state @@ -823,6 +820,7 @@ struct mt7530_priv { u8 mirror_tx; struct mt7530_port ports[MT7530_NUM_PORTS]; + struct mt753x_pcs pcs[MT7530_NUM_PORTS]; /* protect among processes for registers access*/ struct mutex reg_mutex; int irq; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index 5caa75b41b73..4e9215bce4ad 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -2212,7 +2212,7 @@ * MAC DA 2. The reset default is set to mask out all parameters. */ #define NIG_REG_P0_LLH_PTP_PARAM_MASK 0x187a0 -/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set +/* [RW 14] Mask register for the rules used in detecting PTP packets. Set * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; @@ -2381,7 +2381,7 @@ * MAC DA 2. The reset default is set to mask out all parameters. */ #define NIG_REG_P1_LLH_PTP_PARAM_MASK 0x187c8 -/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set +/* [RW 14] Mask register for the rules used in detecting PTP packets. Set * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; @@ -2493,7 +2493,7 @@ * MAC DA 2. The reset default is set to mask out all parameters. */ #define NIG_REG_P0_TLLH_PTP_PARAM_MASK 0x187f0 -/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set +/* [RW 14] Mask register for the rules used in detecting PTP packets. Set * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; @@ -2529,7 +2529,7 @@ * MAC DA 2. The reset default is set to mask out all parameters. */ #define NIG_REG_P1_TLLH_PTP_PARAM_MASK 0x187f8 -/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set +/* [RW 14] Mask register for the rules used in detecting PTP packets. Set * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; @@ -6218,7 +6218,7 @@ #define AEU_INPUTS_ATTN_BITS_GPIO0_FUNCTION_0 (0x1<<2) #define AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR (0x1<<12) #define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY (0x1<<28) -#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY (0x1<<31) +#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY (0x1U<<31) #define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY (0x1<<29) #define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY (0x1<<30) #define AEU_INPUTS_ATTN_BITS_MISC_HW_INTERRUPT (0x1<<15) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 874fad0a5cf8..0489c1c2e7dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -56,6 +56,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <net/page_pool.h> +#include <linux/align.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -738,7 +739,6 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, page_pool_recycle_direct(rxr->page_pool, page); return NULL; } - *mapping += bp->rx_dma_offset; return page; } @@ -780,6 +780,7 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, if (!page) return -ENOMEM; + mapping += bp->rx_dma_offset; rx_buf->data = page; rx_buf->data_ptr = page_address(page) + bp->rx_offset; } else { @@ -840,33 +841,41 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, u16 sw_prod = rxr->rx_sw_agg_prod; unsigned int offset = 0; - if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { - page = rxr->rx_page; - if (!page) { + if (BNXT_RX_PAGE_MODE(bp)) { + page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp); + + if (!page) + return -ENOMEM; + + } else { + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + page = rxr->rx_page; + if (!page) { + page = alloc_page(gfp); + if (!page) + return -ENOMEM; + rxr->rx_page = page; + rxr->rx_page_offset = 0; + } + offset = rxr->rx_page_offset; + rxr->rx_page_offset += BNXT_RX_PAGE_SIZE; + if (rxr->rx_page_offset == PAGE_SIZE) + rxr->rx_page = NULL; + else + get_page(page); + } else { page = alloc_page(gfp); if (!page) return -ENOMEM; - rxr->rx_page = page; - rxr->rx_page_offset = 0; } - offset = rxr->rx_page_offset; - rxr->rx_page_offset += BNXT_RX_PAGE_SIZE; - if (rxr->rx_page_offset == PAGE_SIZE) - rxr->rx_page = NULL; - else - get_page(page); - } else { - page = alloc_page(gfp); - if (!page) - return -ENOMEM; - } - mapping = dma_map_page_attrs(&pdev->dev, page, offset, - BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE, - DMA_ATTR_WEAK_ORDERING); - if (dma_mapping_error(&pdev->dev, mapping)) { - __free_page(page); - return -EIO; + mapping = dma_map_page_attrs(&pdev->dev, page, offset, + BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE, + DMA_ATTR_WEAK_ORDERING); + if (dma_mapping_error(&pdev->dev, mapping)) { + __free_page(page); + return -EIO; + } } if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap))) @@ -962,6 +971,39 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, rxr->rx_sw_agg_prod = sw_prod; } +static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr, + u16 cons, void *data, u8 *data_ptr, + dma_addr_t dma_addr, + unsigned int offset_and_len) +{ + unsigned int len = offset_and_len & 0xffff; + struct page *page = data; + u16 prod = rxr->rx_prod; + struct sk_buff *skb; + int err; + + err = bnxt_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC); + if (unlikely(err)) { + bnxt_reuse_rx_data(rxr, cons, data); + return NULL; + } + dma_addr -= bp->rx_dma_offset; + dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, + DMA_ATTR_WEAK_ORDERING); + skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE + + bp->rx_dma_offset); + if (!skb) { + __free_page(page); + return NULL; + } + skb_mark_for_recycle(skb); + skb_reserve(skb, bp->rx_dma_offset); + __skb_put(skb, len); + + return skb; +} + static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, void *data, u8 *data_ptr, @@ -984,7 +1026,6 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, dma_addr -= bp->rx_dma_offset; dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); - page_pool_release_page(rxr->page_pool, page); if (unlikely(!payload)) payload = eth_get_headlen(bp->dev, data_ptr, len); @@ -995,6 +1036,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, return NULL; } + skb_mark_for_recycle(skb); off = (void *)data_ptr - page_address(page); skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE); memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN, @@ -1038,22 +1080,24 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp, return skb; } -static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, - struct bnxt_cp_ring_info *cpr, - struct sk_buff *skb, u16 idx, - u32 agg_bufs, bool tpa) +static u32 __bnxt_rx_agg_pages(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + struct skb_shared_info *shinfo, + u16 idx, u32 agg_bufs, bool tpa, + struct xdp_buff *xdp) { struct bnxt_napi *bnapi = cpr->bnapi; struct pci_dev *pdev = bp->pdev; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; u16 prod = rxr->rx_agg_prod; + u32 i, total_frag_len = 0; bool p5_tpa = false; - u32 i; if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) p5_tpa = true; for (i = 0; i < agg_bufs; i++) { + skb_frag_t *frag = &shinfo->frags[i]; u16 cons, frag_len; struct rx_agg_cmp *agg; struct bnxt_sw_rx_agg_bd *cons_rx_buf; @@ -1069,8 +1113,10 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; cons_rx_buf = &rxr->rx_agg_ring[cons]; - skb_fill_page_desc(skb, i, cons_rx_buf->page, - cons_rx_buf->offset, frag_len); + skb_frag_off_set(frag, cons_rx_buf->offset); + skb_frag_size_set(frag, frag_len); + __skb_frag_set_page(frag, cons_rx_buf->page); + shinfo->nr_frags = i + 1; __clear_bit(cons, rxr->rx_agg_bmap); /* It is possible for bnxt_alloc_rx_page() to allocate @@ -1081,16 +1127,14 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, page = cons_rx_buf->page; cons_rx_buf->page = NULL; + if (xdp && page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); + if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) { - struct skb_shared_info *shinfo; unsigned int nr_frags; - shinfo = skb_shinfo(skb); nr_frags = --shinfo->nr_frags; __skb_frag_set_page(&shinfo->frags[nr_frags], NULL); - - dev_kfree_skb(skb); - cons_rx_buf->page = page; /* Update prod since possibly some pages have been @@ -1098,23 +1142,62 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, */ rxr->rx_agg_prod = prod; bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa); - return NULL; + return 0; } dma_unmap_page_attrs(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE, - DMA_FROM_DEVICE, + bp->rx_dir, DMA_ATTR_WEAK_ORDERING); - skb->data_len += frag_len; - skb->len += frag_len; - skb->truesize += PAGE_SIZE; - + total_frag_len += frag_len; prod = NEXT_RX_AGG(prod); } rxr->rx_agg_prod = prod; + return total_frag_len; +} + +static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + struct sk_buff *skb, u16 idx, + u32 agg_bufs, bool tpa) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + u32 total_frag_len = 0; + + total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, idx, + agg_bufs, tpa, NULL); + if (!total_frag_len) { + dev_kfree_skb(skb); + return NULL; + } + + skb->data_len += total_frag_len; + skb->len += total_frag_len; + skb->truesize += PAGE_SIZE * agg_bufs; return skb; } +static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + struct xdp_buff *xdp, u16 idx, + u32 agg_bufs, bool tpa) +{ + struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp); + u32 total_frag_len = 0; + + if (!xdp_buff_has_frags(xdp)) + shinfo->nr_frags = 0; + + total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, + idx, agg_bufs, tpa, xdp); + if (total_frag_len) { + xdp_buff_set_frags_flag(xdp); + shinfo->nr_frags = agg_bufs; + shinfo->xdp_frags_size = total_frag_len; + } + return total_frag_len; +} + static int bnxt_agg_bufs_valid(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, u8 agg_bufs, u32 *raw_cons) { @@ -1644,7 +1727,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, } if (agg_bufs) { - skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true); + skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ cpr->sw_stats.rx.rx_oom_discards += 1; @@ -1729,8 +1812,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; + bool xdp_active = false; dma_addr_t dma_addr; struct sk_buff *skb; + struct xdp_buff xdp; u32 flags, misc; void *data; int rc = 0; @@ -1839,18 +1924,39 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, len = flags >> RX_CMP_LEN_SHIFT; dma_addr = rx_buf->mapping; - if (bnxt_rx_xdp(bp, rxr, cons, data, &data_ptr, &len, event)) { - rc = 1; - goto next_rx; + if (bnxt_xdp_attached(bp, rxr)) { + bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp); + if (agg_bufs) { + u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, + cp_cons, agg_bufs, + false); + if (!frag_len) { + cpr->sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; + } + } + xdp_active = true; + } + + if (xdp_active) { + if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) { + rc = 1; + goto next_rx; + } } if (len <= bp->rx_copy_thresh) { skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { - if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, - agg_bufs, false); + if (agg_bufs) { + if (!xdp_active) + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, + agg_bufs, false); + else + bnxt_xdp_buff_frags_free(rxr, &xdp); + } cpr->sw_stats.rx.rx_oom_discards += 1; rc = -ENOMEM; goto next_rx; @@ -1872,11 +1978,22 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } if (agg_bufs) { - skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + if (!xdp_active) { + skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false); + if (!skb) { + cpr->sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; + } + } else { + skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); + if (!skb) { + /* we should be able to free the old skb here */ + bnxt_xdp_buff_frags_free(rxr, &xdp); + cpr->sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; + } } } @@ -2492,10 +2609,13 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi) if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - if (bnapi->events & BNXT_AGG_EVENT) - bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); } + if (bnapi->events & BNXT_AGG_EVENT) { + struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; + + bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + } bnapi->events = 0; } @@ -2872,14 +2992,23 @@ skip_rx_buf_free: if (!page) continue; - dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping, - BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE, - DMA_ATTR_WEAK_ORDERING); + if (BNXT_RX_PAGE_MODE(bp)) { + dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping, + BNXT_RX_PAGE_SIZE, bp->rx_dir, + DMA_ATTR_WEAK_ORDERING); + rx_agg_buf->page = NULL; + __clear_bit(i, rxr->rx_agg_bmap); - rx_agg_buf->page = NULL; - __clear_bit(i, rxr->rx_agg_bmap); + page_pool_recycle_direct(rxr->page_pool, page); + } else { + dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping, + BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE, + DMA_ATTR_WEAK_ORDERING); + rx_agg_buf->page = NULL; + __clear_bit(i, rxr->rx_agg_bmap); - __free_page(page); + __free_page(page); + } } skip_rx_agg_free: @@ -3793,7 +3922,7 @@ void bnxt_set_ring_params(struct bnxt *bp) /* 8 for CRC and VLAN */ rx_size = SKB_DATA_ALIGN(bp->dev->mtu + ETH_HLEN + NET_IP_ALIGN + 8); - rx_space = rx_size + NET_SKB_PAD + + rx_space = rx_size + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); bp->rx_copy_thresh = BNXT_RX_COPY_THRESH; @@ -3834,9 +3963,15 @@ void bnxt_set_ring_params(struct bnxt *bp) } bp->rx_agg_ring_size = agg_ring_size; bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1; - rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); - rx_space = rx_size + NET_SKB_PAD + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + if (BNXT_RX_PAGE_MODE(bp)) { + rx_space = BNXT_PAGE_MODE_BUF_SIZE; + rx_size = BNXT_MAX_PAGE_MODE_MTU; + } else { + rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); + rx_space = rx_size + NET_SKB_PAD + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } } bp->rx_buf_use_size = rx_size; @@ -3877,14 +4012,21 @@ void bnxt_set_ring_params(struct bnxt *bp) int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) { if (page_mode) { - if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) - return -EOPNOTSUPP; - bp->dev->max_mtu = - min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); bp->flags &= ~BNXT_FLAG_AGG_RINGS; - bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE; + bp->flags |= BNXT_FLAG_RX_PAGE_MODE; + + if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { + bp->flags |= BNXT_FLAG_JUMBO; + bp->rx_skb_func = bnxt_rx_multi_page_skb; + bp->dev->max_mtu = + min_t(u16, bp->max_mtu, BNXT_MAX_MTU); + } else { + bp->flags |= BNXT_FLAG_NO_AGG_RINGS; + bp->rx_skb_func = bnxt_rx_page_skb; + bp->dev->max_mtu = + min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); + } bp->rx_dir = DMA_BIDIRECTIONAL; - bp->rx_skb_func = bnxt_rx_page_skb; /* Disable LRO or GRO_HW */ netdev_update_features(bp->dev); } else { @@ -5226,12 +5368,15 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) if (rc) return rc; - req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT | - VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | - VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); - req->enables = - cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID | - VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); + req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); + req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID); + + if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) { + req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | + VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); + req->enables |= + cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); + } /* thresholds not implemented in firmware yet */ req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); @@ -11031,6 +11176,9 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev, if (bp->flags & BNXT_FLAG_NO_AGG_RINGS) features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); + if (!(bp->flags & BNXT_FLAG_TPA)) + features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW); + if (!(features & NETIF_F_GRO)) features &= ~NETIF_F_GRO_HW; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 98453a78cbd0..a498ee297946 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -591,10 +591,12 @@ struct nqe_cn { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) #define BNXT_MAX_MTU 9500 -#define BNXT_MAX_PAGE_MODE_MTU \ +#define BNXT_PAGE_MODE_BUF_SIZE \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ - XDP_PACKET_HEADROOM - \ - SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) + XDP_PACKET_HEADROOM) +#define BNXT_MAX_PAGE_MODE_MTU \ + BNXT_PAGE_MODE_BUF_SIZE - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)) #define BNXT_MIN_PKT_SIZE 52 @@ -699,13 +701,12 @@ struct bnxt_sw_tx_bd { }; DEFINE_DMA_UNMAP_ADDR(mapping); DEFINE_DMA_UNMAP_LEN(len); + struct page *page; u8 is_gso; u8 is_push; u8 action; - union { - unsigned short nr_frags; - u16 rx_prod; - }; + unsigned short nr_frags; + u16 rx_prod; }; struct bnxt_sw_rx_bd { @@ -1817,6 +1818,7 @@ struct bnxt { #define BNXT_SUPPORTS_TPA(bp) (!BNXT_CHIP_TYPE_NITRO_A0(bp) && \ (!((bp)->flags & BNXT_FLAG_CHIP_P5) || \ (bp)->max_tpa_v2) && !is_kdump_kernel()) +#define BNXT_RX_JUMBO_MODE(bp) ((bp)->flags & BNXT_FLAG_JUMBO) #define BNXT_CHIP_SR2(bp) \ ((bp)->chip_num == CHIP_NUM_58818) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 22e965e18fbc..b3a48d6675fe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3491,7 +3491,7 @@ static int bnxt_run_loopback(struct bnxt *bp) dev_kfree_skb(skb); return -EIO; } - bnxt_xmit_bd(bp, txr, map, pkt_size); + bnxt_xmit_bd(bp, txr, map, pkt_size, NULL); /* Sync BD data before updating doorbell */ wmb(); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 03b1d6c04504..f02fe906dedb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -24,36 +24,91 @@ DEFINE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - dma_addr_t mapping, u32 len) + dma_addr_t mapping, u32 len, + struct xdp_buff *xdp) { - struct bnxt_sw_tx_bd *tx_buf; + struct skb_shared_info *sinfo; + struct bnxt_sw_tx_bd *tx_buf, *first_buf; struct tx_bd *txbd; + int num_frags = 0; u32 flags; u16 prod; + int i; + + if (xdp && xdp_buff_has_frags(xdp)) { + sinfo = xdp_get_shared_info_from_buff(xdp); + num_frags = sinfo->nr_frags; + } + /* fill up the first buffer */ prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[prod]; + first_buf = tx_buf; + tx_buf->nr_frags = num_frags; + if (xdp) + tx_buf->page = virt_to_head_page(xdp->data); txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; - flags = (len << TX_BD_LEN_SHIFT) | (1 << TX_BD_FLAGS_BD_CNT_SHIFT) | - TX_BD_FLAGS_PACKET_END | bnxt_lhint_arr[len >> 9]; + flags = ((len) << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT); txbd->tx_bd_len_flags_type = cpu_to_le32(flags); txbd->tx_bd_opaque = prod; txbd->tx_bd_haddr = cpu_to_le64(mapping); + /* now let us fill up the frags into the next buffers */ + for (i = 0; i < num_frags ; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + struct bnxt_sw_tx_bd *frag_tx_buf; + struct pci_dev *pdev = bp->pdev; + dma_addr_t frag_mapping; + int frag_len; + + prod = NEXT_TX(prod); + txr->tx_prod = prod; + + /* first fill up the first buffer */ + frag_tx_buf = &txr->tx_buf_ring[prod]; + frag_tx_buf->page = skb_frag_page(frag); + + txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + + frag_len = skb_frag_size(frag); + frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0, + frag_len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(&pdev->dev, frag_mapping))) + return NULL; + + dma_unmap_addr_set(frag_tx_buf, mapping, frag_mapping); + + flags = frag_len << TX_BD_LEN_SHIFT; + txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + txbd->tx_bd_opaque = prod; + txbd->tx_bd_haddr = cpu_to_le64(frag_mapping); + + len = frag_len; + } + + flags &= ~TX_BD_LEN; + txbd->tx_bd_len_flags_type = cpu_to_le32(((len) << TX_BD_LEN_SHIFT) | flags | + TX_BD_FLAGS_PACKET_END); + /* Sync TX BD */ + wmb(); prod = NEXT_TX(prod); txr->tx_prod = prod; - return tx_buf; + + return first_buf; } static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - dma_addr_t mapping, u32 len, u16 rx_prod) + dma_addr_t mapping, u32 len, u16 rx_prod, + struct xdp_buff *xdp) { struct bnxt_sw_tx_bd *tx_buf; - tx_buf = bnxt_xmit_bd(bp, txr, mapping, len); + tx_buf = bnxt_xmit_bd(bp, txr, mapping, len, xdp); tx_buf->rx_prod = rx_prod; tx_buf->action = XDP_TX; + } static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, @@ -63,7 +118,7 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, { struct bnxt_sw_tx_bd *tx_buf; - tx_buf = bnxt_xmit_bd(bp, txr, mapping, len); + tx_buf = bnxt_xmit_bd(bp, txr, mapping, len, NULL); tx_buf->action = XDP_REDIRECT; tx_buf->xdpf = xdpf; dma_unmap_addr_set(tx_buf, mapping, mapping); @@ -78,7 +133,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) struct bnxt_sw_tx_bd *tx_buf; u16 tx_cons = txr->tx_cons; u16 last_tx_cons = tx_cons; - int i; + int i, j, frags; for (i = 0; i < nr_pkts; i++) { tx_buf = &txr->tx_buf_ring[tx_cons]; @@ -96,6 +151,13 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) } else if (tx_buf->action == XDP_TX) { rx_doorbell_needed = true; last_tx_cons = tx_cons; + + frags = tx_buf->nr_frags; + for (j = 0; j < frags; j++) { + tx_cons = NEXT_TX(tx_cons); + tx_buf = &txr->tx_buf_ring[tx_cons]; + page_pool_recycle_direct(rxr->page_pool, tx_buf->page); + } } tx_cons = NEXT_TX(tx_cons); } @@ -103,7 +165,52 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) if (rx_doorbell_needed) { tx_buf = &txr->tx_buf_ring[last_tx_cons]; bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod); + + } +} + +bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) +{ + struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); + + return !!xdp_prog; +} + +void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + u16 cons, u8 **data_ptr, unsigned int *len, + struct xdp_buff *xdp) +{ + struct bnxt_sw_rx_bd *rx_buf; + struct pci_dev *pdev; + dma_addr_t mapping; + u32 offset; + + pdev = bp->pdev; + rx_buf = &rxr->rx_buf_ring[cons]; + offset = bp->rx_offset; + + mapping = rx_buf->mapping - bp->rx_dma_offset; + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); + + xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq); + xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false); +} + +void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, + struct xdp_buff *xdp) +{ + struct skb_shared_info *shinfo; + int i; + + if (!xdp || !xdp_buff_has_frags(xdp)) + return; + shinfo = xdp_get_shared_info_from_buff(xdp); + for (i = 0; i < shinfo->nr_frags; i++) { + struct page *page = skb_frag_page(&shinfo->frags[i]); + + page_pool_recycle_direct(rxr->page_pool, page); } + shinfo->nr_frags = 0; } /* returns the following: @@ -111,14 +218,14 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) * false - packet should be passed to the stack. */ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct page *page, u8 **data_ptr, unsigned int *len, u8 *event) + struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event) { struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); struct bnxt_tx_ring_info *txr; struct bnxt_sw_rx_bd *rx_buf; struct pci_dev *pdev; - struct xdp_buff xdp; dma_addr_t mapping; + u32 tx_needed = 1; void *orig_data; u32 tx_avail; u32 offset; @@ -128,16 +235,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, return false; pdev = bp->pdev; - rx_buf = &rxr->rx_buf_ring[cons]; offset = bp->rx_offset; - mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); - txr = rxr->bnapi->tx_ring; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ - xdp_init_buff(&xdp, PAGE_SIZE, &rxr->xdp_rxq); - xdp_prepare_buff(&xdp, *data_ptr - offset, offset, *len, false); orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -150,26 +251,38 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, *event &= ~BNXT_RX_EVENT; *len = xdp.data_end - xdp.data; - if (orig_data != xdp.data) { + if (orig_data != xdp.data) offset = xdp.data - xdp.data_hard_start; - *data_ptr = xdp.data_hard_start + offset; - } + switch (act) { case XDP_PASS: return false; case XDP_TX: - if (tx_avail < 1) { + rx_buf = &rxr->rx_buf_ring[cons]; + mapping = rx_buf->mapping - bp->rx_dma_offset; + *event = 0; + + if (unlikely(xdp_buff_has_frags(&xdp))) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(&xdp); + + tx_needed += sinfo->nr_frags; + *event = BNXT_AGG_EVENT; + } + + if (tx_avail < tx_needed) { trace_xdp_exception(bp->dev, xdp_prog, act); + bnxt_xdp_buff_frags_free(rxr, &xdp); bnxt_reuse_rx_data(rxr, cons, page); return true; } - *event = BNXT_TX_EVENT; dma_sync_single_for_device(&pdev->dev, mapping + offset, *len, bp->rx_dir); + + *event |= BNXT_TX_EVENT; __bnxt_xmit_xdp(bp, txr, mapping + offset, *len, - NEXT_RX(rxr->rx_prod)); + NEXT_RX(rxr->rx_prod), &xdp); bnxt_reuse_rx_data(rxr, cons, page); return true; case XDP_REDIRECT: @@ -177,6 +290,8 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, * redirect is coming from a frame received by the * bnxt_en driver. */ + rx_buf = &rxr->rx_buf_ring[cons]; + mapping = rx_buf->mapping - bp->rx_dma_offset; dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); @@ -184,6 +299,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, /* if we are unable to allocate a new buffer, abort and reuse */ if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { trace_xdp_exception(bp->dev, xdp_prog, act); + bnxt_xdp_buff_frags_free(rxr, &xdp); bnxt_reuse_rx_data(rxr, cons, page); return true; } @@ -203,6 +319,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, trace_xdp_exception(bp->dev, xdp_prog, act); fallthrough; case XDP_DROP: + bnxt_xdp_buff_frags_free(rxr, &xdp); bnxt_reuse_rx_data(rxr, cons, page); break; } @@ -270,8 +387,9 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) int tx_xdp = 0, rc, tc; struct bpf_prog *old; - if (prog && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { - netdev_warn(dev, "MTU %d larger than largest XDP supported MTU %d.\n", + if (prog && !prog->aux->xdp_has_frags && + bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) { + netdev_warn(dev, "MTU %d larger than %d without XDP frag support.\n", bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU); return -EOPNOTSUPP; } @@ -337,3 +455,26 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) } return rc; } + +struct sk_buff * +bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, + struct page_pool *pool, struct xdp_buff *xdp, + struct rx_cmp_ext *rxcmp1) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + + if (!skb) + return NULL; + skb_checksum_none_assert(skb); + if (RX_CMP_L4_CS_OK(rxcmp1)) { + if (bp->dev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = RX_CMP_ENCAP(rxcmp1); + } + } + xdp_update_skb_shared_info(skb, num_frags, + sinfo->xdp_frags_size, + PAGE_SIZE * sinfo->nr_frags, + xdp_buff_is_frag_pfmemalloc(xdp)); + return skb; +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 067bb5e821f5..505911ae095d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -14,13 +14,25 @@ DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key); struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - dma_addr_t mapping, u32 len); + dma_addr_t mapping, u32 len, + struct xdp_buff *xdp); void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct page *page, u8 **data_ptr, unsigned int *len, + struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event); int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); int bnxt_xdp_xmit(struct net_device *dev, int num_frames, struct xdp_frame **frames, u32 flags); +bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr); + +void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + u16 cons, u8 **data_ptr, unsigned int *len, + struct xdp_buff *xdp); +void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, + struct xdp_buff *xdp); +struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, + u8 num_frags, struct page_pool *pool, + struct xdp_buff *xdp, + struct rx_cmp_ext *rxcmp1); #endif diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index e475be29845c..a5140d4d3baf 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -337,11 +337,9 @@ static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum) struct macb *bp = bus->priv; int status; - status = pm_runtime_get_sync(&bp->pdev->dev); - if (status < 0) { - pm_runtime_put_noidle(&bp->pdev->dev); + status = pm_runtime_resume_and_get(&bp->pdev->dev); + if (status < 0) goto mdio_pm_exit; - } status = macb_mdio_wait_for_idle(bp); if (status < 0) @@ -391,11 +389,9 @@ static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum, struct macb *bp = bus->priv; int status; - status = pm_runtime_get_sync(&bp->pdev->dev); - if (status < 0) { - pm_runtime_put_noidle(&bp->pdev->dev); + status = pm_runtime_resume_and_get(&bp->pdev->dev); + if (status < 0) goto mdio_pm_exit; - } status = macb_mdio_wait_for_idle(bp); if (status < 0) @@ -2753,9 +2749,9 @@ static int macb_open(struct net_device *dev) netdev_dbg(bp->dev, "open\n"); - err = pm_runtime_get_sync(&bp->pdev->dev); + err = pm_runtime_resume_and_get(&bp->pdev->dev); if (err < 0) - goto pm_exit; + return err; /* RX buffers initialization */ macb_init_rx_buffer_size(bp, bufsz); @@ -4142,11 +4138,9 @@ static int at91ether_open(struct net_device *dev) u32 ctl; int ret; - ret = pm_runtime_get_sync(&lp->pdev->dev); - if (ret < 0) { - pm_runtime_put_noidle(&lp->pdev->dev); + ret = pm_runtime_resume_and_get(&lp->pdev->dev); + if (ret < 0) return ret; - } /* Clear internal statistics */ ctl = macb_readl(lp, NCR); diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 457cb7121000..1281d1565ef8 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1224,7 +1224,7 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit) * @budget : maximum number of packets that the current CPU can receive from * all interfaces. * Description : - * This function implements the the reception process. + * This function implements the reception process. * Also it runs the TX completion thread */ static int xgmac_poll(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 9e2378013642..41714203ace8 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -567,7 +567,7 @@ void chtls_shutdown(struct sock *sk, int how); void chtls_destroy_sock(struct sock *sk); int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int chtls_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len); + size_t len, int flags, int *addr_len); int chtls_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int send_tx_flowc_wr(struct sock *sk, int compl, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index c320cc8ca68d..539992dad8ba 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1426,7 +1426,7 @@ static void chtls_cleanup_rbuf(struct sock *sk, int copied) } static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; @@ -1441,7 +1441,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, buffers_freed = 0; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) @@ -1616,7 +1616,7 @@ skip_copy: * Peek at data in a socket's receive buffer. */ static int peekmsg(struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags) + size_t len, int flags) { struct tcp_sock *tp = tcp_sk(sk); u32 peek_seq, offset; @@ -1626,7 +1626,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, long timeo; lock_sock(sk); - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); peek_seq = tp->copied_seq; do { @@ -1737,7 +1737,7 @@ found_ok_skb: } int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct tcp_sock *tp = tcp_sk(sk); struct chtls_sock *csk; @@ -1750,25 +1750,23 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, buffers_freed = 0; if (unlikely(flags & MSG_OOB)) - return tcp_prot.recvmsg(sk, msg, len, nonblock, flags, - addr_len); + return tcp_prot.recvmsg(sk, msg, len, flags, addr_len); if (unlikely(flags & MSG_PEEK)) - return peekmsg(sk, msg, len, nonblock, flags); + return peekmsg(sk, msg, len, flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && sk->sk_state == TCP_ESTABLISHED) - sk_busy_loop(sk, nonblock); + sk_busy_loop(sk, flags & MSG_DONTWAIT); lock_sock(sk); csk = rcu_dereference_sk_user_data(sk); if (is_tls_rx(csk)) - return chtls_pt_recvmsg(sk, msg, len, nonblock, - flags, addr_len); + return chtls_pt_recvmsg(sk, msg, len, flags, addr_len); - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 55c6bce5da61..18558a019353 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -852,6 +852,7 @@ struct i40e_vsi { u64 tx_busy; u64 tx_linearize; u64 tx_force_wb; + u64 tx_stopped; u64 rx_buf_failed; u64 rx_page_failed; u64 rx_page_reuse; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 6aefffd83615..2819e261a126 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -47,6 +47,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: + case I40E_DEV_ID_SFP_X722_A: hw->mac.type = I40E_MAC_X722; break; default: diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index be7c6f34d45c..c9dcd6d92c83 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -309,10 +309,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) tx_ring->stats.bytes, tx_ring->tx_stats.restart_queue); dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n", + " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld, tx_stopped = %lld\n", i, tx_ring->tx_stats.tx_busy, - tx_ring->tx_stats.tx_done_old); + tx_ring->tx_stats.tx_done_old, + tx_ring->tx_stats.tx_stopped); dev_info(&pf->pdev->dev, " tx_rings[%i]: size = %i\n", i, tx_ring->size); diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index 1bcb0ec0f0c0..2610338002fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -33,6 +33,7 @@ #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 +#define I40E_DEV_ID_SFP_X722_A 0x0DDA #endif /* _I40E_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index e48499624d22..610f00cbaff9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -293,12 +293,14 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_linearize", tx_linearize), I40E_VSI_STAT("tx_force_wb", tx_force_wb), I40E_VSI_STAT("tx_busy", tx_busy), + I40E_VSI_STAT("tx_stopped", tx_stopped), I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse), I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc), I40E_VSI_STAT("rx_cache_waive", rx_page_waive), I40E_VSI_STAT("rx_cache_busy", rx_page_busy), + I40E_VSI_STAT("tx_restart", tx_restart), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6778df2177a1..358c2edc118d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -77,6 +77,7 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0}, @@ -785,6 +786,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) unsigned int start; u64 tx_linearize; u64 tx_force_wb; + u64 tx_stopped; u64 rx_p, rx_b; u64 tx_p, tx_b; u16 q; @@ -804,6 +806,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; + tx_stopped = 0; rx_page = 0; rx_buf = 0; rx_reuse = 0; @@ -828,6 +831,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; + tx_stopped += p->tx_stats.tx_stopped; /* locate Rx ring */ p = READ_ONCE(vsi->rx_rings[q]); @@ -872,6 +876,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; + vsi->tx_stopped = tx_stopped; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; vsi->rx_page_reuse = rx_reuse; @@ -13436,8 +13441,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np->vsi = vsi; hw_enc_features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | + NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_SOFT_FEATURES | NETIF_F_TSO | @@ -13468,6 +13472,23 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; +#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ + NETIF_F_GSO_GRE_CSUM | \ + NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | \ + NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) + + netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES; + netdev->features |= NETIF_F_GSO_PARTIAL | + I40E_GSO_PARTIAL_FEATURES; + + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES; + /* enable macvlan offloads */ netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0eae5858f2fe..7bc1174edf6b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3,6 +3,7 @@ #include <linux/prefetch.h> #include <linux/bpf_trace.h> +#include <net/mpls.h> #include <net/xdp.h> #include "i40e.h" #include "i40e_trace.h" @@ -3015,6 +3016,7 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, { struct sk_buff *skb = first->skb; u64 cd_cmd, cd_tso_len, cd_mss; + __be16 protocol; union { struct iphdr *v4; struct ipv6hdr *v6; @@ -3026,7 +3028,7 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, unsigned char *hdr; } l4; u32 paylen, l4_offset; - u16 gso_segs, gso_size; + u16 gso_size; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3039,15 +3041,23 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, if (err < 0) return err; - ip.hdr = skb_network_header(skb); - l4.hdr = skb_transport_header(skb); + protocol = vlan_get_protocol(skb); + + if (eth_p_mpls(protocol)) + ip.hdr = skb_inner_network_header(skb); + else + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); /* initialize outer IP header fields */ if (ip.v4->version == 4) { ip.v4->tot_len = 0; ip.v4->check = 0; + + first->tx_flags |= I40E_TX_FLAGS_TSO; } else { ip.v6->payload_len = 0; + first->tx_flags |= I40E_TX_FLAGS_TSO; } if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | @@ -3100,10 +3110,9 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, /* pull values out of skb_shinfo */ gso_size = skb_shinfo(skb)->gso_size; - gso_segs = skb_shinfo(skb)->gso_segs; /* update GSO size and bytecount with header size */ - first->gso_segs = gso_segs; + first->gso_segs = skb_shinfo(skb)->gso_segs; first->bytecount += (first->gso_segs - 1) * *hdr_len; /* find the field values */ @@ -3187,13 +3196,27 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, unsigned char *exthdr; u32 offset, cmd = 0; __be16 frag_off; + __be16 protocol; u8 l4_proto = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) return 0; - ip.hdr = skb_network_header(skb); - l4.hdr = skb_transport_header(skb); + protocol = vlan_get_protocol(skb); + + if (eth_p_mpls(protocol)) + ip.hdr = skb_inner_network_header(skb); + else + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); + + /* set the tx_flags to indicate the IP protocol type. this is + * required so that checksum header computation below is accurate. + */ + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + else + *tx_flags |= I40E_TX_FLAGS_IPV6; /* compute outer L2 header size */ offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; @@ -3373,6 +3396,8 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) /* Memory barrier before checking head and tail */ smp_mb(); + ++tx_ring->tx_stats.tx_stopped; + /* Check again in a case another CPU has just made room available. */ if (likely(I40E_DESC_UNUSED(tx_ring) < size)) return -EBUSY; @@ -3749,7 +3774,6 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, struct i40e_tx_buffer *first; u32 td_offset = 0; u32 tx_flags = 0; - __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; int tso, count; @@ -3791,15 +3815,6 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) goto out_drop; - /* obtain protocol of skb */ - protocol = vlan_get_protocol(skb); - - /* setup IPv4/IPv6 offloads */ - if (protocol == htons(ETH_P_IP)) - tx_flags |= I40E_TX_FLAGS_IPV4; - else if (protocol == htons(ETH_P_IPV6)) - tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index c471c2da313c..41f86e9535a0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -290,6 +290,7 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_stopped; int prev_pkt_ctr; }; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5b1198859da7..fde839ef0613 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3336,7 +3336,9 @@ static void ice_set_netdev_features(struct net_device *netdev) vlano_features | tso_features; /* add support for HW_CSUM on packets with MPLS header */ - netdev->mpls_features = NETIF_F_HW_CSUM; + netdev->mpls_features = NETIF_F_HW_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6; /* enable features */ netdev->features |= netdev->hw_features; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 25b8f6f726eb..496250f9f8fc 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -30,12 +30,46 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, 0x2, 0, 0, 0, 0, 0, 0x81, 0, 0, 0}; +enum { + ICE_PKT_VLAN = BIT(0), + ICE_PKT_OUTER_IPV6 = BIT(1), + ICE_PKT_TUN_GTPC = BIT(2), + ICE_PKT_TUN_GTPU = BIT(3), + ICE_PKT_TUN_NVGRE = BIT(4), + ICE_PKT_TUN_UDP = BIT(5), + ICE_PKT_INNER_IPV6 = BIT(6), + ICE_PKT_INNER_TCP = BIT(7), + ICE_PKT_INNER_UDP = BIT(8), + ICE_PKT_GTP_NOPAY = BIT(9), +}; + struct ice_dummy_pkt_offsets { enum ice_protocol_type type; u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */ }; -static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = { +struct ice_dummy_pkt_profile { + const struct ice_dummy_pkt_offsets *offsets; + const u8 *pkt; + u32 match; + u16 pkt_len; +}; + +#define ICE_DECLARE_PKT_OFFSETS(type) \ + static const struct ice_dummy_pkt_offsets \ + ice_dummy_##type##_packet_offsets[] + +#define ICE_DECLARE_PKT_TEMPLATE(type) \ + static const u8 ice_dummy_##type##_packet[] + +#define ICE_PKT_PROFILE(type, m) { \ + .match = (m), \ + .pkt = ice_dummy_##type##_packet, \ + .pkt_len = sizeof(ice_dummy_##type##_packet), \ + .offsets = ice_dummy_##type##_packet_offsets, \ +} + +ICE_DECLARE_PKT_OFFSETS(gre_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -47,7 +81,7 @@ static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_gre_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(gre_tcp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -82,7 +116,7 @@ static const u8 dummy_gre_tcp_packet[] = { 0x00, 0x00, 0x00, 0x00 }; -static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(gre_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -94,7 +128,7 @@ static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_gre_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(gre_udp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -126,7 +160,7 @@ static const u8 dummy_gre_udp_packet[] = { 0x00, 0x08, 0x00, 0x00, }; -static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(udp_tun_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -141,7 +175,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_udp_tun_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(udp_tun_tcp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -179,7 +213,7 @@ static const u8 dummy_udp_tun_tcp_packet[] = { 0x00, 0x00, 0x00, 0x00 }; -static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(udp_tun_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -194,7 +228,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_udp_tun_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(udp_tun_udp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -229,8 +263,7 @@ static const u8 dummy_udp_tun_udp_packet[] = { 0x00, 0x08, 0x00, 0x00, }; -static const struct ice_dummy_pkt_offsets -dummy_gre_ipv6_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(gre_ipv6_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -242,7 +275,7 @@ dummy_gre_ipv6_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_gre_ipv6_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_tcp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -282,8 +315,7 @@ static const u8 dummy_gre_ipv6_tcp_packet[] = { 0x00, 0x00, 0x00, 0x00 }; -static const struct ice_dummy_pkt_offsets -dummy_gre_ipv6_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(gre_ipv6_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -295,7 +327,7 @@ dummy_gre_ipv6_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_gre_ipv6_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_udp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -332,8 +364,7 @@ static const u8 dummy_gre_ipv6_udp_packet[] = { 0x00, 0x08, 0x00, 0x00, }; -static const struct ice_dummy_pkt_offsets -dummy_udp_tun_ipv6_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -348,7 +379,7 @@ dummy_udp_tun_ipv6_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_udp_tun_ipv6_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_tcp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -391,8 +422,7 @@ static const u8 dummy_udp_tun_ipv6_tcp_packet[] = { 0x00, 0x00, 0x00, 0x00 }; -static const struct ice_dummy_pkt_offsets -dummy_udp_tun_ipv6_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -407,7 +437,7 @@ dummy_udp_tun_ipv6_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_udp_tun_ipv6_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_udp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -448,7 +478,7 @@ static const u8 dummy_udp_tun_ipv6_udp_packet[] = { }; /* offset info for MAC + IPv4 + UDP dummy packet */ -static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(udp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -457,7 +487,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = { }; /* Dummy packet for MAC + IPv4 + UDP */ -static const u8 dummy_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(udp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -477,7 +507,7 @@ static const u8 dummy_udp_packet[] = { }; /* offset info for MAC + VLAN + IPv4 + UDP dummy packet */ -static const struct ice_dummy_pkt_offsets dummy_vlan_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(vlan_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_VLAN_OFOS, 12 }, { ICE_ETYPE_OL, 16 }, @@ -487,7 +517,7 @@ static const struct ice_dummy_pkt_offsets dummy_vlan_udp_packet_offsets[] = { }; /* C-tag (801.1Q), IPv4:UDP dummy packet */ -static const u8 dummy_vlan_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(vlan_udp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -509,7 +539,7 @@ static const u8 dummy_vlan_udp_packet[] = { }; /* offset info for MAC + IPv4 + TCP dummy packet */ -static const struct ice_dummy_pkt_offsets dummy_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV4_OFOS, 14 }, @@ -518,7 +548,7 @@ static const struct ice_dummy_pkt_offsets dummy_tcp_packet_offsets[] = { }; /* Dummy packet for MAC + IPv4 + TCP */ -static const u8 dummy_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(tcp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -541,7 +571,7 @@ static const u8 dummy_tcp_packet[] = { }; /* offset info for MAC + VLAN (C-tag, 802.1Q) + IPv4 + TCP dummy packet */ -static const struct ice_dummy_pkt_offsets dummy_vlan_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(vlan_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_VLAN_OFOS, 12 }, { ICE_ETYPE_OL, 16 }, @@ -551,7 +581,7 @@ static const struct ice_dummy_pkt_offsets dummy_vlan_tcp_packet_offsets[] = { }; /* C-tag (801.1Q), IPv4:TCP dummy packet */ -static const u8 dummy_vlan_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(vlan_tcp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -575,7 +605,7 @@ static const u8 dummy_vlan_tcp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const struct ice_dummy_pkt_offsets dummy_tcp_ipv6_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(tcp_ipv6) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV6_OFOS, 14 }, @@ -583,7 +613,7 @@ static const struct ice_dummy_pkt_offsets dummy_tcp_ipv6_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_tcp_ipv6_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(tcp_ipv6) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -611,8 +641,7 @@ static const u8 dummy_tcp_ipv6_packet[] = { }; /* C-tag (802.1Q): IPv6 + TCP */ -static const struct ice_dummy_pkt_offsets -dummy_vlan_tcp_ipv6_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(vlan_tcp_ipv6) = { { ICE_MAC_OFOS, 0 }, { ICE_VLAN_OFOS, 12 }, { ICE_ETYPE_OL, 16 }, @@ -622,7 +651,7 @@ dummy_vlan_tcp_ipv6_packet_offsets[] = { }; /* C-tag (802.1Q), IPv6 + TCP dummy packet */ -static const u8 dummy_vlan_tcp_ipv6_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(vlan_tcp_ipv6) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -652,7 +681,7 @@ static const u8 dummy_vlan_tcp_ipv6_packet[] = { }; /* IPv6 + UDP */ -static const struct ice_dummy_pkt_offsets dummy_udp_ipv6_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(udp_ipv6) = { { ICE_MAC_OFOS, 0 }, { ICE_ETYPE_OL, 12 }, { ICE_IPV6_OFOS, 14 }, @@ -661,7 +690,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_ipv6_packet_offsets[] = { }; /* IPv6 + UDP dummy packet */ -static const u8 dummy_udp_ipv6_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(udp_ipv6) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -689,8 +718,7 @@ static const u8 dummy_udp_ipv6_packet[] = { }; /* C-tag (802.1Q): IPv6 + UDP */ -static const struct ice_dummy_pkt_offsets -dummy_vlan_udp_ipv6_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(vlan_udp_ipv6) = { { ICE_MAC_OFOS, 0 }, { ICE_VLAN_OFOS, 12 }, { ICE_ETYPE_OL, 16 }, @@ -700,7 +728,7 @@ dummy_vlan_udp_ipv6_packet_offsets[] = { }; /* C-tag (802.1Q), IPv6 + UDP dummy packet */ -static const u8 dummy_vlan_udp_ipv6_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(vlan_udp_ipv6) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -727,8 +755,7 @@ static const u8 dummy_vlan_udp_ipv6_packet[] = { }; /* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner TCP */ -static const -struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV4_OFOS, 14 }, { ICE_UDP_OF, 34 }, @@ -738,7 +765,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv4_gtpu_ipv4_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_tcp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -776,8 +803,7 @@ static const u8 dummy_ipv4_gtpu_ipv4_tcp_packet[] = { }; /* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner UDP */ -static const -struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV4_OFOS, 14 }, { ICE_UDP_OF, 34 }, @@ -787,7 +813,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv4_gtpu_ipv4_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_udp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -822,8 +848,7 @@ static const u8 dummy_ipv4_gtpu_ipv4_udp_packet[] = { }; /* Outer IPv6 + Outer UDP + GTP + Inner IPv4 + Inner TCP */ -static const -struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV4_OFOS, 14 }, { ICE_UDP_OF, 34 }, @@ -833,7 +858,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv4_gtpu_ipv6_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_tcp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -875,8 +900,7 @@ static const u8 dummy_ipv4_gtpu_ipv6_tcp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const -struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV4_OFOS, 14 }, { ICE_UDP_OF, 34 }, @@ -886,7 +910,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv4_gtpu_ipv6_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_udp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -925,8 +949,7 @@ static const u8 dummy_ipv4_gtpu_ipv6_udp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const -struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV6_OFOS, 14 }, { ICE_UDP_OF, 54 }, @@ -936,7 +959,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv6_gtpu_ipv4_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_tcp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -978,8 +1001,7 @@ static const u8 dummy_ipv6_gtpu_ipv4_tcp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const -struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV6_OFOS, 14 }, { ICE_UDP_OF, 54 }, @@ -989,7 +1011,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv6_gtpu_ipv4_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_udp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1028,8 +1050,7 @@ static const u8 dummy_ipv6_gtpu_ipv4_udp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const -struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_tcp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_tcp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV6_OFOS, 14 }, { ICE_UDP_OF, 54 }, @@ -1039,7 +1060,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_tcp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv6_gtpu_ipv6_tcp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_tcp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1086,8 +1107,7 @@ static const u8 dummy_ipv6_gtpu_ipv6_tcp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const -struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_udp_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_udp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV6_OFOS, 14 }, { ICE_UDP_OF, 54 }, @@ -1097,7 +1117,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_udp_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv6_gtpu_ipv6_udp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_udp) = { 0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1141,7 +1161,15 @@ static const u8 dummy_ipv6_gtpu_ipv6_udp_packet[] = { 0x00, 0x00, /* 2 bytes for 4 byte alignment */ }; -static const u8 dummy_ipv4_gtpu_ipv4_packet[] = { +ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4) = { + { ICE_MAC_OFOS, 0 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_OF, 34 }, + { ICE_GTP_NO_PAY, 42 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1171,17 +1199,7 @@ static const u8 dummy_ipv4_gtpu_ipv4_packet[] = { 0x00, 0x00, }; -static const -struct ice_dummy_pkt_offsets dummy_ipv4_gtp_no_pay_packet_offsets[] = { - { ICE_MAC_OFOS, 0 }, - { ICE_IPV4_OFOS, 14 }, - { ICE_UDP_OF, 34 }, - { ICE_GTP_NO_PAY, 42 }, - { ICE_PROTOCOL_LAST, 0 }, -}; - -static const -struct ice_dummy_pkt_offsets dummy_ipv6_gtp_no_pay_packet_offsets[] = { +ICE_DECLARE_PKT_OFFSETS(ipv6_gtp) = { { ICE_MAC_OFOS, 0 }, { ICE_IPV6_OFOS, 14 }, { ICE_UDP_OF, 54 }, @@ -1189,7 +1207,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtp_no_pay_packet_offsets[] = { { ICE_PROTOCOL_LAST, 0 }, }; -static const u8 dummy_ipv6_gtp_packet[] = { +ICE_DECLARE_PKT_TEMPLATE(ipv6_gtp) = { 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1215,6 +1233,55 @@ static const u8 dummy_ipv6_gtp_packet[] = { 0x00, 0x00, }; +static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { + ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPU | ICE_PKT_OUTER_IPV6 | + ICE_PKT_GTP_NOPAY), + ICE_PKT_PROFILE(ipv6_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU | + ICE_PKT_OUTER_IPV6 | + ICE_PKT_INNER_IPV6 | + ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(ipv6_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU | + ICE_PKT_OUTER_IPV6 | + ICE_PKT_INNER_IPV6), + ICE_PKT_PROFILE(ipv6_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU | + ICE_PKT_OUTER_IPV6 | + ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(ipv6_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU | + ICE_PKT_OUTER_IPV6), + ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPU | ICE_PKT_GTP_NOPAY), + ICE_PKT_PROFILE(ipv4_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU | + ICE_PKT_INNER_IPV6 | + ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(ipv4_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU | + ICE_PKT_INNER_IPV6), + ICE_PKT_PROFILE(ipv4_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU | + ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(ipv4_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU), + ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPC | ICE_PKT_OUTER_IPV6), + ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPC), + ICE_PKT_PROFILE(gre_ipv6_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6 | + ICE_PKT_INNER_TCP), + ICE_PKT_PROFILE(gre_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_TCP), + ICE_PKT_PROFILE(gre_ipv6_udp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6), + ICE_PKT_PROFILE(gre_udp, ICE_PKT_TUN_NVGRE), + ICE_PKT_PROFILE(udp_tun_ipv6_tcp, ICE_PKT_TUN_UDP | + ICE_PKT_INNER_IPV6 | + ICE_PKT_INNER_TCP), + ICE_PKT_PROFILE(udp_tun_tcp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_TCP), + ICE_PKT_PROFILE(udp_tun_ipv6_udp, ICE_PKT_TUN_UDP | + ICE_PKT_INNER_IPV6), + ICE_PKT_PROFILE(udp_tun_udp, ICE_PKT_TUN_UDP), + ICE_PKT_PROFILE(vlan_udp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP | + ICE_PKT_VLAN), + ICE_PKT_PROFILE(udp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(vlan_udp, ICE_PKT_INNER_UDP | ICE_PKT_VLAN), + ICE_PKT_PROFILE(udp, ICE_PKT_INNER_UDP), + ICE_PKT_PROFILE(vlan_tcp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_VLAN), + ICE_PKT_PROFILE(tcp_ipv6, ICE_PKT_OUTER_IPV6), + ICE_PKT_PROFILE(vlan_tcp, ICE_PKT_VLAN), + ICE_PKT_PROFILE(tcp, 0), +}; + #define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \ (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr) + \ (DUMMY_ETH_HDR_LEN * \ @@ -5501,212 +5568,66 @@ err_free_lkup_exts: * structure per protocol header * @lkups_cnt: number of protocols * @tun_type: tunnel type - * @pkt: dummy packet to fill according to filter match criteria - * @pkt_len: packet length of dummy packet - * @offsets: pointer to receive the pointer to the offsets for the packet + * + * Returns the &ice_dummy_pkt_profile corresponding to these lookup params. */ -static void +static const struct ice_dummy_pkt_profile * ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, - enum ice_sw_tunnel_type tun_type, - const u8 **pkt, u16 *pkt_len, - const struct ice_dummy_pkt_offsets **offsets) + enum ice_sw_tunnel_type tun_type) { - bool inner_tcp = false, inner_udp = false, outer_ipv6 = false; - bool vlan = false, inner_ipv6 = false, gtp_no_pay = false; + const struct ice_dummy_pkt_profile *ret = ice_dummy_pkt_profiles; + u32 match = 0; u16 i; + switch (tun_type) { + case ICE_SW_TUN_GTPC: + match |= ICE_PKT_TUN_GTPC; + break; + case ICE_SW_TUN_GTPU: + match |= ICE_PKT_TUN_GTPU; + break; + case ICE_SW_TUN_NVGRE: + match |= ICE_PKT_TUN_NVGRE; + break; + case ICE_SW_TUN_GENEVE: + case ICE_SW_TUN_VXLAN: + match |= ICE_PKT_TUN_UDP; + break; + default: + break; + } + for (i = 0; i < lkups_cnt; i++) { if (lkups[i].type == ICE_UDP_ILOS) - inner_udp = true; + match |= ICE_PKT_INNER_UDP; else if (lkups[i].type == ICE_TCP_IL) - inner_tcp = true; + match |= ICE_PKT_INNER_TCP; else if (lkups[i].type == ICE_IPV6_OFOS) - outer_ipv6 = true; + match |= ICE_PKT_OUTER_IPV6; else if (lkups[i].type == ICE_VLAN_OFOS) - vlan = true; + match |= ICE_PKT_VLAN; else if (lkups[i].type == ICE_ETYPE_OL && lkups[i].h_u.ethertype.ethtype_id == cpu_to_be16(ICE_IPV6_ETHER_ID) && lkups[i].m_u.ethertype.ethtype_id == cpu_to_be16(0xFFFF)) - outer_ipv6 = true; + match |= ICE_PKT_OUTER_IPV6; else if (lkups[i].type == ICE_ETYPE_IL && lkups[i].h_u.ethertype.ethtype_id == cpu_to_be16(ICE_IPV6_ETHER_ID) && lkups[i].m_u.ethertype.ethtype_id == cpu_to_be16(0xFFFF)) - inner_ipv6 = true; + match |= ICE_PKT_INNER_IPV6; else if (lkups[i].type == ICE_IPV6_IL) - inner_ipv6 = true; + match |= ICE_PKT_INNER_IPV6; else if (lkups[i].type == ICE_GTP_NO_PAY) - gtp_no_pay = true; + match |= ICE_PKT_GTP_NOPAY; } - if (tun_type == ICE_SW_TUN_GTPU) { - if (outer_ipv6) { - if (gtp_no_pay) { - *pkt = dummy_ipv6_gtp_packet; - *pkt_len = sizeof(dummy_ipv6_gtp_packet); - *offsets = dummy_ipv6_gtp_no_pay_packet_offsets; - } else if (inner_ipv6) { - if (inner_udp) { - *pkt = dummy_ipv6_gtpu_ipv6_udp_packet; - *pkt_len = sizeof(dummy_ipv6_gtpu_ipv6_udp_packet); - *offsets = dummy_ipv6_gtpu_ipv6_udp_packet_offsets; - } else { - *pkt = dummy_ipv6_gtpu_ipv6_tcp_packet; - *pkt_len = sizeof(dummy_ipv6_gtpu_ipv6_tcp_packet); - *offsets = dummy_ipv6_gtpu_ipv6_tcp_packet_offsets; - } - } else { - if (inner_udp) { - *pkt = dummy_ipv6_gtpu_ipv4_udp_packet; - *pkt_len = sizeof(dummy_ipv6_gtpu_ipv4_udp_packet); - *offsets = dummy_ipv6_gtpu_ipv4_udp_packet_offsets; - } else { - *pkt = dummy_ipv6_gtpu_ipv4_tcp_packet; - *pkt_len = sizeof(dummy_ipv6_gtpu_ipv4_tcp_packet); - *offsets = dummy_ipv6_gtpu_ipv4_tcp_packet_offsets; - } - } - } else { - if (gtp_no_pay) { - *pkt = dummy_ipv4_gtpu_ipv4_packet; - *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_packet); - *offsets = dummy_ipv4_gtp_no_pay_packet_offsets; - } else if (inner_ipv6) { - if (inner_udp) { - *pkt = dummy_ipv4_gtpu_ipv6_udp_packet; - *pkt_len = sizeof(dummy_ipv4_gtpu_ipv6_udp_packet); - *offsets = dummy_ipv4_gtpu_ipv6_udp_packet_offsets; - } else { - *pkt = dummy_ipv4_gtpu_ipv6_tcp_packet; - *pkt_len = sizeof(dummy_ipv4_gtpu_ipv6_tcp_packet); - *offsets = dummy_ipv4_gtpu_ipv6_tcp_packet_offsets; - } - } else { - if (inner_udp) { - *pkt = dummy_ipv4_gtpu_ipv4_udp_packet; - *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_udp_packet); - *offsets = dummy_ipv4_gtpu_ipv4_udp_packet_offsets; - } else { - *pkt = dummy_ipv4_gtpu_ipv4_tcp_packet; - *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_tcp_packet); - *offsets = dummy_ipv4_gtpu_ipv4_tcp_packet_offsets; - } - } - } - return; - } - - if (tun_type == ICE_SW_TUN_GTPC) { - if (outer_ipv6) { - *pkt = dummy_ipv6_gtp_packet; - *pkt_len = sizeof(dummy_ipv6_gtp_packet); - *offsets = dummy_ipv6_gtp_no_pay_packet_offsets; - } else { - *pkt = dummy_ipv4_gtpu_ipv4_packet; - *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_packet); - *offsets = dummy_ipv4_gtp_no_pay_packet_offsets; - } - return; - } - - if (tun_type == ICE_SW_TUN_NVGRE) { - if (inner_tcp && inner_ipv6) { - *pkt = dummy_gre_ipv6_tcp_packet; - *pkt_len = sizeof(dummy_gre_ipv6_tcp_packet); - *offsets = dummy_gre_ipv6_tcp_packet_offsets; - return; - } - if (inner_tcp) { - *pkt = dummy_gre_tcp_packet; - *pkt_len = sizeof(dummy_gre_tcp_packet); - *offsets = dummy_gre_tcp_packet_offsets; - return; - } - if (inner_ipv6) { - *pkt = dummy_gre_ipv6_udp_packet; - *pkt_len = sizeof(dummy_gre_ipv6_udp_packet); - *offsets = dummy_gre_ipv6_udp_packet_offsets; - return; - } - *pkt = dummy_gre_udp_packet; - *pkt_len = sizeof(dummy_gre_udp_packet); - *offsets = dummy_gre_udp_packet_offsets; - return; - } - - if (tun_type == ICE_SW_TUN_VXLAN || - tun_type == ICE_SW_TUN_GENEVE) { - if (inner_tcp && inner_ipv6) { - *pkt = dummy_udp_tun_ipv6_tcp_packet; - *pkt_len = sizeof(dummy_udp_tun_ipv6_tcp_packet); - *offsets = dummy_udp_tun_ipv6_tcp_packet_offsets; - return; - } - if (inner_tcp) { - *pkt = dummy_udp_tun_tcp_packet; - *pkt_len = sizeof(dummy_udp_tun_tcp_packet); - *offsets = dummy_udp_tun_tcp_packet_offsets; - return; - } - if (inner_ipv6) { - *pkt = dummy_udp_tun_ipv6_udp_packet; - *pkt_len = sizeof(dummy_udp_tun_ipv6_udp_packet); - *offsets = dummy_udp_tun_ipv6_udp_packet_offsets; - return; - } - *pkt = dummy_udp_tun_udp_packet; - *pkt_len = sizeof(dummy_udp_tun_udp_packet); - *offsets = dummy_udp_tun_udp_packet_offsets; - return; - } + while (ret->match && (match & ret->match) != ret->match) + ret++; - if (inner_udp && !outer_ipv6) { - if (vlan) { - *pkt = dummy_vlan_udp_packet; - *pkt_len = sizeof(dummy_vlan_udp_packet); - *offsets = dummy_vlan_udp_packet_offsets; - return; - } - *pkt = dummy_udp_packet; - *pkt_len = sizeof(dummy_udp_packet); - *offsets = dummy_udp_packet_offsets; - return; - } else if (inner_udp && outer_ipv6) { - if (vlan) { - *pkt = dummy_vlan_udp_ipv6_packet; - *pkt_len = sizeof(dummy_vlan_udp_ipv6_packet); - *offsets = dummy_vlan_udp_ipv6_packet_offsets; - return; - } - *pkt = dummy_udp_ipv6_packet; - *pkt_len = sizeof(dummy_udp_ipv6_packet); - *offsets = dummy_udp_ipv6_packet_offsets; - return; - } else if ((inner_tcp && outer_ipv6) || outer_ipv6) { - if (vlan) { - *pkt = dummy_vlan_tcp_ipv6_packet; - *pkt_len = sizeof(dummy_vlan_tcp_ipv6_packet); - *offsets = dummy_vlan_tcp_ipv6_packet_offsets; - return; - } - *pkt = dummy_tcp_ipv6_packet; - *pkt_len = sizeof(dummy_tcp_ipv6_packet); - *offsets = dummy_tcp_ipv6_packet_offsets; - return; - } - - if (vlan) { - *pkt = dummy_vlan_tcp_packet; - *pkt_len = sizeof(dummy_vlan_tcp_packet); - *offsets = dummy_vlan_tcp_packet_offsets; - } else { - *pkt = dummy_tcp_packet; - *pkt_len = sizeof(dummy_tcp_packet); - *offsets = dummy_tcp_packet_offsets; - } + return ret; } /** @@ -5716,15 +5637,12 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, * structure per protocol header * @lkups_cnt: number of protocols * @s_rule: stores rule information from the match criteria - * @dummy_pkt: dummy packet to fill according to filter match criteria - * @pkt_len: packet length of dummy packet - * @offsets: offset info for the dummy packet + * @profile: dummy packet profile (the template, its size and header offsets) */ static int ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, struct ice_aqc_sw_rules_elem *s_rule, - const u8 *dummy_pkt, u16 pkt_len, - const struct ice_dummy_pkt_offsets *offsets) + const struct ice_dummy_pkt_profile *profile) { u8 *pkt; u16 i; @@ -5734,9 +5652,10 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, */ pkt = s_rule->pdata.lkup_tx_rx.hdr; - memcpy(pkt, dummy_pkt, pkt_len); + memcpy(pkt, profile->pkt, profile->pkt_len); for (i = 0; i < lkups_cnt; i++) { + const struct ice_dummy_pkt_offsets *offsets = profile->offsets; enum ice_protocol_type type; u16 offset = 0, len = 0, j; bool found = false; @@ -5810,16 +5729,18 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, * indicated by the mask to make sure we don't improperly write * over any significant packet data. */ - for (j = 0; j < len / sizeof(u16); j++) - if (((u16 *)&lkups[i].m_u)[j]) - ((u16 *)(pkt + offset))[j] = - (((u16 *)(pkt + offset))[j] & - ~((u16 *)&lkups[i].m_u)[j]) | - (((u16 *)&lkups[i].h_u)[j] & - ((u16 *)&lkups[i].m_u)[j]); + for (j = 0; j < len / sizeof(u16); j++) { + u16 *ptr = (u16 *)(pkt + offset); + u16 mask = lkups[i].m_raw[j]; + + if (!mask) + continue; + + ptr[j] = (ptr[j] & ~mask) | (lkups[i].h_raw[j] & mask); + } } - s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(pkt_len); + s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(profile->pkt_len); return 0; } @@ -6042,12 +5963,11 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, struct ice_rule_query_data *added_entry) { struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL; - u16 rid = 0, i, pkt_len, rule_buf_sz, vsi_handle; - const struct ice_dummy_pkt_offsets *pkt_offsets; struct ice_aqc_sw_rules_elem *s_rule = NULL; + const struct ice_dummy_pkt_profile *profile; + u16 rid = 0, i, rule_buf_sz, vsi_handle; struct list_head *rule_head; struct ice_switch_info *sw; - const u8 *pkt = NULL; u16 word_cnt; u32 act = 0; int status; @@ -6065,24 +5985,18 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, /* get # of words we need to match */ word_cnt = 0; for (i = 0; i < lkups_cnt; i++) { - u16 j, *ptr; + u16 j; - ptr = (u16 *)&lkups[i].m_u; - for (j = 0; j < sizeof(lkups->m_u) / sizeof(u16); j++) - if (ptr[j] != 0) + for (j = 0; j < ARRAY_SIZE(lkups->m_raw); j++) + if (lkups[i].m_raw[j]) word_cnt++; } if (!word_cnt || word_cnt > ICE_MAX_CHAIN_WORDS) return -EINVAL; - /* make sure that we can locate a dummy packet */ - ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type, &pkt, &pkt_len, - &pkt_offsets); - if (!pkt) { - status = -EINVAL; - goto err_ice_add_adv_rule; - } + /* locate a dummy packet */ + profile = ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type); if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI || rinfo->sw_act.fltr_act == ICE_FWD_TO_Q || @@ -6123,7 +6037,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } return status; } - rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + pkt_len; + rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + profile->pkt_len; s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); if (!s_rule) return -ENOMEM; @@ -6183,8 +6097,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(rid); s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); - status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, pkt, - pkt_len, pkt_offsets); + status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, profile); if (status) goto err_ice_add_adv_rule; @@ -6192,7 +6105,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) { status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->pdata.lkup_tx_rx.hdr, - pkt_offsets); + profile->offsets); if (status) goto err_ice_add_adv_rule; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index ed3d1d03befa..ecac75e71395 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -138,8 +138,16 @@ struct ice_update_recipe_lkup_idx_params { struct ice_adv_lkup_elem { enum ice_protocol_type type; - union ice_prot_hdr h_u; /* Header values */ - union ice_prot_hdr m_u; /* Mask of header values to match */ + union { + union ice_prot_hdr h_u; /* Header values */ + /* Used to iterate over the headers */ + u16 h_raw[sizeof(union ice_prot_hdr) / sizeof(u16)]; + }; + union { + union ice_prot_hdr m_u; /* Mask of header values to match */ + /* Used to iterate over header mask */ + u16 m_raw[sizeof(union ice_prot_hdr) / sizeof(u16)]; + }; }; struct ice_sw_act_ctrl { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index f9bf008471c9..3f8b7274ed2f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -8,6 +8,7 @@ #include <linux/prefetch.h> #include <linux/bpf_trace.h> #include <net/dsfield.h> +#include <net/mpls.h> #include <net/xdp.h> #include "ice_txrx_lib.h" #include "ice_lib.h" @@ -1748,18 +1749,24 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) if (skb->ip_summed != CHECKSUM_PARTIAL) return 0; - ip.hdr = skb_network_header(skb); - l4.hdr = skb_transport_header(skb); + protocol = vlan_get_protocol(skb); + + if (eth_p_mpls(protocol)) + ip.hdr = skb_inner_network_header(skb); + else + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); /* compute outer L2 header size */ l2_len = ip.hdr - skb->data; offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S; - protocol = vlan_get_protocol(skb); - - if (protocol == htons(ETH_P_IP)) + /* set the tx_flags to indicate the IP protocol type. this is + * required so that checksum header computation below is accurate. + */ + if (ip.v4->version == 4) first->tx_flags |= ICE_TX_FLAGS_IPV4; - else if (protocol == htons(ETH_P_IPV6)) + else if (ip.v6->version == 6) first->tx_flags |= ICE_TX_FLAGS_IPV6; if (skb->encapsulation) { @@ -1957,6 +1964,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) unsigned char *hdr; } l4; u64 cd_mss, cd_tso_len; + __be16 protocol; u32 paylen; u8 l4_start; int err; @@ -1972,8 +1980,13 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) return err; /* cppcheck-suppress unreadVariable */ - ip.hdr = skb_network_header(skb); - l4.hdr = skb_transport_header(skb); + protocol = vlan_get_protocol(skb); + + if (eth_p_mpls(protocol)) + ip.hdr = skb_inner_network_header(skb); + else + ip.hdr = skb_network_header(skb); + l4.hdr = skb_checksum_start(skb); /* initialize outer IP header fields */ if (ip.v4->version == 4) { diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index fe0989c0fc25..4cb55724001b 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -177,6 +177,7 @@ config SKY2_DEBUG source "drivers/net/ethernet/marvell/octeontx2/Kconfig" +source "drivers/net/ethernet/marvell/octeon_ep/Kconfig" source "drivers/net/ethernet/marvell/prestera/Kconfig" endif # NET_VENDOR_MARVELL diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile index 9f88fe822555..ceba4aa4f026 100644 --- a/drivers/net/ethernet/marvell/Makefile +++ b/drivers/net/ethernet/marvell/Makefile @@ -11,5 +11,6 @@ obj-$(CONFIG_MVPP2) += mvpp2/ obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_SKGE) += skge.o obj-$(CONFIG_SKY2) += sky2.o +obj-y += octeon_ep/ obj-y += octeontx2/ obj-y += prestera/ diff --git a/drivers/net/ethernet/marvell/octeon_ep/Kconfig b/drivers/net/ethernet/marvell/octeon_ep/Kconfig new file mode 100644 index 000000000000..0d7db815340e --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/Kconfig @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Marvell's Octeon PCI Endpoint NIC Driver Configuration +# + +config OCTEON_EP + tristate "Marvell Octeon PCI Endpoint NIC Driver" + depends on 64BIT + depends on PCI + depends on PTP_1588_CLOCK_OPTIONAL + help + This driver supports networking functionality of Marvell's + Octeon PCI Endpoint NIC. + + To know the list of devices supported by this driver, refer + documentation in + <file:Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst>. + + To compile this drivers as a module, choose M here. Name of the + module is octeon_ep. diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile new file mode 100644 index 000000000000..2026c8118158 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Network driver for Marvell's Octeon PCI Endpoint NIC +# + +obj-$(CONFIG_OCTEON_EP) += octeon_ep.o + +octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \ + octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c new file mode 100644 index 000000000000..1e47143c596d --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c @@ -0,0 +1,737 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_regs_cn9k_pf.h" + +/* Names of Hardware non-queue generic interrupts */ +static char *cn93_non_ioq_msix_names[] = { + "epf_ire_rint", + "epf_ore_rint", + "epf_vfire_rint0", + "epf_vfire_rint1", + "epf_vfore_rint0", + "epf_vfore_rint1", + "epf_mbox_rint0", + "epf_mbox_rint1", + "epf_oei_rint", + "epf_dma_rint", + "epf_dma_vf_rint0", + "epf_dma_vf_rint1", + "epf_pp_vf_rint0", + "epf_pp_vf_rint1", + "epf_misc_rint", + "epf_rsvd", +}; + +/* Dump useful hardware CSRs for debug purpose */ +static void cn93_dump_regs(struct octep_device *oct, int qno) +{ + struct device *dev = &oct->pdev->dev; + + dev_info(dev, "IQ-%d register dump\n", qno); + dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_INSTR_DBELL(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(qno))); + dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_CONTROL(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_CONTROL(qno))); + dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_ENABLE(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_ENABLE(qno))); + dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_INSTR_BADDR(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_BADDR(qno))); + dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_INSTR_RSIZE(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_RSIZE(qno))); + dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_CNTS(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_CNTS(qno))); + dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_INT_LEVELS(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(qno))); + dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_PKT_CNT(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_PKT_CNT(qno))); + dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_IN_BYTE_CNT(qno), + octep_read_csr64(oct, CN93_SDP_R_IN_BYTE_CNT(qno))); + + dev_info(dev, "OQ-%d register dump\n", qno); + dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_SLIST_DBELL(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_SLIST_DBELL(qno))); + dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_CONTROL(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(qno))); + dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_ENABLE(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_ENABLE(qno))); + dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_SLIST_BADDR(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_SLIST_BADDR(qno))); + dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_SLIST_RSIZE(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_SLIST_RSIZE(qno))); + dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_CNTS(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_CNTS(qno))); + dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_INT_LEVELS(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(qno))); + dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_PKT_CNT(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_PKT_CNT(qno))); + dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_OUT_BYTE_CNT(qno), + octep_read_csr64(oct, CN93_SDP_R_OUT_BYTE_CNT(qno))); + dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n", + qno, CN93_SDP_R_ERR_TYPE(qno), + octep_read_csr64(oct, CN93_SDP_R_ERR_TYPE(qno))); +} + +/* Reset Hardware Tx queue */ +static int cn93_reset_iq(struct octep_device *oct, int q_no) +{ + struct octep_config *conf = oct->conf; + u64 val = 0ULL; + + dev_dbg(&oct->pdev->dev, "Reset PF IQ-%d\n", q_no); + + /* Get absolute queue number */ + q_no += conf->pf_ring_cfg.srn; + + /* Disable the Tx/Instruction Ring */ + octep_write_csr64(oct, CN93_SDP_R_IN_ENABLE(q_no), val); + + /* clear the Instruction Ring packet/byte counts and doorbell CSRs */ + octep_write_csr64(oct, CN93_SDP_R_IN_CNTS(q_no), val); + octep_write_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(q_no), val); + octep_write_csr64(oct, CN93_SDP_R_IN_PKT_CNT(q_no), val); + octep_write_csr64(oct, CN93_SDP_R_IN_BYTE_CNT(q_no), val); + octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_BADDR(q_no), val); + octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_RSIZE(q_no), val); + + val = 0xFFFFFFFF; + octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(q_no), val); + + return 0; +} + +/* Reset Hardware Rx queue */ +static void cn93_reset_oq(struct octep_device *oct, int q_no) +{ + u64 val = 0ULL; + + q_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + /* Disable Output (Rx) Ring */ + octep_write_csr64(oct, CN93_SDP_R_OUT_ENABLE(q_no), val); + + /* Clear count CSRs */ + val = octep_read_csr(oct, CN93_SDP_R_OUT_CNTS(q_no)); + octep_write_csr(oct, CN93_SDP_R_OUT_CNTS(q_no), val); + + octep_write_csr64(oct, CN93_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL); + octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF); +} + +/* Reset all hardware Tx/Rx queues */ +static void octep_reset_io_queues_cn93_pf(struct octep_device *oct) +{ + struct pci_dev *pdev = oct->pdev; + int q; + + dev_dbg(&pdev->dev, "Reset OCTEP_CN93 PF IO Queues\n"); + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + cn93_reset_iq(oct, q); + cn93_reset_oq(oct, q); + } +} + +/* Initialize windowed addresses to access some hardware registers */ +static void octep_setup_pci_window_regs_cn93_pf(struct octep_device *oct) +{ + u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr; + + oct->pci_win_regs.pci_win_wr_addr = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_WR_ADDR64); + oct->pci_win_regs.pci_win_rd_addr = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_RD_ADDR64); + oct->pci_win_regs.pci_win_wr_data = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_WR_DATA64); + oct->pci_win_regs.pci_win_rd_data = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_RD_DATA64); +} + +/* Configure Hardware mapping: inform hardware which rings belong to PF. */ +static void octep_configure_ring_mapping_cn93_pf(struct octep_device *oct) +{ + struct octep_config *conf = oct->conf; + struct pci_dev *pdev = oct->pdev; + u64 pf_srn = CFG_GET_PORTS_PF_SRN(oct->conf); + int q; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(conf); q++) { + u64 regval = 0; + + if (oct->pcie_port) + regval = 8 << CN93_SDP_FUNC_SEL_EPF_BIT_POS; + + octep_write_csr64(oct, CN93_SDP_EPVF_RING(pf_srn + q), regval); + + regval = octep_read_csr64(oct, CN93_SDP_EPVF_RING(pf_srn + q)); + dev_dbg(&pdev->dev, "Write SDP_EPVF_RING[0x%llx] = 0x%llx\n", + CN93_SDP_EPVF_RING(pf_srn + q), regval); + } +} + +/* Initialize configuration limits and initial active config 93xx PF. */ +static void octep_init_config_cn93_pf(struct octep_device *oct) +{ + struct octep_config *conf = oct->conf; + struct pci_dev *pdev = oct->pdev; + u64 val; + + /* Read ring configuration: + * PF ring count, number of VFs and rings per VF supported + */ + val = octep_read_csr64(oct, CN93_SDP_EPF_RINFO); + conf->sriov_cfg.max_rings_per_vf = CN93_SDP_EPF_RINFO_RPVF(val); + conf->sriov_cfg.active_rings_per_vf = conf->sriov_cfg.max_rings_per_vf; + conf->sriov_cfg.max_vfs = CN93_SDP_EPF_RINFO_NVFS(val); + conf->sriov_cfg.active_vfs = conf->sriov_cfg.max_vfs; + conf->sriov_cfg.vf_srn = CN93_SDP_EPF_RINFO_SRN(val); + + val = octep_read_csr64(oct, CN93_SDP_MAC_PF_RING_CTL(oct->pcie_port)); + conf->pf_ring_cfg.srn = CN93_SDP_MAC_PF_RING_CTL_SRN(val); + conf->pf_ring_cfg.max_io_rings = CN93_SDP_MAC_PF_RING_CTL_RPPF(val); + conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings; + dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n", + conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf, + conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings); + + conf->iq.num_descs = OCTEP_IQ_MAX_DESCRIPTORS; + conf->iq.instr_type = OCTEP_64BYTE_INSTR; + conf->iq.pkind = 0; + conf->iq.db_min = OCTEP_DB_MIN; + conf->iq.intr_threshold = OCTEP_IQ_INTR_THRESHOLD; + + conf->oq.num_descs = OCTEP_OQ_MAX_DESCRIPTORS; + conf->oq.buf_size = OCTEP_OQ_BUF_SIZE; + conf->oq.refill_threshold = OCTEP_OQ_REFILL_THRESHOLD; + conf->oq.oq_intr_pkt = OCTEP_OQ_INTR_PKT_THRESHOLD; + conf->oq.oq_intr_time = OCTEP_OQ_INTR_TIME_THRESHOLD; + + conf->msix_cfg.non_ioq_msix = CN93_NUM_NON_IOQ_INTR; + conf->msix_cfg.ioq_msix = conf->pf_ring_cfg.active_io_rings; + conf->msix_cfg.non_ioq_msix_names = cn93_non_ioq_msix_names; + + conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + (0x400000ull * 7); +} + +/* Setup registers for a hardware Tx Queue */ +static void octep_setup_iq_regs_cn93_pf(struct octep_device *oct, int iq_no) +{ + struct octep_iq *iq = oct->iq[iq_no]; + u32 reset_instr_cnt; + u64 reg_val; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_CONTROL(iq_no)); + + /* wait for IDLE to set to 1 */ + if (!(reg_val & CN93_R_IN_CTL_IDLE)) { + do { + reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_CONTROL(iq_no)); + } while (!(reg_val & CN93_R_IN_CTL_IDLE)); + } + + reg_val |= CN93_R_IN_CTL_RDSIZE; + reg_val |= CN93_R_IN_CTL_IS_64B; + reg_val |= CN93_R_IN_CTL_ESR; + octep_write_csr64(oct, CN93_SDP_R_IN_CONTROL(iq_no), reg_val); + + /* Write the start of the input queue's ring and its size */ + octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_BADDR(iq_no), + iq->desc_ring_dma); + octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_RSIZE(iq_no), + iq->max_count); + + /* Remember the doorbell & instruction count register addr + * for this queue + */ + iq->doorbell_reg = oct->mmio[0].hw_addr + + CN93_SDP_R_IN_INSTR_DBELL(iq_no); + iq->inst_cnt_reg = oct->mmio[0].hw_addr + + CN93_SDP_R_IN_CNTS(iq_no); + iq->intr_lvl_reg = oct->mmio[0].hw_addr + + CN93_SDP_R_IN_INT_LEVELS(iq_no); + + /* Store the current instruction counter (used in flush_iq calculation) */ + reset_instr_cnt = readl(iq->inst_cnt_reg); + writel(reset_instr_cnt, iq->inst_cnt_reg); + + /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */ + reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff; + octep_write_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(iq_no), reg_val); +} + +/* Setup registers for a hardware Rx Queue */ +static void octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val; + u64 oq_ctl = 0ULL; + u32 time_threshold = 0; + struct octep_oq *oq = oct->oq[oq_no]; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no)); + + /* wait for IDLE to set to 1 */ + if (!(reg_val & CN93_R_OUT_CTL_IDLE)) { + do { + reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no)); + } while (!(reg_val & CN93_R_OUT_CTL_IDLE)); + } + + reg_val &= ~(CN93_R_OUT_CTL_IMODE); + reg_val &= ~(CN93_R_OUT_CTL_ROR_P); + reg_val &= ~(CN93_R_OUT_CTL_NSR_P); + reg_val &= ~(CN93_R_OUT_CTL_ROR_I); + reg_val &= ~(CN93_R_OUT_CTL_NSR_I); + reg_val &= ~(CN93_R_OUT_CTL_ES_I); + reg_val &= ~(CN93_R_OUT_CTL_ROR_D); + reg_val &= ~(CN93_R_OUT_CTL_NSR_D); + reg_val &= ~(CN93_R_OUT_CTL_ES_D); + reg_val |= (CN93_R_OUT_CTL_ES_P); + + octep_write_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no), reg_val); + octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_BADDR(oq_no), + oq->desc_ring_dma); + octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_RSIZE(oq_no), + oq->max_count); + + oq_ctl = octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no)); + oq_ctl &= ~0x7fffffULL; //clear the ISIZE and BSIZE (22-0) + oq_ctl |= (oq->buffer_size & 0xffff); //populate the BSIZE (15-0) + octep_write_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no), oq_ctl); + + /* Get the mapped address of the pkt_sent and pkts_credit regs */ + oq->pkts_sent_reg = oct->mmio[0].hw_addr + CN93_SDP_R_OUT_CNTS(oq_no); + oq->pkts_credit_reg = oct->mmio[0].hw_addr + + CN93_SDP_R_OUT_SLIST_DBELL(oq_no); + + time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf); + reg_val = ((u64)time_threshold << 32) | + CFG_GET_OQ_INTR_PKT(oct->conf); + octep_write_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); +} + +/* Setup registers for a PF mailbox */ +static void octep_setup_mbox_regs_cn93_pf(struct octep_device *oct, int q_no) +{ + struct octep_mbox *mbox = oct->mbox[q_no]; + + mbox->q_no = q_no; + + /* PF mbox interrupt reg */ + mbox->mbox_int_reg = oct->mmio[0].hw_addr + CN93_SDP_EPF_MBOX_RINT(0); + + /* PF to VF DATA reg. PF writes into this reg */ + mbox->mbox_write_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_PF_VF_DATA(q_no); + + /* VF to PF DATA reg. PF reads from this reg */ + mbox->mbox_read_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_VF_PF_DATA(q_no); +} + +/* Mailbox Interrupt handler */ +static void cn93_handle_pf_mbox_intr(struct octep_device *oct) +{ + u64 mbox_int_val = 0ULL, val = 0ULL, qno = 0ULL; + + mbox_int_val = readq(oct->mbox[0]->mbox_int_reg); + for (qno = 0; qno < OCTEP_MAX_VF; qno++) { + val = readq(oct->mbox[qno]->mbox_read_reg); + dev_dbg(&oct->pdev->dev, + "PF MBOX READ: val:%llx from VF:%llx\n", val, qno); + } + + writeq(mbox_int_val, oct->mbox[0]->mbox_int_reg); +} + +/* Interrupts handler for all non-queue generic interrupts. */ +static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + int i = 0; + + /* Check for IRERR INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_IRERR_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "received IRERR_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT, reg_val); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + reg_val = octep_read_csr64(oct, + CN93_SDP_R_ERR_TYPE(i)); + if (reg_val) { + dev_info(&pdev->dev, + "Received err type on IQ-%d: 0x%llx\n", + i, reg_val); + octep_write_csr64(oct, CN93_SDP_R_ERR_TYPE(i), + reg_val); + } + } + goto irq_handled; + } + + /* Check for ORERR INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_ORERR_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received ORERR_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT, reg_val); + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + reg_val = octep_read_csr64(oct, CN93_SDP_R_ERR_TYPE(i)); + if (reg_val) { + dev_info(&pdev->dev, + "Received err type on OQ-%d: 0x%llx\n", + i, reg_val); + octep_write_csr64(oct, CN93_SDP_R_ERR_TYPE(i), + reg_val); + } + } + + goto irq_handled; + } + + /* Check for VFIRE INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received VFIRE_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0), reg_val); + goto irq_handled; + } + + /* Check for VFORE INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received VFORE_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0), reg_val); + goto irq_handled; + } + + /* Check for MBOX INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MBOX_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received MBOX_RINT intr: 0x%llx\n", reg_val); + cn93_handle_pf_mbox_intr(oct); + goto irq_handled; + } + + /* Check for OEI INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received OEI_EINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg_val); + queue_work(octep_wq, &oct->ctrl_mbox_task); + goto irq_handled; + } + + /* Check for DMA INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_RINT); + if (reg_val) { + octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT, reg_val); + goto irq_handled; + } + + /* Check for DMA VF INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received DMA_VF_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0), reg_val); + goto irq_handled; + } + + /* Check for PPVF INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received PP_VF_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0), reg_val); + goto irq_handled; + } + + /* Check for MISC INTR */ + reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MISC_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received MISC_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT, reg_val); + goto irq_handled; + } + + dev_info(&pdev->dev, "Reserved inerrupts raised; Ignore\n"); +irq_handled: + return IRQ_HANDLED; +} + +/* Tx/Rx queue interrupt handler */ +static irqreturn_t octep_ioq_intr_handler_cn93_pf(void *data) +{ + struct octep_ioq_vector *vector = (struct octep_ioq_vector *)data; + struct octep_oq *oq = vector->oq; + + napi_schedule_irqoff(oq->napi); + return IRQ_HANDLED; +} + +/* soft reset of 93xx */ +static int octep_soft_reset_cn93_pf(struct octep_device *oct) +{ + dev_info(&oct->pdev->dev, "CN93XX: Doing soft reset\n"); + + octep_write_csr64(oct, CN93_SDP_WIN_WR_MASK_REG, 0xFF); + + /* Set core domain reset bit */ + OCTEP_PCI_WIN_WRITE(oct, CN93_RST_CORE_DOMAIN_W1S, 1); + /* Wait for 100ms as Octeon resets. */ + mdelay(100); + /* clear core domain reset bit */ + OCTEP_PCI_WIN_WRITE(oct, CN93_RST_CORE_DOMAIN_W1C, 1); + + return 0; +} + +/* Re-initialize Octeon hardware registers */ +static void octep_reinit_regs_cn93_pf(struct octep_device *oct) +{ + u32 i; + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + oct->hw_ops.setup_iq_regs(oct, i); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + oct->hw_ops.setup_oq_regs(oct, i); + + oct->hw_ops.enable_interrupts(oct); + oct->hw_ops.enable_io_queues(oct); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); +} + +/* Enable all interrupts */ +static void octep_enable_interrupts_cn93_pf(struct octep_device *oct) +{ + u64 intr_mask = 0ULL; + int srn, num_rings, i; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (i = 0; i < num_rings; i++) + intr_mask |= (0x1ULL << (srn + i)); + + octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask); +} + +/* Disable all interrupts */ +static void octep_disable_interrupts_cn93_pf(struct octep_device *oct) +{ + u64 intr_mask = 0ULL; + int srn, num_rings, i; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (i = 0; i < num_rings; i++) + intr_mask |= (0x1ULL << (srn + i)); + + octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask); +} + +/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ +static u32 octep_update_iq_read_index_cn93_pf(struct octep_iq *iq) +{ + u32 pkt_in_done = readl(iq->inst_cnt_reg); + u32 last_done, new_idx; + + last_done = pkt_in_done - iq->pkt_in_done; + iq->pkt_in_done = pkt_in_done; + + new_idx = (iq->octep_read_index + last_done) % iq->max_count; + + return new_idx; +} + +/* Enable a hardware Tx Queue */ +static void octep_enable_iq_cn93_pf(struct octep_device *oct, int iq_no) +{ + u64 loop = HZ; + u64 reg_val; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF); + + while (octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(iq_no)) && + loop--) { + schedule_timeout_interruptible(1); + } + + reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(iq_no)); + reg_val |= (0x1ULL << 62); + octep_write_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(iq_no), reg_val); + + reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no)); + reg_val |= 0x1ULL; + octep_write_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no), reg_val); +} + +/* Enable a hardware Rx Queue */ +static void octep_enable_oq_cn93_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val = 0ULL; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no)); + reg_val |= (0x1ULL << 62); + octep_write_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); + + octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF); + + reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no)); + reg_val |= 0x1ULL; + octep_write_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no), reg_val); +} + +/* Enable all hardware Tx/Rx Queues assined to PF */ +static void octep_enable_io_queues_cn93_pf(struct octep_device *oct) +{ + u8 q; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + octep_enable_iq_cn93_pf(oct, q); + octep_enable_oq_cn93_pf(oct, q); + } +} + +/* Disable a hardware Tx Queue assined to PF */ +static void octep_disable_iq_cn93_pf(struct octep_device *oct, int iq_no) +{ + u64 reg_val = 0ULL; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no)); + reg_val &= ~0x1ULL; + octep_write_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no), reg_val); +} + +/* Disable a hardware Rx Queue assined to PF */ +static void octep_disable_oq_cn93_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val = 0ULL; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no)); + reg_val &= ~0x1ULL; + octep_write_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no), reg_val); +} + +/* Disable all hardware Tx/Rx Queues assined to PF */ +static void octep_disable_io_queues_cn93_pf(struct octep_device *oct) +{ + int q = 0; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + octep_disable_iq_cn93_pf(oct, q); + octep_disable_oq_cn93_pf(oct, q); + } +} + +/* Dump hardware registers (including Tx/Rx queues) for debugging. */ +static void octep_dump_registers_cn93_pf(struct octep_device *oct) +{ + u8 srn, num_rings, q; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (q = srn; q < srn + num_rings; q++) + cn93_dump_regs(oct, q); +} + +/** + * octep_device_setup_cn93_pf() - Setup Octeon device. + * + * @oct: Octeon device private data structure. + * + * - initialize hardware operations. + * - get target side pcie port number for the device. + * - setup window access to hardware registers. + * - set initial configuration and max limits. + * - setup hardware mapping of rings to the PF device. + */ +void octep_device_setup_cn93_pf(struct octep_device *oct) +{ + oct->hw_ops.setup_iq_regs = octep_setup_iq_regs_cn93_pf; + oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cn93_pf; + oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cn93_pf; + + oct->hw_ops.non_ioq_intr_handler = octep_non_ioq_intr_handler_cn93_pf; + oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cn93_pf; + oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf; + oct->hw_ops.reinit_regs = octep_reinit_regs_cn93_pf; + + oct->hw_ops.enable_interrupts = octep_enable_interrupts_cn93_pf; + oct->hw_ops.disable_interrupts = octep_disable_interrupts_cn93_pf; + + oct->hw_ops.update_iq_read_idx = octep_update_iq_read_index_cn93_pf; + + oct->hw_ops.enable_iq = octep_enable_iq_cn93_pf; + oct->hw_ops.enable_oq = octep_enable_oq_cn93_pf; + oct->hw_ops.enable_io_queues = octep_enable_io_queues_cn93_pf; + + oct->hw_ops.disable_iq = octep_disable_iq_cn93_pf; + oct->hw_ops.disable_oq = octep_disable_oq_cn93_pf; + oct->hw_ops.disable_io_queues = octep_disable_io_queues_cn93_pf; + oct->hw_ops.reset_io_queues = octep_reset_io_queues_cn93_pf; + + oct->hw_ops.dump_registers = octep_dump_registers_cn93_pf; + + octep_setup_pci_window_regs_cn93_pf(oct); + + oct->pcie_port = octep_read_csr64(oct, CN93_SDP_MAC_NUMBER) & 0xff; + dev_info(&oct->pdev->dev, + "Octeon device using PCIE Port %d\n", oct->pcie_port); + + octep_init_config_cn93_pf(oct); + octep_configure_ring_mapping_cn93_pf(oct); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h new file mode 100644 index 000000000000..f208f3f9a447 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_CONFIG_H_ +#define _OCTEP_CONFIG_H_ + +/* Tx instruction types by length */ +#define OCTEP_32BYTE_INSTR 32 +#define OCTEP_64BYTE_INSTR 64 + +/* Tx Queue: maximum descriptors per ring */ +#define OCTEP_IQ_MAX_DESCRIPTORS 1024 +/* Minimum input (Tx) requests to be enqueued to ring doorbell */ +#define OCTEP_DB_MIN 1 +/* Packet threshold for Tx queue interrupt */ +#define OCTEP_IQ_INTR_THRESHOLD 0x0 + +/* Rx Queue: maximum descriptors per ring */ +#define OCTEP_OQ_MAX_DESCRIPTORS 1024 + +/* Rx buffer size: Use page size buffers. + * Build skb from allocated page buffer once the packet is received. + * When a gathered packet is received, make head page as skb head and + * page buffers in consecutive Rx descriptors as fragments. + */ +#define OCTEP_OQ_BUF_SIZE (SKB_WITH_OVERHEAD(PAGE_SIZE)) +#define OCTEP_OQ_PKTS_PER_INTR 128 +#define OCTEP_OQ_REFILL_THRESHOLD (OCTEP_OQ_MAX_DESCRIPTORS / 4) + +#define OCTEP_OQ_INTR_PKT_THRESHOLD 1 +#define OCTEP_OQ_INTR_TIME_THRESHOLD 10 + +#define OCTEP_MSIX_NAME_SIZE (IFNAMSIZ + 32) + +/* Tx Queue wake threshold + * wakeup a stopped Tx queue if minimum 2 descriptors are available. + * Even a skb with fragments consume only one Tx queue descriptor entry. + */ +#define OCTEP_WAKE_QUEUE_THRESHOLD 2 + +/* Minimum MTU supported by Octeon network interface */ +#define OCTEP_MIN_MTU ETH_MIN_MTU +/* Maximum MTU supported by Octeon interface*/ +#define OCTEP_MAX_MTU (10000 - (ETH_HLEN + ETH_FCS_LEN)) +/* Default MTU */ +#define OCTEP_DEFAULT_MTU 1500 + +/* Macros to get octeon config params */ +#define CFG_GET_IQ_CFG(cfg) ((cfg)->iq) +#define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs) +#define CFG_GET_IQ_INSTR_TYPE(cfg) ((cfg)->iq.instr_type) +#define CFG_GET_IQ_PKIND(cfg) ((cfg)->iq.pkind) +#define CFG_GET_IQ_INSTR_SIZE(cfg) (64) +#define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min) +#define CFG_GET_IQ_INTR_THRESHOLD(cfg) ((cfg)->iq.intr_threshold) + +#define CFG_GET_OQ_NUM_DESC(cfg) ((cfg)->oq.num_descs) +#define CFG_GET_OQ_BUF_SIZE(cfg) ((cfg)->oq.buf_size) +#define CFG_GET_OQ_REFILL_THRESHOLD(cfg) ((cfg)->oq.refill_threshold) +#define CFG_GET_OQ_INTR_PKT(cfg) ((cfg)->oq.oq_intr_pkt) +#define CFG_GET_OQ_INTR_TIME(cfg) ((cfg)->oq.oq_intr_time) + +#define CFG_GET_PORTS_MAX_IO_RINGS(cfg) ((cfg)->pf_ring_cfg.max_io_rings) +#define CFG_GET_PORTS_ACTIVE_IO_RINGS(cfg) ((cfg)->pf_ring_cfg.active_io_rings) +#define CFG_GET_PORTS_PF_SRN(cfg) ((cfg)->pf_ring_cfg.srn) + +#define CFG_GET_DPI_PKIND(cfg) ((cfg)->core_cfg.dpi_pkind) +#define CFG_GET_CORE_TICS_PER_US(cfg) ((cfg)->core_cfg.core_tics_per_us) +#define CFG_GET_COPROC_TICS_PER_US(cfg) ((cfg)->core_cfg.coproc_tics_per_us) + +#define CFG_GET_MAX_VFS(cfg) ((cfg)->sriov_cfg.max_vfs) +#define CFG_GET_ACTIVE_VFS(cfg) ((cfg)->sriov_cfg.active_vfs) +#define CFG_GET_MAX_RPVF(cfg) ((cfg)->sriov_cfg.max_rings_per_vf) +#define CFG_GET_ACTIVE_RPVF(cfg) ((cfg)->sriov_cfg.active_rings_per_vf) +#define CFG_GET_VF_SRN(cfg) ((cfg)->sriov_cfg.vf_srn) + +#define CFG_GET_IOQ_MSIX(cfg) ((cfg)->msix_cfg.ioq_msix) +#define CFG_GET_NON_IOQ_MSIX(cfg) ((cfg)->msix_cfg.non_ioq_msix) +#define CFG_GET_NON_IOQ_MSIX_NAMES(cfg) ((cfg)->msix_cfg.non_ioq_msix_names) + +#define CFG_GET_CTRL_MBOX_MEM_ADDR(cfg) ((cfg)->ctrl_mbox_cfg.barmem_addr) + +/* Hardware Tx Queue configuration. */ +struct octep_iq_config { + /* Size of the Input queue (number of commands) */ + u16 num_descs; + + /* Command size - 32 or 64 bytes */ + u16 instr_type; + + /* pkind for packets sent to Octeon */ + u16 pkind; + + /* Minimum number of commands pending to be posted to Octeon before driver + * hits the Input queue doorbell. + */ + u16 db_min; + + /* Trigger the IQ interrupt when processed cmd count reaches + * this level. + */ + u32 intr_threshold; +}; + +/* Hardware Rx Queue configuration. */ +struct octep_oq_config { + /* Size of Output queue (number of descriptors) */ + u16 num_descs; + + /* Size of buffer in this Output queue. */ + u16 buf_size; + + /* The number of buffers that were consumed during packet processing + * by the driver on this Output queue before the driver attempts to + * replenish the descriptor ring with new buffers. + */ + u16 refill_threshold; + + /* Interrupt Coalescing (Packet Count). Octeon will interrupt the host + * only if it sent as many packets as specified by this field. + * The driver usually does not use packet count interrupt coalescing. + */ + u32 oq_intr_pkt; + + /* Interrupt Coalescing (Time Interval). Octeon will interrupt the host + * if at least one packet was sent in the time interval specified by + * this field. The driver uses time interval interrupt coalescing by + * default. The time is specified in microseconds. + */ + u32 oq_intr_time; +}; + +/* Tx/Rx configuration */ +struct octep_pf_ring_config { + /* Max number of IOQs */ + u16 max_io_rings; + + /* Number of active IOQs */ + u16 active_io_rings; + + /* Starting IOQ number: this changes based on which PEM is used */ + u16 srn; +}; + +/* Octeon Hardware SRIOV config */ +struct octep_sriov_config { + /* Max number of VF devices supported */ + u16 max_vfs; + + /* Number of VF devices enabled */ + u16 active_vfs; + + /* Max number of rings assigned to VF */ + u8 max_rings_per_vf; + + /* Number of rings enabled per VF */ + u8 active_rings_per_vf; + + /* starting ring number of VF's: ring-0 of VF-0 of the PF */ + u16 vf_srn; +}; + +/* Octeon MSI-x config. */ +struct octep_msix_config { + /* Number of IOQ interrupts */ + u16 ioq_msix; + + /* Number of Non IOQ interrupts */ + u16 non_ioq_msix; + + /* Names of Non IOQ interrupts */ + char **non_ioq_msix_names; +}; + +struct octep_ctrl_mbox_config { + /* Barmem address for control mbox */ + void __iomem *barmem_addr; +}; + +/* Data Structure to hold configuration limits and active config */ +struct octep_config { + /* Input Queue attributes. */ + struct octep_iq_config iq; + + /* Output Queue attributes. */ + struct octep_oq_config oq; + + /* NIC Port Configuration */ + struct octep_pf_ring_config pf_ring_cfg; + + /* SRIOV configuration of the PF */ + struct octep_sriov_config sriov_cfg; + + /* MSI-X interrupt config */ + struct octep_msix_config msix_cfg; + + /* ctrl mbox config */ + struct octep_ctrl_mbox_config ctrl_mbox_cfg; +}; +#endif /* _OCTEP_CONFIG_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c new file mode 100644 index 000000000000..8c196dadfad0 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/mutex.h> +#include <linux/jiffies.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/io.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> + +#include "octep_ctrl_mbox.h" +#include "octep_config.h" +#include "octep_main.h" + +/* Timeout in msecs for message response */ +#define OCTEP_CTRL_MBOX_MSG_TIMEOUT_MS 100 +/* Time in msecs to wait for message response */ +#define OCTEP_CTRL_MBOX_MSG_WAIT_MS 10 + +#define OCTEP_CTRL_MBOX_INFO_MAGIC_NUM_OFFSET(m) (m) +#define OCTEP_CTRL_MBOX_INFO_BARMEM_SZ_OFFSET(m) ((m) + 8) +#define OCTEP_CTRL_MBOX_INFO_HOST_VERSION_OFFSET(m) ((m) + 16) +#define OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(m) ((m) + 24) +#define OCTEP_CTRL_MBOX_INFO_FW_VERSION_OFFSET(m) ((m) + 136) +#define OCTEP_CTRL_MBOX_INFO_FW_STATUS_OFFSET(m) ((m) + 144) + +#define OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m) ((m) + OCTEP_CTRL_MBOX_INFO_SZ) +#define OCTEP_CTRL_MBOX_H2FQ_PROD_OFFSET(m) (OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) +#define OCTEP_CTRL_MBOX_H2FQ_CONS_OFFSET(m) ((OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) + 4) +#define OCTEP_CTRL_MBOX_H2FQ_ELEM_SZ_OFFSET(m) ((OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) + 8) +#define OCTEP_CTRL_MBOX_H2FQ_ELEM_CNT_OFFSET(m) ((OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) + 12) + +#define OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m) ((m) + \ + OCTEP_CTRL_MBOX_INFO_SZ + \ + OCTEP_CTRL_MBOX_H2FQ_INFO_SZ) +#define OCTEP_CTRL_MBOX_F2HQ_PROD_OFFSET(m) (OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) +#define OCTEP_CTRL_MBOX_F2HQ_CONS_OFFSET(m) ((OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) + 4) +#define OCTEP_CTRL_MBOX_F2HQ_ELEM_SZ_OFFSET(m) ((OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) + 8) +#define OCTEP_CTRL_MBOX_F2HQ_ELEM_CNT_OFFSET(m) ((OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) + 12) + +#define OCTEP_CTRL_MBOX_Q_OFFSET(m, i) ((m) + \ + (sizeof(struct octep_ctrl_mbox_msg) * (i))) + +static u32 octep_ctrl_mbox_circq_inc(u32 index, u32 mask) +{ + return (index + 1) & mask; +} + +static u32 octep_ctrl_mbox_circq_space(u32 pi, u32 ci, u32 mask) +{ + return mask - ((pi - ci) & mask); +} + +static u32 octep_ctrl_mbox_circq_depth(u32 pi, u32 ci, u32 mask) +{ + return ((pi - ci) & mask); +} + +int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox) +{ + u64 version, magic_num, status; + + if (!mbox) + return -EINVAL; + + if (!mbox->barmem) { + pr_info("octep_ctrl_mbox : Invalid barmem %p\n", mbox->barmem); + return -EINVAL; + } + + magic_num = readq(OCTEP_CTRL_MBOX_INFO_MAGIC_NUM_OFFSET(mbox->barmem)); + if (magic_num != OCTEP_CTRL_MBOX_MAGIC_NUMBER) { + pr_info("octep_ctrl_mbox : Invalid magic number %llx\n", magic_num); + return -EINVAL; + } + + version = readq(OCTEP_CTRL_MBOX_INFO_FW_VERSION_OFFSET(mbox->barmem)); + if (version != OCTEP_DRV_VERSION) { + pr_info("octep_ctrl_mbox : Firmware version mismatch %llx != %x\n", + version, OCTEP_DRV_VERSION); + return -EINVAL; + } + + status = readq(OCTEP_CTRL_MBOX_INFO_FW_STATUS_OFFSET(mbox->barmem)); + if (status != OCTEP_CTRL_MBOX_STATUS_READY) { + pr_info("octep_ctrl_mbox : Firmware is not ready.\n"); + return -EINVAL; + } + + mbox->barmem_sz = readl(OCTEP_CTRL_MBOX_INFO_BARMEM_SZ_OFFSET(mbox->barmem)); + + writeq(mbox->version, OCTEP_CTRL_MBOX_INFO_HOST_VERSION_OFFSET(mbox->barmem)); + writeq(OCTEP_CTRL_MBOX_STATUS_INIT, OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem)); + + mbox->h2fq.elem_cnt = readl(OCTEP_CTRL_MBOX_H2FQ_ELEM_CNT_OFFSET(mbox->barmem)); + mbox->h2fq.elem_sz = readl(OCTEP_CTRL_MBOX_H2FQ_ELEM_SZ_OFFSET(mbox->barmem)); + mbox->h2fq.mask = (mbox->h2fq.elem_cnt - 1); + mutex_init(&mbox->h2fq_lock); + + mbox->f2hq.elem_cnt = readl(OCTEP_CTRL_MBOX_F2HQ_ELEM_CNT_OFFSET(mbox->barmem)); + mbox->f2hq.elem_sz = readl(OCTEP_CTRL_MBOX_F2HQ_ELEM_SZ_OFFSET(mbox->barmem)); + mbox->f2hq.mask = (mbox->f2hq.elem_cnt - 1); + mutex_init(&mbox->f2hq_lock); + + mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD_OFFSET(mbox->barmem); + mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS_OFFSET(mbox->barmem); + mbox->h2fq.hw_q = mbox->barmem + + OCTEP_CTRL_MBOX_INFO_SZ + + OCTEP_CTRL_MBOX_H2FQ_INFO_SZ + + OCTEP_CTRL_MBOX_F2HQ_INFO_SZ; + + mbox->f2hq.hw_prod = OCTEP_CTRL_MBOX_F2HQ_PROD_OFFSET(mbox->barmem); + mbox->f2hq.hw_cons = OCTEP_CTRL_MBOX_F2HQ_CONS_OFFSET(mbox->barmem); + mbox->f2hq.hw_q = mbox->h2fq.hw_q + + ((mbox->h2fq.elem_sz + sizeof(union octep_ctrl_mbox_msg_hdr)) * + mbox->h2fq.elem_cnt); + + /* ensure ready state is seen after everything is initialized */ + wmb(); + writeq(OCTEP_CTRL_MBOX_STATUS_READY, OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem)); + + pr_info("Octep ctrl mbox : Init successful.\n"); + + return 0; +} + +int octep_ctrl_mbox_send(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg) +{ + unsigned long timeout = msecs_to_jiffies(OCTEP_CTRL_MBOX_MSG_TIMEOUT_MS); + unsigned long period = msecs_to_jiffies(OCTEP_CTRL_MBOX_MSG_WAIT_MS); + struct octep_ctrl_mbox_q *q; + unsigned long expire; + u64 *mbuf, *word0; + u8 __iomem *qidx; + u16 pi, ci; + int i; + + if (!mbox || !msg) + return -EINVAL; + + q = &mbox->h2fq; + pi = readl(q->hw_prod); + ci = readl(q->hw_cons); + + if (!octep_ctrl_mbox_circq_space(pi, ci, q->mask)) + return -ENOMEM; + + qidx = OCTEP_CTRL_MBOX_Q_OFFSET(q->hw_q, pi); + mbuf = (u64 *)msg->msg; + word0 = &msg->hdr.word0; + + mutex_lock(&mbox->h2fq_lock); + for (i = 1; i <= msg->hdr.sizew; i++) + writeq(*mbuf++, (qidx + (i * 8))); + + writeq(*word0, qidx); + + pi = octep_ctrl_mbox_circq_inc(pi, q->mask); + writel(pi, q->hw_prod); + mutex_unlock(&mbox->h2fq_lock); + + /* don't check for notification response */ + if (msg->hdr.flags & OCTEP_CTRL_MBOX_MSG_HDR_FLAG_NOTIFY) + return 0; + + expire = jiffies + timeout; + while (true) { + *word0 = readq(qidx); + if (msg->hdr.flags == OCTEP_CTRL_MBOX_MSG_HDR_FLAG_RESP) + break; + schedule_timeout_interruptible(period); + if (signal_pending(current) || time_after(jiffies, expire)) { + pr_info("octep_ctrl_mbox: Timed out\n"); + return -EBUSY; + } + } + mbuf = (u64 *)msg->msg; + for (i = 1; i <= msg->hdr.sizew; i++) + *mbuf++ = readq(qidx + (i * 8)); + + return 0; +} + +int octep_ctrl_mbox_recv(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg) +{ + struct octep_ctrl_mbox_q *q; + u32 count, pi, ci; + u8 __iomem *qidx; + u64 *mbuf; + int i; + + if (!mbox || !msg) + return -EINVAL; + + q = &mbox->f2hq; + pi = readl(q->hw_prod); + ci = readl(q->hw_cons); + count = octep_ctrl_mbox_circq_depth(pi, ci, q->mask); + if (!count) + return -EAGAIN; + + qidx = OCTEP_CTRL_MBOX_Q_OFFSET(q->hw_q, ci); + mbuf = (u64 *)msg->msg; + + mutex_lock(&mbox->f2hq_lock); + + msg->hdr.word0 = readq(qidx); + for (i = 1; i <= msg->hdr.sizew; i++) + *mbuf++ = readq(qidx + (i * 8)); + + ci = octep_ctrl_mbox_circq_inc(ci, q->mask); + writel(ci, q->hw_cons); + + mutex_unlock(&mbox->f2hq_lock); + + if (msg->hdr.flags != OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ || !mbox->process_req) + return 0; + + mbox->process_req(mbox->user_ctx, msg); + mbuf = (u64 *)msg->msg; + for (i = 1; i <= msg->hdr.sizew; i++) + writeq(*mbuf++, (qidx + (i * 8))); + + writeq(msg->hdr.word0, qidx); + + return 0; +} + +int octep_ctrl_mbox_uninit(struct octep_ctrl_mbox *mbox) +{ + if (!mbox) + return -EINVAL; + + writeq(OCTEP_CTRL_MBOX_STATUS_UNINIT, + OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem)); + /* ensure uninit state is written before uninitialization */ + wmb(); + + mutex_destroy(&mbox->h2fq_lock); + mutex_destroy(&mbox->f2hq_lock); + + writeq(OCTEP_CTRL_MBOX_STATUS_INVALID, + OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem)); + writeq(0, OCTEP_CTRL_MBOX_INFO_HOST_VERSION_OFFSET(mbox->barmem)); + + pr_info("Octep ctrl mbox : Uninit successful.\n"); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h new file mode 100644 index 000000000000..2dc5753cfec6 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + #ifndef __OCTEP_CTRL_MBOX_H__ +#define __OCTEP_CTRL_MBOX_H__ + +/* barmem structure + * |===========================================| + * |Info (16 + 120 + 120 = 256 bytes) | + * |-------------------------------------------| + * |magic number (8 bytes) | + * |bar memory size (4 bytes) | + * |reserved (4 bytes) | + * |-------------------------------------------| + * |host version (8 bytes) | + * |host status (8 bytes) | + * |host reserved (104 bytes) | + * |-------------------------------------------| + * |fw version (8 bytes) | + * |fw status (8 bytes) | + * |fw reserved (104 bytes) | + * |===========================================| + * |Host to Fw Queue info (16 bytes) | + * |-------------------------------------------| + * |producer index (4 bytes) | + * |consumer index (4 bytes) | + * |element size (4 bytes) | + * |element count (4 bytes) | + * |===========================================| + * |Fw to Host Queue info (16 bytes) | + * |-------------------------------------------| + * |producer index (4 bytes) | + * |consumer index (4 bytes) | + * |element size (4 bytes) | + * |element count (4 bytes) | + * |===========================================| + * |Host to Fw Queue | + * |-------------------------------------------| + * |((elem_sz + hdr(8 bytes)) * elem_cnt) bytes| + * |===========================================| + * |===========================================| + * |Fw to Host Queue | + * |-------------------------------------------| + * |((elem_sz + hdr(8 bytes)) * elem_cnt) bytes| + * |===========================================| + */ + +#define OCTEP_CTRL_MBOX_MAGIC_NUMBER 0xdeaddeadbeefbeefull + +/* Size of mbox info in bytes */ +#define OCTEP_CTRL_MBOX_INFO_SZ 256 +/* Size of mbox host to target queue info in bytes */ +#define OCTEP_CTRL_MBOX_H2FQ_INFO_SZ 16 +/* Size of mbox target to host queue info in bytes */ +#define OCTEP_CTRL_MBOX_F2HQ_INFO_SZ 16 +/* Size of mbox queue in bytes */ +#define OCTEP_CTRL_MBOX_Q_SZ(sz, cnt) (((sz) + 8) * (cnt)) +/* Size of mbox in bytes */ +#define OCTEP_CTRL_MBOX_SZ(hsz, hcnt, fsz, fcnt) (OCTEP_CTRL_MBOX_INFO_SZ + \ + OCTEP_CTRL_MBOX_H2FQ_INFO_SZ + \ + OCTEP_CTRL_MBOX_F2HQ_INFO_SZ + \ + OCTEP_CTRL_MBOX_Q_SZ(hsz, hcnt) + \ + OCTEP_CTRL_MBOX_Q_SZ(fsz, fcnt)) + +/* Valid request message */ +#define OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ BIT(0) +/* Valid response message */ +#define OCTEP_CTRL_MBOX_MSG_HDR_FLAG_RESP BIT(1) +/* Valid notification, no response required */ +#define OCTEP_CTRL_MBOX_MSG_HDR_FLAG_NOTIFY BIT(2) + +enum octep_ctrl_mbox_status { + OCTEP_CTRL_MBOX_STATUS_INVALID = 0, + OCTEP_CTRL_MBOX_STATUS_INIT, + OCTEP_CTRL_MBOX_STATUS_READY, + OCTEP_CTRL_MBOX_STATUS_UNINIT +}; + +/* mbox message */ +union octep_ctrl_mbox_msg_hdr { + u64 word0; + struct { + /* OCTEP_CTRL_MBOX_MSG_HDR_FLAG_* */ + u32 flags; + /* size of message in words excluding header */ + u32 sizew; + }; +}; + +/* mbox message */ +struct octep_ctrl_mbox_msg { + /* mbox transaction header */ + union octep_ctrl_mbox_msg_hdr hdr; + /* pointer to message buffer */ + void *msg; +}; + +/* Mbox queue */ +struct octep_ctrl_mbox_q { + /* q element size, should be aligned to unsigned long */ + u16 elem_sz; + /* q element count, should be power of 2 */ + u16 elem_cnt; + /* q mask */ + u16 mask; + /* producer address in bar mem */ + u8 __iomem *hw_prod; + /* consumer address in bar mem */ + u8 __iomem *hw_cons; + /* q base address in bar mem */ + u8 __iomem *hw_q; +}; + +struct octep_ctrl_mbox { + /* host driver version */ + u64 version; + /* size of bar memory */ + u32 barmem_sz; + /* pointer to BAR memory */ + u8 __iomem *barmem; + /* user context for callback, can be null */ + void *user_ctx; + /* callback handler for processing request, called from octep_ctrl_mbox_recv */ + int (*process_req)(void *user_ctx, struct octep_ctrl_mbox_msg *msg); + /* host-to-fw queue */ + struct octep_ctrl_mbox_q h2fq; + /* fw-to-host queue */ + struct octep_ctrl_mbox_q f2hq; + /* lock for h2fq */ + struct mutex h2fq_lock; + /* lock for f2hq */ + struct mutex f2hq_lock; +}; + +/* Initialize control mbox. + * + * @param mbox: non-null pointer to struct octep_ctrl_mbox. + * + * return value: 0 on success, -errno on failure. + */ +int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox); + +/* Send mbox message. + * + * @param mbox: non-null pointer to struct octep_ctrl_mbox. + * + * return value: 0 on success, -errno on failure. + */ +int octep_ctrl_mbox_send(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg); + +/* Retrieve mbox message. + * + * @param mbox: non-null pointer to struct octep_ctrl_mbox. + * + * return value: 0 on success, -errno on failure. + */ +int octep_ctrl_mbox_recv(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg); + +/* Uninitialize control mbox. + * + * @param ep: non-null pointer to struct octep_ctrl_mbox. + * + * return value: 0 on success, -errno on failure. + */ +int octep_ctrl_mbox_uninit(struct octep_ctrl_mbox *mbox); + +#endif /* __OCTEP_CTRL_MBOX_H__ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c new file mode 100644 index 000000000000..7c00c896ab98 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#include <linux/string.h> +#include <linux/types.h> +#include <linux/etherdevice.h> +#include <linux/pci.h> + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_ctrl_net.h" + +int octep_get_link_status(struct octep_device *oct) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_net_h2f_resp *resp; + struct octep_ctrl_mbox_msg msg = {}; + int err; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS; + req.link.cmd = OCTEP_CTRL_NET_CMD_GET; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_STATE_REQ_SZW; + msg.msg = &req; + err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); + if (err) + return err; + + resp = (struct octep_ctrl_net_h2f_resp *)&req; + return resp->link.state; +} + +void octep_set_link_status(struct octep_device *oct, bool up) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_mbox_msg msg = {}; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS; + req.link.cmd = OCTEP_CTRL_NET_CMD_SET; + req.link.state = (up) ? OCTEP_CTRL_NET_STATE_UP : OCTEP_CTRL_NET_STATE_DOWN; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_STATE_REQ_SZW; + msg.msg = &req; + octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); +} + +void octep_set_rx_state(struct octep_device *oct, bool up) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_mbox_msg msg = {}; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_RX_STATE; + req.link.cmd = OCTEP_CTRL_NET_CMD_SET; + req.link.state = (up) ? OCTEP_CTRL_NET_STATE_UP : OCTEP_CTRL_NET_STATE_DOWN; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_STATE_REQ_SZW; + msg.msg = &req; + octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); +} + +int octep_get_mac_addr(struct octep_device *oct, u8 *addr) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_net_h2f_resp *resp; + struct octep_ctrl_mbox_msg msg = {}; + int err; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_MAC; + req.link.cmd = OCTEP_CTRL_NET_CMD_GET; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_MAC_REQ_SZW; + msg.msg = &req; + err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); + if (err) + return err; + + resp = (struct octep_ctrl_net_h2f_resp *)&req; + memcpy(addr, resp->mac.addr, ETH_ALEN); + + return err; +} + +int octep_set_mac_addr(struct octep_device *oct, u8 *addr) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_mbox_msg msg = {}; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_MAC; + req.mac.cmd = OCTEP_CTRL_NET_CMD_SET; + memcpy(&req.mac.addr, addr, ETH_ALEN); + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_MAC_REQ_SZW; + msg.msg = &req; + + return octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); +} + +int octep_set_mtu(struct octep_device *oct, int mtu) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_mbox_msg msg = {}; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_MTU; + req.mtu.cmd = OCTEP_CTRL_NET_CMD_SET; + req.mtu.val = mtu; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_MTU_REQ_SZW; + msg.msg = &req; + + return octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); +} + +int octep_get_if_stats(struct octep_device *oct) +{ + void __iomem *iface_rx_stats; + void __iomem *iface_tx_stats; + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_mbox_msg msg = {}; + int err; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_GET_IF_STATS; + req.mac.cmd = OCTEP_CTRL_NET_CMD_GET; + req.get_stats.offset = oct->ctrl_mbox_ifstats_offset; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_GET_STATS_REQ_SZW; + msg.msg = &req; + err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); + if (err) + return err; + + iface_rx_stats = oct->ctrl_mbox.barmem + oct->ctrl_mbox_ifstats_offset; + iface_tx_stats = oct->ctrl_mbox.barmem + oct->ctrl_mbox_ifstats_offset + + sizeof(struct octep_iface_rx_stats); + memcpy_fromio(&oct->iface_rx_stats, iface_rx_stats, sizeof(struct octep_iface_rx_stats)); + memcpy_fromio(&oct->iface_tx_stats, iface_tx_stats, sizeof(struct octep_iface_tx_stats)); + + return err; +} + +int octep_get_link_info(struct octep_device *oct) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_net_h2f_resp *resp; + struct octep_ctrl_mbox_msg msg = {}; + int err; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_INFO; + req.mac.cmd = OCTEP_CTRL_NET_CMD_GET; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_LINK_INFO_REQ_SZW; + msg.msg = &req; + err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); + if (err) + return err; + + resp = (struct octep_ctrl_net_h2f_resp *)&req; + oct->link_info.supported_modes = resp->link_info.supported_modes; + oct->link_info.advertised_modes = resp->link_info.advertised_modes; + oct->link_info.autoneg = resp->link_info.autoneg; + oct->link_info.pause = resp->link_info.pause; + oct->link_info.speed = resp->link_info.speed; + + return err; +} + +int octep_set_link_info(struct octep_device *oct, struct octep_iface_link_info *link_info) +{ + struct octep_ctrl_net_h2f_req req = {}; + struct octep_ctrl_mbox_msg msg = {}; + + req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_INFO; + req.link_info.cmd = OCTEP_CTRL_NET_CMD_SET; + req.link_info.info.advertised_modes = link_info->advertised_modes; + req.link_info.info.autoneg = link_info->autoneg; + req.link_info.info.pause = link_info->pause; + req.link_info.info.speed = link_info->speed; + + msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ; + msg.hdr.sizew = OCTEP_CTRL_NET_H2F_LINK_INFO_REQ_SZW; + msg.msg = &req; + + return octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h new file mode 100644 index 000000000000..f23b58381322 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#ifndef __OCTEP_CTRL_NET_H__ +#define __OCTEP_CTRL_NET_H__ + +/* Supported commands */ +enum octep_ctrl_net_cmd { + OCTEP_CTRL_NET_CMD_GET = 0, + OCTEP_CTRL_NET_CMD_SET, +}; + +/* Supported states */ +enum octep_ctrl_net_state { + OCTEP_CTRL_NET_STATE_DOWN = 0, + OCTEP_CTRL_NET_STATE_UP, +}; + +/* Supported replies */ +enum octep_ctrl_net_reply { + OCTEP_CTRL_NET_REPLY_OK = 0, + OCTEP_CTRL_NET_REPLY_GENERIC_FAIL, + OCTEP_CTRL_NET_REPLY_INVALID_PARAM, +}; + +/* Supported host to fw commands */ +enum octep_ctrl_net_h2f_cmd { + OCTEP_CTRL_NET_H2F_CMD_INVALID = 0, + OCTEP_CTRL_NET_H2F_CMD_MTU, + OCTEP_CTRL_NET_H2F_CMD_MAC, + OCTEP_CTRL_NET_H2F_CMD_GET_IF_STATS, + OCTEP_CTRL_NET_H2F_CMD_GET_XSTATS, + OCTEP_CTRL_NET_H2F_CMD_GET_Q_STATS, + OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS, + OCTEP_CTRL_NET_H2F_CMD_RX_STATE, + OCTEP_CTRL_NET_H2F_CMD_LINK_INFO, +}; + +/* Supported fw to host commands */ +enum octep_ctrl_net_f2h_cmd { + OCTEP_CTRL_NET_F2H_CMD_INVALID = 0, + OCTEP_CTRL_NET_F2H_CMD_LINK_STATUS, +}; + +struct octep_ctrl_net_req_hdr { + /* sender id */ + u16 sender; + /* receiver id */ + u16 receiver; + /* octep_ctrl_net_h2t_cmd */ + u16 cmd; + /* reserved */ + u16 rsvd0; +}; + +/* get/set mtu request */ +struct octep_ctrl_net_h2f_req_cmd_mtu { + /* enum octep_ctrl_net_cmd */ + u16 cmd; + /* 0-65535 */ + u16 val; +}; + +/* get/set mac request */ +struct octep_ctrl_net_h2f_req_cmd_mac { + /* enum octep_ctrl_net_cmd */ + u16 cmd; + /* xx:xx:xx:xx:xx:xx */ + u8 addr[ETH_ALEN]; +}; + +/* get if_stats, xstats, q_stats request */ +struct octep_ctrl_net_h2f_req_cmd_get_stats { + /* offset into barmem where fw should copy over stats */ + u32 offset; +}; + +/* get/set link state, rx state */ +struct octep_ctrl_net_h2f_req_cmd_state { + /* enum octep_ctrl_net_cmd */ + u16 cmd; + /* enum octep_ctrl_net_state */ + u16 state; +}; + +/* link info */ +struct octep_ctrl_net_link_info { + /* Bitmap of Supported link speeds/modes */ + u64 supported_modes; + /* Bitmap of Advertised link speeds/modes */ + u64 advertised_modes; + /* Autonegotation state; bit 0=disabled; bit 1=enabled */ + u8 autoneg; + /* Pause frames setting. bit 0=disabled; bit 1=enabled */ + u8 pause; + /* Negotiated link speed in Mbps */ + u32 speed; +}; + +/* get/set link info */ +struct octep_ctrl_net_h2f_req_cmd_link_info { + /* enum octep_ctrl_net_cmd */ + u16 cmd; + /* struct octep_ctrl_net_link_info */ + struct octep_ctrl_net_link_info info; +}; + +/* Host to fw request data */ +struct octep_ctrl_net_h2f_req { + struct octep_ctrl_net_req_hdr hdr; + union { + struct octep_ctrl_net_h2f_req_cmd_mtu mtu; + struct octep_ctrl_net_h2f_req_cmd_mac mac; + struct octep_ctrl_net_h2f_req_cmd_get_stats get_stats; + struct octep_ctrl_net_h2f_req_cmd_state link; + struct octep_ctrl_net_h2f_req_cmd_state rx; + struct octep_ctrl_net_h2f_req_cmd_link_info link_info; + }; +} __packed; + +struct octep_ctrl_net_resp_hdr { + /* sender id */ + u16 sender; + /* receiver id */ + u16 receiver; + /* octep_ctrl_net_h2t_cmd */ + u16 cmd; + /* octep_ctrl_net_reply */ + u16 reply; +}; + +/* get mtu response */ +struct octep_ctrl_net_h2f_resp_cmd_mtu { + /* 0-65535 */ + u16 val; +}; + +/* get mac response */ +struct octep_ctrl_net_h2f_resp_cmd_mac { + /* xx:xx:xx:xx:xx:xx */ + u8 addr[ETH_ALEN]; +}; + +/* get link state, rx state response */ +struct octep_ctrl_net_h2f_resp_cmd_state { + /* enum octep_ctrl_net_state */ + u16 state; +}; + +/* Host to fw response data */ +struct octep_ctrl_net_h2f_resp { + struct octep_ctrl_net_resp_hdr hdr; + union { + struct octep_ctrl_net_h2f_resp_cmd_mtu mtu; + struct octep_ctrl_net_h2f_resp_cmd_mac mac; + struct octep_ctrl_net_h2f_resp_cmd_state link; + struct octep_ctrl_net_h2f_resp_cmd_state rx; + struct octep_ctrl_net_link_info link_info; + }; +} __packed; + +/* link state notofication */ +struct octep_ctrl_net_f2h_req_cmd_state { + /* enum octep_ctrl_net_state */ + u16 state; +}; + +/* Fw to host request data */ +struct octep_ctrl_net_f2h_req { + struct octep_ctrl_net_req_hdr hdr; + union { + struct octep_ctrl_net_f2h_req_cmd_state link; + }; +}; + +/* Fw to host response data */ +struct octep_ctrl_net_f2h_resp { + struct octep_ctrl_net_resp_hdr hdr; +}; + +/* Size of host to fw octep_ctrl_mbox queue element */ +union octep_ctrl_net_h2f_data_sz { + struct octep_ctrl_net_h2f_req h2f_req; + struct octep_ctrl_net_h2f_resp h2f_resp; +}; + +/* Size of fw to host octep_ctrl_mbox queue element */ +union octep_ctrl_net_f2h_data_sz { + struct octep_ctrl_net_f2h_req f2h_req; + struct octep_ctrl_net_f2h_resp f2h_resp; +}; + +/* size of host to fw data in words */ +#define OCTEP_CTRL_NET_H2F_DATA_SZW ((sizeof(union octep_ctrl_net_h2f_data_sz)) / \ + (sizeof(unsigned long))) + +/* size of fw to host data in words */ +#define OCTEP_CTRL_NET_F2H_DATA_SZW ((sizeof(union octep_ctrl_net_f2h_data_sz)) / \ + (sizeof(unsigned long))) + +/* size in words of get/set mtu request */ +#define OCTEP_CTRL_NET_H2F_MTU_REQ_SZW 2 +/* size in words of get/set mac request */ +#define OCTEP_CTRL_NET_H2F_MAC_REQ_SZW 2 +/* size in words of get stats request */ +#define OCTEP_CTRL_NET_H2F_GET_STATS_REQ_SZW 2 +/* size in words of get/set state request */ +#define OCTEP_CTRL_NET_H2F_STATE_REQ_SZW 2 +/* size in words of get/set link info request */ +#define OCTEP_CTRL_NET_H2F_LINK_INFO_REQ_SZW 4 + +/* size in words of get mtu response */ +#define OCTEP_CTRL_NET_H2F_GET_MTU_RESP_SZW 2 +/* size in words of set mtu response */ +#define OCTEP_CTRL_NET_H2F_SET_MTU_RESP_SZW 1 +/* size in words of get mac response */ +#define OCTEP_CTRL_NET_H2F_GET_MAC_RESP_SZW 2 +/* size in words of set mac response */ +#define OCTEP_CTRL_NET_H2F_SET_MAC_RESP_SZW 1 +/* size in words of get state request */ +#define OCTEP_CTRL_NET_H2F_GET_STATE_RESP_SZW 2 +/* size in words of set state request */ +#define OCTEP_CTRL_NET_H2F_SET_STATE_RESP_SZW 1 +/* size in words of get link info request */ +#define OCTEP_CTRL_NET_H2F_GET_LINK_INFO_RESP_SZW 4 +/* size in words of set link info request */ +#define OCTEP_CTRL_NET_H2F_SET_LINK_INFO_RESP_SZW 1 + +/** Get link status from firmware. + * + * @param oct: non-null pointer to struct octep_device. + * + * return value: link status 0=down, 1=up. + */ +int octep_get_link_status(struct octep_device *oct); + +/** Set link status in firmware. + * + * @param oct: non-null pointer to struct octep_device. + * @param up: boolean status. + */ +void octep_set_link_status(struct octep_device *oct, bool up); + +/** Set rx state in firmware. + * + * @param oct: non-null pointer to struct octep_device. + * @param up: boolean status. + */ +void octep_set_rx_state(struct octep_device *oct, bool up); + +/** Get mac address from firmware. + * + * @param oct: non-null pointer to struct octep_device. + * @param addr: non-null pointer to mac address. + * + * return value: 0 on success, -errno on failure. + */ +int octep_get_mac_addr(struct octep_device *oct, u8 *addr); + +/** Set mac address in firmware. + * + * @param oct: non-null pointer to struct octep_device. + * @param addr: non-null pointer to mac address. + */ +int octep_set_mac_addr(struct octep_device *oct, u8 *addr); + +/** Set mtu in firmware. + * + * @param oct: non-null pointer to struct octep_device. + * @param mtu: mtu. + */ +int octep_set_mtu(struct octep_device *oct, int mtu); + +/** Get interface statistics from firmware. + * + * @param oct: non-null pointer to struct octep_device. + * + * return value: 0 on success, -errno on failure. + */ +int octep_get_if_stats(struct octep_device *oct); + +/** Get link info from firmware. + * + * @param oct: non-null pointer to struct octep_device. + * + * return value: 0 on success, -errno on failure. + */ +int octep_get_link_info(struct octep_device *oct); + +/** Set link info in firmware. + * + * @param oct: non-null pointer to struct octep_device. + */ +int octep_set_link_info(struct octep_device *oct, struct octep_iface_link_info *link_info); + +#endif /* __OCTEP_CTRL_NET_H__ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c new file mode 100644 index 000000000000..87ef129b269a --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/ethtool.h> + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_ctrl_net.h" + +static const char octep_gstrings_global_stats[][ETH_GSTRING_LEN] = { + "rx_packets", + "tx_packets", + "rx_bytes", + "tx_bytes", + "rx_alloc_errors", + "tx_busy_errors", + "rx_dropped", + "tx_dropped", + "tx_hw_pkts", + "tx_hw_octs", + "tx_hw_bcast", + "tx_hw_mcast", + "tx_hw_underflow", + "tx_hw_control", + "tx_less_than_64", + "tx_equal_64", + "tx_equal_65_to_127", + "tx_equal_128_to_255", + "tx_equal_256_to_511", + "tx_equal_512_to_1023", + "tx_equal_1024_to_1518", + "tx_greater_than_1518", + "rx_hw_pkts", + "rx_hw_bytes", + "rx_hw_bcast", + "rx_hw_mcast", + "rx_pause_pkts", + "rx_pause_bytes", + "rx_dropped_pkts_fifo_full", + "rx_dropped_bytes_fifo_full", + "rx_err_pkts", +}; + +#define OCTEP_GLOBAL_STATS_CNT (sizeof(octep_gstrings_global_stats) / ETH_GSTRING_LEN) + +static const char octep_gstrings_tx_q_stats[][ETH_GSTRING_LEN] = { + "tx_packets_posted[Q-%u]", + "tx_packets_completed[Q-%u]", + "tx_bytes[Q-%u]", + "tx_busy[Q-%u]", +}; + +#define OCTEP_TX_Q_STATS_CNT (sizeof(octep_gstrings_tx_q_stats) / ETH_GSTRING_LEN) + +static const char octep_gstrings_rx_q_stats[][ETH_GSTRING_LEN] = { + "rx_packets[Q-%u]", + "rx_bytes[Q-%u]", + "rx_alloc_errors[Q-%u]", +}; + +#define OCTEP_RX_Q_STATS_CNT (sizeof(octep_gstrings_rx_q_stats) / ETH_GSTRING_LEN) + +static void octep_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct octep_device *oct = netdev_priv(netdev); + + strscpy(info->driver, OCTEP_DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(oct->pdev), sizeof(info->bus_info)); +} + +static void octep_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct octep_device *oct = netdev_priv(netdev); + u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + char *strings = (char *)data; + int i, j; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < OCTEP_GLOBAL_STATS_CNT; i++) { + snprintf(strings, ETH_GSTRING_LEN, + octep_gstrings_global_stats[i]); + strings += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + for (j = 0; j < OCTEP_TX_Q_STATS_CNT; j++) { + snprintf(strings, ETH_GSTRING_LEN, + octep_gstrings_tx_q_stats[j], i); + strings += ETH_GSTRING_LEN; + } + } + + for (i = 0; i < num_queues; i++) { + for (j = 0; j < OCTEP_RX_Q_STATS_CNT; j++) { + snprintf(strings, ETH_GSTRING_LEN, + octep_gstrings_rx_q_stats[j], i); + strings += ETH_GSTRING_LEN; + } + } + break; + default: + break; + } +} + +static int octep_get_sset_count(struct net_device *netdev, int sset) +{ + struct octep_device *oct = netdev_priv(netdev); + u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + switch (sset) { + case ETH_SS_STATS: + return OCTEP_GLOBAL_STATS_CNT + (num_queues * + (OCTEP_TX_Q_STATS_CNT + OCTEP_RX_Q_STATS_CNT)); + break; + default: + return -EOPNOTSUPP; + } +} + +static void +octep_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct octep_device *oct = netdev_priv(netdev); + struct octep_iface_tx_stats *iface_tx_stats; + struct octep_iface_rx_stats *iface_rx_stats; + u64 rx_packets, rx_bytes; + u64 tx_packets, tx_bytes; + u64 rx_alloc_errors, tx_busy_errors; + int q, i; + + rx_packets = 0; + rx_bytes = 0; + tx_packets = 0; + tx_bytes = 0; + rx_alloc_errors = 0; + tx_busy_errors = 0; + tx_packets = 0; + tx_bytes = 0; + rx_packets = 0; + rx_bytes = 0; + + octep_get_if_stats(oct); + iface_tx_stats = &oct->iface_tx_stats; + iface_rx_stats = &oct->iface_rx_stats; + + for (q = 0; q < oct->num_oqs; q++) { + struct octep_iq *iq = oct->iq[q]; + struct octep_oq *oq = oct->oq[q]; + + tx_packets += iq->stats.instr_completed; + tx_bytes += iq->stats.bytes_sent; + tx_busy_errors += iq->stats.tx_busy; + + rx_packets += oq->stats.packets; + rx_bytes += oq->stats.bytes; + rx_alloc_errors += oq->stats.alloc_failures; + } + i = 0; + data[i++] = rx_packets; + data[i++] = tx_packets; + data[i++] = rx_bytes; + data[i++] = tx_bytes; + data[i++] = rx_alloc_errors; + data[i++] = tx_busy_errors; + data[i++] = iface_rx_stats->dropped_pkts_fifo_full + + iface_rx_stats->err_pkts; + data[i++] = iface_tx_stats->xscol + + iface_tx_stats->xsdef; + data[i++] = iface_tx_stats->pkts; + data[i++] = iface_tx_stats->octs; + data[i++] = iface_tx_stats->bcst; + data[i++] = iface_tx_stats->mcst; + data[i++] = iface_tx_stats->undflw; + data[i++] = iface_tx_stats->ctl; + data[i++] = iface_tx_stats->hist_lt64; + data[i++] = iface_tx_stats->hist_eq64; + data[i++] = iface_tx_stats->hist_65to127; + data[i++] = iface_tx_stats->hist_128to255; + data[i++] = iface_tx_stats->hist_256to511; + data[i++] = iface_tx_stats->hist_512to1023; + data[i++] = iface_tx_stats->hist_1024to1518; + data[i++] = iface_tx_stats->hist_gt1518; + data[i++] = iface_rx_stats->pkts; + data[i++] = iface_rx_stats->octets; + data[i++] = iface_rx_stats->mcast_pkts; + data[i++] = iface_rx_stats->bcast_pkts; + data[i++] = iface_rx_stats->pause_pkts; + data[i++] = iface_rx_stats->pause_octets; + data[i++] = iface_rx_stats->dropped_pkts_fifo_full; + data[i++] = iface_rx_stats->dropped_octets_fifo_full; + data[i++] = iface_rx_stats->err_pkts; + + /* Per Tx Queue stats */ + for (q = 0; q < oct->num_iqs; q++) { + struct octep_iq *iq = oct->iq[q]; + + data[i++] = iq->stats.instr_posted; + data[i++] = iq->stats.instr_completed; + data[i++] = iq->stats.bytes_sent; + data[i++] = iq->stats.tx_busy; + } + + /* Per Rx Queue stats */ + for (q = 0; q < oct->num_oqs; q++) { + struct octep_oq *oq = oct->oq[q]; + + data[i++] = oq->stats.packets; + data[i++] = oq->stats.bytes; + data[i++] = oq->stats.alloc_failures; + } +} + +#define OCTEP_SET_ETHTOOL_LINK_MODES_BITMAP(octep_speeds, ksettings, name) \ +{ \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_T)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseT_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_R)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseR_FEC); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_CR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseCR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_KR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseKR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_LR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseLR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_SR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseSR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_25GBASE_CR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseCR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_25GBASE_KR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseKR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_25GBASE_SR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseSR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_CR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseCR4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_KR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseKR4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_LR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseLR4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_SR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseSR4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_CR2)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR2_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_KR2)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR2_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_SR2)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR2_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_CR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_KR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_LR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseLR_ER_FR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_SR)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_CR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseCR4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_KR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseKR4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_LR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseLR4_ER4_Full); \ + if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_SR4)) \ + ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseSR4_Full); \ +} + +static int octep_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct octep_device *oct = netdev_priv(netdev); + struct octep_iface_link_info *link_info; + u32 advertised_modes, supported_modes; + + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + + octep_get_link_info(oct); + + advertised_modes = oct->link_info.advertised_modes; + supported_modes = oct->link_info.supported_modes; + link_info = &oct->link_info; + + OCTEP_SET_ETHTOOL_LINK_MODES_BITMAP(supported_modes, cmd, supported); + OCTEP_SET_ETHTOOL_LINK_MODES_BITMAP(advertised_modes, cmd, advertising); + + if (link_info->autoneg) { + if (link_info->autoneg & OCTEP_LINK_MODE_AUTONEG_SUPPORTED) + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + if (link_info->autoneg & OCTEP_LINK_MODE_AUTONEG_ADVERTISED) { + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + cmd->base.autoneg = AUTONEG_ENABLE; + } else { + cmd->base.autoneg = AUTONEG_DISABLE; + } + } else { + cmd->base.autoneg = AUTONEG_DISABLE; + } + + if (link_info->pause) { + if (link_info->pause & OCTEP_LINK_MODE_PAUSE_SUPPORTED) + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + if (link_info->pause & OCTEP_LINK_MODE_PAUSE_ADVERTISED) + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + } + + cmd->base.port = PORT_FIBRE; + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); + + if (netif_carrier_ok(netdev)) { + cmd->base.speed = link_info->speed; + cmd->base.duplex = DUPLEX_FULL; + } else { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + } + return 0; +} + +static int octep_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct octep_device *oct = netdev_priv(netdev); + struct octep_iface_link_info link_info_new; + struct octep_iface_link_info *link_info; + u64 advertised = 0; + u8 autoneg = 0; + int err; + + link_info = &oct->link_info; + memcpy(&link_info_new, link_info, sizeof(struct octep_iface_link_info)); + + /* Only Full duplex is supported; + * Assume full duplex when duplex is unknown. + */ + if (cmd->base.duplex != DUPLEX_FULL && + cmd->base.duplex != DUPLEX_UNKNOWN) + return -EOPNOTSUPP; + + if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (!(link_info->autoneg & OCTEP_LINK_MODE_AUTONEG_SUPPORTED)) + return -EOPNOTSUPP; + autoneg = 1; + } + + if (!bitmap_subset(cmd->link_modes.advertising, + cmd->link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + return -EINVAL; + + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10000baseT_Full)) + advertised |= BIT(OCTEP_LINK_MODE_10GBASE_T); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10000baseR_FEC)) + advertised |= BIT(OCTEP_LINK_MODE_10GBASE_R); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10000baseCR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_10GBASE_CR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10000baseKR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_10GBASE_KR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10000baseLR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_10GBASE_LR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 10000baseSR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_10GBASE_SR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 25000baseCR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_25GBASE_CR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 25000baseKR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_25GBASE_KR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 25000baseSR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_25GBASE_SR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 40000baseCR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_40GBASE_CR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 40000baseKR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_40GBASE_KR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 40000baseLR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_40GBASE_LR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 40000baseSR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_40GBASE_SR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseCR2_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_CR2); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseKR2_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_KR2); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseSR2_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_SR2); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseCR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_CR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseKR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_KR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseLR_ER_FR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_LR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 50000baseSR_Full)) + advertised |= BIT(OCTEP_LINK_MODE_50GBASE_SR); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100000baseCR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_100GBASE_CR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100000baseKR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_100GBASE_KR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100000baseLR4_ER4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_100GBASE_LR4); + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, + 100000baseSR4_Full)) + advertised |= BIT(OCTEP_LINK_MODE_100GBASE_SR4); + + if (advertised == link_info->advertised_modes && + cmd->base.speed == link_info->speed && + cmd->base.autoneg == link_info->autoneg) + return 0; + + link_info_new.advertised_modes = advertised; + link_info_new.speed = cmd->base.speed; + link_info_new.autoneg = autoneg; + + err = octep_set_link_info(oct, &link_info_new); + if (err) + return err; + + memcpy(link_info, &link_info_new, sizeof(struct octep_iface_link_info)); + return 0; +} + +static const struct ethtool_ops octep_ethtool_ops = { + .get_drvinfo = octep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = octep_get_strings, + .get_sset_count = octep_get_sset_count, + .get_ethtool_stats = octep_get_ethtool_stats, + .get_link_ksettings = octep_get_link_ksettings, + .set_link_ksettings = octep_set_link_ksettings, +}; + +void octep_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &octep_ethtool_ops; +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c new file mode 100644 index 000000000000..5d39c857ea41 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -0,0 +1,1177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/aer.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/vmalloc.h> + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_ctrl_net.h" + +struct workqueue_struct *octep_wq; + +/* Supported Devices */ +static const struct pci_device_id octep_pci_id_tbl[] = { + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)}, + {0, }, +}; +MODULE_DEVICE_TABLE(pci, octep_pci_id_tbl); + +MODULE_AUTHOR("Veerasenareddy Burru <vburru@marvell.com>"); +MODULE_DESCRIPTION(OCTEP_DRV_STRING); +MODULE_LICENSE("GPL"); +MODULE_VERSION(OCTEP_DRV_VERSION_STR); + +/** + * octep_alloc_ioq_vectors() - Allocate Tx/Rx Queue interrupt info. + * + * @oct: Octeon device private data structure. + * + * Allocate resources to hold per Tx/Rx queue interrupt info. + * This is the information passed to interrupt handler, from which napi poll + * is scheduled and includes quick access to private data of Tx/Rx queue + * corresponding to the interrupt being handled. + * + * Return: 0, on successful allocation of resources for all queue interrupts. + * -1, if failed to allocate any resource. + */ +static int octep_alloc_ioq_vectors(struct octep_device *oct) +{ + int i; + struct octep_ioq_vector *ioq_vector; + + for (i = 0; i < oct->num_oqs; i++) { + oct->ioq_vector[i] = vzalloc(sizeof(*oct->ioq_vector[i])); + if (!oct->ioq_vector[i]) + goto free_ioq_vector; + + ioq_vector = oct->ioq_vector[i]; + ioq_vector->iq = oct->iq[i]; + ioq_vector->oq = oct->oq[i]; + ioq_vector->octep_dev = oct; + } + + dev_info(&oct->pdev->dev, "Allocated %d IOQ vectors\n", oct->num_oqs); + return 0; + +free_ioq_vector: + while (i) { + i--; + vfree(oct->ioq_vector[i]); + oct->ioq_vector[i] = NULL; + } + return -1; +} + +/** + * octep_free_ioq_vectors() - Free Tx/Rx Queue interrupt vector info. + * + * @oct: Octeon device private data structure. + */ +static void octep_free_ioq_vectors(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_oqs; i++) { + if (oct->ioq_vector[i]) { + vfree(oct->ioq_vector[i]); + oct->ioq_vector[i] = NULL; + } + } + netdev_info(oct->netdev, "Freed IOQ Vectors\n"); +} + +/** + * octep_enable_msix_range() - enable MSI-x interrupts. + * + * @oct: Octeon device private data structure. + * + * Allocate and enable all MSI-x interrupts (queue and non-queue interrupts) + * for the Octeon device. + * + * Return: 0, on successfully enabling all MSI-x interrupts. + * -1, if failed to enable any MSI-x interrupt. + */ +static int octep_enable_msix_range(struct octep_device *oct) +{ + int num_msix, msix_allocated; + int i; + + /* Generic interrupts apart from input/output queues */ + num_msix = oct->num_oqs + CFG_GET_NON_IOQ_MSIX(oct->conf); + oct->msix_entries = kcalloc(num_msix, + sizeof(struct msix_entry), GFP_KERNEL); + if (!oct->msix_entries) + goto msix_alloc_err; + + for (i = 0; i < num_msix; i++) + oct->msix_entries[i].entry = i; + + msix_allocated = pci_enable_msix_range(oct->pdev, oct->msix_entries, + num_msix, num_msix); + if (msix_allocated != num_msix) { + dev_err(&oct->pdev->dev, + "Failed to enable %d msix irqs; got only %d\n", + num_msix, msix_allocated); + goto enable_msix_err; + } + oct->num_irqs = msix_allocated; + dev_info(&oct->pdev->dev, "MSI-X enabled successfully\n"); + + return 0; + +enable_msix_err: + if (msix_allocated > 0) + pci_disable_msix(oct->pdev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; +msix_alloc_err: + return -1; +} + +/** + * octep_disable_msix() - disable MSI-x interrupts. + * + * @oct: Octeon device private data structure. + * + * Disable MSI-x on the Octeon device. + */ +static void octep_disable_msix(struct octep_device *oct) +{ + pci_disable_msix(oct->pdev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + dev_info(&oct->pdev->dev, "Disabled MSI-X\n"); +} + +/** + * octep_non_ioq_intr_handler() - common handler for all generic interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for all non-queue (generic) interrupts. + */ +static irqreturn_t octep_non_ioq_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.non_ioq_intr_handler(oct); +} + +/** + * octep_ioq_intr_handler() - handler for all Tx/Rx queue interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data contains pointers to Tx/Rx queue private data + * and correspong NAPI context. + * + * this is common handler for all non-queue (generic) interrupts. + */ +static irqreturn_t octep_ioq_intr_handler(int irq, void *data) +{ + struct octep_ioq_vector *ioq_vector = data; + struct octep_device *oct = ioq_vector->octep_dev; + + return oct->hw_ops.ioq_intr_handler(ioq_vector); +} + +/** + * octep_request_irqs() - Register interrupt handlers. + * + * @oct: Octeon device private data structure. + * + * Register handlers for all queue and non-queue interrupts. + * + * Return: 0, on successful registration of all interrupt handlers. + * -1, on any error. + */ +static int octep_request_irqs(struct octep_device *oct) +{ + struct net_device *netdev = oct->netdev; + struct octep_ioq_vector *ioq_vector; + struct msix_entry *msix_entry; + char **non_ioq_msix_names; + int num_non_ioq_msix; + int ret, i; + + num_non_ioq_msix = CFG_GET_NON_IOQ_MSIX(oct->conf); + non_ioq_msix_names = CFG_GET_NON_IOQ_MSIX_NAMES(oct->conf); + + oct->non_ioq_irq_names = kcalloc(num_non_ioq_msix, + OCTEP_MSIX_NAME_SIZE, GFP_KERNEL); + if (!oct->non_ioq_irq_names) + goto alloc_err; + + /* First few MSI-X interrupts are non-queue interrupts */ + for (i = 0; i < num_non_ioq_msix; i++) { + char *irq_name; + + irq_name = &oct->non_ioq_irq_names[i * OCTEP_MSIX_NAME_SIZE]; + msix_entry = &oct->msix_entries[i]; + + snprintf(irq_name, OCTEP_MSIX_NAME_SIZE, + "%s-%s", netdev->name, non_ioq_msix_names[i]); + ret = request_irq(msix_entry->vector, + octep_non_ioq_intr_handler, 0, + irq_name, oct); + if (ret) { + netdev_err(netdev, + "request_irq failed for %s; err=%d", + irq_name, ret); + goto non_ioq_irq_err; + } + } + + /* Request IRQs for Tx/Rx queues */ + for (i = 0; i < oct->num_oqs; i++) { + ioq_vector = oct->ioq_vector[i]; + msix_entry = &oct->msix_entries[i + num_non_ioq_msix]; + + snprintf(ioq_vector->name, sizeof(ioq_vector->name), + "%s-q%d", netdev->name, i); + ret = request_irq(msix_entry->vector, + octep_ioq_intr_handler, 0, + ioq_vector->name, ioq_vector); + if (ret) { + netdev_err(netdev, + "request_irq failed for Q-%d; err=%d", + i, ret); + goto ioq_irq_err; + } + + cpumask_set_cpu(i % num_online_cpus(), + &ioq_vector->affinity_mask); + irq_set_affinity_hint(msix_entry->vector, + &ioq_vector->affinity_mask); + } + + return 0; +ioq_irq_err: + while (i > num_non_ioq_msix) { + --i; + irq_set_affinity_hint(oct->msix_entries[i].vector, NULL); + free_irq(oct->msix_entries[i].vector, oct->ioq_vector[i]); + } +non_ioq_irq_err: + while (i) { + --i; + free_irq(oct->msix_entries[i].vector, oct); + } +alloc_err: + return -1; +} + +/** + * octep_free_irqs() - free all registered interrupts. + * + * @oct: Octeon device private data structure. + * + * Free all queue and non-queue interrupts of the Octeon device. + */ +static void octep_free_irqs(struct octep_device *oct) +{ + int i; + + /* First few MSI-X interrupts are non queue interrupts; free them */ + for (i = 0; i < CFG_GET_NON_IOQ_MSIX(oct->conf); i++) + free_irq(oct->msix_entries[i].vector, oct); + kfree(oct->non_ioq_irq_names); + + /* Free IRQs for Input/Output (Tx/Rx) queues */ + for (i = CFG_GET_NON_IOQ_MSIX(oct->conf); i < oct->num_irqs; i++) { + irq_set_affinity_hint(oct->msix_entries[i].vector, NULL); + free_irq(oct->msix_entries[i].vector, + oct->ioq_vector[i - CFG_GET_NON_IOQ_MSIX(oct->conf)]); + } + netdev_info(oct->netdev, "IRQs freed\n"); +} + +/** + * octep_setup_irqs() - setup interrupts for the Octeon device. + * + * @oct: Octeon device private data structure. + * + * Allocate data structures to hold per interrupt information, allocate/enable + * MSI-x interrupt and register interrupt handlers. + * + * Return: 0, on successful allocation and registration of all interrupts. + * -1, on any error. + */ +static int octep_setup_irqs(struct octep_device *oct) +{ + if (octep_alloc_ioq_vectors(oct)) + goto ioq_vector_err; + + if (octep_enable_msix_range(oct)) + goto enable_msix_err; + + if (octep_request_irqs(oct)) + goto request_irq_err; + + return 0; + +request_irq_err: + octep_disable_msix(oct); +enable_msix_err: + octep_free_ioq_vectors(oct); +ioq_vector_err: + return -1; +} + +/** + * octep_clean_irqs() - free all interrupts and its resources. + * + * @oct: Octeon device private data structure. + */ +static void octep_clean_irqs(struct octep_device *oct) +{ + octep_free_irqs(oct); + octep_disable_msix(oct); + octep_free_ioq_vectors(oct); +} + +/** + * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ +static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) +{ + u32 pkts_pend = oq->pkts_pending; + + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); + if (iq->pkts_processed) { + writel(iq->pkts_processed, iq->inst_cnt_reg); + iq->pkt_in_done -= iq->pkts_processed; + iq->pkts_processed = 0; + } + if (oq->last_pkt_count - pkts_pend) { + writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); + oq->last_pkt_count = pkts_pend; + } + + /* Flush the previous wrties before writing to RESEND bit */ + wmb(); + writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); + writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); +} + +/** + * octep_napi_poll() - NAPI poll function for Tx/Rx. + * + * @napi: pointer to napi context. + * @budget: max number of packets to be processed in single invocation. + */ +static int octep_napi_poll(struct napi_struct *napi, int budget) +{ + struct octep_ioq_vector *ioq_vector = + container_of(napi, struct octep_ioq_vector, napi); + u32 tx_pending, rx_done; + + tx_pending = octep_iq_process_completions(ioq_vector->iq, budget); + rx_done = octep_oq_process_rx(ioq_vector->oq, budget); + + /* need more polling if tx completion processing is still pending or + * processed at least 'budget' number of rx packets. + */ + if (tx_pending || rx_done >= budget) + return budget; + + napi_complete(napi); + octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); + return rx_done; +} + +/** + * octep_napi_add() - Add NAPI poll for all Tx/Rx queues. + * + * @oct: Octeon device private data structure. + */ +static void octep_napi_add(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_oqs; i++) { + netdev_dbg(oct->netdev, "Adding NAPI on Q-%d\n", i); + netif_napi_add(oct->netdev, &oct->ioq_vector[i]->napi, + octep_napi_poll, 64); + oct->oq[i]->napi = &oct->ioq_vector[i]->napi; + } +} + +/** + * octep_napi_delete() - delete NAPI poll callback for all Tx/Rx queues. + * + * @oct: Octeon device private data structure. + */ +static void octep_napi_delete(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_oqs; i++) { + netdev_dbg(oct->netdev, "Deleting NAPI on Q-%d\n", i); + netif_napi_del(&oct->ioq_vector[i]->napi); + oct->oq[i]->napi = NULL; + } +} + +/** + * octep_napi_enable() - enable NAPI for all Tx/Rx queues. + * + * @oct: Octeon device private data structure. + */ +static void octep_napi_enable(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_oqs; i++) { + netdev_dbg(oct->netdev, "Enabling NAPI on Q-%d\n", i); + napi_enable(&oct->ioq_vector[i]->napi); + } +} + +/** + * octep_napi_disable() - disable NAPI for all Tx/Rx queues. + * + * @oct: Octeon device private data structure. + */ +static void octep_napi_disable(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_oqs; i++) { + netdev_dbg(oct->netdev, "Disabling NAPI on Q-%d\n", i); + napi_disable(&oct->ioq_vector[i]->napi); + } +} + +static void octep_link_up(struct net_device *netdev) +{ + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); +} + +/** + * octep_open() - start the octeon network device. + * + * @netdev: pointer to kernel network device. + * + * setup Tx/Rx queues, interrupts and enable hardware operation of Tx/Rx queues + * and interrupts.. + * + * Return: 0, on successfully setting up device and bring it up. + * -1, on any error. + */ +static int octep_open(struct net_device *netdev) +{ + struct octep_device *oct = netdev_priv(netdev); + int err, ret; + + netdev_info(netdev, "Starting netdev ...\n"); + netif_carrier_off(netdev); + + oct->hw_ops.reset_io_queues(oct); + + if (octep_setup_iqs(oct)) + goto setup_iq_err; + if (octep_setup_oqs(oct)) + goto setup_oq_err; + if (octep_setup_irqs(oct)) + goto setup_irq_err; + + err = netif_set_real_num_tx_queues(netdev, oct->num_oqs); + if (err) + goto set_queues_err; + err = netif_set_real_num_rx_queues(netdev, oct->num_iqs); + if (err) + goto set_queues_err; + + octep_napi_add(oct); + octep_napi_enable(oct); + + oct->link_info.admin_up = 1; + octep_set_rx_state(oct, true); + + ret = octep_get_link_status(oct); + if (!ret) + octep_set_link_status(oct, true); + + /* Enable the input and output queues for this Octeon device */ + oct->hw_ops.enable_io_queues(oct); + + /* Enable Octeon device interrupts */ + oct->hw_ops.enable_interrupts(oct); + + octep_oq_dbell_init(oct); + + ret = octep_get_link_status(oct); + if (ret) + octep_link_up(netdev); + + return 0; + +set_queues_err: + octep_napi_disable(oct); + octep_napi_delete(oct); + octep_clean_irqs(oct); +setup_irq_err: + octep_free_oqs(oct); +setup_oq_err: + octep_free_iqs(oct); +setup_iq_err: + return -1; +} + +/** + * octep_stop() - stop the octeon network device. + * + * @netdev: pointer to kernel network device. + * + * stop the device Tx/Rx operations, bring down the link and + * free up all resources allocated for Tx/Rx queues and interrupts. + */ +static int octep_stop(struct net_device *netdev) +{ + struct octep_device *oct = netdev_priv(netdev); + + netdev_info(netdev, "Stopping the device ...\n"); + + /* Stop Tx from stack */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + netif_tx_disable(netdev); + + octep_set_link_status(oct, false); + octep_set_rx_state(oct, false); + + oct->link_info.admin_up = 0; + oct->link_info.oper_up = 0; + + oct->hw_ops.disable_interrupts(oct); + octep_napi_disable(oct); + octep_napi_delete(oct); + + octep_clean_irqs(oct); + octep_clean_iqs(oct); + + oct->hw_ops.disable_io_queues(oct); + oct->hw_ops.reset_io_queues(oct); + octep_free_oqs(oct); + octep_free_iqs(oct); + netdev_info(netdev, "Device stopped !!\n"); + return 0; +} + +/** + * octep_iq_full_check() - check if a Tx queue is full. + * + * @iq: Octeon Tx queue data structure. + * + * Return: 0, if the Tx queue is not full. + * 1, if the Tx queue is full. + */ +static inline int octep_iq_full_check(struct octep_iq *iq) +{ + if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >= + OCTEP_WAKE_QUEUE_THRESHOLD)) + return 0; + + /* Stop the queue if unable to send */ + netif_stop_subqueue(iq->netdev, iq->q_no); + + /* check again and restart the queue, in case NAPI has just freed + * enough Tx ring entries. + */ + if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >= + OCTEP_WAKE_QUEUE_THRESHOLD)) { + netif_start_subqueue(iq->netdev, iq->q_no); + iq->stats.restart_cnt++; + return 0; + } + + return 1; +} + +/** + * octep_start_xmit() - Enqueue packet to Octoen hardware Tx Queue. + * + * @skb: packet skbuff pointer. + * @netdev: kernel network device. + * + * Return: NETDEV_TX_BUSY, if Tx Queue is full. + * NETDEV_TX_OK, if successfully enqueued to hardware Tx queue. + */ +static netdev_tx_t octep_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct octep_device *oct = netdev_priv(netdev); + struct octep_tx_sglist_desc *sglist; + struct octep_tx_buffer *tx_buffer; + struct octep_tx_desc_hw *hw_desc; + struct skb_shared_info *shinfo; + struct octep_instr_hdr *ih; + struct octep_iq *iq; + skb_frag_t *frag; + u16 nr_frags, si; + u16 q_no, wi; + + q_no = skb_get_queue_mapping(skb); + if (q_no >= oct->num_iqs) { + netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no); + q_no = q_no % oct->num_iqs; + } + + iq = oct->iq[q_no]; + if (octep_iq_full_check(iq)) { + iq->stats.tx_busy++; + return NETDEV_TX_BUSY; + } + + shinfo = skb_shinfo(skb); + nr_frags = shinfo->nr_frags; + + wi = iq->host_write_index; + hw_desc = &iq->desc_ring[wi]; + hw_desc->ih64 = 0; + + tx_buffer = iq->buff_info + wi; + tx_buffer->skb = skb; + + ih = &hw_desc->ih; + ih->tlen = skb->len; + ih->pkind = oct->pkind; + + if (!nr_frags) { + tx_buffer->gather = 0; + tx_buffer->dma = dma_map_single(iq->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(iq->dev, tx_buffer->dma)) + goto dma_map_err; + hw_desc->dptr = tx_buffer->dma; + } else { + /* Scatter/Gather */ + dma_addr_t dma; + u16 len; + + sglist = tx_buffer->sglist; + + ih->gsz = nr_frags + 1; + ih->gather = 1; + tx_buffer->gather = 1; + + len = skb_headlen(skb); + dma = dma_map_single(iq->dev, skb->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(iq->dev, dma)) + goto dma_map_err; + + dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma, + OCTEP_SGLIST_SIZE_PER_PKT, + DMA_TO_DEVICE); + memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT); + sglist[0].len[3] = len; + sglist[0].dma_ptr[0] = dma; + + si = 1; /* entry 0 is main skb, mapped above */ + frag = &shinfo->frags[0]; + while (nr_frags--) { + len = skb_frag_size(frag); + dma = skb_frag_dma_map(iq->dev, frag, 0, + len, DMA_TO_DEVICE); + if (dma_mapping_error(iq->dev, dma)) + goto dma_map_sg_err; + + sglist[si >> 2].len[3 - (si & 3)] = len; + sglist[si >> 2].dma_ptr[si & 3] = dma; + + frag++; + si++; + } + dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma, + OCTEP_SGLIST_SIZE_PER_PKT, + DMA_TO_DEVICE); + + hw_desc->dptr = tx_buffer->sglist_dma; + } + + /* Flush the hw descriptor before writing to doorbell */ + wmb(); + + /* Ring Doorbell to notify the NIC there is a new packet */ + writel(1, iq->doorbell_reg); + atomic_inc(&iq->instr_pending); + wi++; + if (wi == iq->max_count) + wi = 0; + iq->host_write_index = wi; + + netdev_tx_sent_queue(iq->netdev_q, skb->len); + iq->stats.instr_posted++; + skb_tx_timestamp(skb); + return NETDEV_TX_OK; + +dma_map_sg_err: + if (si > 0) { + dma_unmap_single(iq->dev, sglist[0].dma_ptr[0], + sglist[0].len[0], DMA_TO_DEVICE); + sglist[0].len[0] = 0; + } + while (si > 1) { + dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3], + sglist[si >> 2].len[si & 3], DMA_TO_DEVICE); + sglist[si >> 2].len[si & 3] = 0; + si--; + } + tx_buffer->gather = 0; +dma_map_err: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * octep_get_stats64() - Get Octeon network device statistics. + * + * @netdev: kernel network device. + * @stats: pointer to stats structure to be filled in. + */ +static void octep_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + struct octep_device *oct = netdev_priv(netdev); + int q; + + octep_get_if_stats(oct); + tx_packets = 0; + tx_bytes = 0; + rx_packets = 0; + rx_bytes = 0; + for (q = 0; q < oct->num_oqs; q++) { + struct octep_iq *iq = oct->iq[q]; + struct octep_oq *oq = oct->oq[q]; + + tx_packets += iq->stats.instr_completed; + tx_bytes += iq->stats.bytes_sent; + rx_packets += oq->stats.packets; + rx_bytes += oq->stats.bytes; + } + stats->tx_packets = tx_packets; + stats->tx_bytes = tx_bytes; + stats->rx_packets = rx_packets; + stats->rx_bytes = rx_bytes; + stats->multicast = oct->iface_rx_stats.mcast_pkts; + stats->rx_errors = oct->iface_rx_stats.err_pkts; + stats->collisions = oct->iface_tx_stats.xscol; + stats->tx_fifo_errors = oct->iface_tx_stats.undflw; +} + +/** + * octep_tx_timeout_task - work queue task to Handle Tx queue timeout. + * + * @work: pointer to Tx queue timeout work_struct + * + * Stop and start the device so that it frees up all queue resources + * and restarts the queues, that potentially clears a Tx queue timeout + * condition. + **/ +static void octep_tx_timeout_task(struct work_struct *work) +{ + struct octep_device *oct = container_of(work, struct octep_device, + tx_timeout_task); + struct net_device *netdev = oct->netdev; + + rtnl_lock(); + if (netif_running(netdev)) { + octep_stop(netdev); + octep_open(netdev); + } + rtnl_unlock(); +} + +/** + * octep_tx_timeout() - Handle Tx Queue timeout. + * + * @netdev: pointer to kernel network device. + * @txqueue: Timed out Tx queue number. + * + * Schedule a work to handle Tx queue timeout. + */ +static void octep_tx_timeout(struct net_device *netdev, unsigned int txqueue) +{ + struct octep_device *oct = netdev_priv(netdev); + + queue_work(octep_wq, &oct->tx_timeout_task); +} + +static int octep_set_mac(struct net_device *netdev, void *p) +{ + struct octep_device *oct = netdev_priv(netdev); + struct sockaddr *addr = (struct sockaddr *)p; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + err = octep_set_mac_addr(oct, addr->sa_data); + if (err) + return err; + + memcpy(oct->mac_addr, addr->sa_data, ETH_ALEN); + eth_hw_addr_set(netdev, addr->sa_data); + + return 0; +} + +static int octep_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct octep_device *oct = netdev_priv(netdev); + struct octep_iface_link_info *link_info; + int err = 0; + + link_info = &oct->link_info; + if (link_info->mtu == new_mtu) + return 0; + + err = octep_set_mtu(oct, new_mtu); + if (!err) { + oct->link_info.mtu = new_mtu; + netdev->mtu = new_mtu; + } + + return err; +} + +static const struct net_device_ops octep_netdev_ops = { + .ndo_open = octep_open, + .ndo_stop = octep_stop, + .ndo_start_xmit = octep_start_xmit, + .ndo_get_stats64 = octep_get_stats64, + .ndo_tx_timeout = octep_tx_timeout, + .ndo_set_mac_address = octep_set_mac, + .ndo_change_mtu = octep_change_mtu, +}; + +/** + * octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages. + * + * @work: pointer to ctrl mbox work_struct + * + * Poll ctrl mbox message queue and handle control messages from firmware. + **/ +static void octep_ctrl_mbox_task(struct work_struct *work) +{ + struct octep_device *oct = container_of(work, struct octep_device, + ctrl_mbox_task); + struct net_device *netdev = oct->netdev; + struct octep_ctrl_net_f2h_req req = {}; + struct octep_ctrl_mbox_msg msg; + int ret = 0; + + msg.msg = &req; + while (true) { + ret = octep_ctrl_mbox_recv(&oct->ctrl_mbox, &msg); + if (ret) + break; + + switch (req.hdr.cmd) { + case OCTEP_CTRL_NET_F2H_CMD_LINK_STATUS: + if (netif_running(netdev)) { + if (req.link.state) { + dev_info(&oct->pdev->dev, "netif_carrier_on\n"); + netif_carrier_on(netdev); + } else { + dev_info(&oct->pdev->dev, "netif_carrier_off\n"); + netif_carrier_off(netdev); + } + } + break; + default: + pr_info("Unknown mbox req : %u\n", req.hdr.cmd); + break; + } + } +} + +/** + * octep_device_setup() - Setup Octeon Device. + * + * @oct: Octeon device private data structure. + * + * Setup Octeon device hardware operations, configuration, etc ... + */ +int octep_device_setup(struct octep_device *oct) +{ + struct octep_ctrl_mbox *ctrl_mbox; + struct pci_dev *pdev = oct->pdev; + int i, ret; + + /* allocate memory for oct->conf */ + oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL); + if (!oct->conf) + return -ENOMEM; + + /* Map BAR regions */ + for (i = 0; i < OCTEP_MMIO_REGIONS; i++) { + oct->mmio[i].hw_addr = + ioremap(pci_resource_start(oct->pdev, i * 2), + pci_resource_len(oct->pdev, i * 2)); + oct->mmio[i].mapped = 1; + } + + oct->chip_id = pdev->device; + oct->rev_id = pdev->revision; + dev_info(&pdev->dev, "chip_id = 0x%x\n", pdev->device); + + switch (oct->chip_id) { + case OCTEP_PCI_DEVICE_ID_CN93_PF: + dev_info(&pdev->dev, + "Setting up OCTEON CN93XX PF PASS%d.%d\n", + OCTEP_MAJOR_REV(oct), OCTEP_MINOR_REV(oct)); + octep_device_setup_cn93_pf(oct); + break; + default: + dev_err(&pdev->dev, + "%s: unsupported device\n", __func__); + goto unsupported_dev; + } + + oct->pkind = CFG_GET_IQ_PKIND(oct->conf); + + /* Initialize control mbox */ + ctrl_mbox = &oct->ctrl_mbox; + ctrl_mbox->version = OCTEP_DRV_VERSION; + ctrl_mbox->barmem = CFG_GET_CTRL_MBOX_MEM_ADDR(oct->conf); + ret = octep_ctrl_mbox_init(ctrl_mbox); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize control mbox\n"); + return -1; + } + oct->ctrl_mbox_ifstats_offset = OCTEP_CTRL_MBOX_SZ(ctrl_mbox->h2fq.elem_sz, + ctrl_mbox->h2fq.elem_cnt, + ctrl_mbox->f2hq.elem_sz, + ctrl_mbox->f2hq.elem_cnt); + + return 0; + +unsupported_dev: + return -1; +} + +/** + * octep_device_cleanup() - Cleanup Octeon Device. + * + * @oct: Octeon device private data structure. + * + * Cleanup Octeon device allocated resources. + */ +static void octep_device_cleanup(struct octep_device *oct) +{ + int i; + + dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n"); + + for (i = 0; i < OCTEP_MAX_VF; i++) { + if (oct->mbox[i]) + vfree(oct->mbox[i]); + oct->mbox[i] = NULL; + } + + octep_ctrl_mbox_uninit(&oct->ctrl_mbox); + + oct->hw_ops.soft_reset(oct); + for (i = 0; i < OCTEP_MMIO_REGIONS; i++) { + if (oct->mmio[i].mapped) + iounmap(oct->mmio[i].hw_addr); + } + + kfree(oct->conf); + oct->conf = NULL; +} + +/** + * octep_probe() - Octeon PCI device probe handler. + * + * @pdev: PCI device structure. + * @ent: entry in Octeon PCI device ID table. + * + * Initializes and enables the Octeon PCI device for network operations. + * Initializes Octeon private data structure and registers a network device. + */ +static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct octep_device *octep_dev = NULL; + struct net_device *netdev; + int err; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + return err; + } + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "Failed to set DMA mask !!\n"); + goto err_dma_mask; + } + + err = pci_request_mem_regions(pdev, OCTEP_DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Failed to map PCI memory regions\n"); + goto err_pci_regions; + } + + pci_enable_pcie_error_reporting(pdev); + pci_set_master(pdev); + + netdev = alloc_etherdev_mq(sizeof(struct octep_device), + OCTEP_MAX_QUEUES); + if (!netdev) { + dev_err(&pdev->dev, "Failed to allocate netdev\n"); + err = -ENOMEM; + goto err_alloc_netdev; + } + SET_NETDEV_DEV(netdev, &pdev->dev); + + octep_dev = netdev_priv(netdev); + octep_dev->netdev = netdev; + octep_dev->pdev = pdev; + octep_dev->dev = &pdev->dev; + pci_set_drvdata(pdev, octep_dev); + + err = octep_device_setup(octep_dev); + if (err) { + dev_err(&pdev->dev, "Device setup failed\n"); + goto err_octep_config; + } + INIT_WORK(&octep_dev->tx_timeout_task, octep_tx_timeout_task); + INIT_WORK(&octep_dev->ctrl_mbox_task, octep_ctrl_mbox_task); + + netdev->netdev_ops = &octep_netdev_ops; + octep_set_ethtool_ops(netdev); + netif_carrier_off(netdev); + + netdev->hw_features = NETIF_F_SG; + netdev->features |= netdev->hw_features; + netdev->min_mtu = OCTEP_MIN_MTU; + netdev->max_mtu = OCTEP_MAX_MTU; + netdev->mtu = OCTEP_DEFAULT_MTU; + + octep_get_mac_addr(octep_dev, octep_dev->mac_addr); + eth_hw_addr_set(netdev, octep_dev->mac_addr); + + if (register_netdev(netdev)) { + dev_err(&pdev->dev, "Failed to register netdev\n"); + goto register_dev_err; + } + dev_info(&pdev->dev, "Device probe successful\n"); + return 0; + +register_dev_err: + octep_device_cleanup(octep_dev); +err_octep_config: + free_netdev(netdev); +err_alloc_netdev: + pci_disable_pcie_error_reporting(pdev); + pci_release_mem_regions(pdev); +err_pci_regions: +err_dma_mask: + pci_disable_device(pdev); + return err; +} + +/** + * octep_remove() - Remove Octeon PCI device from driver control. + * + * @pdev: PCI device structure of the Octeon device. + * + * Cleanup all resources allocated for the Octeon device. + * Unregister from network device and disable the PCI device. + */ +static void octep_remove(struct pci_dev *pdev) +{ + struct octep_device *oct = pci_get_drvdata(pdev); + struct net_device *netdev; + + if (!oct) + return; + + cancel_work_sync(&oct->tx_timeout_task); + cancel_work_sync(&oct->ctrl_mbox_task); + netdev = oct->netdev; + if (netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(netdev); + + octep_device_cleanup(oct); + pci_release_mem_regions(pdev); + free_netdev(netdev); + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); +} + +static struct pci_driver octep_driver = { + .name = OCTEP_DRV_NAME, + .id_table = octep_pci_id_tbl, + .probe = octep_probe, + .remove = octep_remove, +}; + +/** + * octep_init_module() - Module initialiation. + * + * create common resource for the driver and register PCI driver. + */ +static int __init octep_init_module(void) +{ + int ret; + + pr_info("%s: Loading %s ...\n", OCTEP_DRV_NAME, OCTEP_DRV_STRING); + + /* work queue for all deferred tasks */ + octep_wq = create_singlethread_workqueue(OCTEP_DRV_NAME); + if (!octep_wq) { + pr_err("%s: Failed to create common workqueue\n", + OCTEP_DRV_NAME); + return -ENOMEM; + } + + ret = pci_register_driver(&octep_driver); + if (ret < 0) { + pr_err("%s: Failed to register PCI driver; err=%d\n", + OCTEP_DRV_NAME, ret); + return ret; + } + + pr_info("%s: Loaded successfully !\n", OCTEP_DRV_NAME); + + return ret; +} + +/** + * octep_exit_module() - Module exit routine. + * + * unregister the driver with PCI subsystem and cleanup common resources. + */ +static void __exit octep_exit_module(void) +{ + pr_info("%s: Unloading ...\n", OCTEP_DRV_NAME); + + pci_unregister_driver(&octep_driver); + destroy_workqueue(octep_wq); + + pr_info("%s: Unloading complete\n", OCTEP_DRV_NAME); +} + +module_init(octep_init_module); +module_exit(octep_exit_module); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h new file mode 100644 index 000000000000..520f2c3664f9 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -0,0 +1,366 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_MAIN_H_ +#define _OCTEP_MAIN_H_ + +#include "octep_tx.h" +#include "octep_rx.h" +#include "octep_ctrl_mbox.h" + +#define OCTEP_DRV_VERSION_MAJOR 1 +#define OCTEP_DRV_VERSION_MINOR 0 +#define OCTEP_DRV_VERSION_VARIANT 0 + +#define OCTEP_DRV_VERSION ((OCTEP_DRV_VERSION_MAJOR << 16) + \ + (OCTEP_DRV_VERSION_MINOR << 8) + \ + OCTEP_DRV_VERSION_VARIANT) + +#define OCTEP_DRV_VERSION_STR "1.0.0" +#define OCTEP_DRV_NAME "octeon_ep" +#define OCTEP_DRV_STRING "Marvell Octeon EndPoint NIC Driver" + +#define OCTEP_PCIID_CN93_PF 0xB200177d +#define OCTEP_PCIID_CN93_VF 0xB203177d + +#define OCTEP_PCI_DEVICE_ID_CN93_PF 0xB200 +#define OCTEP_PCI_DEVICE_ID_CN93_VF 0xB203 + +#define OCTEP_MAX_QUEUES 63 +#define OCTEP_MAX_IQ OCTEP_MAX_QUEUES +#define OCTEP_MAX_OQ OCTEP_MAX_QUEUES +#define OCTEP_MAX_VF 64 + +#define OCTEP_MAX_MSIX_VECTORS OCTEP_MAX_OQ + +/* Flags to disable and enable Interrupts */ +#define OCTEP_INPUT_INTR (1) +#define OCTEP_OUTPUT_INTR (2) +#define OCTEP_MBOX_INTR (4) +#define OCTEP_ALL_INTR 0xff + +#define OCTEP_IQ_INTR_RESEND_BIT 59 +#define OCTEP_OQ_INTR_RESEND_BIT 59 + +#define OCTEP_MMIO_REGIONS 3 +/* PCI address space mapping information. + * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of + * Octeon gets mapped to different physical address spaces in + * the kernel. + */ +struct octep_mmio { + /* The physical address to which the PCI address space is mapped. */ + u8 __iomem *hw_addr; + + /* Flag indicating the mapping was successful. */ + int mapped; +}; + +struct octep_pci_win_regs { + u8 __iomem *pci_win_wr_addr; + u8 __iomem *pci_win_rd_addr; + u8 __iomem *pci_win_wr_data; + u8 __iomem *pci_win_rd_data; +}; + +struct octep_hw_ops { + void (*setup_iq_regs)(struct octep_device *oct, int q); + void (*setup_oq_regs)(struct octep_device *oct, int q); + void (*setup_mbox_regs)(struct octep_device *oct, int mbox); + + irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector); + irqreturn_t (*ioq_intr_handler)(void *ioq_vector); + int (*soft_reset)(struct octep_device *oct); + void (*reinit_regs)(struct octep_device *oct); + u32 (*update_iq_read_idx)(struct octep_iq *iq); + + void (*enable_interrupts)(struct octep_device *oct); + void (*disable_interrupts)(struct octep_device *oct); + + void (*enable_io_queues)(struct octep_device *oct); + void (*disable_io_queues)(struct octep_device *oct); + void (*enable_iq)(struct octep_device *oct, int q); + void (*disable_iq)(struct octep_device *oct, int q); + void (*enable_oq)(struct octep_device *oct, int q); + void (*disable_oq)(struct octep_device *oct, int q); + void (*reset_io_queues)(struct octep_device *oct); + void (*dump_registers)(struct octep_device *oct); +}; + +/* Octeon mailbox data */ +struct octep_mbox_data { + u32 cmd; + u32 total_len; + u32 recv_len; + u32 rsvd; + u64 *data; +}; + +/* Octeon device mailbox */ +struct octep_mbox { + /* A spinlock to protect access to this q_mbox. */ + spinlock_t lock; + + u32 q_no; + u32 state; + + /* SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */ + u8 __iomem *mbox_int_reg; + + /* SLI_PKT_PF_VF_MBOX_SIG(0) for PF, + * SLI_PKT_PF_VF_MBOX_SIG(1) for VF. + */ + u8 __iomem *mbox_write_reg; + + /* SLI_PKT_PF_VF_MBOX_SIG(1) for PF, + * SLI_PKT_PF_VF_MBOX_SIG(0) for VF. + */ + u8 __iomem *mbox_read_reg; + + struct octep_mbox_data mbox_data; +}; + +/* Tx/Rx queue vector per interrupt. */ +struct octep_ioq_vector { + char name[OCTEP_MSIX_NAME_SIZE]; + struct napi_struct napi; + struct octep_device *octep_dev; + struct octep_iq *iq; + struct octep_oq *oq; + cpumask_t affinity_mask; +}; + +/* Octeon hardware/firmware offload capability flags. */ +#define OCTEP_CAP_TX_CHECKSUM BIT(0) +#define OCTEP_CAP_RX_CHECKSUM BIT(1) +#define OCTEP_CAP_TSO BIT(2) + +/* Link modes */ +enum octep_link_mode_bit_indices { + OCTEP_LINK_MODE_10GBASE_T = 0, + OCTEP_LINK_MODE_10GBASE_R, + OCTEP_LINK_MODE_10GBASE_CR, + OCTEP_LINK_MODE_10GBASE_KR, + OCTEP_LINK_MODE_10GBASE_LR, + OCTEP_LINK_MODE_10GBASE_SR, + OCTEP_LINK_MODE_25GBASE_CR, + OCTEP_LINK_MODE_25GBASE_KR, + OCTEP_LINK_MODE_25GBASE_SR, + OCTEP_LINK_MODE_40GBASE_CR4, + OCTEP_LINK_MODE_40GBASE_KR4, + OCTEP_LINK_MODE_40GBASE_LR4, + OCTEP_LINK_MODE_40GBASE_SR4, + OCTEP_LINK_MODE_50GBASE_CR2, + OCTEP_LINK_MODE_50GBASE_KR2, + OCTEP_LINK_MODE_50GBASE_SR2, + OCTEP_LINK_MODE_50GBASE_CR, + OCTEP_LINK_MODE_50GBASE_KR, + OCTEP_LINK_MODE_50GBASE_LR, + OCTEP_LINK_MODE_50GBASE_SR, + OCTEP_LINK_MODE_100GBASE_CR4, + OCTEP_LINK_MODE_100GBASE_KR4, + OCTEP_LINK_MODE_100GBASE_LR4, + OCTEP_LINK_MODE_100GBASE_SR4, + OCTEP_LINK_MODE_NBITS +}; + +/* Hardware interface link state information. */ +struct octep_iface_link_info { + /* Bitmap of Supported link speeds/modes. */ + u64 supported_modes; + + /* Bitmap of Advertised link speeds/modes. */ + u64 advertised_modes; + + /* Negotiated link speed in Mbps. */ + u32 speed; + + /* MTU */ + u16 mtu; + + /* Autonegotation state. */ +#define OCTEP_LINK_MODE_AUTONEG_SUPPORTED BIT(0) +#define OCTEP_LINK_MODE_AUTONEG_ADVERTISED BIT(1) + u8 autoneg; + + /* Pause frames setting. */ +#define OCTEP_LINK_MODE_PAUSE_SUPPORTED BIT(0) +#define OCTEP_LINK_MODE_PAUSE_ADVERTISED BIT(1) + u8 pause; + + /* Admin state of the link (ifconfig <iface> up/down */ + u8 admin_up; + + /* Operational state of the link: physical link is up down */ + u8 oper_up; +}; + +/* The Octeon device specific private data structure. + * Each Octeon device has this structure to represent all its components. + */ +struct octep_device { + struct octep_config *conf; + + /* Octeon Chip type. */ + u16 chip_id; + u16 rev_id; + + /* Device capabilities enabled */ + u64 caps_enabled; + /* Device capabilities supported */ + u64 caps_supported; + + /* Pointer to basic Linux device */ + struct device *dev; + /* Linux PCI device pointer */ + struct pci_dev *pdev; + /* Netdev corresponding to the Octeon device */ + struct net_device *netdev; + + /* memory mapped io range */ + struct octep_mmio mmio[OCTEP_MMIO_REGIONS]; + + /* MAC address */ + u8 mac_addr[ETH_ALEN]; + + /* Tx queues (IQ: Instruction Queue) */ + u16 num_iqs; + /* pkind value to be used in every Tx hardware descriptor */ + u8 pkind; + /* Pointers to Octeon Tx queues */ + struct octep_iq *iq[OCTEP_MAX_IQ]; + + /* Rx queues (OQ: Output Queue) */ + u16 num_oqs; + /* Pointers to Octeon Rx queues */ + struct octep_oq *oq[OCTEP_MAX_OQ]; + + /* Hardware port number of the PCIe interface */ + u16 pcie_port; + + /* PCI Window registers to access some hardware CSRs */ + struct octep_pci_win_regs pci_win_regs; + /* Hardware operations */ + struct octep_hw_ops hw_ops; + + /* IRQ info */ + u16 num_irqs; + u16 num_non_ioq_irqs; + char *non_ioq_irq_names; + struct msix_entry *msix_entries; + /* IOq information of it's corresponding MSI-X interrupt. */ + struct octep_ioq_vector *ioq_vector[OCTEP_MAX_QUEUES]; + + /* Hardware Interface Tx statistics */ + struct octep_iface_tx_stats iface_tx_stats; + /* Hardware Interface Rx statistics */ + struct octep_iface_rx_stats iface_rx_stats; + + /* Hardware Interface Link info like supported modes, aneg support */ + struct octep_iface_link_info link_info; + + /* Mailbox to talk to VFs */ + struct octep_mbox *mbox[OCTEP_MAX_VF]; + + /* Work entry to handle Tx timeout */ + struct work_struct tx_timeout_task; + + /* control mbox over pf */ + struct octep_ctrl_mbox ctrl_mbox; + + /* offset for iface stats */ + u32 ctrl_mbox_ifstats_offset; + + /* Work entry to handle ctrl mbox interrupt */ + struct work_struct ctrl_mbox_task; + +}; + +static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct) +{ + u16 rev = (oct->rev_id & 0xC) >> 2; + + return (rev == 0) ? 1 : rev; +} + +static inline u16 OCTEP_MINOR_REV(struct octep_device *oct) +{ + return (oct->rev_id & 0x3); +} + +/* Octeon CSR read/write access APIs */ +#define octep_write_csr(octep_dev, reg_off, value) \ + writel(value, (octep_dev)->mmio[0].hw_addr + (reg_off)) + +#define octep_write_csr64(octep_dev, reg_off, val64) \ + writeq(val64, (octep_dev)->mmio[0].hw_addr + (reg_off)) + +#define octep_read_csr(octep_dev, reg_off) \ + readl((octep_dev)->mmio[0].hw_addr + (reg_off)) + +#define octep_read_csr64(octep_dev, reg_off) \ + readq((octep_dev)->mmio[0].hw_addr + (reg_off)) + +/* Read windowed register. + * @param oct - pointer to the Octeon device. + * @param addr - Address of the register to read. + * + * This routine is called to read from the indirectly accessed + * Octeon registers that are visible through a PCI BAR0 mapped window + * register. + * @return - 64 bit value read from the register. + */ +static inline u64 +OCTEP_PCI_WIN_READ(struct octep_device *oct, u64 addr) +{ + u64 val64; + + addr |= 1ull << 53; /* read 8 bytes */ + writeq(addr, oct->pci_win_regs.pci_win_rd_addr); + val64 = readq(oct->pci_win_regs.pci_win_rd_data); + + dev_dbg(&oct->pdev->dev, + "%s: reg: 0x%016llx val: 0x%016llx\n", __func__, addr, val64); + + return val64; +} + +/* Write windowed register. + * @param oct - pointer to the Octeon device. + * @param addr - Address of the register to write + * @param val - Value to write + * + * This routine is called to write to the indirectly accessed + * Octeon registers that are visible through a PCI BAR0 mapped window + * register. + * @return Nothing. + */ +static inline void +OCTEP_PCI_WIN_WRITE(struct octep_device *oct, u64 addr, u64 val) +{ + writeq(addr, oct->pci_win_regs.pci_win_wr_addr); + writeq(val, oct->pci_win_regs.pci_win_wr_data); + + dev_dbg(&oct->pdev->dev, + "%s: reg: 0x%016llx val: 0x%016llx\n", __func__, addr, val); +} + +extern struct workqueue_struct *octep_wq; + +int octep_device_setup(struct octep_device *oct); +int octep_setup_iqs(struct octep_device *oct); +void octep_free_iqs(struct octep_device *oct); +void octep_clean_iqs(struct octep_device *oct); +int octep_setup_oqs(struct octep_device *oct); +void octep_free_oqs(struct octep_device *oct); +void octep_oq_dbell_init(struct octep_device *oct); +void octep_device_setup_cn93_pf(struct octep_device *oct); +int octep_iq_process_completions(struct octep_iq *iq, u16 budget); +int octep_oq_process_rx(struct octep_oq *oq, int budget); +void octep_set_ethtool_ops(struct net_device *netdev); + +#endif /* _OCTEP_MAIN_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h new file mode 100644 index 000000000000..cc51149790ff --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_REGS_CN9K_PF_H_ +#define _OCTEP_REGS_CN9K_PF_H_ + +/* ############################ RST ######################### */ +#define CN93_RST_BOOT 0x000087E006001600ULL +#define CN93_RST_CORE_DOMAIN_W1S 0x000087E006001820ULL +#define CN93_RST_CORE_DOMAIN_W1C 0x000087E006001828ULL + +#define CN93_CONFIG_XPANSION_BAR 0x38 +#define CN93_CONFIG_PCIE_CAP 0x70 +#define CN93_CONFIG_PCIE_DEVCAP 0x74 +#define CN93_CONFIG_PCIE_DEVCTL 0x78 +#define CN93_CONFIG_PCIE_LINKCAP 0x7C +#define CN93_CONFIG_PCIE_LINKCTL 0x80 +#define CN93_CONFIG_PCIE_SLOTCAP 0x84 +#define CN93_CONFIG_PCIE_SLOTCTL 0x88 + +#define CN93_PCIE_SRIOV_FDL 0x188 /* 0x98 */ +#define CN93_PCIE_SRIOV_FDL_BIT_POS 0x10 +#define CN93_PCIE_SRIOV_FDL_MASK 0xFF + +#define CN93_CONFIG_PCIE_FLTMSK 0x720 + +/* ################# Offsets of RING, EPF, MAC ######################### */ +#define CN93_RING_OFFSET (0x1ULL << 17) +#define CN93_EPF_OFFSET (0x1ULL << 25) +#define CN93_MAC_OFFSET (0x1ULL << 4) +#define CN93_BIT_ARRAY_OFFSET (0x1ULL << 4) +#define CN93_EPVF_RING_OFFSET (0x1ULL << 4) + +/* ################# Scratch Registers ######################### */ +#define CN93_SDP_EPF_SCRATCH 0x205E0 + +/* ################# Window Registers ######################### */ +#define CN93_SDP_WIN_WR_ADDR64 0x20000 +#define CN93_SDP_WIN_RD_ADDR64 0x20010 +#define CN93_SDP_WIN_WR_DATA64 0x20020 +#define CN93_SDP_WIN_WR_MASK_REG 0x20030 +#define CN93_SDP_WIN_RD_DATA64 0x20040 + +#define CN93_SDP_MAC_NUMBER 0x2C100 + +/* ################# Global Previliged registers ######################### */ +#define CN93_SDP_EPF_RINFO 0x205F0 + +#define CN93_SDP_EPF_RINFO_SRN(val) ((val) & 0xFF) +#define CN93_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) +#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) && 0xFF) + +/* SDP Function select */ +#define CN93_SDP_FUNC_SEL_EPF_BIT_POS 8 +#define CN93_SDP_FUNC_SEL_FUNC_BIT_POS 0 + +/* ##### RING IN (Into device from PCI: Tx Ring) REGISTERS #### */ +#define CN93_SDP_R_IN_CONTROL_START 0x10000 +#define CN93_SDP_R_IN_ENABLE_START 0x10010 +#define CN93_SDP_R_IN_INSTR_BADDR_START 0x10020 +#define CN93_SDP_R_IN_INSTR_RSIZE_START 0x10030 +#define CN93_SDP_R_IN_INSTR_DBELL_START 0x10040 +#define CN93_SDP_R_IN_CNTS_START 0x10050 +#define CN93_SDP_R_IN_INT_LEVELS_START 0x10060 +#define CN93_SDP_R_IN_PKT_CNT_START 0x10080 +#define CN93_SDP_R_IN_BYTE_CNT_START 0x10090 + +#define CN93_SDP_R_IN_CONTROL(ring) \ + (CN93_SDP_R_IN_CONTROL_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_ENABLE(ring) \ + (CN93_SDP_R_IN_ENABLE_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_INSTR_BADDR(ring) \ + (CN93_SDP_R_IN_INSTR_BADDR_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_INSTR_RSIZE(ring) \ + (CN93_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_INSTR_DBELL(ring) \ + (CN93_SDP_R_IN_INSTR_DBELL_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_CNTS(ring) \ + (CN93_SDP_R_IN_CNTS_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_INT_LEVELS(ring) \ + (CN93_SDP_R_IN_INT_LEVELS_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_PKT_CNT(ring) \ + (CN93_SDP_R_IN_PKT_CNT_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_BYTE_CNT(ring) \ + (CN93_SDP_R_IN_BYTE_CNT_START + ((ring) * CN93_RING_OFFSET)) + +/* Rings per Virtual Function */ +#define CN93_R_IN_CTL_RPVF_MASK (0xF) +#define CN93_R_IN_CTL_RPVF_POS (48) + +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + */ +#define CN93_R_IN_CTL_IDLE (0x1ULL << 28) +#define CN93_R_IN_CTL_RDSIZE (0x3ULL << 25) +#define CN93_R_IN_CTL_IS_64B (0x1ULL << 24) +#define CN93_R_IN_CTL_D_NSR (0x1ULL << 8) +#define CN93_R_IN_CTL_D_ESR (0x1ULL << 6) +#define CN93_R_IN_CTL_D_ROR (0x1ULL << 5) +#define CN93_R_IN_CTL_NSR (0x1ULL << 3) +#define CN93_R_IN_CTL_ESR (0x1ULL << 1) +#define CN93_R_IN_CTL_ROR (0x1ULL << 0) + +#define CN93_R_IN_CTL_MASK (CN93_R_IN_CTL_RDSIZE | CN93_R_IN_CTL_IS_64B) + +/* ##### RING OUT (out from device to PCI host: Rx Ring) REGISTERS #### */ +#define CN93_SDP_R_OUT_CNTS_START 0x10100 +#define CN93_SDP_R_OUT_INT_LEVELS_START 0x10110 +#define CN93_SDP_R_OUT_SLIST_BADDR_START 0x10120 +#define CN93_SDP_R_OUT_SLIST_RSIZE_START 0x10130 +#define CN93_SDP_R_OUT_SLIST_DBELL_START 0x10140 +#define CN93_SDP_R_OUT_CONTROL_START 0x10150 +#define CN93_SDP_R_OUT_ENABLE_START 0x10160 +#define CN93_SDP_R_OUT_PKT_CNT_START 0x10180 +#define CN93_SDP_R_OUT_BYTE_CNT_START 0x10190 + +#define CN93_SDP_R_OUT_CONTROL(ring) \ + (CN93_SDP_R_OUT_CONTROL_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_ENABLE(ring) \ + (CN93_SDP_R_OUT_ENABLE_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_SLIST_BADDR(ring) \ + (CN93_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_SLIST_RSIZE(ring) \ + (CN93_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_SLIST_DBELL(ring) \ + (CN93_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_CNTS(ring) \ + (CN93_SDP_R_OUT_CNTS_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_INT_LEVELS(ring) \ + (CN93_SDP_R_OUT_INT_LEVELS_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_PKT_CNT(ring) \ + (CN93_SDP_R_OUT_PKT_CNT_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_BYTE_CNT(ring) \ + (CN93_SDP_R_OUT_BYTE_CNT_START + ((ring) * CN93_RING_OFFSET)) + +/*------------------ R_OUT Masks ----------------*/ +#define CN93_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) +#define CN93_R_OUT_INT_LEVELS_TIMET (32) + +#define CN93_R_OUT_CTL_IDLE BIT_ULL(40) +#define CN93_R_OUT_CTL_ES_I BIT_ULL(34) +#define CN93_R_OUT_CTL_NSR_I BIT_ULL(33) +#define CN93_R_OUT_CTL_ROR_I BIT_ULL(32) +#define CN93_R_OUT_CTL_ES_D BIT_ULL(30) +#define CN93_R_OUT_CTL_NSR_D BIT_ULL(29) +#define CN93_R_OUT_CTL_ROR_D BIT_ULL(28) +#define CN93_R_OUT_CTL_ES_P BIT_ULL(26) +#define CN93_R_OUT_CTL_NSR_P BIT_ULL(25) +#define CN93_R_OUT_CTL_ROR_P BIT_ULL(24) +#define CN93_R_OUT_CTL_IMODE BIT_ULL(23) + +/* ############### Interrupt Moderation Registers ############### */ +#define CN93_SDP_R_IN_INT_MDRT_CTL0_START 0x10280 +#define CN93_SDP_R_IN_INT_MDRT_CTL1_START 0x102A0 +#define CN93_SDP_R_IN_INT_MDRT_DBG_START 0x102C0 + +#define CN93_SDP_R_OUT_INT_MDRT_CTL0_START 0x10380 +#define CN93_SDP_R_OUT_INT_MDRT_CTL1_START 0x103A0 +#define CN93_SDP_R_OUT_INT_MDRT_DBG_START 0x103C0 + +#define CN93_SDP_R_IN_INT_MDRT_CTL0(ring) \ + (CN93_SDP_R_IN_INT_MDRT_CTL0_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_INT_MDRT_CTL1(ring) \ + (CN93_SDP_R_IN_INT_MDRT_CTL1_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_INT_MDRT_DBG(ring) \ + (CN93_SDP_R_IN_INT_MDRT_DBG_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_INT_MDRT_CTL0(ring) \ + (CN93_SDP_R_OUT_INT_MDRT_CTL0_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_INT_MDRT_CTL1(ring) \ + (CN93_SDP_R_OUT_INT_MDRT_CTL1_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_INT_MDRT_DBG(ring) \ + (CN93_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CN93_RING_OFFSET)) + +/* ##################### Mail Box Registers ########################## */ +/* INT register for VF. when a MBOX write from PF happed to a VF, + * corresponding bit will be set in this register as well as in + * PF_VF_INT register. + * + * This is a RO register, the int can be cleared by writing 1 to PF_VF_INT + */ +/* Basically first 3 are from PF to VF. The last one is data from VF to PF */ +#define CN93_SDP_R_MBOX_PF_VF_DATA_START 0x10210 +#define CN93_SDP_R_MBOX_PF_VF_INT_START 0x10220 +#define CN93_SDP_R_MBOX_VF_PF_DATA_START 0x10230 + +#define CN93_SDP_R_MBOX_PF_VF_DATA(ring) \ + (CN93_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_MBOX_PF_VF_INT(ring) \ + (CN93_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_MBOX_VF_PF_DATA(ring) \ + (CN93_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CN93_RING_OFFSET)) + +/* ##################### Interrupt Registers ########################## */ +#define CN93_SDP_R_ERR_TYPE_START 0x10400 + +#define CN93_SDP_R_ERR_TYPE(ring) \ + (CN93_SDP_R_ERR_TYPE_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_MBOX_ISM_START 0x10500 +#define CN93_SDP_R_OUT_CNTS_ISM_START 0x10510 +#define CN93_SDP_R_IN_CNTS_ISM_START 0x10520 + +#define CN93_SDP_R_MBOX_ISM(ring) \ + (CN93_SDP_R_MBOX_ISM_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_OUT_CNTS_ISM(ring) \ + (CN93_SDP_R_OUT_CNTS_ISM_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_R_IN_CNTS_ISM(ring) \ + (CN93_SDP_R_IN_CNTS_ISM_START + ((ring) * CN93_RING_OFFSET)) + +#define CN93_SDP_EPF_MBOX_RINT_START 0x20100 +#define CN93_SDP_EPF_MBOX_RINT_W1S_START 0x20120 +#define CN93_SDP_EPF_MBOX_RINT_ENA_W1C_START 0x20140 +#define CN93_SDP_EPF_MBOX_RINT_ENA_W1S_START 0x20160 + +#define CN93_SDP_EPF_VFIRE_RINT_START 0x20180 +#define CN93_SDP_EPF_VFIRE_RINT_W1S_START 0x201A0 +#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1C_START 0x201C0 +#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1S_START 0x201E0 + +#define CN93_SDP_EPF_IRERR_RINT 0x20200 +#define CN93_SDP_EPF_IRERR_RINT_W1S 0x20210 +#define CN93_SDP_EPF_IRERR_RINT_ENA_W1C 0x20220 +#define CN93_SDP_EPF_IRERR_RINT_ENA_W1S 0x20230 + +#define CN93_SDP_EPF_VFORE_RINT_START 0x20240 +#define CN93_SDP_EPF_VFORE_RINT_W1S_START 0x20260 +#define CN93_SDP_EPF_VFORE_RINT_ENA_W1C_START 0x20280 +#define CN93_SDP_EPF_VFORE_RINT_ENA_W1S_START 0x202A0 + +#define CN93_SDP_EPF_ORERR_RINT 0x20320 +#define CN93_SDP_EPF_ORERR_RINT_W1S 0x20330 +#define CN93_SDP_EPF_ORERR_RINT_ENA_W1C 0x20340 +#define CN93_SDP_EPF_ORERR_RINT_ENA_W1S 0x20350 + +#define CN93_SDP_EPF_OEI_RINT 0x20360 +#define CN93_SDP_EPF_OEI_RINT_W1S 0x20370 +#define CN93_SDP_EPF_OEI_RINT_ENA_W1C 0x20380 +#define CN93_SDP_EPF_OEI_RINT_ENA_W1S 0x20390 + +#define CN93_SDP_EPF_DMA_RINT 0x20400 +#define CN93_SDP_EPF_DMA_RINT_W1S 0x20410 +#define CN93_SDP_EPF_DMA_RINT_ENA_W1C 0x20420 +#define CN93_SDP_EPF_DMA_RINT_ENA_W1S 0x20430 + +#define CN93_SDP_EPF_DMA_INT_LEVEL_START 0x20440 +#define CN93_SDP_EPF_DMA_CNT_START 0x20460 +#define CN93_SDP_EPF_DMA_TIM_START 0x20480 + +#define CN93_SDP_EPF_MISC_RINT 0x204A0 +#define CN93_SDP_EPF_MISC_RINT_W1S 0x204B0 +#define CN93_SDP_EPF_MISC_RINT_ENA_W1C 0x204C0 +#define CN93_SDP_EPF_MISC_RINT_ENA_W1S 0x204D0 + +#define CN93_SDP_EPF_DMA_VF_RINT_START 0x204E0 +#define CN93_SDP_EPF_DMA_VF_RINT_W1S_START 0x20500 +#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C_START 0x20520 +#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S_START 0x20540 + +#define CN93_SDP_EPF_PP_VF_RINT_START 0x20560 +#define CN93_SDP_EPF_PP_VF_RINT_W1S_START 0x20580 +#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1C_START 0x205A0 +#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1S_START 0x205C0 + +#define CN93_SDP_EPF_MBOX_RINT(index) \ + (CN93_SDP_EPF_MBOX_RINT_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_MBOX_RINT_W1S(index) \ + (CN93_SDP_EPF_MBOX_RINT_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_MBOX_RINT_ENA_W1C(index) \ + (CN93_SDP_EPF_MBOX_RINT_ENA_W1C_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_MBOX_RINT_ENA_W1S(index) \ + (CN93_SDP_EPF_MBOX_RINT_ENA_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET)) + +#define CN93_SDP_EPF_VFIRE_RINT(index) \ + (CN93_SDP_EPF_VFIRE_RINT_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_VFIRE_RINT_W1S(index) \ + (CN93_SDP_EPF_VFIRE_RINT_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1C(index) \ + (CN93_SDP_EPF_VFIRE_RINT_ENA_W1C_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1S(index) \ + (CN93_SDP_EPF_VFIRE_RINT_ENA_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET)) + +#define CN93_SDP_EPF_VFORE_RINT(index) \ + (CN93_SDP_EPF_VFORE_RINT_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_VFORE_RINT_W1S(index) \ + (CN93_SDP_EPF_VFORE_RINT_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_VFORE_RINT_ENA_W1C(index) \ + (CN93_SDP_EPF_VFORE_RINT_ENA_W1C_START + ((index) * CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_VFORE_RINT_ENA_W1S(index) \ + (CN93_SDP_EPF_VFORE_RINT_ENA_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET)) + +#define CN93_SDP_EPF_DMA_VF_RINT(index) \ + (CN93_SDP_EPF_DMA_VF_RINT_START + ((index) + CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_DMA_VF_RINT_W1S(index) \ + (CN93_SDP_EPF_DMA_VF_RINT_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C(index) \ + (CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C_START + ((index) + CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S(index) \ + (CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET)) + +#define CN93_SDP_EPF_PP_VF_RINT(index) \ + (CN93_SDP_EPF_PP_VF_RINT_START + ((index) + CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_PP_VF_RINT_W1S(index) \ + (CN93_SDP_EPF_PP_VF_RINT_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1C(index) \ + (CN93_SDP_EPF_PP_VF_RINT_ENA_W1C_START + ((index) + CN93_BIT_ARRAY_OFFSET)) +#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1S(index) \ + (CN93_SDP_EPF_PP_VF_RINT_ENA_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET)) + +/*------------------ Interrupt Masks ----------------*/ +#define CN93_INTR_R_SEND_ISM BIT_ULL(63) +#define CN93_INTR_R_OUT_INT BIT_ULL(62) +#define CN93_INTR_R_IN_INT BIT_ULL(61) +#define CN93_INTR_R_MBOX_INT BIT_ULL(60) +#define CN93_INTR_R_RESEND BIT_ULL(59) +#define CN93_INTR_R_CLR_TIM BIT_ULL(58) + +/* ####################### Ring Mapping Registers ################################## */ +#define CN93_SDP_EPVF_RING_START 0x26000 +#define CN93_SDP_IN_RING_TB_MAP_START 0x28000 +#define CN93_SDP_IN_RATE_LIMIT_START 0x2A000 +#define CN93_SDP_MAC_PF_RING_CTL_START 0x2C000 + +#define CN93_SDP_EPVF_RING(ring) \ + (CN93_SDP_EPVF_RING_START + ((ring) * CN93_EPVF_RING_OFFSET)) +#define CN93_SDP_IN_RING_TB_MAP(ring) \ + (CN93_SDP_N_RING_TB_MAP_START + ((ring) * CN93_EPVF_RING_OFFSET)) +#define CN93_SDP_IN_RATE_LIMIT(ring) \ + (CN93_SDP_IN_RATE_LIMIT_START + ((ring) * CN93_EPVF_RING_OFFSET)) +#define CN93_SDP_MAC_PF_RING_CTL(mac) \ + (CN93_SDP_MAC_PF_RING_CTL_START + ((mac) * CN93_MAC_OFFSET)) + +#define CN93_SDP_MAC_PF_RING_CTL_NPFS(val) ((val) & 0xF) +#define CN93_SDP_MAC_PF_RING_CTL_SRN(val) (((val) >> 8) & 0xFF) +#define CN93_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 16) & 0x3F) + +/* Number of non-queue interrupts in CN93xx */ +#define CN93_NUM_NON_IOQ_INTR 16 +#endif /* _OCTEP_REGS_CN9K_PF_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c new file mode 100644 index 000000000000..945947ec7723 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -0,0 +1,508 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/vmalloc.h> + +#include "octep_config.h" +#include "octep_main.h" + +static void octep_oq_reset_indices(struct octep_oq *oq) +{ + oq->host_read_idx = 0; + oq->host_refill_idx = 0; + oq->refill_count = 0; + oq->last_pkt_count = 0; + oq->pkts_pending = 0; +} + +/** + * octep_oq_fill_ring_buffers() - fill initial receive buffers for Rx ring. + * + * @oq: Octeon Rx queue data structure. + * + * Return: 0, if successfully filled receive buffers for all descriptors. + * -1, if failed to allocate a buffer or failed to map for DMA. + */ +static int octep_oq_fill_ring_buffers(struct octep_oq *oq) +{ + struct octep_oq_desc_hw *desc_ring = oq->desc_ring; + struct page *page; + u32 i; + + for (i = 0; i < oq->max_count; i++) { + page = dev_alloc_page(); + if (unlikely(!page)) { + dev_err(oq->dev, "Rx buffer alloc failed\n"); + goto rx_buf_alloc_err; + } + desc_ring[i].buffer_ptr = dma_map_page(oq->dev, page, 0, + PAGE_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(oq->dev, desc_ring[i].buffer_ptr)) { + dev_err(oq->dev, + "OQ-%d buffer alloc: DMA mapping error!\n", + oq->q_no); + put_page(page); + goto dma_map_err; + } + oq->buff_info[i].page = page; + } + + return 0; + +dma_map_err: +rx_buf_alloc_err: + while (i) { + i--; + dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr, PAGE_SIZE, DMA_FROM_DEVICE); + put_page(oq->buff_info[i].page); + oq->buff_info[i].page = NULL; + } + + return -1; +} + +/** + * octep_oq_refill() - refill buffers for used Rx ring descriptors. + * + * @oct: Octeon device private data structure. + * @oq: Octeon Rx queue data structure. + * + * Return: number of descriptors successfully refilled with receive buffers. + */ +static int octep_oq_refill(struct octep_device *oct, struct octep_oq *oq) +{ + struct octep_oq_desc_hw *desc_ring = oq->desc_ring; + struct page *page; + u32 refill_idx, i; + + refill_idx = oq->host_refill_idx; + for (i = 0; i < oq->refill_count; i++) { + page = dev_alloc_page(); + if (unlikely(!page)) { + dev_err(oq->dev, "refill: rx buffer alloc failed\n"); + oq->stats.alloc_failures++; + break; + } + + desc_ring[refill_idx].buffer_ptr = dma_map_page(oq->dev, page, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(oq->dev, desc_ring[refill_idx].buffer_ptr)) { + dev_err(oq->dev, + "OQ-%d buffer refill: DMA mapping error!\n", + oq->q_no); + put_page(page); + oq->stats.alloc_failures++; + break; + } + oq->buff_info[refill_idx].page = page; + refill_idx++; + if (refill_idx == oq->max_count) + refill_idx = 0; + } + oq->host_refill_idx = refill_idx; + oq->refill_count -= i; + + return i; +} + +/** + * octep_setup_oq() - Setup a Rx queue. + * + * @oct: Octeon device private data structure. + * @q_no: Rx queue number to be setup. + * + * Allocate resources for a Rx queue. + */ +static int octep_setup_oq(struct octep_device *oct, int q_no) +{ + struct octep_oq *oq; + u32 desc_ring_size; + + oq = vzalloc(sizeof(*oq)); + if (!oq) + goto create_oq_fail; + oct->oq[q_no] = oq; + + oq->octep_dev = oct; + oq->netdev = oct->netdev; + oq->dev = &oct->pdev->dev; + oq->q_no = q_no; + oq->max_count = CFG_GET_OQ_NUM_DESC(oct->conf); + oq->ring_size_mask = oq->max_count - 1; + oq->buffer_size = CFG_GET_OQ_BUF_SIZE(oct->conf); + oq->max_single_buffer_size = oq->buffer_size - OCTEP_OQ_RESP_HW_SIZE; + + /* When the hardware/firmware supports additional capabilities, + * additional header is filled-in by Octeon after length field in + * Rx packets. this header contains additional packet information. + */ + if (oct->caps_enabled) + oq->max_single_buffer_size -= OCTEP_OQ_RESP_HW_EXT_SIZE; + + oq->refill_threshold = CFG_GET_OQ_REFILL_THRESHOLD(oct->conf); + + desc_ring_size = oq->max_count * OCTEP_OQ_DESC_SIZE; + oq->desc_ring = dma_alloc_coherent(oq->dev, desc_ring_size, + &oq->desc_ring_dma, GFP_KERNEL); + + if (unlikely(!oq->desc_ring)) { + dev_err(oq->dev, + "Failed to allocate DMA memory for OQ-%d !!\n", q_no); + goto desc_dma_alloc_err; + } + + oq->buff_info = (struct octep_rx_buffer *) + vzalloc(oq->max_count * OCTEP_OQ_RECVBUF_SIZE); + if (unlikely(!oq->buff_info)) { + dev_err(&oct->pdev->dev, + "Failed to allocate buffer info for OQ-%d\n", q_no); + goto buf_list_err; + } + + if (octep_oq_fill_ring_buffers(oq)) + goto oq_fill_buff_err; + + octep_oq_reset_indices(oq); + oct->hw_ops.setup_oq_regs(oct, q_no); + oct->num_oqs++; + + return 0; + +oq_fill_buff_err: + vfree(oq->buff_info); + oq->buff_info = NULL; +buf_list_err: + dma_free_coherent(oq->dev, desc_ring_size, + oq->desc_ring, oq->desc_ring_dma); + oq->desc_ring = NULL; +desc_dma_alloc_err: + vfree(oq); + oct->oq[q_no] = NULL; +create_oq_fail: + return -1; +} + +/** + * octep_oq_free_ring_buffers() - Free ring buffers. + * + * @oq: Octeon Rx queue data structure. + * + * Free receive buffers in unused Rx queue descriptors. + */ +static void octep_oq_free_ring_buffers(struct octep_oq *oq) +{ + struct octep_oq_desc_hw *desc_ring = oq->desc_ring; + int i; + + if (!oq->desc_ring || !oq->buff_info) + return; + + for (i = 0; i < oq->max_count; i++) { + if (oq->buff_info[i].page) { + dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr, + PAGE_SIZE, DMA_FROM_DEVICE); + put_page(oq->buff_info[i].page); + oq->buff_info[i].page = NULL; + desc_ring[i].buffer_ptr = 0; + } + } + octep_oq_reset_indices(oq); +} + +/** + * octep_free_oq() - Free Rx queue resources. + * + * @oq: Octeon Rx queue data structure. + * + * Free all resources of a Rx queue. + */ +static int octep_free_oq(struct octep_oq *oq) +{ + struct octep_device *oct = oq->octep_dev; + int q_no = oq->q_no; + + octep_oq_free_ring_buffers(oq); + + if (oq->buff_info) + vfree(oq->buff_info); + + if (oq->desc_ring) + dma_free_coherent(oq->dev, + oq->max_count * OCTEP_OQ_DESC_SIZE, + oq->desc_ring, oq->desc_ring_dma); + + vfree(oq); + oct->oq[q_no] = NULL; + oct->num_oqs--; + return 0; +} + +/** + * octep_setup_oqs() - setup resources for all Rx queues. + * + * @oct: Octeon device private data structure. + */ +int octep_setup_oqs(struct octep_device *oct) +{ + int i, retval = 0; + + oct->num_oqs = 0; + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + retval = octep_setup_oq(oct, i); + if (retval) { + dev_err(&oct->pdev->dev, + "Failed to setup OQ(RxQ)-%d.\n", i); + goto oq_setup_err; + } + dev_dbg(&oct->pdev->dev, "Successfully setup OQ(RxQ)-%d.\n", i); + } + + return 0; + +oq_setup_err: + while (i) { + i--; + octep_free_oq(oct->oq[i]); + } + return -1; +} + +/** + * octep_oq_dbell_init() - Initialize Rx queue doorbell. + * + * @oct: Octeon device private data structure. + * + * Write number of descriptors to Rx queue doorbell register. + */ +void octep_oq_dbell_init(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_oqs; i++) + writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); +} + +/** + * octep_free_oqs() - Free resources of all Rx queues. + * + * @oct: Octeon device private data structure. + */ +void octep_free_oqs(struct octep_device *oct) +{ + int i; + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + if (!oct->oq[i]) + continue; + octep_free_oq(oct->oq[i]); + dev_dbg(&oct->pdev->dev, + "Successfully freed OQ(RxQ)-%d.\n", i); + } +} + +/** + * octep_oq_check_hw_for_pkts() - Check for new Rx packets. + * + * @oct: Octeon device private data structure. + * @oq: Octeon Rx queue data structure. + * + * Return: packets received after previous check. + */ +static int octep_oq_check_hw_for_pkts(struct octep_device *oct, + struct octep_oq *oq) +{ + u32 pkt_count, new_pkts; + + pkt_count = readl(oq->pkts_sent_reg); + new_pkts = pkt_count - oq->last_pkt_count; + + /* Clear the hardware packets counter register if the rx queue is + * being processed continuously with-in a single interrupt and + * reached half its max value. + * this counter is not cleared every time read, to save write cycles. + */ + if (unlikely(pkt_count > 0xF0000000U)) { + writel(pkt_count, oq->pkts_sent_reg); + pkt_count = readl(oq->pkts_sent_reg); + new_pkts += pkt_count; + } + oq->last_pkt_count = pkt_count; + oq->pkts_pending += new_pkts; + return new_pkts; +} + +/** + * __octep_oq_process_rx() - Process hardware Rx queue and push to stack. + * + * @oct: Octeon device private data structure. + * @oq: Octeon Rx queue data structure. + * @pkts_to_process: number of packets to be processed. + * + * Process the new packets in Rx queue. + * Packets larger than single Rx buffer arrive in consecutive descriptors. + * But, count returned by the API only accounts full packets, not fragments. + * + * Return: number of packets processed and pushed to stack. + */ +static int __octep_oq_process_rx(struct octep_device *oct, + struct octep_oq *oq, u16 pkts_to_process) +{ + struct octep_oq_resp_hw_ext *resp_hw_ext = NULL; + struct octep_rx_buffer *buff_info; + struct octep_oq_resp_hw *resp_hw; + u32 pkt, rx_bytes, desc_used; + struct sk_buff *skb; + u16 data_offset; + u32 read_idx; + + read_idx = oq->host_read_idx; + rx_bytes = 0; + desc_used = 0; + for (pkt = 0; pkt < pkts_to_process; pkt++) { + buff_info = (struct octep_rx_buffer *)&oq->buff_info[read_idx]; + dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, + PAGE_SIZE, DMA_FROM_DEVICE); + resp_hw = page_address(buff_info->page); + buff_info->page = NULL; + + /* Swap the length field that is in Big-Endian to CPU */ + buff_info->len = be64_to_cpu(resp_hw->length); + if (oct->caps_enabled & OCTEP_CAP_RX_CHECKSUM) { + /* Extended response header is immediately after + * response header (resp_hw) + */ + resp_hw_ext = (struct octep_oq_resp_hw_ext *) + (resp_hw + 1); + buff_info->len -= OCTEP_OQ_RESP_HW_EXT_SIZE; + /* Packet Data is immediately after + * extended response header. + */ + data_offset = OCTEP_OQ_RESP_HW_SIZE + + OCTEP_OQ_RESP_HW_EXT_SIZE; + } else { + /* Data is immediately after + * Hardware Rx response header. + */ + data_offset = OCTEP_OQ_RESP_HW_SIZE; + } + rx_bytes += buff_info->len; + + if (buff_info->len <= oq->max_single_buffer_size) { + skb = build_skb((void *)resp_hw, PAGE_SIZE); + skb_reserve(skb, data_offset); + skb_put(skb, buff_info->len); + read_idx++; + desc_used++; + if (read_idx == oq->max_count) + read_idx = 0; + } else { + struct skb_shared_info *shinfo; + u16 data_len; + + skb = build_skb((void *)resp_hw, PAGE_SIZE); + skb_reserve(skb, data_offset); + /* Head fragment includes response header(s); + * subsequent fragments contains only data. + */ + skb_put(skb, oq->max_single_buffer_size); + read_idx++; + desc_used++; + if (read_idx == oq->max_count) + read_idx = 0; + + shinfo = skb_shinfo(skb); + data_len = buff_info->len - oq->max_single_buffer_size; + while (data_len) { + dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, + PAGE_SIZE, DMA_FROM_DEVICE); + buff_info = (struct octep_rx_buffer *) + &oq->buff_info[read_idx]; + if (data_len < oq->buffer_size) { + buff_info->len = data_len; + data_len = 0; + } else { + buff_info->len = oq->buffer_size; + data_len -= oq->buffer_size; + } + + skb_add_rx_frag(skb, shinfo->nr_frags, + buff_info->page, 0, + buff_info->len, + buff_info->len); + buff_info->page = NULL; + read_idx++; + desc_used++; + if (read_idx == oq->max_count) + read_idx = 0; + } + } + + skb->dev = oq->netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + if (resp_hw_ext && + resp_hw_ext->csum_verified == OCTEP_CSUM_VERIFIED) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + napi_gro_receive(oq->napi, skb); + } + + oq->host_read_idx = read_idx; + oq->refill_count += desc_used; + oq->stats.packets += pkt; + oq->stats.bytes += rx_bytes; + + return pkt; +} + +/** + * octep_oq_process_rx() - Process Rx queue. + * + * @oq: Octeon Rx queue data structure. + * @budget: max number of packets can be processed in one invocation. + * + * Check for newly received packets and process them. + * Keeps checking for new packets until budget is used or no new packets seen. + * + * Return: number of packets processed. + */ +int octep_oq_process_rx(struct octep_oq *oq, int budget) +{ + u32 pkts_available, pkts_processed, total_pkts_processed; + struct octep_device *oct = oq->octep_dev; + + pkts_available = 0; + pkts_processed = 0; + total_pkts_processed = 0; + while (total_pkts_processed < budget) { + /* update pending count only when current one exhausted */ + if (oq->pkts_pending == 0) + octep_oq_check_hw_for_pkts(oct, oq); + pkts_available = min(budget - total_pkts_processed, + oq->pkts_pending); + if (!pkts_available) + break; + + pkts_processed = __octep_oq_process_rx(oct, oq, + pkts_available); + oq->pkts_pending -= pkts_processed; + total_pkts_processed += pkts_processed; + } + + if (oq->refill_count >= oq->refill_threshold) { + u32 desc_refilled = octep_oq_refill(oct, oq); + + /* flush pending writes before updating credits */ + wmb(); + writel(desc_refilled, oq->pkts_credit_reg); + } + + return total_pkts_processed; +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.h new file mode 100644 index 000000000000..782a24f27f3e --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_RX_H_ +#define _OCTEP_RX_H_ + +/* struct octep_oq_desc_hw - Octeon Hardware OQ descriptor format. + * + * The descriptor ring is made of descriptors which have 2 64-bit values: + * + * @buffer_ptr: DMA address of the skb->data + * @info_ptr: DMA address of host memory, used to update pkt count by hw. + * This is currently unused to save pci writes. + */ +struct octep_oq_desc_hw { + dma_addr_t buffer_ptr; + u64 info_ptr; +}; + +#define OCTEP_OQ_DESC_SIZE (sizeof(struct octep_oq_desc_hw)) + +#define OCTEP_CSUM_L4_VERIFIED 0x1 +#define OCTEP_CSUM_IP_VERIFIED 0x2 +#define OCTEP_CSUM_VERIFIED (OCTEP_CSUM_L4_VERIFIED | OCTEP_CSUM_IP_VERIFIED) + +/* Extended Response Header in packet data received from Hardware. + * Includes metadata like checksum status. + * this is valid only if hardware/firmware published support for this. + * This is at offset 0 of packet data (skb->data). + */ +struct octep_oq_resp_hw_ext { + /* Reserved. */ + u64 reserved:62; + + /* checksum verified. */ + u64 csum_verified:2; +}; + +#define OCTEP_OQ_RESP_HW_EXT_SIZE (sizeof(struct octep_oq_resp_hw_ext)) + +/* Length of Rx packet DMA'ed by Octeon to Host. + * this is in bigendian; so need to be converted to cpu endian. + * Octeon writes this at the beginning of Rx buffer (skb->data). + */ +struct octep_oq_resp_hw { + /* The Length of the packet. */ + __be64 length; +}; + +#define OCTEP_OQ_RESP_HW_SIZE (sizeof(struct octep_oq_resp_hw)) + +/* Pointer to data buffer. + * Driver keeps a pointer to the data buffer that it made available to + * the Octeon device. Since the descriptor ring keeps physical (bus) + * addresses, this field is required for the driver to keep track of + * the virtual address pointers. The fields are operated by + * OS-dependent routines. + */ +struct octep_rx_buffer { + struct page *page; + + /* length from rx hardware descriptor after converting to cpu endian */ + u64 len; +}; + +#define OCTEP_OQ_RECVBUF_SIZE (sizeof(struct octep_rx_buffer)) + +/* Output Queue statistics. Each output queue has four stats fields. */ +struct octep_oq_stats { + /* Number of packets received from the Device. */ + u64 packets; + + /* Number of bytes received from the Device. */ + u64 bytes; + + /* Number of times failed to allocate buffers. */ + u64 alloc_failures; +}; + +#define OCTEP_OQ_STATS_SIZE (sizeof(struct octep_oq_stats)) + +/* Hardware interface Rx statistics */ +struct octep_iface_rx_stats { + /* Received packets */ + u64 pkts; + + /* Octets of received packets */ + u64 octets; + + /* Received PAUSE and Control packets */ + u64 pause_pkts; + + /* Received PAUSE and Control octets */ + u64 pause_octets; + + /* Filtered DMAC0 packets */ + u64 dmac0_pkts; + + /* Filtered DMAC0 octets */ + u64 dmac0_octets; + + /* Packets dropped due to RX FIFO full */ + u64 dropped_pkts_fifo_full; + + /* Octets dropped due to RX FIFO full */ + u64 dropped_octets_fifo_full; + + /* Error packets */ + u64 err_pkts; + + /* Filtered DMAC1 packets */ + u64 dmac1_pkts; + + /* Filtered DMAC1 octets */ + u64 dmac1_octets; + + /* NCSI-bound packets dropped */ + u64 ncsi_dropped_pkts; + + /* NCSI-bound octets dropped */ + u64 ncsi_dropped_octets; + + /* Multicast packets received. */ + u64 mcast_pkts; + + /* Broadcast packets received. */ + u64 bcast_pkts; + +}; + +/* The Descriptor Ring Output Queue structure. + * This structure has all the information required to implement a + * Octeon OQ. + */ +struct octep_oq { + u32 q_no; + + struct octep_device *octep_dev; + struct net_device *netdev; + struct device *dev; + + struct napi_struct *napi; + + /* The receive buffer list. This list has the virtual addresses + * of the buffers. + */ + struct octep_rx_buffer *buff_info; + + /* Pointer to the mapped packet credit register. + * Host writes number of info/buffer ptrs available to this register + */ + u8 __iomem *pkts_credit_reg; + + /* Pointer to the mapped packet sent register. + * Octeon writes the number of packets DMA'ed to host memory + * in this register. + */ + u8 __iomem *pkts_sent_reg; + + /* Statistics for this OQ. */ + struct octep_oq_stats stats; + + /* Packets pending to be processed */ + u32 pkts_pending; + u32 last_pkt_count; + + /* Index in the ring where the driver should read the next packet */ + u32 host_read_idx; + + /* Number of descriptors in this ring. */ + u32 max_count; + u32 ring_size_mask; + + /* The number of descriptors pending refill. */ + u32 refill_count; + + /* Index in the ring where the driver will refill the + * descriptor's buffer + */ + u32 host_refill_idx; + u32 refill_threshold; + + /* The size of each buffer pointed by the buffer pointer. */ + u32 buffer_size; + u32 max_single_buffer_size; + + /* The 8B aligned descriptor ring starts at this address. */ + struct octep_oq_desc_hw *desc_ring; + + /* DMA mapped address of the OQ descriptor ring. */ + dma_addr_t desc_ring_dma; +}; + +#define OCTEP_OQ_SIZE (sizeof(struct octep_oq)) +#endif /* _OCTEP_RX_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c new file mode 100644 index 000000000000..511552bc3e87 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/vmalloc.h> + +#include "octep_config.h" +#include "octep_main.h" + +/* Reset various index of Tx queue data structure. */ +static void octep_iq_reset_indices(struct octep_iq *iq) +{ + iq->fill_cnt = 0; + iq->host_write_index = 0; + iq->octep_read_index = 0; + iq->flush_index = 0; + iq->pkts_processed = 0; + iq->pkt_in_done = 0; + atomic_set(&iq->instr_pending, 0); +} + +/** + * octep_iq_process_completions() - Process Tx queue completions. + * + * @iq: Octeon Tx queue data structure. + * @budget: max number of completions to be processed in one invocation. + */ +int octep_iq_process_completions(struct octep_iq *iq, u16 budget) +{ + u32 compl_pkts, compl_bytes, compl_sg; + struct octep_device *oct = iq->octep_dev; + struct octep_tx_buffer *tx_buffer; + struct skb_shared_info *shinfo; + u32 fi = iq->flush_index; + struct sk_buff *skb; + u8 frags, i; + + compl_pkts = 0; + compl_sg = 0; + compl_bytes = 0; + iq->octep_read_index = oct->hw_ops.update_iq_read_idx(iq); + + while (likely(budget && (fi != iq->octep_read_index))) { + tx_buffer = iq->buff_info + fi; + skb = tx_buffer->skb; + + fi++; + if (unlikely(fi == iq->max_count)) + fi = 0; + compl_bytes += skb->len; + compl_pkts++; + budget--; + + if (!tx_buffer->gather) { + dma_unmap_single(iq->dev, tx_buffer->dma, + tx_buffer->skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + continue; + } + + /* Scatter/Gather */ + shinfo = skb_shinfo(skb); + frags = shinfo->nr_frags; + compl_sg++; + + dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0], + tx_buffer->sglist[0].len[0], DMA_TO_DEVICE); + + i = 1; /* entry 0 is main skb, unmapped above */ + while (frags--) { + dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], + tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); + i++; + } + + dev_kfree_skb_any(skb); + } + + iq->pkts_processed += compl_pkts; + atomic_sub(compl_pkts, &iq->instr_pending); + iq->stats.instr_completed += compl_pkts; + iq->stats.bytes_sent += compl_bytes; + iq->stats.sgentry_sent += compl_sg; + iq->flush_index = fi; + + netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes); + + if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) && + ((iq->max_count - atomic_read(&iq->instr_pending)) > + OCTEP_WAKE_QUEUE_THRESHOLD)) + netif_wake_subqueue(iq->netdev, iq->q_no); + return !budget; +} + +/** + * octep_iq_free_pending() - Free Tx buffers for pending completions. + * + * @iq: Octeon Tx queue data structure. + */ +static void octep_iq_free_pending(struct octep_iq *iq) +{ + struct octep_tx_buffer *tx_buffer; + struct skb_shared_info *shinfo; + u32 fi = iq->flush_index; + struct sk_buff *skb; + u8 frags, i; + + while (fi != iq->host_write_index) { + tx_buffer = iq->buff_info + fi; + skb = tx_buffer->skb; + + fi++; + if (unlikely(fi == iq->max_count)) + fi = 0; + + if (!tx_buffer->gather) { + dma_unmap_single(iq->dev, tx_buffer->dma, + tx_buffer->skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + continue; + } + + /* Scatter/Gather */ + shinfo = skb_shinfo(skb); + frags = shinfo->nr_frags; + + dma_unmap_single(iq->dev, + tx_buffer->sglist[0].dma_ptr[0], + tx_buffer->sglist[0].len[0], + DMA_TO_DEVICE); + + i = 1; /* entry 0 is main skb, unmapped above */ + while (frags--) { + dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], + tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); + i++; + } + + dev_kfree_skb_any(skb); + } + + atomic_set(&iq->instr_pending, 0); + iq->flush_index = fi; + netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no)); +} + +/** + * octep_clean_iqs() - Clean Tx queues to shutdown the device. + * + * @oct: Octeon device private data structure. + * + * Free the buffers in Tx queue descriptors pending completion and + * reset queue indices + */ +void octep_clean_iqs(struct octep_device *oct) +{ + int i; + + for (i = 0; i < oct->num_iqs; i++) { + octep_iq_free_pending(oct->iq[i]); + octep_iq_reset_indices(oct->iq[i]); + } +} + +/** + * octep_setup_iq() - Setup a Tx queue. + * + * @oct: Octeon device private data structure. + * @q_no: Tx queue number to be setup. + * + * Allocate resources for a Tx queue. + */ +static int octep_setup_iq(struct octep_device *oct, int q_no) +{ + u32 desc_ring_size, buff_info_size, sglist_size; + struct octep_iq *iq; + int i; + + iq = vzalloc(sizeof(*iq)); + if (!iq) + goto iq_alloc_err; + oct->iq[q_no] = iq; + + iq->octep_dev = oct; + iq->netdev = oct->netdev; + iq->dev = &oct->pdev->dev; + iq->q_no = q_no; + iq->max_count = CFG_GET_IQ_NUM_DESC(oct->conf); + iq->ring_size_mask = iq->max_count - 1; + iq->fill_threshold = CFG_GET_IQ_DB_MIN(oct->conf); + iq->netdev_q = netdev_get_tx_queue(iq->netdev, q_no); + + /* Allocate memory for hardware queue descriptors */ + desc_ring_size = OCTEP_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf); + iq->desc_ring = dma_alloc_coherent(iq->dev, desc_ring_size, + &iq->desc_ring_dma, GFP_KERNEL); + if (unlikely(!iq->desc_ring)) { + dev_err(iq->dev, + "Failed to allocate DMA memory for IQ-%d\n", q_no); + goto desc_dma_alloc_err; + } + + /* Allocate memory for hardware SGLIST descriptors */ + sglist_size = OCTEP_SGLIST_SIZE_PER_PKT * + CFG_GET_IQ_NUM_DESC(oct->conf); + iq->sglist = dma_alloc_coherent(iq->dev, sglist_size, + &iq->sglist_dma, GFP_KERNEL); + if (unlikely(!iq->sglist)) { + dev_err(iq->dev, + "Failed to allocate DMA memory for IQ-%d SGLIST\n", + q_no); + goto sglist_alloc_err; + } + + /* allocate memory to manage Tx packets pending completion */ + buff_info_size = OCTEP_IQ_TXBUFF_INFO_SIZE * iq->max_count; + iq->buff_info = vzalloc(buff_info_size); + if (!iq->buff_info) { + dev_err(iq->dev, + "Failed to allocate buff info for IQ-%d\n", q_no); + goto buff_info_err; + } + + /* Setup sglist addresses in tx_buffer entries */ + for (i = 0; i < CFG_GET_IQ_NUM_DESC(oct->conf); i++) { + struct octep_tx_buffer *tx_buffer; + + tx_buffer = &iq->buff_info[i]; + tx_buffer->sglist = + &iq->sglist[i * OCTEP_SGLIST_ENTRIES_PER_PKT]; + tx_buffer->sglist_dma = + iq->sglist_dma + (i * OCTEP_SGLIST_SIZE_PER_PKT); + } + + octep_iq_reset_indices(iq); + oct->hw_ops.setup_iq_regs(oct, q_no); + + oct->num_iqs++; + return 0; + +buff_info_err: + dma_free_coherent(iq->dev, sglist_size, iq->sglist, iq->sglist_dma); +sglist_alloc_err: + dma_free_coherent(iq->dev, desc_ring_size, + iq->desc_ring, iq->desc_ring_dma); +desc_dma_alloc_err: + vfree(iq); + oct->iq[q_no] = NULL; +iq_alloc_err: + return -1; +} + +/** + * octep_free_iq() - Free Tx queue resources. + * + * @iq: Octeon Tx queue data structure. + * + * Free all the resources allocated for a Tx queue. + */ +static void octep_free_iq(struct octep_iq *iq) +{ + struct octep_device *oct = iq->octep_dev; + u64 desc_ring_size, sglist_size; + int q_no = iq->q_no; + + desc_ring_size = OCTEP_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf); + + if (iq->buff_info) + vfree(iq->buff_info); + + if (iq->desc_ring) + dma_free_coherent(iq->dev, desc_ring_size, + iq->desc_ring, iq->desc_ring_dma); + + sglist_size = OCTEP_SGLIST_SIZE_PER_PKT * + CFG_GET_IQ_NUM_DESC(oct->conf); + if (iq->sglist) + dma_free_coherent(iq->dev, sglist_size, + iq->sglist, iq->sglist_dma); + + vfree(iq); + oct->iq[q_no] = NULL; + oct->num_iqs--; +} + +/** + * octep_setup_iqs() - setup resources for all Tx queues. + * + * @oct: Octeon device private data structure. + */ +int octep_setup_iqs(struct octep_device *oct) +{ + int i; + + oct->num_iqs = 0; + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + if (octep_setup_iq(oct, i)) { + dev_err(&oct->pdev->dev, + "Failed to setup IQ(TxQ)-%d.\n", i); + goto iq_setup_err; + } + dev_dbg(&oct->pdev->dev, "Successfully setup IQ(TxQ)-%d.\n", i); + } + + return 0; + +iq_setup_err: + while (i) { + i--; + octep_free_iq(oct->iq[i]); + } + return -1; +} + +/** + * octep_free_iqs() - Free resources of all Tx queues. + * + * @oct: Octeon device private data structure. + */ +void octep_free_iqs(struct octep_device *oct) +{ + int i; + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + octep_free_iq(oct->iq[i]); + dev_dbg(&oct->pdev->dev, + "Successfully destroyed IQ(TxQ)-%d.\n", i); + } + oct->num_iqs = 0; +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h new file mode 100644 index 000000000000..2ef57980eb47 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_TX_H_ +#define _OCTEP_TX_H_ + +#define IQ_SEND_OK 0 +#define IQ_SEND_STOP 1 +#define IQ_SEND_FAILED -1 + +#define TX_BUFTYPE_NONE 0 +#define TX_BUFTYPE_NET 1 +#define TX_BUFTYPE_NET_SG 2 +#define NUM_TX_BUFTYPES 3 + +/* Hardware format for Scatter/Gather list */ +struct octep_tx_sglist_desc { + u16 len[4]; + dma_addr_t dma_ptr[4]; +}; + +/* Each Scatter/Gather entry sent to hardwar hold four pointers. + * So, number of entries required is (MAX_SKB_FRAGS + 1)/4, where '+1' + * is for main skb which also goes as a gather buffer to Octeon hardware. + * To allocate sufficient SGLIST entries for a packet with max fragments, + * align by adding 3 before calcuating max SGLIST entries per packet. + */ +#define OCTEP_SGLIST_ENTRIES_PER_PKT ((MAX_SKB_FRAGS + 1 + 3) / 4) +#define OCTEP_SGLIST_SIZE_PER_PKT \ + (OCTEP_SGLIST_ENTRIES_PER_PKT * sizeof(struct octep_tx_sglist_desc)) + +struct octep_tx_buffer { + struct sk_buff *skb; + dma_addr_t dma; + struct octep_tx_sglist_desc *sglist; + dma_addr_t sglist_dma; + u8 gather; +}; + +#define OCTEP_IQ_TXBUFF_INFO_SIZE (sizeof(struct octep_tx_buffer)) + +/* Hardware interface Tx statistics */ +struct octep_iface_tx_stats { + /* Packets dropped due to excessive collisions */ + u64 xscol; + + /* Packets dropped due to excessive deferral */ + u64 xsdef; + + /* Packets sent that experienced multiple collisions before successful + * transmission + */ + u64 mcol; + + /* Packets sent that experienced a single collision before successful + * transmission + */ + u64 scol; + + /* Total octets sent on the interface */ + u64 octs; + + /* Total frames sent on the interface */ + u64 pkts; + + /* Packets sent with an octet count < 64 */ + u64 hist_lt64; + + /* Packets sent with an octet count == 64 */ + u64 hist_eq64; + + /* Packets sent with an octet count of 65–127 */ + u64 hist_65to127; + + /* Packets sent with an octet count of 128–255 */ + u64 hist_128to255; + + /* Packets sent with an octet count of 256–511 */ + u64 hist_256to511; + + /* Packets sent with an octet count of 512–1023 */ + u64 hist_512to1023; + + /* Packets sent with an octet count of 1024-1518 */ + u64 hist_1024to1518; + + /* Packets sent with an octet count of > 1518 */ + u64 hist_gt1518; + + /* Packets sent to a broadcast DMAC */ + u64 bcst; + + /* Packets sent to the multicast DMAC */ + u64 mcst; + + /* Packets sent that experienced a transmit underflow and were + * truncated + */ + u64 undflw; + + /* Control/PAUSE packets sent */ + u64 ctl; +}; + +/* Input Queue statistics. Each input queue has four stats fields. */ +struct octep_iq_stats { + /* Instructions posted to this queue. */ + u64 instr_posted; + + /* Instructions copied by hardware for processing. */ + u64 instr_completed; + + /* Instructions that could not be processed. */ + u64 instr_dropped; + + /* Bytes sent through this queue. */ + u64 bytes_sent; + + /* Gather entries sent through this queue. */ + u64 sgentry_sent; + + /* Number of transmit failures due to TX_BUSY */ + u64 tx_busy; + + /* Number of times the queue is restarted */ + u64 restart_cnt; +}; + +/* The instruction (input) queue. + * The input queue is used to post raw (instruction) mode data or packet + * data to Octeon device from the host. Each input queue (up to 4) for + * a Octeon device has one such structure to represent it. + */ +struct octep_iq { + u32 q_no; + + struct octep_device *octep_dev; + struct net_device *netdev; + struct device *dev; + struct netdev_queue *netdev_q; + + /* Index in input ring where driver should write the next packet */ + u16 host_write_index; + + /* Index in input ring where Octeon is expected to read next packet */ + u16 octep_read_index; + + /* This index aids in finding the window in the queue where Octeon + * has read the commands. + */ + u16 flush_index; + + /* Statistics for this input queue. */ + struct octep_iq_stats stats; + + /* This field keeps track of the instructions pending in this queue. */ + atomic_t instr_pending; + + /* Pointer to the Virtual Base addr of the input ring. */ + struct octep_tx_desc_hw *desc_ring; + + /* DMA mapped base address of the input descriptor ring. */ + dma_addr_t desc_ring_dma; + + /* Info of Tx buffers pending completion. */ + struct octep_tx_buffer *buff_info; + + /* Base pointer to Scatter/Gather lists for all ring descriptors. */ + struct octep_tx_sglist_desc *sglist; + + /* DMA mapped addr of Scatter Gather Lists */ + dma_addr_t sglist_dma; + + /* Octeon doorbell register for the ring. */ + u8 __iomem *doorbell_reg; + + /* Octeon instruction count register for this ring. */ + u8 __iomem *inst_cnt_reg; + + /* interrupt level register for this ring */ + u8 __iomem *intr_lvl_reg; + + /* Maximum no. of instructions in this queue. */ + u32 max_count; + u32 ring_size_mask; + + u32 pkt_in_done; + u32 pkts_processed; + + u32 status; + + /* Number of instructions pending to be posted to Octeon. */ + u32 fill_cnt; + + /* The max. number of instructions that can be held pending by the + * driver before ringing doorbell. + */ + u32 fill_threshold; +}; + +/* Hardware Tx Instruction Header */ +struct octep_instr_hdr { + /* Data Len */ + u64 tlen:16; + + /* Reserved */ + u64 rsvd:20; + + /* PKIND for SDP */ + u64 pkind:6; + + /* Front Data size */ + u64 fsz:6; + + /* No. of entries in gather list */ + u64 gsz:14; + + /* Gather indicator 1=gather*/ + u64 gather:1; + + /* Reserved3 */ + u64 reserved3:1; +}; + +/* Hardware Tx completion response header */ +struct octep_instr_resp_hdr { + /* Request ID */ + u64 rid:16; + + /* PCIe port to use for response */ + u64 pcie_port:3; + + /* Scatter indicator 1=scatter */ + u64 scatter:1; + + /* Size of Expected result OR no. of entries in scatter list */ + u64 rlenssz:14; + + /* Desired destination port for result */ + u64 dport:6; + + /* Opcode Specific parameters */ + u64 param:8; + + /* Opcode for the return packet */ + u64 opcode:16; +}; + +/* 64-byte Tx instruction format. + * Format of instruction for a 64-byte mode input queue. + * + * only first 16-bytes (dptr and ih) are mandatory; rest are optional + * and filled by the driver based on firmware/hardware capabilities. + * These optional headers together called Front Data and its size is + * described by ih->fsz. + */ +struct octep_tx_desc_hw { + /* Pointer where the input data is available. */ + u64 dptr; + + /* Instruction Header. */ + union { + struct octep_instr_hdr ih; + u64 ih64; + }; + + /* Pointer where the response for a RAW mode packet will be written + * by Octeon. + */ + u64 rptr; + + /* Input Instruction Response Header. */ + struct octep_instr_resp_hdr irh; + + /* Additional headers available in a 64-byte instruction. */ + u64 exhdr[4]; +}; + +#define OCTEP_IQ_DESC_SIZE (sizeof(struct octep_tx_desc_hw)) +#endif /* _OCTEP_TX_H_ */ diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.c b/drivers/net/ethernet/marvell/prestera/prestera_acl.c index 47c899c08951..e5627782fac6 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_acl.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.c @@ -421,13 +421,6 @@ int prestera_acl_rule_add(struct prestera_switch *sw, rule->re_arg.vtcam_id = ruleset->vtcam_id; rule->re_key.prio = rule->priority; - /* setup counter */ - rule->re_arg.count.valid = true; - err = prestera_acl_chain_to_client(ruleset->ht_key.chain_index, - &rule->re_arg.count.client); - if (err) - goto err_rule_add; - rule->re = prestera_acl_rule_entry_find(sw->acl, &rule->re_key); err = WARN_ON(rule->re) ? -EEXIST : 0; if (err) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index 921959a980ee..c12b09ac6559 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -70,6 +70,24 @@ static int prestera_flower_parse_actions(struct prestera_flow_block *block, if (!flow_action_has_entries(flow_action)) return 0; + if (!flow_action_mixed_hw_stats_check(flow_action, extack)) + return -EOPNOTSUPP; + + act = flow_action_first_entry_get(flow_action); + if (act->hw_stats & FLOW_ACTION_HW_STATS_DISABLED) { + /* Nothing to do */ + } else if (act->hw_stats & FLOW_ACTION_HW_STATS_DELAYED) { + /* setup counter first */ + rule->re_arg.count.valid = true; + err = prestera_acl_chain_to_client(chain_index, + &rule->re_arg.count.client); + if (err) + return err; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); + return -EOPNOTSUPP; + } + flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_ACCEPT: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 6c5618cf4f08..3754d8aec76d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -4,6 +4,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/inetdevice.h> +#include <net/inet_dscp.h> #include <net/switchdev.h> #include <linux/rhashtable.h> @@ -26,7 +27,7 @@ struct prestera_kern_fib_cache { /* Indicate if route is not overlapped by another table */ struct rhash_head ht_node; /* node of prestera_router */ struct fib_info *fi; - u8 kern_tos; + dscp_t kern_dscp; u8 kern_type; bool reachable; }; @@ -88,7 +89,7 @@ prestera_kern_fib_cache_destroy(struct prestera_switch *sw, static struct prestera_kern_fib_cache * prestera_kern_fib_cache_create(struct prestera_switch *sw, struct prestera_kern_fib_cache_key *key, - struct fib_info *fi, u8 tos, u8 type) + struct fib_info *fi, dscp_t dscp, u8 type) { struct prestera_kern_fib_cache *fib_cache; int err; @@ -100,7 +101,7 @@ prestera_kern_fib_cache_create(struct prestera_switch *sw, memcpy(&fib_cache->key, key, sizeof(*key)); fib_info_hold(fi); fib_cache->fi = fi; - fib_cache->kern_tos = tos; + fib_cache->kern_dscp = dscp; fib_cache->kern_type = type; err = rhashtable_insert_fast(&sw->router->kern_fib_cache_ht, @@ -132,7 +133,7 @@ __prestera_k_arb_fib_lpm_offload_set(struct prestera_switch *sw, fri.tb_id = fc->key.kern_tb_id; fri.dst = fc->key.addr.u.ipv4; fri.dst_len = fc->key.prefix_len; - fri.tos = fc->kern_tos; + fri.dscp = fc->kern_dscp; fri.type = fc->kern_type; /* flags begin */ fri.offload = offload; @@ -305,7 +306,7 @@ prestera_k_arb_fib_evt(struct prestera_switch *sw, if (replace) { fib_cache = prestera_kern_fib_cache_create(sw, &fc_key, fen_info->fi, - fen_info->tos, + fen_info->dscp, fen_info->type); if (!fib_cache) { dev_err(sw->dev->dev, "fib_cache == NULL"); diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 86d356b4388d..da4ec235d146 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -7,6 +7,10 @@ config NET_VENDOR_MEDIATEK if NET_VENDOR_MEDIATEK +config NET_MEDIATEK_SOC_WED + depends on ARCH_MEDIATEK || COMPILE_TEST + def_bool NET_MEDIATEK_SOC != n + config NET_MEDIATEK_SOC tristate "MediaTek SoC Gigabit Ethernet support" depends on NET_DSA || !NET_DSA diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile index 79d4cdbbcbf5..45ba0970504a 100644 --- a/drivers/net/ethernet/mediatek/Makefile +++ b/drivers/net/ethernet/mediatek/Makefile @@ -5,4 +5,9 @@ obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o +mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed.o +ifdef CONFIG_DEBUG_FS +mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_debugfs.o +endif +obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f02d07ec5ccb..18eebcaa6a76 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -9,6 +9,7 @@ #include <linux/of_device.h> #include <linux/of_mdio.h> #include <linux/of_net.h> +#include <linux/of_address.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> #include <linux/clk.h> @@ -20,9 +21,11 @@ #include <linux/pinctrl/devinfo.h> #include <linux/phylink.h> #include <linux/jhash.h> +#include <linux/bitfield.h> #include <net/dsa.h> #include "mtk_eth_soc.h" +#include "mtk_wed.h" static int mtk_msg_level = -1; module_param_named(msg_level, mtk_msg_level, int, 0); @@ -786,7 +789,7 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) dma_addr_t dma_addr; int i; - eth->scratch_ring = dma_alloc_coherent(eth->dev, + eth->scratch_ring = dma_alloc_coherent(eth->dma_dev, cnt * sizeof(struct mtk_tx_dma), ð->phy_scratch_ring, GFP_ATOMIC); @@ -798,10 +801,10 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) if (unlikely(!eth->scratch_head)) return -ENOMEM; - dma_addr = dma_map_single(eth->dev, + dma_addr = dma_map_single(eth->dma_dev, eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, dma_addr))) + if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) return -ENOMEM; phy_ring_tail = eth->phy_scratch_ring + @@ -855,26 +858,26 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf, { if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { - dma_unmap_single(eth->dev, + dma_unmap_single(eth->dma_dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) { - dma_unmap_page(eth->dev, + dma_unmap_page(eth->dma_dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); } } else { if (dma_unmap_len(tx_buf, dma_len0)) { - dma_unmap_page(eth->dev, + dma_unmap_page(eth->dma_dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); } if (dma_unmap_len(tx_buf, dma_len1)) { - dma_unmap_page(eth->dev, + dma_unmap_page(eth->dma_dev, dma_unmap_addr(tx_buf, dma_addr1), dma_unmap_len(tx_buf, dma_len1), DMA_TO_DEVICE); @@ -952,9 +955,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, if (skb_vlan_tag_present(skb)) txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb); - mapped_addr = dma_map_single(eth->dev, skb->data, + mapped_addr = dma_map_single(eth->dma_dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr))) return -ENOMEM; WRITE_ONCE(itxd->txd1, mapped_addr); @@ -993,10 +996,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(eth->dev, frag, offset, + mapped_addr = skb_frag_dma_map(eth->dma_dev, frag, offset, frag_map_size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr))) goto err_dma; if (i == nr_frags - 1 && @@ -1237,7 +1240,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, struct net_device *netdev; unsigned int pktlen; dma_addr_t dma_addr; - u32 hash; + u32 hash, reason; int mac; ring = mtk_get_rx_ring(eth); @@ -1274,18 +1277,18 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev->stats.rx_dropped++; goto release_desc; } - dma_addr = dma_map_single(eth->dev, + dma_addr = dma_map_single(eth->dma_dev, new_data + NET_SKB_PAD + eth->ip_align, ring->buf_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, dma_addr))) { + if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) { skb_free_frag(new_data); netdev->stats.rx_dropped++; goto release_desc; } - dma_unmap_single(eth->dev, trxd.rxd1, + dma_unmap_single(eth->dma_dev, trxd.rxd1, ring->buf_size, DMA_FROM_DEVICE); /* receive data */ @@ -1313,6 +1316,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb_set_hash(skb, hash, PKT_HASH_TYPE_L4); } + reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4); + if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) + mtk_ppe_check_skb(eth->ppe, skb, + trxd.rxd4 & MTK_RXD4_FOE_ENTRY); + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX && (trxd.rxd2 & RX_DMA_VTAG)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), @@ -1558,7 +1566,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) if (!ring->buf) goto no_tx_mem; - ring->dma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz, + ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz, &ring->phys, GFP_ATOMIC); if (!ring->dma) goto no_tx_mem; @@ -1576,7 +1584,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) * descriptors in ring->dma_pdma. */ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { - ring->dma_pdma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz, + ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz, &ring->phys_pdma, GFP_ATOMIC); if (!ring->dma_pdma) @@ -1635,7 +1643,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) } if (ring->dma) { - dma_free_coherent(eth->dev, + dma_free_coherent(eth->dma_dev, MTK_DMA_SIZE * sizeof(*ring->dma), ring->dma, ring->phys); @@ -1643,7 +1651,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) } if (ring->dma_pdma) { - dma_free_coherent(eth->dev, + dma_free_coherent(eth->dma_dev, MTK_DMA_SIZE * sizeof(*ring->dma_pdma), ring->dma_pdma, ring->phys_pdma); @@ -1688,18 +1696,18 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) return -ENOMEM; } - ring->dma = dma_alloc_coherent(eth->dev, + ring->dma = dma_alloc_coherent(eth->dma_dev, rx_dma_size * sizeof(*ring->dma), &ring->phys, GFP_ATOMIC); if (!ring->dma) return -ENOMEM; for (i = 0; i < rx_dma_size; i++) { - dma_addr_t dma_addr = dma_map_single(eth->dev, + dma_addr_t dma_addr = dma_map_single(eth->dma_dev, ring->data[i] + NET_SKB_PAD + eth->ip_align, ring->buf_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, dma_addr))) + if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) return -ENOMEM; ring->dma[i].rxd1 = (unsigned int)dma_addr; @@ -1735,7 +1743,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring) continue; if (!ring->dma[i].rxd1) continue; - dma_unmap_single(eth->dev, + dma_unmap_single(eth->dma_dev, ring->dma[i].rxd1, ring->buf_size, DMA_FROM_DEVICE); @@ -1746,7 +1754,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring) } if (ring->dma) { - dma_free_coherent(eth->dev, + dma_free_coherent(eth->dma_dev, ring->dma_size * sizeof(*ring->dma), ring->dma, ring->phys); @@ -2099,7 +2107,7 @@ static void mtk_dma_free(struct mtk_eth *eth) if (eth->netdev[i]) netdev_reset_queue(eth->netdev[i]); if (eth->scratch_ring) { - dma_free_coherent(eth->dev, + dma_free_coherent(eth->dma_dev, MTK_DMA_SIZE * sizeof(struct mtk_tx_dma), eth->scratch_ring, eth->phy_scratch_ring); @@ -2267,7 +2275,7 @@ static int mtk_open(struct net_device *dev) if (err) return err; - if (eth->soc->offload_version && mtk_ppe_start(ð->ppe) == 0) + if (eth->soc->offload_version && mtk_ppe_start(eth->ppe) == 0) gdm_config = MTK_GDMA_TO_PPE; mtk_gdm_config(eth, gdm_config); @@ -2341,7 +2349,7 @@ static int mtk_stop(struct net_device *dev) mtk_dma_free(eth); if (eth->soc->offload_version) - mtk_ppe_stop(ð->ppe); + mtk_ppe_stop(eth->ppe); return 0; } @@ -2448,6 +2456,8 @@ static void mtk_dim_tx(struct work_struct *work) static int mtk_hw_init(struct mtk_eth *eth) { + u32 dma_mask = ETHSYS_DMA_AG_MAP_PDMA | ETHSYS_DMA_AG_MAP_QDMA | + ETHSYS_DMA_AG_MAP_PPE; int i, val, ret; if (test_and_set_bit(MTK_HW_INIT, ð->state)) @@ -2460,6 +2470,10 @@ static int mtk_hw_init(struct mtk_eth *eth) if (ret) goto err_disable_pm; + if (eth->ethsys) + regmap_update_bits(eth->ethsys, ETHSYS_DMA_AG_MAP, dma_mask, + of_dma_is_coherent(eth->dma_dev->of_node) * dma_mask); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { ret = device_reset(eth->dev); if (ret) { @@ -3040,6 +3054,35 @@ free_netdev: return err; } +void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev) +{ + struct net_device *dev, *tmp; + LIST_HEAD(dev_list); + int i; + + rtnl_lock(); + + for (i = 0; i < MTK_MAC_COUNT; i++) { + dev = eth->netdev[i]; + + if (!dev || !(dev->flags & IFF_UP)) + continue; + + list_add_tail(&dev->close_list, &dev_list); + } + + dev_close_many(&dev_list, false); + + eth->dma_dev = dma_dev; + + list_for_each_entry_safe(dev, tmp, &dev_list, close_list) { + list_del_init(&dev->close_list); + dev_open(dev, NULL); + } + + rtnl_unlock(); +} + static int mtk_probe(struct platform_device *pdev) { struct device_node *mac_np; @@ -3053,6 +3096,7 @@ static int mtk_probe(struct platform_device *pdev) eth->soc = of_device_get_match_data(&pdev->dev); eth->dev = &pdev->dev; + eth->dma_dev = &pdev->dev; eth->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(eth->base)) return PTR_ERR(eth->base); @@ -3101,6 +3145,16 @@ static int mtk_probe(struct platform_device *pdev) } } + if (of_dma_is_coherent(pdev->dev.of_node)) { + struct regmap *cci; + + cci = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "cci-control-port"); + /* enable CPU/bus coherency */ + if (!IS_ERR(cci)) + regmap_write(cci, 0, 3); + } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) { eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii), GFP_KERNEL); @@ -3123,6 +3177,22 @@ static int mtk_probe(struct platform_device *pdev) } } + for (i = 0;; i++) { + struct device_node *np = of_parse_phandle(pdev->dev.of_node, + "mediatek,wed", i); + static const u32 wdma_regs[] = { + MTK_WDMA0_BASE, + MTK_WDMA1_BASE + }; + void __iomem *wdma; + + if (!np || i >= ARRAY_SIZE(wdma_regs)) + break; + + wdma = eth->base + wdma_regs[i]; + mtk_wed_add_hw(np, eth, wdma, i); + } + for (i = 0; i < 3; i++) { if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0) eth->irq[i] = eth->irq[0]; @@ -3198,10 +3268,11 @@ static int mtk_probe(struct platform_device *pdev) } if (eth->soc->offload_version) { - err = mtk_ppe_init(ð->ppe, eth->dev, - eth->base + MTK_ETH_PPE_BASE, 2); - if (err) + eth->ppe = mtk_ppe_init(eth, eth->base + MTK_ETH_PPE_BASE, 2); + if (!eth->ppe) { + err = -ENOMEM; goto err_free_dev; + } err = mtk_eth_offload_init(eth); if (err) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index c9d42be314b5..c98c7ee42c6f 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -295,6 +295,9 @@ #define MTK_GDM1_TX_GPCNT 0x2438 #define MTK_STAT_OFFSET 0x40 +#define MTK_WDMA0_BASE 0x2800 +#define MTK_WDMA1_BASE 0x2c00 + /* QDMA descriptor txd4 */ #define TX_DMA_CHKSUM (0x7 << 29) #define TX_DMA_TSO BIT(28) @@ -465,6 +468,12 @@ #define RSTCTRL_FE BIT(6) #define RSTCTRL_PPE BIT(31) +/* ethernet dma channel agent map */ +#define ETHSYS_DMA_AG_MAP 0x408 +#define ETHSYS_DMA_AG_MAP_PDMA BIT(0) +#define ETHSYS_DMA_AG_MAP_QDMA BIT(1) +#define ETHSYS_DMA_AG_MAP_PPE BIT(2) + /* SGMII subsystem config registers */ /* Register to auto-negotiation restart */ #define SGMSYS_PCS_CONTROL_1 0x0 @@ -882,6 +891,7 @@ struct mtk_sgmii { /* struct mtk_eth - This is the main datasructure for holding the state * of the driver * @dev: The device pointer + * @dev: The device pointer used for dma mapping/alloc * @base: The mapped register i/o base * @page_lock: Make sure that register operations are atomic * @tx_irq__lock: Make sure that IRQ register operations are atomic @@ -925,6 +935,7 @@ struct mtk_sgmii { struct mtk_eth { struct device *dev; + struct device *dma_dev; void __iomem *base; spinlock_t page_lock; spinlock_t tx_irq_lock; @@ -974,7 +985,7 @@ struct mtk_eth { u32 rx_dma_l4_valid; int ip_align; - struct mtk_ppe ppe; + struct mtk_ppe *ppe; struct rhashtable flow_table; }; @@ -1023,6 +1034,7 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id); int mtk_eth_offload_init(struct mtk_eth *eth); int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data); +void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev); #endif /* MTK_ETH_H */ diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 3ad10c793308..683f89f8e3b2 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -6,9 +6,22 @@ #include <linux/iopoll.h> #include <linux/etherdevice.h> #include <linux/platform_device.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <net/dsa.h> +#include "mtk_eth_soc.h" #include "mtk_ppe.h" #include "mtk_ppe_regs.h" +static DEFINE_SPINLOCK(ppe_lock); + +static const struct rhashtable_params mtk_flow_l2_ht_params = { + .head_offset = offsetof(struct mtk_flow_entry, l2_node), + .key_offset = offsetof(struct mtk_flow_entry, data.bridge), + .key_len = offsetof(struct mtk_foe_bridge, key_end), + .automatic_shrinking = true, +}; + static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val) { writel(val, ppe->base + reg); @@ -41,6 +54,11 @@ static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val) return ppe_m32(ppe, reg, val, 0); } +static u32 mtk_eth_timestamp(struct mtk_eth *eth) +{ + return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP; +} + static int mtk_ppe_wait_busy(struct mtk_ppe *ppe) { int ret; @@ -76,13 +94,6 @@ static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e) u32 hash; switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) { - case MTK_PPE_PKT_TYPE_BRIDGE: - hv1 = e->bridge.src_mac_lo; - hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16); - hv2 = e->bridge.src_mac_hi >> 16; - hv2 ^= e->bridge.dest_mac_lo; - hv3 = e->bridge.dest_mac_hi; - break; case MTK_PPE_PKT_TYPE_IPV4_ROUTE: case MTK_PPE_PKT_TYPE_IPV4_HNAPT: hv1 = e->ipv4.orig.ports; @@ -122,6 +133,9 @@ mtk_foe_entry_l2(struct mtk_foe_entry *entry) { int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + if (type == MTK_PPE_PKT_TYPE_BRIDGE) + return &entry->bridge.l2; + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) return &entry->ipv6.l2; @@ -133,6 +147,9 @@ mtk_foe_entry_ib2(struct mtk_foe_entry *entry) { int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + if (type == MTK_PPE_PKT_TYPE_BRIDGE) + return &entry->bridge.ib2; + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) return &entry->ipv6.ib2; @@ -167,7 +184,12 @@ int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto, if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T) entry->ipv6.ports = ports_pad; - if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) { + if (type == MTK_PPE_PKT_TYPE_BRIDGE) { + ether_addr_copy(entry->bridge.src_mac, src_mac); + ether_addr_copy(entry->bridge.dest_mac, dest_mac); + entry->bridge.ib2 = val; + l2 = &entry->bridge.l2; + } else if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) { entry->ipv6.ib2 = val; l2 = &entry->ipv6.l2; } else { @@ -329,32 +351,167 @@ int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid) return 0; } +int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq, + int bss, int wcid) +{ + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); + u32 *ib2 = mtk_foe_entry_ib2(entry); + + *ib2 &= ~MTK_FOE_IB2_PORT_MG; + *ib2 |= MTK_FOE_IB2_WDMA_WINFO; + if (wdma_idx) + *ib2 |= MTK_FOE_IB2_WDMA_DEVIDX; + + l2->vlan2 = FIELD_PREP(MTK_FOE_VLAN2_WINFO_BSS, bss) | + FIELD_PREP(MTK_FOE_VLAN2_WINFO_WCID, wcid) | + FIELD_PREP(MTK_FOE_VLAN2_WINFO_RING, txq); + + return 0; +} + static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry) { return !(entry->ib1 & MTK_FOE_IB1_STATIC) && FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND; } -int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, - u16 timestamp) +static bool +mtk_flow_entry_match(struct mtk_flow_entry *entry, struct mtk_foe_entry *data) +{ + int type, len; + + if ((data->ib1 ^ entry->data.ib1) & MTK_FOE_IB1_UDP) + return false; + + type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->data.ib1); + if (type > MTK_PPE_PKT_TYPE_IPV4_DSLITE) + len = offsetof(struct mtk_foe_entry, ipv6._rsv); + else + len = offsetof(struct mtk_foe_entry, ipv4.ib2); + + return !memcmp(&entry->data.data, &data->data, len - 4); +} + +static void +__mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) +{ + struct hlist_head *head; + struct hlist_node *tmp; + + if (entry->type == MTK_FLOW_TYPE_L2) { + rhashtable_remove_fast(&ppe->l2_flows, &entry->l2_node, + mtk_flow_l2_ht_params); + + head = &entry->l2_flows; + hlist_for_each_entry_safe(entry, tmp, head, l2_data.list) + __mtk_foe_entry_clear(ppe, entry); + return; + } + + hlist_del_init(&entry->list); + if (entry->hash != 0xffff) { + ppe->foe_table[entry->hash].ib1 &= ~MTK_FOE_IB1_STATE; + ppe->foe_table[entry->hash].ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE, + MTK_FOE_STATE_BIND); + dma_wmb(); + } + entry->hash = 0xffff; + + if (entry->type != MTK_FLOW_TYPE_L2_SUBFLOW) + return; + + hlist_del_init(&entry->l2_data.list); + kfree(entry); +} + +static int __mtk_foe_entry_idle_time(struct mtk_ppe *ppe, u32 ib1) +{ + u16 timestamp; + u16 now; + + now = mtk_eth_timestamp(ppe->eth) & MTK_FOE_IB1_BIND_TIMESTAMP; + timestamp = ib1 & MTK_FOE_IB1_BIND_TIMESTAMP; + + if (timestamp > now) + return MTK_FOE_IB1_BIND_TIMESTAMP + 1 - timestamp + now; + else + return now - timestamp; +} + +static void +mtk_flow_entry_update_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) { + struct mtk_flow_entry *cur; struct mtk_foe_entry *hwe; - u32 hash; + struct hlist_node *tmp; + int idle; + + idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1); + hlist_for_each_entry_safe(cur, tmp, &entry->l2_flows, l2_data.list) { + int cur_idle; + u32 ib1; + + hwe = &ppe->foe_table[cur->hash]; + ib1 = READ_ONCE(hwe->ib1); + + if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND) { + cur->hash = 0xffff; + __mtk_foe_entry_clear(ppe, cur); + continue; + } + + cur_idle = __mtk_foe_entry_idle_time(ppe, ib1); + if (cur_idle >= idle) + continue; + + idle = cur_idle; + entry->data.ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP; + entry->data.ib1 |= hwe->ib1 & MTK_FOE_IB1_BIND_TIMESTAMP; + } +} + +static void +mtk_flow_entry_update(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) +{ + struct mtk_foe_entry *hwe; + struct mtk_foe_entry foe; + + spin_lock_bh(&ppe_lock); + + if (entry->type == MTK_FLOW_TYPE_L2) { + mtk_flow_entry_update_l2(ppe, entry); + goto out; + } + + if (entry->hash == 0xffff) + goto out; + + hwe = &ppe->foe_table[entry->hash]; + memcpy(&foe, hwe, sizeof(foe)); + if (!mtk_flow_entry_match(entry, &foe)) { + entry->hash = 0xffff; + goto out; + } + + entry->data.ib1 = foe.ib1; + +out: + spin_unlock_bh(&ppe_lock); +} + +static void +__mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, + u16 hash) +{ + struct mtk_foe_entry *hwe; + u16 timestamp; + timestamp = mtk_eth_timestamp(ppe->eth); timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP; entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP; entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp); - hash = mtk_ppe_hash_entry(entry); hwe = &ppe->foe_table[hash]; - if (!mtk_foe_entry_usable(hwe)) { - hwe++; - hash++; - - if (!mtk_foe_entry_usable(hwe)) - return -ENOSPC; - } - memcpy(&hwe->data, &entry->data, sizeof(hwe->data)); wmb(); hwe->ib1 = entry->ib1; @@ -362,32 +519,195 @@ int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, dma_wmb(); mtk_ppe_cache_clear(ppe); +} - return hash; +void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) +{ + spin_lock_bh(&ppe_lock); + __mtk_foe_entry_clear(ppe, entry); + spin_unlock_bh(&ppe_lock); +} + +static int +mtk_foe_entry_commit_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) +{ + entry->type = MTK_FLOW_TYPE_L2; + + return rhashtable_insert_fast(&ppe->l2_flows, &entry->l2_node, + mtk_flow_l2_ht_params); +} + +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) +{ + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->data.ib1); + u32 hash; + + if (type == MTK_PPE_PKT_TYPE_BRIDGE) + return mtk_foe_entry_commit_l2(ppe, entry); + + hash = mtk_ppe_hash_entry(&entry->data); + entry->hash = 0xffff; + spin_lock_bh(&ppe_lock); + hlist_add_head(&entry->list, &ppe->foe_flow[hash / 2]); + spin_unlock_bh(&ppe_lock); + + return 0; +} + +static void +mtk_foe_entry_commit_subflow(struct mtk_ppe *ppe, struct mtk_flow_entry *entry, + u16 hash) +{ + struct mtk_flow_entry *flow_info; + struct mtk_foe_entry foe, *hwe; + struct mtk_foe_mac_info *l2; + u32 ib1_mask = MTK_FOE_IB1_PACKET_TYPE | MTK_FOE_IB1_UDP; + int type; + + flow_info = kzalloc(offsetof(struct mtk_flow_entry, l2_data.end), + GFP_ATOMIC); + if (!flow_info) + return; + + flow_info->l2_data.base_flow = entry; + flow_info->type = MTK_FLOW_TYPE_L2_SUBFLOW; + flow_info->hash = hash; + hlist_add_head(&flow_info->list, &ppe->foe_flow[hash / 2]); + hlist_add_head(&flow_info->l2_data.list, &entry->l2_flows); + + hwe = &ppe->foe_table[hash]; + memcpy(&foe, hwe, sizeof(foe)); + foe.ib1 &= ib1_mask; + foe.ib1 |= entry->data.ib1 & ~ib1_mask; + + l2 = mtk_foe_entry_l2(&foe); + memcpy(l2, &entry->data.bridge.l2, sizeof(*l2)); + + type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, foe.ib1); + if (type == MTK_PPE_PKT_TYPE_IPV4_HNAPT) + memcpy(&foe.ipv4.new, &foe.ipv4.orig, sizeof(foe.ipv4.new)); + else if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T && l2->etype == ETH_P_IP) + l2->etype = ETH_P_IPV6; + + *mtk_foe_entry_ib2(&foe) = entry->data.bridge.ib2; + + __mtk_foe_entry_commit(ppe, &foe, hash); } -int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base, +void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash) +{ + struct hlist_head *head = &ppe->foe_flow[hash / 2]; + struct mtk_foe_entry *hwe = &ppe->foe_table[hash]; + struct mtk_flow_entry *entry; + struct mtk_foe_bridge key = {}; + struct hlist_node *n; + struct ethhdr *eh; + bool found = false; + u8 *tag; + + spin_lock_bh(&ppe_lock); + + if (FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) == MTK_FOE_STATE_BIND) + goto out; + + hlist_for_each_entry_safe(entry, n, head, list) { + if (entry->type == MTK_FLOW_TYPE_L2_SUBFLOW) { + if (unlikely(FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) == + MTK_FOE_STATE_BIND)) + continue; + + entry->hash = 0xffff; + __mtk_foe_entry_clear(ppe, entry); + continue; + } + + if (found || !mtk_flow_entry_match(entry, hwe)) { + if (entry->hash != 0xffff) + entry->hash = 0xffff; + continue; + } + + entry->hash = hash; + __mtk_foe_entry_commit(ppe, &entry->data, hash); + found = true; + } + + if (found) + goto out; + + eh = eth_hdr(skb); + ether_addr_copy(key.dest_mac, eh->h_dest); + ether_addr_copy(key.src_mac, eh->h_source); + tag = skb->data - 2; + key.vlan = 0; + switch (skb->protocol) { +#if IS_ENABLED(CONFIG_NET_DSA) + case htons(ETH_P_XDSA): + if (!netdev_uses_dsa(skb->dev) || + skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK) + goto out; + + tag += 4; + if (get_unaligned_be16(tag) != ETH_P_8021Q) + break; + + fallthrough; +#endif + case htons(ETH_P_8021Q): + key.vlan = get_unaligned_be16(tag + 2) & VLAN_VID_MASK; + break; + default: + break; + } + + entry = rhashtable_lookup_fast(&ppe->l2_flows, &key, mtk_flow_l2_ht_params); + if (!entry) + goto out; + + mtk_foe_entry_commit_subflow(ppe, entry, hash); + +out: + spin_unlock_bh(&ppe_lock); +} + +int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry) +{ + mtk_flow_entry_update(ppe, entry); + + return __mtk_foe_entry_idle_time(ppe, entry->data.ib1); +} + +struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version) { + struct device *dev = eth->dev; struct mtk_foe_entry *foe; + struct mtk_ppe *ppe; + + ppe = devm_kzalloc(dev, sizeof(*ppe), GFP_KERNEL); + if (!ppe) + return NULL; + + rhashtable_init(&ppe->l2_flows, &mtk_flow_l2_ht_params); /* need to allocate a separate device, since it PPE DMA access is * not coherent. */ ppe->base = base; + ppe->eth = eth; ppe->dev = dev; ppe->version = version; foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe), &ppe->foe_phys, GFP_KERNEL); if (!foe) - return -ENOMEM; + return NULL; ppe->foe_table = foe; mtk_ppe_debugfs_init(ppe); - return 0; + return ppe; } static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) @@ -443,7 +763,6 @@ int mtk_ppe_start(struct mtk_ppe *ppe) MTK_PPE_FLOW_CFG_IP4_NAT | MTK_PPE_FLOW_CFG_IP4_NAPT | MTK_PPE_FLOW_CFG_IP4_DSLITE | - MTK_PPE_FLOW_CFG_L2_BRIDGE | MTK_PPE_FLOW_CFG_IP4_NAT_FRAG; ppe_w32(ppe, MTK_PPE_FLOW_CFG, val); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 242fb8f2ae65..1f5cf1c9a947 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/bitfield.h> +#include <linux/rhashtable.h> #define MTK_ETH_PPE_BASE 0xc00 @@ -48,9 +49,9 @@ enum { #define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5) #define MTK_FOE_IB2_MULTICAST BIT(8) -#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12) -#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16) -#define MTK_FOE_IB2_WHNAT_NAT BIT(17) +#define MTK_FOE_IB2_WDMA_QID2 GENMASK(13, 12) +#define MTK_FOE_IB2_WDMA_DEVIDX BIT(16) +#define MTK_FOE_IB2_WDMA_WINFO BIT(17) #define MTK_FOE_IB2_PORT_MG GENMASK(17, 12) @@ -58,9 +59,9 @@ enum { #define MTK_FOE_IB2_DSCP GENMASK(31, 24) -#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0) -#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6) -#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14) +#define MTK_FOE_VLAN2_WINFO_BSS GENMASK(5, 0) +#define MTK_FOE_VLAN2_WINFO_WCID GENMASK(13, 6) +#define MTK_FOE_VLAN2_WINFO_RING GENMASK(15, 14) enum { MTK_FOE_STATE_INVALID, @@ -84,19 +85,16 @@ struct mtk_foe_mac_info { u16 src_mac_lo; }; +/* software-only entry type */ struct mtk_foe_bridge { - u32 dest_mac_hi; + u8 dest_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + u16 vlan; - u16 src_mac_lo; - u16 dest_mac_lo; - - u32 src_mac_hi; + struct {} key_end; u32 ib2; - u32 _rsv[5]; - - u32 udf_tsid; struct mtk_foe_mac_info l2; }; @@ -235,7 +233,37 @@ enum { MTK_PPE_CPU_REASON_INVALID = 0x1f, }; +enum { + MTK_FLOW_TYPE_L4, + MTK_FLOW_TYPE_L2, + MTK_FLOW_TYPE_L2_SUBFLOW, +}; + +struct mtk_flow_entry { + union { + struct hlist_node list; + struct { + struct rhash_head l2_node; + struct hlist_head l2_flows; + }; + }; + u8 type; + s8 wed_index; + u16 hash; + union { + struct mtk_foe_entry data; + struct { + struct mtk_flow_entry *base_flow; + struct hlist_node list; + struct {} end; + } l2_data; + }; + struct rhash_head node; + unsigned long cookie; +}; + struct mtk_ppe { + struct mtk_eth *eth; struct device *dev; void __iomem *base; int version; @@ -243,19 +271,35 @@ struct mtk_ppe { struct mtk_foe_entry *foe_table; dma_addr_t foe_phys; + u16 foe_check_time[MTK_PPE_ENTRIES]; + struct hlist_head foe_flow[MTK_PPE_ENTRIES / 2]; + + struct rhashtable l2_flows; + void *acct_table; }; -int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base, - int version); +struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version); int mtk_ppe_start(struct mtk_ppe *ppe); int mtk_ppe_stop(struct mtk_ppe *ppe); +void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash); + static inline void -mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash) +mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash) { - ppe->foe_table[hash].ib1 = 0; - dma_wmb(); + u16 now, diff; + + if (!ppe) + return; + + now = (u16)jiffies; + diff = now - ppe->foe_check_time[hash]; + if (diff < HZ / 10) + return; + + ppe->foe_check_time[hash] = now; + __mtk_ppe_check_skb(ppe, skb, hash); } static inline int @@ -281,8 +325,11 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry, int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port); int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid); int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid); -int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, - u16 timestamp); +int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq, + int bss, int wcid); +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); +void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); +int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); int mtk_ppe_debugfs_init(struct mtk_ppe *ppe); #endif diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c index d4b482340cb9..eb0b598f14e4 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c @@ -32,7 +32,6 @@ static const char *mtk_foe_pkt_type_str(int type) static const char * const type_str[] = { [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T", [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T", - [MTK_PPE_PKT_TYPE_BRIDGE] = "L2", [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE", [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T", [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T", diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index 7bb1f20002b5..1fe31058b0f2 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -6,10 +6,12 @@ #include <linux/if_ether.h> #include <linux/rhashtable.h> #include <linux/ip.h> +#include <linux/ipv6.h> #include <net/flow_offload.h> #include <net/pkt_cls.h> #include <net/dsa.h> #include "mtk_eth_soc.h" +#include "mtk_wed.h" struct mtk_flow_data { struct ethhdr eth; @@ -19,11 +21,18 @@ struct mtk_flow_data { __be32 src_addr; __be32 dst_addr; } v4; + + struct { + struct in6_addr src_addr; + struct in6_addr dst_addr; + } v6; }; __be16 src_port; __be16 dst_port; + u16 vlan_in; + struct { u16 id; __be16 proto; @@ -35,12 +44,6 @@ struct mtk_flow_data { } pppoe; }; -struct mtk_flow_entry { - struct rhash_head node; - unsigned long cookie; - u16 hash; -}; - static const struct rhashtable_params mtk_flow_ht_params = { .head_offset = offsetof(struct mtk_flow_entry, node), .key_offset = offsetof(struct mtk_flow_entry, cookie), @@ -48,12 +51,6 @@ static const struct rhashtable_params mtk_flow_ht_params = { .automatic_shrinking = true, }; -static u32 -mtk_eth_timestamp(struct mtk_eth *eth) -{ - return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP; -} - static int mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data, bool egress) @@ -63,6 +60,14 @@ mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data, data->v4.dst_addr, data->dst_port); } +static int +mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data) +{ + return mtk_foe_entry_set_ipv6_tuple(foe, + data->v6.src_addr.s6_addr32, data->src_port, + data->v6.dst_addr.s6_addr32, data->dst_port); +} + static void mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth) { @@ -80,6 +85,35 @@ mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth) memcpy(dest, src, act->mangle.mask ? 2 : 4); } +static int +mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_info *info) +{ + struct net_device_path_ctx ctx = { + .dev = dev, + .daddr = addr, + }; + struct net_device_path path = {}; + + if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED)) + return -1; + + if (!dev->netdev_ops->ndo_fill_forward_path) + return -1; + + if (dev->netdev_ops->ndo_fill_forward_path(&ctx, &path)) + return -1; + + if (path.type != DEV_PATH_MTK_WDMA) + return -1; + + info->wdma_idx = path.mtk_wdma.wdma_idx; + info->queue = path.mtk_wdma.queue; + info->bss = path.mtk_wdma.bss; + info->wcid = path.mtk_wdma.wcid; + + return 0; +} + static int mtk_flow_mangle_ports(const struct flow_action_entry *act, @@ -149,10 +183,20 @@ mtk_flow_get_dsa_port(struct net_device **dev) static int mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, - struct net_device *dev) + struct net_device *dev, const u8 *dest_mac, + int *wed_index) { + struct mtk_wdma_info info = {}; int pse_port, dsa_port; + if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) { + mtk_foe_entry_set_wdma(foe, info.wdma_idx, info.queue, info.bss, + info.wcid); + pse_port = 3; + *wed_index = info.wdma_idx; + goto out; + } + dsa_port = mtk_flow_get_dsa_port(&dev); if (dsa_port >= 0) mtk_foe_entry_set_dsa(foe, dsa_port); @@ -164,6 +208,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, else return -EOPNOTSUPP; +out: mtk_foe_entry_set_pse_port(foe, pse_port); return 0; @@ -179,11 +224,10 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) struct net_device *odev = NULL; struct mtk_flow_entry *entry; int offload_type = 0; + int wed_index = -1; u16 addr_type = 0; - u32 timestamp; u8 l4proto = 0; int err = 0; - int hash; int i; if (rhashtable_lookup(ð->flow_table, &f->cookie, mtk_flow_ht_params)) @@ -215,9 +259,45 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) return -EOPNOTSUPP; } + switch (addr_type) { + case 0: + offload_type = MTK_PPE_PKT_TYPE_BRIDGE; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + memcpy(data.eth.h_dest, match.key->dst, ETH_ALEN); + memcpy(data.eth.h_source, match.key->src, ETH_ALEN); + } else { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + + if (match.key->vlan_tpid != cpu_to_be16(ETH_P_8021Q)) + return -EOPNOTSUPP; + + data.vlan_in = match.key->vlan_id; + } + break; + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT; + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T; + break; + default: + return -EOPNOTSUPP; + } + flow_action_for_each(i, act, &rule->action) { switch (act->id) { case FLOW_ACTION_MANGLE: + if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE) + return -EOPNOTSUPP; if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH) mtk_flow_offload_mangle_eth(act, &data.eth); break; @@ -249,14 +329,6 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) } } - switch (addr_type) { - case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT; - break; - default: - return -EOPNOTSUPP; - } - if (!is_valid_ether_addr(data.eth.h_source) || !is_valid_ether_addr(data.eth.h_dest)) return -EINVAL; @@ -270,10 +342,13 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports ports; + if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE) + return -EOPNOTSUPP; + flow_rule_match_ports(rule, &ports); data.src_port = ports.key->src; data.dst_port = ports.key->dst; - } else { + } else if (offload_type != MTK_PPE_PKT_TYPE_BRIDGE) { return -EOPNOTSUPP; } @@ -288,10 +363,24 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) mtk_flow_set_ipv4_addr(&foe, &data, false); } + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs addrs; + + flow_rule_match_ipv6_addrs(rule, &addrs); + + data.v6.src_addr = addrs.key->src; + data.v6.dst_addr = addrs.key->dst; + + mtk_flow_set_ipv6_addr(&foe, &data); + } + flow_action_for_each(i, act, &rule->action) { if (act->id != FLOW_ACTION_MANGLE) continue; + if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE) + return -EOPNOTSUPP; + switch (act->mangle.htype) { case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: @@ -317,6 +406,9 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) return err; } + if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE) + foe.bridge.vlan = data.vlan_in; + if (data.vlan.num == 1) { if (data.vlan.proto != htons(ETH_P_8021Q)) return -EOPNOTSUPP; @@ -326,33 +418,38 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) if (data.pppoe.num == 1) mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid); - err = mtk_flow_set_output_device(eth, &foe, odev); + err = mtk_flow_set_output_device(eth, &foe, odev, data.eth.h_dest, + &wed_index); if (err) return err; + if (wed_index >= 0 && (err = mtk_wed_flow_add(wed_index)) < 0) + return err; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->cookie = f->cookie; - timestamp = mtk_eth_timestamp(eth); - hash = mtk_foe_entry_commit(ð->ppe, &foe, timestamp); - if (hash < 0) { - err = hash; + memcpy(&entry->data, &foe, sizeof(entry->data)); + entry->wed_index = wed_index; + + if (mtk_foe_entry_commit(eth->ppe, entry) < 0) goto free; - } - entry->hash = hash; err = rhashtable_insert_fast(ð->flow_table, &entry->node, mtk_flow_ht_params); if (err < 0) - goto clear_flow; + goto clear; return 0; -clear_flow: - mtk_foe_entry_clear(ð->ppe, hash); + +clear: + mtk_foe_entry_clear(eth->ppe, entry); free: kfree(entry); + if (wed_index >= 0) + mtk_wed_flow_remove(wed_index); return err; } @@ -366,9 +463,11 @@ mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f) if (!entry) return -ENOENT; - mtk_foe_entry_clear(ð->ppe, entry->hash); + mtk_foe_entry_clear(eth->ppe, entry); rhashtable_remove_fast(ð->flow_table, &entry->node, mtk_flow_ht_params); + if (entry->wed_index >= 0) + mtk_wed_flow_remove(entry->wed_index); kfree(entry); return 0; @@ -378,7 +477,6 @@ static int mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f) { struct mtk_flow_entry *entry; - int timestamp; u32 idle; entry = rhashtable_lookup(ð->flow_table, &f->cookie, @@ -386,11 +484,7 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f) if (!entry) return -ENOENT; - timestamp = mtk_foe_entry_timestamp(ð->ppe, entry->hash); - if (timestamp < 0) - return -ETIMEDOUT; - - idle = mtk_eth_timestamp(eth) - timestamp; + idle = mtk_foe_entry_idle_time(eth->ppe, entry); f->stats.lastused = jiffies - idle * HZ; return 0; @@ -442,7 +536,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) struct flow_block_cb *block_cb; flow_setup_cb_t *cb; - if (!eth->ppe.foe_table) + if (!eth->ppe || !eth->ppe->foe_table) return -EOPNOTSUPP; if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) @@ -483,15 +577,18 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - if (type == TC_SETUP_FT) + switch (type) { + case TC_SETUP_BLOCK: + case TC_SETUP_FT: return mtk_eth_setup_tc_block(dev, type_data); - - return -EOPNOTSUPP; + default: + return -EOPNOTSUPP; + } } int mtk_eth_offload_init(struct mtk_eth *eth) { - if (!eth->ppe.foe_table) + if (!eth->ppe || !eth->ppe->foe_table) return 0; return rhashtable_init(ð->flow_table, &mtk_flow_ht_params); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c new file mode 100644 index 000000000000..5530f7991d1d --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -0,0 +1,878 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/bitfield.h> +#include <linux/dma-mapping.h> +#include <linux/skbuff.h> +#include <linux/of_platform.h> +#include <linux/of_address.h> +#include <linux/mfd/syscon.h> +#include <linux/debugfs.h> +#include <linux/soc/mediatek/mtk_wed.h> +#include "mtk_eth_soc.h" +#include "mtk_wed_regs.h" +#include "mtk_wed.h" +#include "mtk_ppe.h" + +#define MTK_PCIE_BASE(n) (0x1a143000 + (n) * 0x2000) + +#define MTK_WED_PKT_SIZE 1900 +#define MTK_WED_BUF_SIZE 2048 +#define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048) + +#define MTK_WED_TX_RING_SIZE 2048 +#define MTK_WED_WDMA_RING_SIZE 1024 + +static struct mtk_wed_hw *hw_list[2]; +static DEFINE_MUTEX(hw_lock); + +static void +wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val) +{ + regmap_update_bits(dev->hw->regs, reg, mask | val, val); +} + +static void +wed_set(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + return wed_m32(dev, reg, 0, mask); +} + +static void +wed_clr(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + return wed_m32(dev, reg, mask, 0); +} + +static void +wdma_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val) +{ + wdma_w32(dev, reg, (wdma_r32(dev, reg) & ~mask) | val); +} + +static void +wdma_set(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + wdma_m32(dev, reg, 0, mask); +} + +static u32 +mtk_wed_read_reset(struct mtk_wed_device *dev) +{ + return wed_r32(dev, MTK_WED_RESET); +} + +static void +mtk_wed_reset(struct mtk_wed_device *dev, u32 mask) +{ + u32 status; + + wed_w32(dev, MTK_WED_RESET, mask); + if (readx_poll_timeout(mtk_wed_read_reset, dev, status, + !(status & mask), 0, 1000)) + WARN_ON_ONCE(1); +} + +static struct mtk_wed_hw * +mtk_wed_assign(struct mtk_wed_device *dev) +{ + struct mtk_wed_hw *hw; + + hw = hw_list[pci_domain_nr(dev->wlan.pci_dev->bus)]; + if (!hw || hw->wed_dev) + return NULL; + + hw->wed_dev = dev; + return hw; +} + +static int +mtk_wed_buffer_alloc(struct mtk_wed_device *dev) +{ + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + void **page_list; + int token = dev->wlan.token_start; + int ring_size; + int n_pages; + int i, page_idx; + + ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); + n_pages = ring_size / MTK_WED_BUF_PER_PAGE; + + page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + dev->buf_ring.size = ring_size; + dev->buf_ring.pages = page_list; + + desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc), + &desc_phys, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + dev->buf_ring.desc = desc; + dev->buf_ring.desc_phys = desc_phys; + + for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) { + dma_addr_t page_phys, buf_phys; + struct page *page; + void *buf; + int s; + + page = __dev_alloc_pages(GFP_KERNEL, 0); + if (!page) + return -ENOMEM; + + page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev->hw->dev, page_phys)) { + __free_page(page); + return -ENOMEM; + } + + page_list[page_idx++] = page; + dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + buf = page_to_virt(page); + buf_phys = page_phys; + + for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) { + u32 txd_size; + u32 ctrl; + + txd_size = dev->wlan.init_buf(buf, buf_phys, token++); + + desc->buf0 = cpu_to_le32(buf_phys); + desc->buf1 = cpu_to_le32(buf_phys + txd_size); + ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) | + FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1, + MTK_WED_BUF_SIZE - txd_size) | + MTK_WDMA_DESC_CTRL_LAST_SEG1; + desc->ctrl = cpu_to_le32(ctrl); + desc->info = 0; + desc++; + + buf += MTK_WED_BUF_SIZE; + buf_phys += MTK_WED_BUF_SIZE; + } + + dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + + return 0; +} + +static void +mtk_wed_free_buffer(struct mtk_wed_device *dev) +{ + struct mtk_wdma_desc *desc = dev->buf_ring.desc; + void **page_list = dev->buf_ring.pages; + int page_idx; + int i; + + if (!page_list) + return; + + if (!desc) + goto free_pagelist; + + for (i = 0, page_idx = 0; i < dev->buf_ring.size; i += MTK_WED_BUF_PER_PAGE) { + void *page = page_list[page_idx++]; + dma_addr_t buf_addr; + + if (!page) + break; + + buf_addr = le32_to_cpu(desc[i].buf0); + dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + __free_page(page); + } + + dma_free_coherent(dev->hw->dev, dev->buf_ring.size * sizeof(*desc), + desc, dev->buf_ring.desc_phys); + +free_pagelist: + kfree(page_list); +} + +static void +mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring) +{ + if (!ring->desc) + return; + + dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc), + ring->desc, ring->desc_phys); +} + +static void +mtk_wed_free_tx_rings(struct mtk_wed_device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) + mtk_wed_free_ring(dev, &dev->tx_ring[i]); + for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++) + mtk_wed_free_ring(dev, &dev->tx_wdma[i]); +} + +static void +mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en) +{ + u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; + + if (!dev->hw->num_flows) + mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; + + wed_w32(dev, MTK_WED_EXT_INT_MASK, en ? mask : 0); + wed_r32(dev, MTK_WED_EXT_INT_MASK); +} + +static void +mtk_wed_stop(struct mtk_wed_device *dev) +{ + regmap_write(dev->hw->mirror, dev->hw->index * 4, 0); + mtk_wed_set_ext_int(dev, false); + + wed_clr(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_EN | + MTK_WED_CTRL_WPDMA_INT_AGENT_EN | + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0); + wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0); + wdma_w32(dev, MTK_WDMA_INT_MASK, 0); + wdma_w32(dev, MTK_WDMA_INT_GRP2, 0); + wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0); + + wed_clr(dev, MTK_WED_GLO_CFG, + MTK_WED_GLO_CFG_TX_DMA_EN | + MTK_WED_GLO_CFG_RX_DMA_EN); + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); +} + +static void +mtk_wed_detach(struct mtk_wed_device *dev) +{ + struct device_node *wlan_node = dev->wlan.pci_dev->dev.of_node; + struct mtk_wed_hw *hw = dev->hw; + + mutex_lock(&hw_lock); + + mtk_wed_stop(dev); + + wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); + wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + + mtk_wed_reset(dev, MTK_WED_RESET_WED); + + mtk_wed_free_buffer(dev); + mtk_wed_free_tx_rings(dev); + + if (of_dma_is_coherent(wlan_node)) + regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, + BIT(hw->index), BIT(hw->index)); + + if (!hw_list[!hw->index]->wed_dev && + hw->eth->dma_dev != hw->eth->dev) + mtk_eth_set_dma_device(hw->eth, hw->eth->dev); + + memset(dev, 0, sizeof(*dev)); + module_put(THIS_MODULE); + + hw->wed_dev = NULL; + mutex_unlock(&hw_lock); +} + +static void +mtk_wed_hw_init_early(struct mtk_wed_device *dev) +{ + u32 mask, set; + u32 offset; + + mtk_wed_stop(dev); + mtk_wed_reset(dev, MTK_WED_RESET_WED); + + mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE | + MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE | + MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE; + set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) | + MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP | + MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY; + wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set); + + wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO_PRERES); + + offset = dev->hw->index ? 0x04000400 : 0; + wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset); + wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset); + + wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index)); + wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys); +} + +static void +mtk_wed_hw_init(struct mtk_wed_device *dev) +{ + if (dev->init_done) + return; + + dev->init_done = true; + mtk_wed_set_ext_int(dev, false); + wed_w32(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, + dev->buf_ring.size / 128) | + FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, + MTK_WED_TX_RING_SIZE / 256)); + + wed_w32(dev, MTK_WED_TX_BM_BASE, dev->buf_ring.desc_phys); + + wed_w32(dev, MTK_WED_TX_BM_TKID, + FIELD_PREP(MTK_WED_TX_BM_TKID_START, + dev->wlan.token_start) | + FIELD_PREP(MTK_WED_TX_BM_TKID_END, + dev->wlan.token_start + dev->wlan.nbuf - 1)); + + wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE); + + wed_w32(dev, MTK_WED_TX_BM_DYN_THR, + FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) | + MTK_WED_TX_BM_DYN_THR_HI); + + mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); + + wed_set(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + + wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE); +} + +static void +mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size) +{ + int i; + + for (i = 0; i < size; i++) { + desc[i].buf0 = 0; + desc[i].ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE); + desc[i].buf1 = 0; + desc[i].info = 0; + } +} + +static u32 +mtk_wed_check_busy(struct mtk_wed_device *dev) +{ + if (wed_r32(dev, MTK_WED_GLO_CFG) & MTK_WED_GLO_CFG_TX_DMA_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_WPDMA_GLO_CFG) & + MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_CTRL) & MTK_WED_CTRL_WDMA_INT_AGENT_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_WDMA_GLO_CFG) & + MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY) + return true; + + if (wdma_r32(dev, MTK_WDMA_GLO_CFG) & + MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY) + return true; + + if (wed_r32(dev, MTK_WED_CTRL) & + (MTK_WED_CTRL_WED_TX_BM_BUSY | MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY)) + return true; + + return false; +} + +static int +mtk_wed_poll_busy(struct mtk_wed_device *dev) +{ + int sleep = 15000; + int timeout = 100 * sleep; + u32 val; + + return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, + timeout, false, dev); +} + +static void +mtk_wed_reset_dma(struct mtk_wed_device *dev) +{ + bool busy = false; + u32 val; + int i; + + for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) { + struct mtk_wdma_desc *desc = dev->tx_ring[i].desc; + + if (!desc) + continue; + + mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE); + } + + if (mtk_wed_poll_busy(dev)) + busy = mtk_wed_check_busy(dev); + + if (busy) { + mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA); + } else { + wed_w32(dev, MTK_WED_RESET_IDX, + MTK_WED_RESET_IDX_TX | + MTK_WED_RESET_IDX_RX); + wed_w32(dev, MTK_WED_RESET_IDX, 0); + } + + wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); + wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); + + if (busy) { + mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV); + } else { + wed_w32(dev, MTK_WED_WDMA_RESET_IDX, + MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV); + wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0); + + wed_set(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE); + + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE); + } + + for (i = 0; i < 100; i++) { + val = wed_r32(dev, MTK_WED_TX_BM_INTF); + if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40) + break; + } + + mtk_wed_reset(dev, MTK_WED_RESET_TX_FREE_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); + + if (busy) { + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT); + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV); + mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV); + } else { + wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, + MTK_WED_WPDMA_RESET_IDX_TX | + MTK_WED_WPDMA_RESET_IDX_RX); + wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, 0); + } + +} + +static int +mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring, + int size) +{ + ring->desc = dma_alloc_coherent(dev->hw->dev, + size * sizeof(*ring->desc), + &ring->desc_phys, GFP_KERNEL); + if (!ring->desc) + return -ENOMEM; + + ring->size = size; + mtk_wed_ring_reset(ring->desc, size); + + return 0; +} + +static int +mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size) +{ + struct mtk_wed_ring *wdma = &dev->tx_wdma[idx]; + + if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE)) + return -ENOMEM; + + wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE, + wdma->desc_phys); + wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT, + size); + wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0); + + wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE, + wdma->desc_phys); + wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT, + size); + + return 0; +} + +static void +mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) +{ + u32 wdma_mask; + u32 val; + int i; + + for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++) + if (!dev->tx_wdma[i].desc) + mtk_wed_wdma_ring_setup(dev, i, 16); + + wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0)); + + mtk_wed_hw_init(dev); + + wed_set(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_EN | + MTK_WED_CTRL_WPDMA_INT_AGENT_EN | + MTK_WED_CTRL_WED_TX_BM_EN | + MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); + + wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS); + + wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, + MTK_WED_WPDMA_INT_TRIGGER_RX_DONE | + MTK_WED_WPDMA_INT_TRIGGER_TX_DONE); + + wed_set(dev, MTK_WED_WPDMA_INT_CTRL, + MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV); + + wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask); + wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask); + + wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask); + wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask); + + wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask); + wed_w32(dev, MTK_WED_INT_MASK, irq_mask); + + wed_set(dev, MTK_WED_GLO_CFG, + MTK_WED_GLO_CFG_TX_DMA_EN | + MTK_WED_GLO_CFG_RX_DMA_EN); + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wed_set(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); + + mtk_wed_set_ext_int(dev, true); + val = dev->wlan.wpdma_phys | + MTK_PCIE_MIRROR_MAP_EN | + FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index); + + if (dev->hw->index) + val |= BIT(1); + val |= BIT(0); + regmap_write(dev->hw->mirror, dev->hw->index * 4, val); + + dev->running = true; +} + +static int +mtk_wed_attach(struct mtk_wed_device *dev) + __releases(RCU) +{ + struct mtk_wed_hw *hw; + int ret = 0; + + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "mtk_wed_attach without holding the RCU read lock"); + + if (pci_domain_nr(dev->wlan.pci_dev->bus) > 1 || + !try_module_get(THIS_MODULE)) + ret = -ENODEV; + + rcu_read_unlock(); + + if (ret) + return ret; + + mutex_lock(&hw_lock); + + hw = mtk_wed_assign(dev); + if (!hw) { + module_put(THIS_MODULE); + ret = -ENODEV; + goto out; + } + + dev_info(&dev->wlan.pci_dev->dev, "attaching wed device %d\n", hw->index); + + dev->hw = hw; + dev->dev = hw->dev; + dev->irq = hw->irq; + dev->wdma_idx = hw->index; + + if (hw->eth->dma_dev == hw->eth->dev && + of_dma_is_coherent(hw->eth->dev->of_node)) + mtk_eth_set_dma_device(hw->eth, hw->dev); + + ret = mtk_wed_buffer_alloc(dev); + if (ret) { + mtk_wed_detach(dev); + goto out; + } + + mtk_wed_hw_init_early(dev); + regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0); + +out: + mutex_unlock(&hw_lock); + + return ret; +} + +static int +mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->tx_ring[idx]; + + /* + * Tx ring redirection: + * Instead of configuring the WLAN PDMA TX ring directly, the WLAN + * driver allocated DMA ring gets configured into WED MTK_WED_RING_TX(n) + * registers. + * + * WED driver posts its own DMA ring as WLAN PDMA TX and configures it + * into MTK_WED_WPDMA_RING_TX(n) registers. + * It gets filled with packets picked up from WED TX ring and from + * WDMA RX. + */ + + BUG_ON(idx > ARRAY_SIZE(dev->tx_ring)); + + if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE)) + return -ENOMEM; + + if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE)) + return -ENOMEM; + + ring->reg_base = MTK_WED_RING_TX(idx); + ring->wpdma = regs; + + /* WED -> WPDMA */ + wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys); + wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE); + wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_CPU_IDX, 0); + + wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE, + ring->desc_phys); + wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT, + MTK_WED_TX_RING_SIZE); + wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0); + + return 0; +} + +static int +mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->txfree_ring; + int i; + + /* + * For txfree event handling, the same DMA ring is shared between WED + * and WLAN. The WLAN driver accesses the ring index registers through + * WED + */ + ring->reg_base = MTK_WED_RING_RX(1); + ring->wpdma = regs; + + for (i = 0; i < 12; i += 4) { + u32 val = readl(regs + i); + + wed_w32(dev, MTK_WED_RING_RX(1) + i, val); + wed_w32(dev, MTK_WED_WPDMA_RING_RX(1) + i, val); + } + + return 0; +} + +static u32 +mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask) +{ + u32 val; + + val = wed_r32(dev, MTK_WED_EXT_INT_STATUS); + wed_w32(dev, MTK_WED_EXT_INT_STATUS, val); + val &= MTK_WED_EXT_INT_STATUS_ERROR_MASK; + if (!dev->hw->num_flows) + val &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; + if (val && net_ratelimit()) + pr_err("mtk_wed%d: error status=%08x\n", dev->hw->index, val); + + val = wed_r32(dev, MTK_WED_INT_STATUS); + val &= mask; + wed_w32(dev, MTK_WED_INT_STATUS, val); /* ACK */ + + return val; +} + +static void +mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask) +{ + if (!dev->running) + return; + + mtk_wed_set_ext_int(dev, !!mask); + wed_w32(dev, MTK_WED_INT_MASK, mask); +} + +int mtk_wed_flow_add(int index) +{ + struct mtk_wed_hw *hw = hw_list[index]; + int ret; + + if (!hw || !hw->wed_dev) + return -ENODEV; + + if (hw->num_flows) { + hw->num_flows++; + return 0; + } + + mutex_lock(&hw_lock); + if (!hw->wed_dev) { + ret = -ENODEV; + goto out; + } + + ret = hw->wed_dev->wlan.offload_enable(hw->wed_dev); + if (!ret) + hw->num_flows++; + mtk_wed_set_ext_int(hw->wed_dev, true); + +out: + mutex_unlock(&hw_lock); + + return ret; +} + +void mtk_wed_flow_remove(int index) +{ + struct mtk_wed_hw *hw = hw_list[index]; + + if (!hw) + return; + + if (--hw->num_flows) + return; + + mutex_lock(&hw_lock); + if (!hw->wed_dev) + goto out; + + hw->wed_dev->wlan.offload_disable(hw->wed_dev); + mtk_wed_set_ext_int(hw->wed_dev, true); + +out: + mutex_unlock(&hw_lock); +} + +void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, + void __iomem *wdma, int index) +{ + static const struct mtk_wed_ops wed_ops = { + .attach = mtk_wed_attach, + .tx_ring_setup = mtk_wed_tx_ring_setup, + .txfree_ring_setup = mtk_wed_txfree_ring_setup, + .start = mtk_wed_start, + .stop = mtk_wed_stop, + .reset_dma = mtk_wed_reset_dma, + .reg_read = wed_r32, + .reg_write = wed_w32, + .irq_get = mtk_wed_irq_get, + .irq_set_mask = mtk_wed_irq_set_mask, + .detach = mtk_wed_detach, + }; + struct device_node *eth_np = eth->dev->of_node; + struct platform_device *pdev; + struct mtk_wed_hw *hw; + struct regmap *regs; + int irq; + + if (!np) + return; + + pdev = of_find_device_by_node(np); + if (!pdev) + return; + + get_device(&pdev->dev); + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return; + + regs = syscon_regmap_lookup_by_phandle(np, NULL); + if (IS_ERR(regs)) + return; + + rcu_assign_pointer(mtk_soc_wed_ops, &wed_ops); + + mutex_lock(&hw_lock); + + if (WARN_ON(hw_list[index])) + goto unlock; + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + hw->node = np; + hw->regs = regs; + hw->eth = eth; + hw->dev = &pdev->dev; + hw->wdma = wdma; + hw->index = index; + hw->irq = irq; + hw->mirror = syscon_regmap_lookup_by_phandle(eth_np, + "mediatek,pcie-mirror"); + hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np, + "mediatek,hifsys"); + if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) { + kfree(hw); + goto unlock; + } + + if (!index) { + regmap_write(hw->mirror, 0, 0); + regmap_write(hw->mirror, 4, 0); + } + mtk_wed_hw_add_debugfs(hw); + + hw_list[index] = hw; + +unlock: + mutex_unlock(&hw_lock); +} + +void mtk_wed_exit(void) +{ + int i; + + rcu_assign_pointer(mtk_soc_wed_ops, NULL); + + synchronize_rcu(); + + for (i = 0; i < ARRAY_SIZE(hw_list); i++) { + struct mtk_wed_hw *hw; + + hw = hw_list[i]; + if (!hw) + continue; + + hw_list[i] = NULL; + debugfs_remove(hw->debugfs_dir); + put_device(hw->dev); + kfree(hw); + } +} diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h new file mode 100644 index 000000000000..981ec613f4b0 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */ + +#ifndef __MTK_WED_PRIV_H +#define __MTK_WED_PRIV_H + +#include <linux/soc/mediatek/mtk_wed.h> +#include <linux/debugfs.h> +#include <linux/regmap.h> +#include <linux/netdevice.h> + +struct mtk_eth; + +struct mtk_wed_hw { + struct device_node *node; + struct mtk_eth *eth; + struct regmap *regs; + struct regmap *hifsys; + struct device *dev; + void __iomem *wdma; + struct regmap *mirror; + struct dentry *debugfs_dir; + struct mtk_wed_device *wed_dev; + u32 debugfs_reg; + u32 num_flows; + char dirname[5]; + int irq; + int index; +}; + +struct mtk_wdma_info { + u8 wdma_idx; + u8 queue; + u16 wcid; + u8 bss; +}; + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED +static inline void +wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val) +{ + regmap_write(dev->hw->regs, reg, val); +} + +static inline u32 +wed_r32(struct mtk_wed_device *dev, u32 reg) +{ + unsigned int val; + + regmap_read(dev->hw->regs, reg, &val); + + return val; +} + +static inline void +wdma_w32(struct mtk_wed_device *dev, u32 reg, u32 val) +{ + writel(val, dev->hw->wdma + reg); +} + +static inline u32 +wdma_r32(struct mtk_wed_device *dev, u32 reg) +{ + return readl(dev->hw->wdma + reg); +} + +static inline u32 +wpdma_tx_r32(struct mtk_wed_device *dev, int ring, u32 reg) +{ + if (!dev->tx_ring[ring].wpdma) + return 0; + + return readl(dev->tx_ring[ring].wpdma + reg); +} + +static inline void +wpdma_tx_w32(struct mtk_wed_device *dev, int ring, u32 reg, u32 val) +{ + if (!dev->tx_ring[ring].wpdma) + return; + + writel(val, dev->tx_ring[ring].wpdma + reg); +} + +static inline u32 +wpdma_txfree_r32(struct mtk_wed_device *dev, u32 reg) +{ + if (!dev->txfree_ring.wpdma) + return 0; + + return readl(dev->txfree_ring.wpdma + reg); +} + +static inline void +wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val) +{ + if (!dev->txfree_ring.wpdma) + return; + + writel(val, dev->txfree_ring.wpdma + reg); +} + +void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, + void __iomem *wdma, int index); +void mtk_wed_exit(void); +int mtk_wed_flow_add(int index); +void mtk_wed_flow_remove(int index); +#else +static inline void +mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, + void __iomem *wdma, int index) +{ +} +static inline void +mtk_wed_exit(void) +{ +} +static inline int mtk_wed_flow_add(int index) +{ + return -EINVAL; +} +static inline void mtk_wed_flow_remove(int index) +{ +} +#endif + +#ifdef CONFIG_DEBUG_FS +void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw); +#else +static inline void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw) +{ +} +#endif + +#endif diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c new file mode 100644 index 000000000000..a81d3fd1a439 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */ + +#include <linux/seq_file.h> +#include "mtk_wed.h" +#include "mtk_wed_regs.h" + +struct reg_dump { + const char *name; + u16 offset; + u8 type; + u8 base; +}; + +enum { + DUMP_TYPE_STRING, + DUMP_TYPE_WED, + DUMP_TYPE_WDMA, + DUMP_TYPE_WPDMA_TX, + DUMP_TYPE_WPDMA_TXFREE, +}; + +#define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING } +#define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ } +#define DUMP_RING(_prefix, _base, ...) \ + { _prefix " BASE", _base, __VA_ARGS__ }, \ + { _prefix " CNT", _base + 0x4, __VA_ARGS__ }, \ + { _prefix " CIDX", _base + 0x8, __VA_ARGS__ }, \ + { _prefix " DIDX", _base + 0xc, __VA_ARGS__ } + +#define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED) +#define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED) + +#define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA) +#define DUMP_WDMA_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WDMA) + +#define DUMP_WPDMA_TX_RING(_n) DUMP_RING("WPDMA_TX" #_n, 0, DUMP_TYPE_WPDMA_TX, _n) +#define DUMP_WPDMA_TXFREE_RING DUMP_RING("WPDMA_RX1", 0, DUMP_TYPE_WPDMA_TXFREE) + +static void +print_reg_val(struct seq_file *s, const char *name, u32 val) +{ + seq_printf(s, "%-32s %08x\n", name, val); +} + +static void +dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev, + const struct reg_dump *regs, int n_regs) +{ + const struct reg_dump *cur; + u32 val; + + for (cur = regs; cur < ®s[n_regs]; cur++) { + switch (cur->type) { + case DUMP_TYPE_STRING: + seq_printf(s, "%s======== %s:\n", + cur > regs ? "\n" : "", + cur->name); + continue; + case DUMP_TYPE_WED: + val = wed_r32(dev, cur->offset); + break; + case DUMP_TYPE_WDMA: + val = wdma_r32(dev, cur->offset); + break; + case DUMP_TYPE_WPDMA_TX: + val = wpdma_tx_r32(dev, cur->base, cur->offset); + break; + case DUMP_TYPE_WPDMA_TXFREE: + val = wpdma_txfree_r32(dev, cur->offset); + break; + } + print_reg_val(s, cur->name, val); + } +} + + +static int +wed_txinfo_show(struct seq_file *s, void *data) +{ + static const struct reg_dump regs[] = { + DUMP_STR("WED TX"), + DUMP_WED(WED_TX_MIB(0)), + DUMP_WED_RING(WED_RING_TX(0)), + + DUMP_WED(WED_TX_MIB(1)), + DUMP_WED_RING(WED_RING_TX(1)), + + DUMP_STR("WPDMA TX"), + DUMP_WED(WED_WPDMA_TX_MIB(0)), + DUMP_WED_RING(WED_WPDMA_RING_TX(0)), + DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(0)), + + DUMP_WED(WED_WPDMA_TX_MIB(1)), + DUMP_WED_RING(WED_WPDMA_RING_TX(1)), + DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(1)), + + DUMP_STR("WPDMA TX"), + DUMP_WPDMA_TX_RING(0), + DUMP_WPDMA_TX_RING(1), + + DUMP_STR("WED WDMA RX"), + DUMP_WED(WED_WDMA_RX_MIB(0)), + DUMP_WED_RING(WED_WDMA_RING_RX(0)), + DUMP_WED(WED_WDMA_RX_THRES(0)), + DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(0)), + DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(0)), + + DUMP_WED(WED_WDMA_RX_MIB(1)), + DUMP_WED_RING(WED_WDMA_RING_RX(1)), + DUMP_WED(WED_WDMA_RX_THRES(1)), + DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(1)), + DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(1)), + + DUMP_STR("WDMA RX"), + DUMP_WDMA(WDMA_GLO_CFG), + DUMP_WDMA_RING(WDMA_RING_RX(0)), + DUMP_WDMA_RING(WDMA_RING_RX(1)), + }; + struct mtk_wed_hw *hw = s->private; + struct mtk_wed_device *dev = hw->wed_dev; + + if (!dev) + return 0; + + dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(wed_txinfo); + + +static int +mtk_wed_reg_set(void *data, u64 val) +{ + struct mtk_wed_hw *hw = data; + + regmap_write(hw->regs, hw->debugfs_reg, val); + + return 0; +} + +static int +mtk_wed_reg_get(void *data, u64 *val) +{ + struct mtk_wed_hw *hw = data; + unsigned int regval; + int ret; + + ret = regmap_read(hw->regs, hw->debugfs_reg, ®val); + if (ret) + return ret; + + *val = regval; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mtk_wed_reg_get, mtk_wed_reg_set, + "0x%08llx\n"); + +void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw) +{ + struct dentry *dir; + + snprintf(hw->dirname, sizeof(hw->dirname), "wed%d", hw->index); + dir = debugfs_create_dir(hw->dirname, NULL); + if (!dir) + return; + + hw->debugfs_dir = dir; + debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg); + debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval); + debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops); +} diff --git a/drivers/net/ethernet/mediatek/mtk_wed_ops.c b/drivers/net/ethernet/mediatek/mtk_wed_ops.c new file mode 100644 index 000000000000..a5d9d8a5bce2 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed_ops.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ + +#include <linux/kernel.h> +#include <linux/soc/mediatek/mtk_wed.h> + +const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; +EXPORT_SYMBOL_GPL(mtk_soc_wed_ops); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h new file mode 100644 index 000000000000..0a0465ea58b4 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ + +#ifndef __MTK_WED_REGS_H +#define __MTK_WED_REGS_H + +#define MTK_WDMA_DESC_CTRL_LEN1 GENMASK(14, 0) +#define MTK_WDMA_DESC_CTRL_LAST_SEG1 BIT(15) +#define MTK_WDMA_DESC_CTRL_BURST BIT(16) +#define MTK_WDMA_DESC_CTRL_LEN0 GENMASK(29, 16) +#define MTK_WDMA_DESC_CTRL_LAST_SEG0 BIT(30) +#define MTK_WDMA_DESC_CTRL_DMA_DONE BIT(31) + +struct mtk_wdma_desc { + __le32 buf0; + __le32 ctrl; + __le32 buf1; + __le32 info; +} __packed __aligned(4); + +#define MTK_WED_RESET 0x008 +#define MTK_WED_RESET_TX_BM BIT(0) +#define MTK_WED_RESET_TX_FREE_AGENT BIT(4) +#define MTK_WED_RESET_WPDMA_TX_DRV BIT(8) +#define MTK_WED_RESET_WPDMA_RX_DRV BIT(9) +#define MTK_WED_RESET_WPDMA_INT_AGENT BIT(11) +#define MTK_WED_RESET_WED_TX_DMA BIT(12) +#define MTK_WED_RESET_WDMA_RX_DRV BIT(17) +#define MTK_WED_RESET_WDMA_INT_AGENT BIT(19) +#define MTK_WED_RESET_WED BIT(31) + +#define MTK_WED_CTRL 0x00c +#define MTK_WED_CTRL_WPDMA_INT_AGENT_EN BIT(0) +#define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY BIT(1) +#define MTK_WED_CTRL_WDMA_INT_AGENT_EN BIT(2) +#define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY BIT(3) +#define MTK_WED_CTRL_WED_TX_BM_EN BIT(8) +#define MTK_WED_CTRL_WED_TX_BM_BUSY BIT(9) +#define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN BIT(10) +#define MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY BIT(11) +#define MTK_WED_CTRL_RESERVE_EN BIT(12) +#define MTK_WED_CTRL_RESERVE_BUSY BIT(13) +#define MTK_WED_CTRL_FINAL_DIDX_READ BIT(24) +#define MTK_WED_CTRL_MIB_READ_CLEAR BIT(28) + +#define MTK_WED_EXT_INT_STATUS 0x020 +#define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR BIT(0) +#define MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD BIT(1) +#define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID BIT(4) +#define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH BIT(8) +#define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH BIT(9) +#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(12) +#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(13) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR BIT(16) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR BIT(17) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT BIT(18) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN BIT(19) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_BM_DMAD_COHERENT BIT(20) +#define MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR BIT(21) +#define MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR BIT(22) +#define MTK_WED_EXT_INT_STATUS_RX_DRV_DMA_RECYCLE BIT(24) +#define MTK_WED_EXT_INT_STATUS_ERROR_MASK (MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \ + MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \ + MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \ + MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \ + MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \ + MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN | \ + MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR | \ + MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR) + +#define MTK_WED_EXT_INT_MASK 0x028 + +#define MTK_WED_STATUS 0x060 +#define MTK_WED_STATUS_TX GENMASK(15, 8) + +#define MTK_WED_TX_BM_CTRL 0x080 +#define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM GENMASK(6, 0) +#define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM GENMASK(22, 16) +#define MTK_WED_TX_BM_CTRL_PAUSE BIT(28) + +#define MTK_WED_TX_BM_BASE 0x084 + +#define MTK_WED_TX_BM_TKID 0x088 +#define MTK_WED_TX_BM_TKID_START GENMASK(15, 0) +#define MTK_WED_TX_BM_TKID_END GENMASK(31, 16) + +#define MTK_WED_TX_BM_BUF_LEN 0x08c + +#define MTK_WED_TX_BM_INTF 0x09c +#define MTK_WED_TX_BM_INTF_TKID GENMASK(15, 0) +#define MTK_WED_TX_BM_INTF_TKFIFO_FDEP GENMASK(23, 16) +#define MTK_WED_TX_BM_INTF_TKID_VALID BIT(28) +#define MTK_WED_TX_BM_INTF_TKID_READ BIT(29) + +#define MTK_WED_TX_BM_DYN_THR 0x0a0 +#define MTK_WED_TX_BM_DYN_THR_LO GENMASK(6, 0) +#define MTK_WED_TX_BM_DYN_THR_HI GENMASK(22, 16) + +#define MTK_WED_INT_STATUS 0x200 +#define MTK_WED_INT_MASK 0x204 + +#define MTK_WED_GLO_CFG 0x208 +#define MTK_WED_GLO_CFG_TX_DMA_EN BIT(0) +#define MTK_WED_GLO_CFG_TX_DMA_BUSY BIT(1) +#define MTK_WED_GLO_CFG_RX_DMA_EN BIT(2) +#define MTK_WED_GLO_CFG_RX_DMA_BUSY BIT(3) +#define MTK_WED_GLO_CFG_RX_BT_SIZE GENMASK(5, 4) +#define MTK_WED_GLO_CFG_TX_WB_DDONE BIT(6) +#define MTK_WED_GLO_CFG_BIG_ENDIAN BIT(7) +#define MTK_WED_GLO_CFG_DIS_BT_SIZE_ALIGN BIT(8) +#define MTK_WED_GLO_CFG_TX_BT_SIZE_LO BIT(9) +#define MTK_WED_GLO_CFG_MULTI_DMA_EN GENMASK(11, 10) +#define MTK_WED_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12) +#define MTK_WED_GLO_CFG_MI_DEPTH_RD GENMASK(21, 13) +#define MTK_WED_GLO_CFG_TX_BT_SIZE_HI GENMASK(23, 22) +#define MTK_WED_GLO_CFG_SW_RESET BIT(24) +#define MTK_WED_GLO_CFG_FIRST_TOKEN_ONLY BIT(26) +#define MTK_WED_GLO_CFG_OMIT_RX_INFO BIT(27) +#define MTK_WED_GLO_CFG_OMIT_TX_INFO BIT(28) +#define MTK_WED_GLO_CFG_BYTE_SWAP BIT(29) +#define MTK_WED_GLO_CFG_RX_2B_OFFSET BIT(31) + +#define MTK_WED_RESET_IDX 0x20c +#define MTK_WED_RESET_IDX_TX GENMASK(3, 0) +#define MTK_WED_RESET_IDX_RX GENMASK(17, 16) + +#define MTK_WED_TX_MIB(_n) (0x2a0 + (_n) * 4) + +#define MTK_WED_RING_TX(_n) (0x300 + (_n) * 0x10) + +#define MTK_WED_RING_RX(_n) (0x400 + (_n) * 0x10) + +#define MTK_WED_WPDMA_INT_TRIGGER 0x504 +#define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE BIT(1) +#define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE GENMASK(5, 4) + +#define MTK_WED_WPDMA_GLO_CFG 0x508 +#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN BIT(0) +#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY BIT(1) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN BIT(2) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY BIT(3) +#define MTK_WED_WPDMA_GLO_CFG_RX_BT_SIZE GENMASK(5, 4) +#define MTK_WED_WPDMA_GLO_CFG_TX_WB_DDONE BIT(6) +#define MTK_WED_WPDMA_GLO_CFG_BIG_ENDIAN BIT(7) +#define MTK_WED_WPDMA_GLO_CFG_DIS_BT_SIZE_ALIGN BIT(8) +#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_LO BIT(9) +#define MTK_WED_WPDMA_GLO_CFG_MULTI_DMA_EN GENMASK(11, 10) +#define MTK_WED_WPDMA_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12) +#define MTK_WED_WPDMA_GLO_CFG_MI_DEPTH_RD GENMASK(21, 13) +#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_HI GENMASK(23, 22) +#define MTK_WED_WPDMA_GLO_CFG_SW_RESET BIT(24) +#define MTK_WED_WPDMA_GLO_CFG_FIRST_TOKEN_ONLY BIT(26) +#define MTK_WED_WPDMA_GLO_CFG_OMIT_RX_INFO BIT(27) +#define MTK_WED_WPDMA_GLO_CFG_OMIT_TX_INFO BIT(28) +#define MTK_WED_WPDMA_GLO_CFG_BYTE_SWAP BIT(29) +#define MTK_WED_WPDMA_GLO_CFG_RX_2B_OFFSET BIT(31) + +#define MTK_WED_WPDMA_RESET_IDX 0x50c +#define MTK_WED_WPDMA_RESET_IDX_TX GENMASK(3, 0) +#define MTK_WED_WPDMA_RESET_IDX_RX GENMASK(17, 16) + +#define MTK_WED_WPDMA_INT_CTRL 0x520 +#define MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV BIT(21) + +#define MTK_WED_WPDMA_INT_MASK 0x524 + +#define MTK_WED_PCIE_CFG_BASE 0x560 + +#define MTK_WED_PCIE_INT_TRIGGER 0x570 +#define MTK_WED_PCIE_INT_TRIGGER_STATUS BIT(16) + +#define MTK_WED_WPDMA_CFG_BASE 0x580 + +#define MTK_WED_WPDMA_TX_MIB(_n) (0x5a0 + (_n) * 4) +#define MTK_WED_WPDMA_TX_COHERENT_MIB(_n) (0x5d0 + (_n) * 4) + +#define MTK_WED_WPDMA_RING_TX(_n) (0x600 + (_n) * 0x10) +#define MTK_WED_WPDMA_RING_RX(_n) (0x700 + (_n) * 0x10) +#define MTK_WED_WDMA_RING_RX(_n) (0x900 + (_n) * 0x10) +#define MTK_WED_WDMA_RX_THRES(_n) (0x940 + (_n) * 0x4) + +#define MTK_WED_WDMA_GLO_CFG 0xa04 +#define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN BIT(0) +#define MTK_WED_WDMA_GLO_CFG_RX_DRV_EN BIT(2) +#define MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY BIT(3) +#define MTK_WED_WDMA_GLO_CFG_BT_SIZE GENMASK(5, 4) +#define MTK_WED_WDMA_GLO_CFG_TX_WB_DDONE BIT(6) +#define MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE BIT(13) +#define MTK_WED_WDMA_GLO_CFG_WCOMPLETE_SEL BIT(16) +#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_RXDMA_BYPASS BIT(17) +#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_BYPASS BIT(18) +#define MTK_WED_WDMA_GLO_CFG_FSM_RETURN_IDLE BIT(19) +#define MTK_WED_WDMA_GLO_CFG_WAIT_COHERENT BIT(20) +#define MTK_WED_WDMA_GLO_CFG_AXI_W_AFTER_AW BIT(21) +#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY_SINGLE_W BIT(22) +#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY BIT(23) +#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP BIT(24) +#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE BIT(25) +#define MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE BIT(26) +#define MTK_WED_WDMA_GLO_CFG_RXDRV_CLKGATE_BYPASS BIT(30) + +#define MTK_WED_WDMA_RESET_IDX 0xa08 +#define MTK_WED_WDMA_RESET_IDX_RX GENMASK(17, 16) +#define MTK_WED_WDMA_RESET_IDX_DRV GENMASK(25, 24) + +#define MTK_WED_WDMA_INT_TRIGGER 0xa28 +#define MTK_WED_WDMA_INT_TRIGGER_RX_DONE GENMASK(17, 16) + +#define MTK_WED_WDMA_INT_CTRL 0xa2c +#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL GENMASK(17, 16) + +#define MTK_WED_WDMA_OFFSET0 0xaa4 +#define MTK_WED_WDMA_OFFSET1 0xaa8 + +#define MTK_WED_WDMA_RX_MIB(_n) (0xae0 + (_n) * 4) +#define MTK_WED_WDMA_RX_RECYCLE_MIB(_n) (0xae8 + (_n) * 4) +#define MTK_WED_WDMA_RX_PROCESSED_MIB(_n) (0xaf0 + (_n) * 4) + +#define MTK_WED_RING_OFS_BASE 0x00 +#define MTK_WED_RING_OFS_COUNT 0x04 +#define MTK_WED_RING_OFS_CPU_IDX 0x08 +#define MTK_WED_RING_OFS_DMA_IDX 0x0c + +#define MTK_WDMA_RING_RX(_n) (0x100 + (_n) * 0x10) + +#define MTK_WDMA_GLO_CFG 0x204 +#define MTK_WDMA_GLO_CFG_RX_INFO_PRERES GENMASK(28, 26) + +#define MTK_WDMA_RESET_IDX 0x208 +#define MTK_WDMA_RESET_IDX_TX GENMASK(3, 0) +#define MTK_WDMA_RESET_IDX_RX GENMASK(17, 16) + +#define MTK_WDMA_INT_MASK 0x228 +#define MTK_WDMA_INT_MASK_TX_DONE GENMASK(3, 0) +#define MTK_WDMA_INT_MASK_RX_DONE GENMASK(17, 16) +#define MTK_WDMA_INT_MASK_TX_DELAY BIT(28) +#define MTK_WDMA_INT_MASK_TX_COHERENT BIT(29) +#define MTK_WDMA_INT_MASK_RX_DELAY BIT(30) +#define MTK_WDMA_INT_MASK_RX_COHERENT BIT(31) + +#define MTK_WDMA_INT_GRP1 0x250 +#define MTK_WDMA_INT_GRP2 0x254 + +#define MTK_PCIE_MIRROR_MAP(n) ((n) ? 0x4 : 0x0) +#define MTK_PCIE_MIRROR_MAP_EN BIT(0) +#define MTK_PCIE_MIRROR_MAP_WED_ID BIT(1) + +/* DMA channel mapping */ +#define HIFSYS_DMA_AG_MAP 0x008 + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 4ba1a78c6515..bfc0cd5ec423 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -16,13 +16,9 @@ config MLX5_CORE Core driver for low level functionality of the ConnectX-4 and Connect-IB cards by Mellanox Technologies. -config MLX5_ACCEL - bool - config MLX5_FPGA bool "Mellanox Technologies Innova support" depends on MLX5_CORE - select MLX5_ACCEL help Build support for the Innova family of network cards by Mellanox Technologies. Innova network cards are comprised of a ConnectX chip @@ -143,71 +139,21 @@ config MLX5_CORE_IPOIB help MLX5 IPoIB offloads & acceleration support. -config MLX5_FPGA_IPSEC - bool "Mellanox Technologies IPsec Innova support" - depends on MLX5_CORE - depends on MLX5_FPGA - help - Build IPsec support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. - -config MLX5_IPSEC - bool "Mellanox Technologies IPsec Connect-X support" - depends on MLX5_CORE_EN - depends on XFRM_OFFLOAD - depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD - select MLX5_ACCEL - help - Build IPsec support for the Connect-X family of network cards by Mellanox - Technologies. - Note: If you select this option, the mlx5_core driver will include - IPsec support for the Connect-X family. - config MLX5_EN_IPSEC - bool "IPSec XFRM cryptography-offload acceleration" + bool "Mellanox Technologies IPsec Connect-X support" depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD - depends on MLX5_FPGA_IPSEC || MLX5_IPSEC help Build support for IPsec cryptography-offload acceleration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. - -config MLX5_FPGA_TLS - bool "Mellanox Technologies TLS Innova support" - depends on TLS_DEVICE - depends on TLS=y || MLX5_CORE=m - depends on MLX5_CORE_EN - depends on MLX5_FPGA - select MLX5_EN_TLS - help - Build TLS support for the Innova family of network cards by Mellanox - Technologies. Innova network cards are comprised of a ConnectX chip - and an FPGA chip on one board. If you select this option, the - mlx5_core driver will include the Innova FPGA core and allow building - sandbox-specific client drivers. -config MLX5_TLS +config MLX5_EN_TLS bool "Mellanox Technologies TLS Connect-X support" depends on TLS_DEVICE depends on TLS=y || MLX5_CORE=m depends on MLX5_CORE_EN - select MLX5_ACCEL - select MLX5_EN_TLS - help - Build TLS support for the Connect-X family of network cards by Mellanox - Technologies. - -config MLX5_EN_TLS - bool help Build support for TLS cryptography-offload acceleration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. config MLX5_SW_STEERING bool "Mellanox Technologies software-managed steering" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4bc666714a35..81620c25c77e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o + en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o lib/crypto.o # # Netdev extra @@ -88,17 +88,13 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # # Accelerations & FPGA # -mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o -mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o -mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o -mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o - mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ - en_accel/ipsec_stats.o en_accel/ipsec_fs.o + en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ + en_accel/ipsec_offload.o -mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ en_accel/ktls_tx.o en_accel/ktls_rx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h deleted file mode 100644 index 82b185121edb..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef __MLX5E_ACCEL_H__ -#define __MLX5E_ACCEL_H__ - -#ifdef CONFIG_MLX5_ACCEL - -#include <linux/skbuff.h> -#include <linux/netdevice.h> - -static inline bool is_metadata_hdr_valid(struct sk_buff *skb) -{ - __be16 *ethtype; - - if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)) - return false; - ethtype = (__be16 *)(skb->data + ETH_ALEN * 2); - if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE)) - return false; - return true; -} - -static inline void remove_metadata_hdr(struct sk_buff *skb) -{ - struct ethhdr *old_eth; - struct ethhdr *new_eth; - - /* Remove the metadata from the buffer */ - old_eth = (struct ethhdr *)skb->data; - new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN); - memmove(new_eth, old_eth, 2 * ETH_ALEN); - /* Ethertype is already in its new place */ - skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN); -} - -#endif /* CONFIG_MLX5_ACCEL */ - -#endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c deleted file mode 100644 index 09f5ce97af46..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/mlx5/device.h> - -#include "accel/ipsec.h" -#include "mlx5_core.h" -#include "fpga/ipsec.h" -#include "accel/ipsec_offload.h" - -void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops; - int err = 0; - - ipsec_ops = (mlx5_ipsec_offload_ops(mdev)) ? - mlx5_ipsec_offload_ops(mdev) : - mlx5_fpga_ipsec_ops(mdev); - - if (!ipsec_ops || !ipsec_ops->init) { - mlx5_core_dbg(mdev, "IPsec ops is not supported\n"); - return; - } - - err = ipsec_ops->init(mdev); - if (err) { - mlx5_core_warn_once(mdev, "Failed to start IPsec device, err = %d\n", err); - return; - } - - mdev->ipsec_ops = ipsec_ops; -} - -void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->cleanup) - return; - - ipsec_ops->cleanup(mdev); -} - -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->device_caps) - return 0; - - return ipsec_ops->device_caps(mdev); -} -EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps); - -unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->counters_count) - return -EOPNOTSUPP; - - return ipsec_ops->counters_count(mdev); -} - -int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->counters_read) - return -EOPNOTSUPP; - - return ipsec_ops->counters_read(mdev, counters, count); -} - -void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - __be32 saddr[4] = {}, daddr[4] = {}; - - if (!ipsec_ops || !ipsec_ops->create_hw_context) - return ERR_PTR(-EOPNOTSUPP); - - if (!xfrm->attrs.is_ipv6) { - saddr[3] = xfrm->attrs.saddr.a4; - daddr[3] = xfrm->attrs.daddr.a4; - } else { - memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr)); - memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr)); - } - - return ipsec_ops->create_hw_context(mdev, xfrm, saddr, daddr, xfrm->attrs.spi, - xfrm->attrs.is_ipv6, sa_handle); -} - -void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->free_hw_context) - return; - - ipsec_ops->free_hw_context(context); -} - -struct mlx5_accel_esp_xfrm * -mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops; - struct mlx5_accel_esp_xfrm *xfrm; - - if (!ipsec_ops || !ipsec_ops->esp_create_xfrm) - return ERR_PTR(-EOPNOTSUPP); - - xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, flags); - if (IS_ERR(xfrm)) - return xfrm; - - xfrm->mdev = mdev; - return xfrm; -} -EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm); - -void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->esp_destroy_xfrm) - return; - - ipsec_ops->esp_destroy_xfrm(xfrm); -} -EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm); - -int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops; - - if (!ipsec_ops || !ipsec_ops->esp_modify_xfrm) - return -EOPNOTSUPP; - - return ipsec_ops->esp_modify_xfrm(xfrm, attrs); -} -EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h deleted file mode 100644 index fbb9c5415d53..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_ACCEL_IPSEC_H__ -#define __MLX5_ACCEL_IPSEC_H__ - -#include <linux/mlx5/driver.h> -#include <linux/mlx5/accel.h> - -#ifdef CONFIG_MLX5_ACCEL - -#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ - MLX5_ACCEL_IPSEC_CAP_DEVICE) - -unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev); -int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int count); - -void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle); -void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context); - -void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); -void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); - -struct mlx5_accel_ipsec_ops { - u32 (*device_caps)(struct mlx5_core_dev *mdev); - unsigned int (*counters_count)(struct mlx5_core_dev *mdev); - int (*counters_read)(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count); - void* (*create_hw_context)(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, u32 *sa_handle); - void (*free_hw_context)(void *context); - int (*init)(struct mlx5_core_dev *mdev); - void (*cleanup)(struct mlx5_core_dev *mdev); - struct mlx5_accel_esp_xfrm* (*esp_create_xfrm)(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags); - int (*esp_modify_xfrm)(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs); - void (*esp_destroy_xfrm)(struct mlx5_accel_esp_xfrm *xfrm); -}; - -#else - -#define MLX5_IPSEC_DEV(mdev) false - -static inline void * -mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *xfrm, - u32 *sa_handle) -{ - return NULL; -} - -static inline void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) {} - -static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {} - -static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {} - -#endif /* CONFIG_MLX5_ACCEL */ - -#endif /* __MLX5_ACCEL_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h deleted file mode 100644 index 970c66d19c1d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ - -#ifndef __MLX5_IPSEC_OFFLOAD_H__ -#define __MLX5_IPSEC_OFFLOAD_H__ - -#include <linux/mlx5/driver.h> -#include "accel/ipsec.h" - -#ifdef CONFIG_MLX5_IPSEC - -const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev); -static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - if (!MLX5_CAP_GEN(mdev, ipsec_offload)) - return false; - - if (!MLX5_CAP_GEN(mdev, log_max_dek)) - return false; - - if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) - return false; - - return MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) && - MLX5_CAP_ETH(mdev, insert_trailer); -} - -#else -static inline const struct mlx5_accel_ipsec_ops * -mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) { return NULL; } -static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - return false; -} - -#endif /* CONFIG_MLX5_IPSEC */ -#endif /* __MLX5_IPSEC_OFFLOAD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c deleted file mode 100644 index 6c2b86a26863..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/mlx5/device.h> - -#include "accel/tls.h" -#include "mlx5_core.h" -#include "lib/mlx5.h" - -#ifdef CONFIG_MLX5_FPGA_TLS -#include "fpga/tls.h" - -int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) -{ - return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, - start_offload_tcp_sn, p_swid, - direction_sx); -} - -void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx) -{ - mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx); -} - -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) -{ - return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn); -} - -bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_is_tls_device(mdev) || - mlx5_accel_is_ktls_device(mdev); -} - -u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_tls_device_caps(mdev); -} - -int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) -{ - return mlx5_fpga_tls_init(mdev); -} - -void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) -{ - mlx5_fpga_tls_cleanup(mdev); -} -#endif - -#ifdef CONFIG_MLX5_TLS -int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id) -{ - u32 sz_bytes; - void *key; - - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 *info = - (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; - - key = info->key; - sz_bytes = sizeof(info->key); - break; - } - case TLS_CIPHER_AES_GCM_256: { - struct tls12_crypto_info_aes_gcm_256 *info = - (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; - - key = info->key; - sz_bytes = sizeof(info->key); - break; - } - default: - return -EINVAL; - } - - return mlx5_create_encryption_key(mdev, key, sz_bytes, - MLX5_ACCEL_OBJ_TLS_KEY, - p_key_id); -} - -void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) -{ - mlx5_destroy_encryption_key(mdev, key_id); -} -#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h deleted file mode 100644 index fd874f0c380a..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_ACCEL_TLS_H__ -#define __MLX5_ACCEL_TLS_H__ - -#include <linux/mlx5/driver.h> -#include <linux/tls.h> - -#ifdef CONFIG_MLX5_TLS -int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id); -void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); - -static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, tls_tx); -} - -static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, tls_rx); -} - -static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) -{ - if (!mlx5_accel_is_ktls_tx(mdev) && - !mlx5_accel_is_ktls_rx(mdev)) - return false; - - if (!MLX5_CAP_GEN(mdev, log_max_dek)) - return false; - - return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); -} - -static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info) -{ - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - if (crypto_info->version == TLS_1_2_VERSION) - return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); - break; - } - - return false; -} -#else -static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev) -{ return false; } - -static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev) -{ return false; } - -static inline int -mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id) { return -ENOTSUPP; } -static inline void -mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {} - -static inline bool -mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } -static inline bool -mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info) { return false; } -#endif - -enum { - MLX5_ACCEL_TLS_TX = BIT(0), - MLX5_ACCEL_TLS_RX = BIT(1), - MLX5_ACCEL_TLS_V12 = BIT(2), - MLX5_ACCEL_TLS_V13 = BIT(3), - MLX5_ACCEL_TLS_LRO = BIT(4), - MLX5_ACCEL_TLS_IPV6 = BIT(5), - MLX5_ACCEL_TLS_AES_GCM128 = BIT(30), - MLX5_ACCEL_TLS_AES_GCM256 = BIT(31), -}; - -struct mlx5_ifc_tls_flow_bits { - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 reserved_at_2[0x1e]; -}; - -#ifdef CONFIG_MLX5_FPGA_TLS -int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx); -void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx); -int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn); -bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev); -u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev); -int mlx5_accel_tls_init(struct mlx5_core_dev *mdev); -void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev); - -#else - -static inline int -mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) { return -ENOTSUPP; } -static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - bool direction_sx) { } -static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) { return 0; } -static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return mlx5_accel_is_ktls_device(mdev); -} -static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } -static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } -static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } -#endif - -#endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8653ac0fd865..50818081bdc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -354,7 +354,6 @@ enum { MLX5E_RQ_STATE_AM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ - MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */ MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */ MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */ }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 08fd1370a8b0..1e8700957280 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -5,8 +5,7 @@ #include "en/txrx.h" #include "en/port.h" #include "en_accel/en_accel.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" +#include "en_accel/ipsec_offload.h" static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -207,7 +206,7 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); u16 stop_room; - stop_room = mlx5e_tls_get_stop_room(mdev, params); + stop_room = mlx5e_ktls_get_stop_room(mdev, params); stop_room += mlx5e_stop_room_for_max_wqe(mdev); if (is_mpwqe) /* A MPWQE can take up to the maximum-sized WQE + all the normal @@ -327,9 +326,6 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) return false; - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; - if (params->xdp_prog) { /* XSK params are not considered here. If striding RQ is in use, * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will @@ -423,9 +419,6 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, int max_mtu; int i; - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; @@ -696,8 +689,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); bool allow_swp; - allow_swp = mlx5_geneve_tx_allowed(mdev) || - !!MLX5_IPSEC_DEV(mdev); + allow_swp = + mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); @@ -804,7 +797,7 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) { - if (mlx5e_accel_is_ktls_rx(mdev)) + if (mlx5e_is_ktls_rx(mdev)) return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; @@ -833,7 +826,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */ - param->is_tls = mlx5e_accel_is_ktls_rx(mdev); + param->is_tls = mlx5e_is_ktls_rx(mdev); if (param->is_tls) param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 62cde3e87c2e..04c0a5e1c89a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -37,8 +37,8 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include "en_accel/ipsec_rxtx.h" -#include "en_accel/tls.h" -#include "en_accel/tls_rxtx.h" +#include "en_accel/ktls.h" +#include "en_accel/ktls_txrx.h" #include "en.h" #include "en/txrx.h" @@ -124,8 +124,9 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, #ifdef CONFIG_MLX5_EN_TLS /* May send SKBs and WQEs. */ - if (mlx5e_tls_skb_offloaded(skb)) - if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls))) + if (mlx5e_ktls_skb_offloaded(skb)) + if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, + &state->tls))) return false; #endif @@ -174,7 +175,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, struct mlx5_wqe_inline_seg *inlseg) { #ifdef CONFIG_MLX5_EN_TLS - mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls); + mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls); #endif #ifdef CONFIG_MLX5_EN_IPSEC diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 299e3f0fcb5c..c280a18ff002 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -226,8 +226,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) return -EINVAL; } if (x->props.flags & XFRM_STATE_ESN && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_ESN)) { + !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_CAP_ESN)) { netdev_info(netdev, "Cannot offload ESN xfrm states\n"); return -EINVAL; } @@ -275,8 +274,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) return -EINVAL; } if (x->props.family == AF_INET6 && - !(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_IPV6)) { + !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_CAP_IPV6)) { netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n"); return -EINVAL; } @@ -286,9 +284,6 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) static int mlx5e_xfrm_fs_add_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_sa_entry *sa_entry) { - if (!mlx5_is_ipsec_device(priv->mdev)) - return 0; - return mlx5e_accel_ipsec_fs_add_rule(priv, &sa_entry->xfrm->attrs, sa_entry->ipsec_obj_id, &sa_entry->ipsec_rule); @@ -297,9 +292,6 @@ static int mlx5e_xfrm_fs_add_rule(struct mlx5e_priv *priv, static void mlx5e_xfrm_fs_del_rule(struct mlx5e_priv *priv, struct mlx5e_ipsec_sa_entry *sa_entry) { - if (!mlx5_is_ipsec_device(priv->mdev)) - return; - mlx5e_accel_ipsec_fs_del_rule(priv, &sa_entry->xfrm->attrs, &sa_entry->ipsec_rule); } @@ -333,9 +325,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) /* create xfrm */ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); - sa_entry->xfrm = - mlx5_accel_esp_create_xfrm(priv->mdev, &attrs, - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA); + sa_entry->xfrm = mlx5_accel_esp_create_xfrm(priv->mdev, &attrs); if (IS_ERR(sa_entry->xfrm)) { err = PTR_ERR(sa_entry->xfrm); goto err_sa_entry; @@ -414,7 +404,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv) { struct mlx5e_ipsec *ipsec = NULL; - if (!MLX5_IPSEC_DEV(priv->mdev)) { + if (!mlx5_ipsec_device_caps(priv->mdev)) { netdev_dbg(priv->netdev, "Not an IPSec offload device\n"); return 0; } @@ -425,10 +415,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv) hash_init(ipsec->sadb_rx); spin_lock_init(&ipsec->sadb_rx_lock); - ida_init(&ipsec->halloc); ipsec->en_priv = priv; - ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) & - MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER); ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, priv->netdev->name); if (!ipsec->wq) { @@ -452,7 +439,6 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) mlx5e_accel_ipsec_fs_cleanup(priv); destroy_workqueue(ipsec->wq); - ida_destroy(&ipsec->halloc); kfree(ipsec); priv->ipsec = NULL; } @@ -531,7 +517,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; struct net_device *netdev = priv->netdev; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || !MLX5_CAP_ETH(mdev, swp)) { mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n"); return; @@ -550,15 +536,13 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) netdev->features |= NETIF_F_HW_ESP_TX_CSUM; netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM; - if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) || !MLX5_CAP_ETH(mdev, swp_lso)) { mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n"); return; } - if (mlx5_is_ipsec_device(mdev)) - netdev->gso_partial_features |= NETIF_F_GSO_ESP; - + netdev->gso_partial_features |= NETIF_F_GSO_ESP; mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n"); netdev->features |= NETIF_F_GSO_ESP; netdev->hw_features |= NETIF_F_GSO_ESP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 6164c7f59efb..a0e9dade09e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -40,7 +40,7 @@ #include <net/xfrm.h> #include <linux/idr.h> -#include "accel/ipsec.h" +#include "ipsec_offload.h" #define MLX5E_IPSEC_SADB_RX_BITS 10 #define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L @@ -55,24 +55,6 @@ struct mlx5e_ipsec_sw_stats { atomic64_t ipsec_tx_drop_no_state; atomic64_t ipsec_tx_drop_not_ip; atomic64_t ipsec_tx_drop_trailer; - atomic64_t ipsec_tx_drop_metadata; -}; - -struct mlx5e_ipsec_stats { - u64 ipsec_dec_in_packets; - u64 ipsec_dec_out_packets; - u64 ipsec_dec_bypass_packets; - u64 ipsec_enc_in_packets; - u64 ipsec_enc_out_packets; - u64 ipsec_enc_bypass_packets; - u64 ipsec_dec_drop_packets; - u64 ipsec_dec_auth_fail_packets; - u64 ipsec_enc_drop_packets; - u64 ipsec_add_sa_success; - u64 ipsec_add_sa_fail; - u64 ipsec_del_sa_success; - u64 ipsec_del_sa_fail; - u64 ipsec_cmd_drop; }; struct mlx5e_accel_fs_esp; @@ -81,11 +63,8 @@ struct mlx5e_ipsec_tx; struct mlx5e_ipsec { struct mlx5e_priv *en_priv; DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS); - bool no_trailer; - spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */ - struct ida halloc; + spinlock_t sadb_rx_lock; /* Protects sadb_rx */ struct mlx5e_ipsec_sw_stats sw_stats; - struct mlx5e_ipsec_stats stats; struct workqueue_struct *wq; struct mlx5e_accel_fs_esp *rx_fs; struct mlx5e_ipsec_tx *tx_fs; @@ -116,7 +95,6 @@ struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_rule ipsec_rule; }; -void mlx5e_ipsec_build_inverse_table(void); int mlx5e_ipsec_init(struct mlx5e_priv *priv); void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv); void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv); @@ -125,11 +103,6 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev, unsigned int handle); #else - -static inline void mlx5e_ipsec_build_inverse_table(void) -{ -} - static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv) { return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 17da23dff0ed..66b529e36ea1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2,7 +2,7 @@ /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ #include <linux/netdevice.h> -#include "accel/ipsec_offload.h" +#include "ipsec_offload.h" #include "ipsec_fs.h" #include "fs_core.h" @@ -700,9 +700,6 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { int err; - if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec) - return -EOPNOTSUPP; - err = fs_init_tx(priv); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h index 3389b3bb3ef8..b70953979709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h @@ -6,10 +6,9 @@ #include "en.h" #include "ipsec.h" -#include "accel/ipsec_offload.h" +#include "ipsec_offload.h" #include "en/fs.h" -#ifdef CONFIG_MLX5_EN_IPSEC void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv); int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv); int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, @@ -19,8 +18,4 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv, void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv, struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5e_ipsec_rule *ipsec_rule); -#else -static inline void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { return 0; } -#endif #endif /* __MLX5_IPSEC_STEERING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index d6667d38e1de..37c9880719cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -1,14 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ +/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ #include "mlx5_core.h" #include "ipsec_offload.h" #include "lib/mlx5.h" #include "en_accel/ipsec_fs.h" -#define MLX5_IPSEC_DEV_BASIC_CAPS (MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | \ - MLX5_ACCEL_IPSEC_CAP_LSO) - struct mlx5_ipsec_sa_ctx { struct rhash_head hash; u32 enc_key_id; @@ -25,25 +22,37 @@ struct mlx5_ipsec_esp_xfrm { struct mlx5_accel_esp_xfrm accel_xfrm; }; -static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev) +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) { - u32 caps = MLX5_IPSEC_DEV_BASIC_CAPS; + u32 caps; + + if (!MLX5_CAP_GEN(mdev, ipsec_offload)) + return 0; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return 0; + + if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) + return 0; - if (!mlx5_is_ipsec_device(mdev)) + if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) || + !MLX5_CAP_ETH(mdev, insert_trailer)) return 0; if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) || !MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt)) return 0; + caps = MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | + MLX5_ACCEL_IPSEC_CAP_LSO; + if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) && MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt)) caps |= MLX5_ACCEL_IPSEC_CAP_ESP; - if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) { + if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) caps |= MLX5_ACCEL_IPSEC_CAP_ESN; - caps |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; - } /* We can accommodate up to 2^24 different IPsec objects * because we use up to 24 bit in flow table metadata @@ -52,6 +61,7 @@ static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev) WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24); return caps; } +EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); static int mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, @@ -94,8 +104,7 @@ mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, static struct mlx5_accel_esp_xfrm * mlx5_ipsec_offload_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) + const struct mlx5_accel_esp_xfrm_attrs *attrs) { struct mlx5_ipsec_esp_xfrm *mxfrm; int err = 0; @@ -274,11 +283,6 @@ static void mlx5_ipsec_offload_delete_sa_ctx(void *context) mutex_unlock(&mxfrm->lock); } -static int mlx5_ipsec_offload_init(struct mlx5_core_dev *mdev) -{ - return 0; -} - static int mlx5_modify_ipsec_obj(struct mlx5_core_dev *mdev, struct mlx5_ipsec_obj_attrs *attrs, u32 ipsec_id) @@ -366,20 +370,51 @@ change_sw_xfrm_attrs: return err; } -static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = { - .device_caps = mlx5_ipsec_offload_device_caps, - .create_hw_context = mlx5_ipsec_offload_create_sa_ctx, - .free_hw_context = mlx5_ipsec_offload_delete_sa_ctx, - .init = mlx5_ipsec_offload_init, - .esp_create_xfrm = mlx5_ipsec_offload_esp_create_xfrm, - .esp_destroy_xfrm = mlx5_ipsec_offload_esp_destroy_xfrm, - .esp_modify_xfrm = mlx5_ipsec_offload_esp_modify_xfrm, -}; +void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + u32 *sa_handle) +{ + __be32 saddr[4] = {}, daddr[4] = {}; + + if (!xfrm->attrs.is_ipv6) { + saddr[3] = xfrm->attrs.saddr.a4; + daddr[3] = xfrm->attrs.daddr.a4; + } else { + memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr)); + memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr)); + } + + return mlx5_ipsec_offload_create_sa_ctx(mdev, xfrm, saddr, daddr, + xfrm->attrs.spi, + xfrm->attrs.is_ipv6, sa_handle); +} + +void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) +{ + mlx5_ipsec_offload_delete_sa_ctx(context); +} -const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) +struct mlx5_accel_esp_xfrm * +mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, + const struct mlx5_accel_esp_xfrm_attrs *attrs) { - if (!mlx5_ipsec_offload_device_caps(mdev)) - return NULL; + struct mlx5_accel_esp_xfrm *xfrm; - return &ipsec_offload_ops; + xfrm = mlx5_ipsec_offload_esp_create_xfrm(mdev, attrs); + if (IS_ERR(xfrm)) + return xfrm; + + xfrm->mdev = mdev; + return xfrm; +} + +void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) +{ + mlx5_ipsec_offload_esp_destroy_xfrm(xfrm); +} + +int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return mlx5_ipsec_offload_esp_modify_xfrm(xfrm, attrs); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h new file mode 100644 index 000000000000..7dac104e6ef1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_IPSEC_OFFLOAD_H__ +#define __MLX5_IPSEC_OFFLOAD_H__ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/accel.h> + +void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm *xfrm, + u32 *sa_handle); +void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context); +#endif /* __MLX5_IPSEC_OFFLOAD_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index b56fea142c24..9b65c765cbd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -34,78 +34,16 @@ #include <crypto/aead.h> #include <net/xfrm.h> #include <net/esp.h> -#include "accel/ipsec_offload.h" +#include "ipsec_offload.h" #include "en_accel/ipsec_rxtx.h" #include "en_accel/ipsec.h" -#include "accel/accel.h" #include "en.h" enum { - MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11, - MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12, - MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17, -}; - -struct mlx5e_ipsec_rx_metadata { - unsigned char nexthdr; - __be32 sa_handle; -} __packed; - -enum { MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8, MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9, }; -struct mlx5e_ipsec_tx_metadata { - __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */ - __be16 seq; /* LSBs of the first TCP seq, only for LSO */ - u8 esp_next_proto; /* Next protocol of ESP */ -} __packed; - -struct mlx5e_ipsec_metadata { - unsigned char syndrome; - union { - unsigned char raw[5]; - /* from FPGA to host, on successful decrypt */ - struct mlx5e_ipsec_rx_metadata rx; - /* from host to FPGA */ - struct mlx5e_ipsec_tx_metadata tx; - } __packed content; - /* packet type ID field */ - __be16 ethertype; -} __packed; - -#define MAX_LSO_MSS 2048 - -/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */ -static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS]; - -static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb) -{ - return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size]; -} - -static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb) -{ - struct mlx5e_ipsec_metadata *mdata; - struct ethhdr *eth; - - if (unlikely(skb_cow_head(skb, sizeof(*mdata)))) - return ERR_PTR(-ENOMEM); - - eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata)); - skb->mac_header -= sizeof(*mdata); - mdata = (struct mlx5e_ipsec_metadata *)(eth + 1); - - memmove(skb->data, skb->data + sizeof(*mdata), - 2 * ETH_ALEN); - - eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); - - memset(mdata->content.raw, 0, sizeof(mdata->content.raw)); - return mdata; -} - static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) { unsigned int alen = crypto_aead_authsize(x->data); @@ -244,40 +182,6 @@ void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, skb_store_bits(skb, iv_offset, &seqno, 8); } -static void mlx5e_ipsec_set_metadata(struct sk_buff *skb, - struct mlx5e_ipsec_metadata *mdata, - struct xfrm_offload *xo) -{ - struct ip_esp_hdr *esph; - struct tcphdr *tcph; - - if (skb_is_gso(skb)) { - /* Add LSO metadata indication */ - esph = ip_esp_hdr(skb); - tcph = inner_tcp_hdr(skb); - netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n", - skb->network_header, - skb->transport_header, - skb->inner_network_header, - skb->inner_transport_header); - netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n", - skb->len, skb_shinfo(skb)->gso_size, - ntohs(tcph->source), ntohs(tcph->dest), - ntohl(tcph->seq), ntohl(esph->seq_no)); - mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP; - mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb); - mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF); - } else { - mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD; - } - mdata->content.tx.esp_next_proto = xo->proto; - - netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n", - mdata->syndrome, mdata->content.tx.esp_next_proto, - ntohs(mdata->content.tx.mss_inv), - ntohs(mdata->content.tx.seq)); -} - void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, struct mlx5e_accel_tx_ipsec_state *ipsec_st, struct mlx5_wqe_inline_seg *inlseg) @@ -298,16 +202,14 @@ static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv, ipsec_st->x = x; ipsec_st->xo = xo; - if (mlx5_is_ipsec_device(priv->mdev)) { - aead = x->data; - alen = crypto_aead_authsize(aead); - blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(skb->len + 2, blksize); - plen = max_t(u32, clen - skb->len, 4); - tailen = plen + alen; - ipsec_st->plen = plen; - ipsec_st->tailen = tailen; - } + aead = x->data; + alen = crypto_aead_authsize(aead); + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + clen = ALIGN(skb->len + 2, blksize); + plen = max_t(u32, clen - skb->len, 4); + tailen = plen + alen; + ipsec_st->plen = plen; + ipsec_st->tailen = tailen; return 0; } @@ -340,19 +242,17 @@ void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, ((struct iphdr *)skb_network_header(skb))->protocol : ((struct ipv6hdr *)skb_network_header(skb))->nexthdr; - if (mlx5_is_ipsec_device(priv->mdev)) { - eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); - eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); - encap = x->encap; - if (!encap) { - eseg->trailer |= (l3_proto == IPPROTO_ESP) ? - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); - } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { - eseg->trailer |= (l3_proto == IPPROTO_ESP) ? - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : - cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); - } + eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); + eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER); + encap = x->encap; + if (!encap) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); + } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) { + eseg->trailer |= (l3_proto == IPPROTO_ESP) ? + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) : + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC); } } @@ -363,7 +263,6 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo = xfrm_offload(skb); struct mlx5e_ipsec_sa_entry *sa_entry; - struct mlx5e_ipsec_metadata *mdata; struct xfrm_state *x; struct sec_path *sp; @@ -392,19 +291,8 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } - if (MLX5_CAP_GEN(priv->mdev, fpga)) { - mdata = mlx5e_ipsec_add_metadata(skb); - if (IS_ERR(mdata)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); - goto drop; - } - } - sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; sa_entry->set_iv_op(skb, x, xo); - if (MLX5_CAP_GEN(priv->mdev, fpga)) - mlx5e_ipsec_set_metadata(skb, mdata, xo); - mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st); return true; @@ -414,79 +302,6 @@ drop: return false; } -static inline struct xfrm_state * -mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, - struct mlx5e_ipsec_metadata *mdata) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct xfrm_offload *xo; - struct xfrm_state *xs; - struct sec_path *sp; - u32 sa_handle; - - sp = secpath_set(skb); - if (unlikely(!sp)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); - return NULL; - } - - sa_handle = be32_to_cpu(mdata->content.rx.sa_handle); - xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle); - if (unlikely(!xs)) { - atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss); - return NULL; - } - - sp = skb_sec_path(skb); - sp->xvec[sp->len++] = xs; - sp->olen++; - - xo = xfrm_offload(skb); - xo->flags = CRYPTO_DONE; - switch (mdata->syndrome) { - case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED: - xo->status = CRYPTO_SUCCESS; - if (likely(priv->ipsec->no_trailer)) { - xo->flags |= XFRM_ESP_NO_TRAILER; - xo->proto = mdata->content.rx.nexthdr; - } - break; - case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED: - xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; - break; - case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO: - xo->status = CRYPTO_INVALID_PROTOCOL; - break; - default: - atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); - return NULL; - } - return xs; -} - -struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, - struct sk_buff *skb, u32 *cqe_bcnt) -{ - struct mlx5e_ipsec_metadata *mdata; - struct xfrm_state *xs; - - if (!is_metadata_hdr_valid(skb)) - return skb; - - /* Use the metadata */ - mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN); - xs = mlx5e_ipsec_build_sp(netdev, skb, mdata); - if (unlikely(!xs)) { - kfree_skb(skb); - return NULL; - } - - remove_metadata_hdr(skb); - *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; - - return skb; -} - enum { MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED, MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED, @@ -528,8 +343,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) { case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED: xo->status = CRYPTO_SUCCESS; - if (WARN_ON_ONCE(priv->ipsec->no_trailer)) - xo->flags |= XFRM_ESP_NO_TRAILER; break; case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED: xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED; @@ -541,21 +354,3 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome); } } - -void mlx5e_ipsec_build_inverse_table(void) -{ - u16 mss_inv; - u32 mss; - - /* Calculate 1/x inverse table for use in GSO data path. - * Using this table, we provide the IPSec accelerator with the value of - * 1/gso_size so that it can infer the position of each segment inside - * the GSO, and increment the ESP sequence number, and generate the IV. - * The HW needs this value in Q0.16 fixed-point number format - */ - mlx5e_ipsec_inverse_table[1] = htons(0xFFFF); - for (mss = 2; mss < MAX_LSO_MSS; mss++) { - mss_inv = div_u64(1ULL << 32, mss) >> 16; - mlx5e_ipsec_inverse_table[mss] = htons(mss_inv); - } -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 428881e0adcb..0ae4e12ce528 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -53,9 +53,6 @@ struct mlx5e_accel_tx_ipsec_state { #ifdef CONFIG_MLX5_EN_IPSEC -struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, - struct sk_buff *skb, u32 *cqe_bcnt); - void mlx5e_ipsec_inverse_table_init(void); void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 5cb936541b9e..3aace1c2a763 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -35,27 +35,9 @@ #include <net/sock.h> #include "en.h" -#include "accel/ipsec.h" +#include "ipsec_offload.h" #include "fpga/sdk.h" #include "en_accel/ipsec.h" -#include "fpga/ipsec.h" - -static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) }, -}; static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) }, @@ -65,13 +47,11 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) }, - { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) }, }; #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) -#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc) #define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) @@ -103,45 +83,4 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) return idx; } -static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw) -{ - return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; -} - -static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) -{ - int ret = 0; - - if (priv->ipsec) - ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats, - NUM_IPSEC_HW_COUNTERS); - if (ret) - memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats)); -} - -static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw) -{ - unsigned int i; - - if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) - for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_ipsec_hw_stats_desc[i].format); - - return idx; -} - -static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw) -{ - int i; - - if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) - for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats, - mlx5e_ipsec_hw_stats_desc, - i); - return idx; -} - MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0); -MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index d93aadbf10da..814f2a56f633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -2,11 +2,49 @@ // Copyright (c) 2019 Mellanox Technologies. #include "en.h" -#include "en_accel/tls.h" +#include "lib/mlx5.h" #include "en_accel/ktls.h" #include "en_accel/ktls_utils.h" #include "en_accel/fs_tcp.h" +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) +{ + u32 sz_bytes; + void *key; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return -EINVAL; + } + + return mlx5_create_encryption_key(mdev, key, sz_bytes, + MLX5_ACCEL_OBJ_TLS_KEY, + p_key_id); +} + +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_destroy_encryption_key(mdev, key_id); +} + static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, enum tls_offload_ctx_dir direction, struct tls_crypto_info *crypto_info, @@ -59,15 +97,15 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - if (!mlx5e_accel_is_ktls_tx(mdev) && !mlx5e_accel_is_ktls_rx(mdev)) + if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev)) return; - if (mlx5e_accel_is_ktls_tx(mdev)) { + if (mlx5e_is_ktls_tx(mdev)) { netdev->hw_features |= NETIF_F_HW_TLS_TX; netdev->features |= NETIF_F_HW_TLS_TX; } - if (mlx5e_accel_is_ktls_rx(mdev)) + if (mlx5e_is_ktls_rx(mdev)) netdev->hw_features |= NETIF_F_HW_TLS_RX; netdev->tlsdev_ops = &mlx5e_ktls_ops; @@ -92,7 +130,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) { int err; - if (!mlx5e_accel_is_ktls_rx(priv->mdev)) + if (!mlx5e_is_ktls_rx(priv->mdev)) return 0; priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx"); @@ -112,7 +150,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) { - if (!mlx5e_accel_is_ktls_rx(priv->mdev)) + if (!mlx5e_is_ktls_rx(priv->mdev)) return; if (priv->netdev->features & NETIF_F_HW_TLS_RX) @@ -120,3 +158,24 @@ void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) destroy_workqueue(priv->tls->rx_wq); } + +int mlx5e_ktls_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return 0; + + tls = kzalloc(sizeof(*tls), GFP_KERNEL); + if (!tls) + return -ENOMEM; + + priv->tls = tls; + return 0; +} + +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) +{ + kfree(priv->tls); + priv->tls = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 5833deb2354c..d016624fbc9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -4,9 +4,42 @@ #ifndef __MLX5E_KTLS_H__ #define __MLX5E_KTLS_H__ +#include <linux/tls.h> +#include <net/tls.h> #include "en.h" #ifdef CONFIG_MLX5_EN_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id); +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); + +static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (is_kdump_kernel()) + return false; + + if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + } + + return false; +} void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); @@ -16,26 +49,36 @@ struct mlx5e_ktls_resync_resp * mlx5e_ktls_rx_resync_create_resp_list(void); void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list); -static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) +static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev) { - return !is_kdump_kernel() && - mlx5_accel_is_ktls_tx(mdev); + return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx); } -static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) +static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) { - return !is_kdump_kernel() && - mlx5_accel_is_ktls_rx(mdev); + return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_rx); } -static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) -{ - return !is_kdump_kernel() && - mlx5_accel_is_ktls_device(mdev); -} +struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_ctx; + atomic64_t tx_tls_del; + atomic64_t rx_tls_ctx; + atomic64_t rx_tls_del; +}; -#else +struct mlx5e_tls { + struct mlx5e_tls_sw_stats sw_stats; + struct workqueue_struct *rx_wq; +}; +int mlx5e_ktls_init(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup(struct mlx5e_priv *priv); + +int mlx5e_ktls_get_count(struct mlx5e_priv *priv); +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data); +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data); + +#else static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { } @@ -64,10 +107,23 @@ mlx5e_ktls_rx_resync_create_resp_list(void) static inline void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {} -static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) { return false; } -static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) { return false; } -static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) +{ + return false; +} + +static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { } +static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + return 0; +} +static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) +{ + return 0; +} #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 96064a2033f7..0bb0633b7542 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -3,7 +3,7 @@ #include <net/inet6_hashtables.h> #include "en_accel/en_accel.h" -#include "en_accel/tls.h" +#include "en_accel/ktls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" #include "en_accel/fs_tcp.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 56e7b2aee85f..2ab46c4247ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -36,14 +36,7 @@ #include "en.h" #include "fpga/sdk.h" -#include "en_accel/tls.h" - -static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) }, - { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, -}; +#include "en_accel/ktls.h" static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, @@ -55,51 +48,43 @@ static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) -static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv) -{ - if (!priv->tls) - return NULL; - if (mlx5e_accel_is_ktls_device(priv->mdev)) - return mlx5e_ktls_sw_stats_desc; - return mlx5e_tls_sw_stats_desc; -} - -int mlx5e_tls_get_count(struct mlx5e_priv *priv) +int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { if (!priv->tls) return 0; - if (mlx5e_accel_is_ktls_device(priv->mdev)) - return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); - return ARRAY_SIZE(mlx5e_tls_sw_stats_desc); + + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); } -int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) +int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { - const struct counter_desc *stats_desc; unsigned int i, n, idx = 0; - stats_desc = get_tls_atomic_stats(priv); - n = mlx5e_tls_get_count(priv); + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); for (i = 0; i < n; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - stats_desc[i].format); + mlx5e_ktls_sw_stats_desc[i].format); return n; } -int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) +int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data) { - const struct counter_desc *stats_desc; unsigned int i, n, idx = 0; - stats_desc = get_tls_atomic_stats(priv); - n = mlx5e_tls_get_count(priv); + if (!priv->tls) + return 0; + + n = mlx5e_ktls_get_count(priv); for (i = 0; i < n; i++) - data[idx++] = - MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, - stats_desc, i); + data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, + mlx5e_ktls_sw_stats_desc, + i); return n; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index aaf11c66bf4c..4b6f0d1ea59a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. -#include "en_accel/tls.h" +#include "en_accel/ktls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" @@ -27,7 +27,7 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa { u16 num_dumps, stop_room = 0; - if (!mlx5e_accel_is_ktls_tx(mdev)) + if (!mlx5e_is_ktls_tx(mdev)) return 0; num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); @@ -448,14 +448,26 @@ err_out: return MLX5E_KTLS_SYNC_FAIL; } -bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq, - struct sk_buff *skb, int datalen, +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state) { struct mlx5e_ktls_offload_context_tx *priv_tx; struct mlx5e_sq_stats *stats = sq->stats; + struct tls_context *tls_ctx; + int datalen; u32 seq; + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + return true; + + mlx5e_tx_mpwqe_ensure_complete(sq); + + tls_ctx = tls_get_ctx(skb->sk); + if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) + goto err_out; + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index 08c9d5134479..2dd78dd4ad65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -16,8 +16,8 @@ struct mlx5e_accel_tx_tls_state { u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq, - struct sk_buff *skb, int datalen, +bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 *cqe_bcnt); @@ -48,6 +48,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) { return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); } + +static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb) +{ + return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); +} + +static inline void +mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5e_accel_tx_tls_state *state) +{ + cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); +} #else static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, @@ -69,6 +81,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) return false; } +static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + return 0; +} + +static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 *cqe_bcnt) +{ +} #endif /* CONFIG_MLX5_EN_TLS */ #endif /* __MLX5E_TLS_TXRX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h index e5c180f2403b..0dc715c4c10d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h @@ -6,7 +6,6 @@ #include <net/tls.h> #include "en.h" -#include "accel/tls.h" enum { MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c deleted file mode 100644 index b8fc863aa68d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/netdevice.h> -#include <net/ipv6.h> -#include "en_accel/tls.h" -#include "accel/tls.h" - -static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - - MLX5_SET(tls_flow, flow, ipv6, 0); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4)); -} - -#if IS_ENABLED(CONFIG_IPV6) -static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - MLX5_SET(tls_flow, flow, ipv6, 1); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); -} -#endif - -static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - - memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport, - MLX5_FLD_SZ_BYTES(tls_flow, src_port)); - memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport, - MLX5_FLD_SZ_BYTES(tls_flow, dst_port)); -} - -static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps) -{ - switch (sk->sk_family) { - case AF_INET: - mlx5e_tls_set_ipv4_flow(flow, sk); - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - if (!sk->sk_ipv6only && - ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) { - mlx5e_tls_set_ipv4_flow(flow, sk); - break; - } - if (!(caps & MLX5_ACCEL_TLS_IPV6)) - goto error_out; - - mlx5e_tls_set_ipv6_flow(flow, sk); - break; -#endif - default: - goto error_out; - } - - mlx5e_tls_set_flow_tcp_ports(flow, sk); - return 0; -error_out: - return -EINVAL; -} - -static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk, - enum tls_offload_ctx_dir direction, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct mlx5_core_dev *mdev = priv->mdev; - u32 caps = mlx5_accel_tls_device_caps(mdev); - int ret = -ENOMEM; - void *flow; - u32 swid; - - flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL); - if (!flow) - return ret; - - ret = mlx5e_tls_set_flow(flow, sk, caps); - if (ret) - goto free_flow; - - ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info, - start_offload_tcp_sn, &swid, - direction == TLS_OFFLOAD_CTX_DIR_TX); - if (ret < 0) - goto free_flow; - - if (direction == TLS_OFFLOAD_CTX_DIR_TX) { - struct mlx5e_tls_offload_context_tx *tx_ctx = - mlx5e_get_tls_tx_context(tls_ctx); - - tx_ctx->swid = htonl(swid); - tx_ctx->expected_seq = start_offload_tcp_sn; - } else { - struct mlx5e_tls_offload_context_rx *rx_ctx = - mlx5e_get_tls_rx_context(tls_ctx); - - rx_ctx->handle = htonl(swid); - } - - return 0; -free_flow: - kfree(flow); - return ret; -} - -static void mlx5e_tls_del(struct net_device *netdev, - struct tls_context *tls_ctx, - enum tls_offload_ctx_dir direction) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - unsigned int handle; - - handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ? - mlx5e_get_tls_tx_context(tls_ctx)->swid : - mlx5e_get_tls_rx_context(tls_ctx)->handle); - - mlx5_accel_tls_del_flow(priv->mdev, handle, - direction == TLS_OFFLOAD_CTX_DIR_TX); -} - -static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, - u32 seq, u8 *rcd_sn_data, - enum tls_offload_ctx_dir direction) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_tls_offload_context_rx *rx_ctx; - __be64 rcd_sn = *(__be64 *)rcd_sn_data; - - if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) - return -EINVAL; - rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); - - netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, - be64_to_cpu(rcd_sn)); - mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn); - atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply); - - return 0; -} - -static const struct tlsdev_ops mlx5e_tls_ops = { - .tls_dev_add = mlx5e_tls_add, - .tls_dev_del = mlx5e_tls_del, - .tls_dev_resync = mlx5e_tls_resync, -}; - -void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) -{ - struct net_device *netdev = priv->netdev; - u32 caps; - - if (mlx5e_accel_is_ktls_device(priv->mdev)) { - mlx5e_ktls_build_netdev(priv); - return; - } - - /* FPGA */ - if (!mlx5e_accel_is_tls_device(priv->mdev)) - return; - - caps = mlx5_accel_tls_device_caps(priv->mdev); - if (caps & MLX5_ACCEL_TLS_TX) { - netdev->features |= NETIF_F_HW_TLS_TX; - netdev->hw_features |= NETIF_F_HW_TLS_TX; - } - - if (caps & MLX5_ACCEL_TLS_RX) { - netdev->features |= NETIF_F_HW_TLS_RX; - netdev->hw_features |= NETIF_F_HW_TLS_RX; - } - - if (!(caps & MLX5_ACCEL_TLS_LRO)) { - netdev->features &= ~NETIF_F_LRO; - netdev->hw_features &= ~NETIF_F_LRO; - } - - netdev->tlsdev_ops = &mlx5e_tls_ops; -} - -int mlx5e_tls_init(struct mlx5e_priv *priv) -{ - struct mlx5e_tls *tls; - - if (!mlx5e_accel_is_tls_device(priv->mdev)) - return 0; - - tls = kzalloc(sizeof(*tls), GFP_KERNEL); - if (!tls) - return -ENOMEM; - - priv->tls = tls; - return 0; -} - -void mlx5e_tls_cleanup(struct mlx5e_priv *priv) -{ - struct mlx5e_tls *tls = priv->tls; - - if (!tls) - return; - - kfree(tls); - priv->tls = NULL; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h deleted file mode 100644 index 62ecf14bf86a..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ -#ifndef __MLX5E_TLS_H__ -#define __MLX5E_TLS_H__ - -#include "accel/tls.h" -#include "en_accel/ktls.h" - -#ifdef CONFIG_MLX5_EN_TLS -#include <net/tls.h> -#include "en.h" - -struct mlx5e_tls_sw_stats { - atomic64_t tx_tls_ctx; - atomic64_t tx_tls_del; - atomic64_t tx_tls_drop_metadata; - atomic64_t tx_tls_drop_resync_alloc; - atomic64_t tx_tls_drop_no_sync_data; - atomic64_t tx_tls_drop_bypass_required; - atomic64_t rx_tls_ctx; - atomic64_t rx_tls_del; - atomic64_t rx_tls_drop_resync_request; - atomic64_t rx_tls_resync_request; - atomic64_t rx_tls_resync_reply; - atomic64_t rx_tls_auth_fail; -}; - -struct mlx5e_tls { - struct mlx5e_tls_sw_stats sw_stats; - struct workqueue_struct *rx_wq; -}; - -struct mlx5e_tls_offload_context_tx { - struct tls_offload_context_tx base; - u32 expected_seq; - __be32 swid; -}; - -static inline struct mlx5e_tls_offload_context_tx * -mlx5e_get_tls_tx_context(struct tls_context *tls_ctx) -{ - BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); - return container_of(tls_offload_ctx_tx(tls_ctx), - struct mlx5e_tls_offload_context_tx, - base); -} - -struct mlx5e_tls_offload_context_rx { - struct tls_offload_context_rx base; - __be32 handle; -}; - -static inline struct mlx5e_tls_offload_context_rx * -mlx5e_get_tls_rx_context(struct tls_context *tls_ctx) -{ - BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); - return container_of(tls_offload_ctx_rx(tls_ctx), - struct mlx5e_tls_offload_context_rx, - base); -} - -static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) -{ - return priv->tls; -} - -void mlx5e_tls_build_netdev(struct mlx5e_priv *priv); -int mlx5e_tls_init(struct mlx5e_priv *priv); -void mlx5e_tls_cleanup(struct mlx5e_priv *priv); - -int mlx5e_tls_get_count(struct mlx5e_priv *priv); -int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); -int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); - -static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) -{ - return !is_kdump_kernel() && - mlx5_accel_is_tls_device(mdev); -} - -#else - -static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) -{ - if (!is_kdump_kernel() && - mlx5_accel_is_ktls_device(priv->mdev)) - mlx5e_ktls_build_netdev(priv); -} - -static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) { return false; } -static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } -static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } -static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } -static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } -static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } -static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } - -#endif - -#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c deleted file mode 100644 index a05580cea481..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include "en_accel/tls.h" -#include "en_accel/tls_rxtx.h" -#include "accel/accel.h" - -#include <net/inet6_hashtables.h> -#include <linux/ipv6.h> - -#define SYNDROM_DECRYPTED 0x30 -#define SYNDROM_RESYNC_REQUEST 0x31 -#define SYNDROM_AUTH_FAILED 0x32 - -#define SYNDROME_OFFLOAD_REQUIRED 32 -#define SYNDROME_SYNC 33 - -struct sync_info { - u64 rcd_sn; - s32 sync_len; - int nr_frags; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -struct recv_metadata_content { - u8 syndrome; - u8 reserved; - __be32 sync_seq; -} __packed; - -struct send_metadata_content { - /* One byte of syndrome followed by 3 bytes of swid */ - __be32 syndrome_swid; - __be16 first_seq; -} __packed; - -struct mlx5e_tls_metadata { - union { - /* from fpga to host */ - struct recv_metadata_content recv; - /* from host to fpga */ - struct send_metadata_content send; - unsigned char raw[6]; - } __packed content; - /* packet type ID field */ - __be16 ethertype; -} __packed; - -static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid) -{ - struct mlx5e_tls_metadata *pet; - struct ethhdr *eth; - - if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata))) - return -ENOMEM; - - eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata)); - skb->mac_header -= sizeof(struct mlx5e_tls_metadata); - pet = (struct mlx5e_tls_metadata *)(eth + 1); - - memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata), - 2 * ETH_ALEN); - - eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE); - pet->content.send.syndrome_swid = - htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid; - - return 0; -} - -static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context, - u32 tcp_seq, struct sync_info *info) -{ - int remaining, i = 0, ret = -EINVAL; - struct tls_record_info *record; - unsigned long flags; - s32 sync_size; - - spin_lock_irqsave(&context->base.lock, flags); - record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn); - - if (unlikely(!record)) - goto out; - - sync_size = tcp_seq - tls_record_start_seq(record); - info->sync_len = sync_size; - if (unlikely(sync_size < 0)) { - if (tls_record_is_start_marker(record)) - goto done; - - goto out; - } - - remaining = sync_size; - while (remaining > 0) { - info->frags[i] = record->frags[i]; - __skb_frag_ref(&info->frags[i]); - remaining -= skb_frag_size(&info->frags[i]); - - if (remaining < 0) - skb_frag_size_add(&info->frags[i], remaining); - - i++; - } - info->nr_frags = i; -done: - ret = 0; -out: - spin_unlock_irqrestore(&context->base.lock, flags); - return ret; -} - -static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb, - struct sk_buff *nskb, u32 tcp_seq, - int headln, __be64 rcd_sn) -{ - struct mlx5e_tls_metadata *pet; - u8 syndrome = SYNDROME_SYNC; - struct iphdr *iph; - struct tcphdr *th; - int data_len, mss; - - nskb->dev = skb->dev; - skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb_network_offset(skb)); - skb_set_transport_header(nskb, skb_transport_offset(skb)); - memcpy(nskb->data, skb->data, headln); - memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn)); - - iph = ip_hdr(nskb); - iph->tot_len = htons(nskb->len - skb_network_offset(nskb)); - th = tcp_hdr(nskb); - data_len = nskb->len - headln; - tcp_seq -= data_len; - th->seq = htonl(tcp_seq); - - mss = nskb->dev->mtu - (headln - skb_network_offset(nskb)); - skb_shinfo(nskb)->gso_size = 0; - if (data_len > mss) { - skb_shinfo(nskb)->gso_size = mss; - skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss); - } - skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; - - pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr)); - memcpy(pet, &syndrome, sizeof(syndrome)); - pet->content.send.first_seq = htons(tcp_seq); - - /* MLX5 devices don't care about the checksum partial start, offset - * and pseudo header - */ - nskb->ip_summed = CHECKSUM_PARTIAL; - - nskb->queue_mapping = skb->queue_mapping; -} - -static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, - struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tls *tls) -{ - u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); - struct sync_info info; - struct sk_buff *nskb; - int linear_len = 0; - int headln; - int i; - - sq->stats->tls_ooo++; - - if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) { - /* We might get here if a retransmission reaches the driver - * after the relevant record is acked. - * It should be safe to drop the packet in this case - */ - atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data); - goto err_out; - } - - if (unlikely(info.sync_len < 0)) { - u32 payload; - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - payload = skb->len - headln; - if (likely(payload <= -info.sync_len)) - /* SKB payload doesn't require offload - */ - return true; - - atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required); - goto err_out; - } - - if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { - atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata); - goto err_out; - } - - headln = skb_transport_offset(skb) + tcp_hdrlen(skb); - linear_len += headln + sizeof(info.rcd_sn); - nskb = alloc_skb(linear_len, GFP_ATOMIC); - if (unlikely(!nskb)) { - atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc); - goto err_out; - } - - context->expected_seq = tcp_seq + skb->len - headln; - skb_put(nskb, linear_len); - for (i = 0; i < info.nr_frags; i++) - skb_shinfo(nskb)->frags[i] = info.frags[i]; - - skb_shinfo(nskb)->nr_frags = info.nr_frags; - nskb->data_len = info.sync_len; - nskb->len += info.sync_len; - sq->stats->tls_resync_bytes += nskb->len; - mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, - cpu_to_be64(info.rcd_sn)); - mlx5e_sq_xmit_simple(sq, nskb, true); - - return true; - -err_out: - dev_kfree_skb_any(skb); - return false; -} - -bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, - struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_tls_offload_context_tx *context; - struct tls_context *tls_ctx; - u32 expected_seq; - int datalen; - u32 skb_seq; - - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); - if (!datalen) - return true; - - mlx5e_tx_mpwqe_ensure_complete(sq); - - tls_ctx = tls_get_ctx(skb->sk); - if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) - goto err_out; - - if (mlx5e_accel_is_ktls_tx(sq->mdev)) - return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state); - - /* FPGA */ - skb_seq = ntohl(tcp_hdr(skb)->seq); - context = mlx5e_get_tls_tx_context(tls_ctx); - expected_seq = context->expected_seq; - - if (unlikely(expected_seq != skb_seq)) - return mlx5e_tls_handle_ooo(context, sq, skb, priv->tls); - - if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) { - atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata); - dev_kfree_skb_any(skb); - return false; - } - - context->expected_seq = skb_seq + datalen; - return true; - -err_out: - dev_kfree_skb_any(skb); - return false; -} - -static int tls_update_resync_sn(struct net_device *netdev, - struct sk_buff *skb, - struct mlx5e_tls_metadata *mdata) -{ - struct sock *sk = NULL; - struct iphdr *iph; - struct tcphdr *th; - __be32 seq; - - if (mdata->ethertype != htons(ETH_P_IP)) - return -EINVAL; - - iph = (struct iphdr *)(mdata + 1); - - th = ((void *)iph) + iph->ihl * 4; - - if (iph->version == 4) { - sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); -#if IS_ENABLED(CONFIG_IPV6) - } else { - struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; - - sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, - &ipv6h->saddr, th->source, - &ipv6h->daddr, ntohs(th->dest), - netdev->ifindex, 0); -#endif - } - if (!sk || sk->sk_state == TCP_TIME_WAIT) { - struct mlx5e_priv *priv = netdev_priv(netdev); - - atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request); - goto out; - } - - skb->sk = sk; - skb->destructor = sock_edemux; - - memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq)); - tls_offload_rx_resync_request(sk, seq); -out: - return 0; -} - -/* FPGA tls rx handler */ -void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, - u32 *cqe_bcnt) -{ - struct mlx5e_tls_metadata *mdata; - struct mlx5e_priv *priv; - - /* Use the metadata */ - mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN); - switch (mdata->content.recv.syndrome) { - case SYNDROM_DECRYPTED: - skb->decrypted = 1; - break; - case SYNDROM_RESYNC_REQUEST: - tls_update_resync_sn(rq->netdev, skb, mdata); - priv = netdev_priv(rq->netdev); - atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request); - break; - case SYNDROM_AUTH_FAILED: - /* Authentication failure will be observed and verified by kTLS */ - priv = netdev_priv(rq->netdev); - atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail); - break; - default: - /* Bypass the metadata header to others */ - return; - } - - remove_metadata_hdr(skb); - *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; -} - -u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - if (!mlx5e_accel_is_tls_device(mdev)) - return 0; - - if (mlx5e_accel_is_ktls_device(mdev)) - return mlx5e_ktls_get_stop_room(mdev, params); - - /* FPGA */ - /* Resync SKB. */ - return mlx5e_stop_room_for_max_wqe(mdev); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h deleted file mode 100644 index 0ca0a023fb8d..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5E_TLS_RXTX_H__ -#define __MLX5E_TLS_RXTX_H__ - -#include "accel/accel.h" -#include "en_accel/ktls_txrx.h" - -#ifdef CONFIG_MLX5_EN_TLS - -#include <linux/skbuff.h> -#include "en.h" -#include "en/txrx.h" - -u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); - -bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, - struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); - -static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb) -{ - return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); -} - -static inline void -mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, - struct mlx5e_accel_tx_tls_state *state) -{ - cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); -} - -void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, - u32 *cqe_bcnt); - -static inline void -mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, - struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) -{ - if (unlikely(get_cqe_tls_offload(cqe))) /* cqe bit indicates a TLS device */ - return mlx5e_ktls_handle_rx_skb(rq, skb, cqe, cqe_bcnt); - - if (unlikely(test_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state) && is_metadata_hdr_valid(skb))) - return mlx5e_tls_handle_rx_skb_metadata(rq, skb, cqe_bcnt); -} - -#else - -static inline bool -mlx5e_accel_is_tls(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return false; } -static inline void -mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, - struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) {} -static inline u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - return 0; -} - -#endif /* CONFIG_MLX5_EN_TLS */ - -#endif /* __MLX5E_TLS_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2f1dedc721d1..12b72a0bcb1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -47,9 +47,8 @@ #include "en_rep.h" #include "en_accel/ipsec.h" #include "en_accel/en_accel.h" -#include "en_accel/tls.h" -#include "accel/ipsec.h" -#include "accel/tls.h" +#include "en_accel/ktls.h" +#include "en_accel/ipsec_offload.h" #include "lib/vxlan.h" #include "lib/clock.h" #include "en/port.h" @@ -68,7 +67,6 @@ #include "en/ptp.h" #include "qos.h" #include "en/trap.h" -#include "fpga/ipsec.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -1036,9 +1034,6 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, if (err) goto err_destroy_rq; - if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */ - if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); @@ -1334,7 +1329,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); - if (MLX5_IPSEC_DEV(c->priv->mdev)) + if (mlx5_ipsec_device_caps(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); @@ -4471,12 +4466,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - if (mlx5_fpga_is_ipsec_device(priv->mdev)) { - netdev_warn(netdev, - "XDP is not available on Innova cards with IPsec support\n"); - return -EINVAL; - } - new_params = priv->channels.params; new_params.xdp_prog = prog; @@ -4934,7 +4923,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); mlx5e_ipsec_build_netdev(priv); - mlx5e_tls_build_netdev(priv); + mlx5e_ktls_build_netdev(priv); } void mlx5e_create_q_counters(struct mlx5e_priv *priv) @@ -4996,7 +4985,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); - err = mlx5e_tls_init(priv); + err = mlx5e_ktls_init(priv); if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); @@ -5007,7 +4996,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_tls_cleanup(priv); + mlx5e_ktls_cleanup(priv); mlx5e_ipsec_cleanup(priv); mlx5e_fs_cleanup(priv); } @@ -5704,7 +5693,6 @@ int mlx5e_init(void) { int ret; - mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); ret = auxiliary_driver_register(&mlx5e_driver); if (ret) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 6b7e7ea6ded2..47f7b4c034cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1112,7 +1112,6 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(per_port_buff_congest), #ifdef CONFIG_MLX5_EN_IPSEC &MLX5E_STATS_GRP(ipsec_sw), - &MLX5E_STATS_GRP(ipsec_hw), #endif &MLX5E_STATS_GRP(ptp), }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 56bb58704bf9..a5f6fd16b665 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -48,10 +48,9 @@ #include "en_rep.h" #include "en/rep/tc.h" #include "ipoib/ipoib.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" +#include "en_accel/ipsec_offload.h" #include "en_accel/ipsec_rxtx.h" -#include "en_accel/tls_rxtx.h" +#include "en_accel/ktls_txrx.h" #include "en/xdp.h" #include "en/xsk/rx.h" #include "en/health.h" @@ -1416,7 +1415,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mac_len = ETH_HLEN; - mlx5e_tls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); + if (unlikely(get_cqe_tls_offload(cqe))) + mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe); @@ -2383,46 +2383,6 @@ const struct mlx5e_rx_handlers mlx5i_rx_handlers = { }; #endif /* CONFIG_MLX5_CORE_IPOIB */ -#ifdef CONFIG_MLX5_EN_IPSEC - -static void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) -{ - struct mlx5_wq_cyc *wq = &rq->wqe.wq; - struct mlx5e_wqe_frag_info *wi; - struct sk_buff *skb; - u32 cqe_bcnt; - u16 ci; - - ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); - wi = get_frag(rq, ci); - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - - if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - rq->stats->wqe_err++; - goto wq_free_wqe; - } - - skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, - mlx5e_skb_from_cqe_linear, - mlx5e_skb_from_cqe_nonlinear, - rq, cqe, wi, cqe_bcnt); - if (unlikely(!skb)) /* a DROP, save the page-reuse checks */ - goto wq_free_wqe; - - skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt); - if (unlikely(!skb)) - goto wq_free_wqe; - - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - napi_gro_receive(rq->cq.napi, skb); - -wq_free_wqe: - mlx5e_free_rx_wqe(rq, wi, true); - mlx5_wq_cyc_pop(wq); -} - -#endif /* CONFIG_MLX5_EN_IPSEC */ - int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk) { struct net_device *netdev = rq->netdev; @@ -2439,10 +2399,6 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - if (mlx5_fpga_is_ipsec_device(mdev)) { - netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); - return -EINVAL; - } if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; if (!rq->handle_rx_cqe) { @@ -2466,14 +2422,7 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool mlx5e_skb_from_cqe_nonlinear; rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - -#ifdef CONFIG_MLX5_EN_IPSEC - if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) && - priv->ipsec) - rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe; - else -#endif - rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; + rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; if (!rq->handle_rx_cqe) { netdev_err(netdev, "RX handler of RQ is not set\n"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index bdc870f9c2f3..57fa0489eeb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -32,7 +32,7 @@ #include "lib/mlx5.h" #include "en.h" -#include "en_accel/tls.h" +#include "en_accel/ktls.h" #include "en_accel/en_accel.h" #include "en/ptp.h" #include "en/port.h" @@ -1900,17 +1900,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls) { - return mlx5e_tls_get_count(priv); + return mlx5e_ktls_get_count(priv); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls) { - return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN); + return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls) { - return idx + mlx5e_tls_get_stats(priv, data + idx); + return idx + mlx5e_ktls_get_stats(priv, data + idx); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; } @@ -2443,7 +2443,6 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { &MLX5E_STATS_GRP(pme), #ifdef CONFIG_MLX5_EN_IPSEC &MLX5E_STATS_GRP(ipsec_sw), - &MLX5E_STATS_GRP(ipsec_hw), #endif &MLX5E_STATS_GRP(tls), &MLX5E_STATS_GRP(channels), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index a7a025d15c14..e48b15b55b6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -482,7 +482,6 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio); extern MLX5E_DECLARE_STATS_GRP(pme); extern MLX5E_DECLARE_STATS_GRP(channels); extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest); -extern MLX5E_DECLARE_STATS_GRP(ipsec_hw); extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); extern MLX5E_DECLARE_STATS_GRP(ptp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 2a984e82ae16..750c32050165 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -57,9 +57,6 @@ struct mlx5_fpga_device { u32 mkey; struct mlx5_uars_page *uar; } conn_res; - - struct mlx5_fpga_ipsec *ipsec; - struct mlx5_fpga_tls *tls; }; #define mlx5_fpga_dbg(__adev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c deleted file mode 100644 index 8ec148010d62..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ /dev/null @@ -1,1582 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/rhashtable.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/fs_helpers.h> -#include <linux/mlx5/fs.h> -#include <linux/rbtree.h> - -#include "mlx5_core.h" -#include "fs_cmd.h" -#include "fpga/ipsec.h" -#include "fpga/sdk.h" -#include "fpga/core.h" - -enum mlx5_fpga_ipsec_cmd_status { - MLX5_FPGA_IPSEC_CMD_PENDING, - MLX5_FPGA_IPSEC_CMD_SEND_FAIL, - MLX5_FPGA_IPSEC_CMD_COMPLETE, -}; - -struct mlx5_fpga_ipsec_cmd_context { - struct mlx5_fpga_dma_buf buf; - enum mlx5_fpga_ipsec_cmd_status status; - struct mlx5_ifc_fpga_ipsec_cmd_resp resp; - int status_code; - struct completion complete; - struct mlx5_fpga_device *dev; - struct list_head list; /* Item in pending_cmds */ - u8 command[]; -}; - -struct mlx5_fpga_esp_xfrm; - -struct mlx5_fpga_ipsec_sa_ctx { - struct rhash_head hash; - struct mlx5_ifc_fpga_ipsec_sa hw_sa; - u32 sa_handle; - struct mlx5_core_dev *dev; - struct mlx5_fpga_esp_xfrm *fpga_xfrm; -}; - -struct mlx5_fpga_esp_xfrm { - unsigned int num_rules; - struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; - struct mutex lock; /* xfrm lock */ - struct mlx5_accel_esp_xfrm accel_xfrm; -}; - -struct mlx5_fpga_ipsec_rule { - struct rb_node node; - struct fs_fte *fte; - struct mlx5_fpga_ipsec_sa_ctx *ctx; -}; - -static const struct rhashtable_params rhash_sa = { - /* Keep out "cmd" field from the key as it's - * value is not constant during the lifetime - * of the key object. - */ - .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) - - sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), - .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) + - sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd), - .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash), - .automatic_shrinking = true, - .min_size = 1, -}; - -struct mlx5_fpga_ipsec { - struct mlx5_fpga_device *fdev; - struct list_head pending_cmds; - spinlock_t pending_cmds_lock; /* Protects pending_cmds */ - u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)]; - struct mlx5_fpga_conn *conn; - - struct notifier_block fs_notifier_ingress_bypass; - struct notifier_block fs_notifier_egress; - - /* Map hardware SA --> SA context - * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx) - * We will use this hash to avoid SAs duplication in fpga which - * aren't allowed - */ - struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */ - struct mutex sa_hash_lock; - - /* Tree holding all rules for this fpga device - * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id) - */ - struct rb_root rules_rb; - struct mutex rules_rb_lock; /* rules lock */ - - struct ida halloc; -}; - -bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) -{ - if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) - return false; - - if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC) - return false; - - return true; -} - -static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, - u8 status) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - - if (status) { - context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context, - buf); - mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n", - status); - context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL; - complete(&context->complete); - } -} - -static inline -int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome) -{ - switch (syndrome) { - case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS: - return 0; - case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE: - return -EEXIST; - case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST: - return -EINVAL; - case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE: - return -EIO; - } - return -EIO; -} - -static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf) -{ - struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data; - struct mlx5_fpga_ipsec_cmd_context *context; - enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome; - struct mlx5_fpga_device *fdev = cb_arg; - unsigned long flags; - - if (buf->sg[0].size < sizeof(*resp)) { - mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n", - buf->sg[0].size, sizeof(*resp)); - return; - } - - mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n", - ntohl(resp->syndrome)); - - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - context = list_first_entry_or_null(&fdev->ipsec->pending_cmds, - struct mlx5_fpga_ipsec_cmd_context, - list); - if (context) - list_del(&context->list); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - - if (!context) { - mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n"); - return; - } - mlx5_fpga_dbg(fdev, "Handling response for %p\n", context); - - syndrome = ntohl(resp->syndrome); - context->status_code = syndrome_to_errno(syndrome); - context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE; - memcpy(&context->resp, resp, sizeof(*resp)); - - if (context->status_code) - mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n", - syndrome); - - complete(&context->complete); -} - -static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev, - const void *cmd, int cmd_size) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - struct mlx5_fpga_device *fdev = mdev->fpga; - unsigned long flags; - int res; - - if (!fdev || !fdev->ipsec) - return ERR_PTR(-EOPNOTSUPP); - - if (cmd_size & 3) - return ERR_PTR(-EINVAL); - - context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC); - if (!context) - return ERR_PTR(-ENOMEM); - - context->status = MLX5_FPGA_IPSEC_CMD_PENDING; - context->dev = fdev; - context->buf.complete = mlx5_fpga_ipsec_send_complete; - init_completion(&context->complete); - memcpy(&context->command, cmd, cmd_size); - context->buf.sg[0].size = cmd_size; - context->buf.sg[0].data = &context->command; - - spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags); - res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf); - if (!res) - list_add_tail(&context->list, &fdev->ipsec->pending_cmds); - spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags); - - if (res) { - mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res); - kfree(context); - return ERR_PTR(res); - } - - /* Context should be freed by the caller after completion. */ - return context; -} - -static int mlx5_fpga_ipsec_cmd_wait(void *ctx) -{ - struct mlx5_fpga_ipsec_cmd_context *context = ctx; - unsigned long timeout = - msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC); - int res; - - res = wait_for_completion_timeout(&context->complete, timeout); - if (!res) { - mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n"); - return -ETIMEDOUT; - } - - if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE) - res = context->status_code; - else - res = -EIO; - - return res; -} - -static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec) -{ - if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command)) - return true; - return false; -} - -static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa, - int opcode) -{ - struct mlx5_core_dev *dev = fdev->mdev; - struct mlx5_ifc_fpga_ipsec_sa *sa; - struct mlx5_fpga_ipsec_cmd_context *cmd_context; - size_t sa_cmd_size; - int err; - - hw_sa->ipsec_sa_v1.cmd = htonl(opcode); - if (is_v2_sadb_supported(fdev->ipsec)) - sa_cmd_size = sizeof(*hw_sa); - else - sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1); - - cmd_context = (struct mlx5_fpga_ipsec_cmd_context *) - mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size); - if (IS_ERR(cmd_context)) - return PTR_ERR(cmd_context); - - err = mlx5_fpga_ipsec_cmd_wait(cmd_context); - if (err) - goto out; - - sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command; - if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) { - mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n", - ntohl(sa->ipsec_sa_v1.sw_sa_handle), - ntohl(cmd_context->resp.sw_sa_handle)); - err = -EIO; - } - -out: - kfree(cmd_context); - return err; -} - -u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - u32 ret = 0; - - if (mlx5_fpga_is_ipsec_device(mdev)) { - ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE; - ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA; - } else { - return ret; - } - - if (!fdev->ipsec) - return ret; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp)) - ret |= MLX5_ACCEL_IPSEC_CAP_ESP; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6)) - ret |= MLX5_ACCEL_IPSEC_CAP_IPV6; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso)) - ret |= MLX5_ACCEL_IPSEC_CAP_LSO; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer)) - ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER; - - if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) { - ret |= MLX5_ACCEL_IPSEC_CAP_ESN; - ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN; - } - - return ret; -} - -static unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!fdev || !fdev->ipsec) - return 0; - - return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - number_of_ipsec_counters); -} - -static int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, - unsigned int counters_count) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - unsigned int i; - __be32 *data; - u32 count; - u64 addr; - int ret; - - if (!fdev || !fdev->ipsec) - return 0; - - addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - ipsec_counters_addr_low) + - ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, - ipsec_counters_addr_high) << 32); - - count = mlx5_fpga_ipsec_counters_count(mdev); - - data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL); - if (!data) { - ret = -ENOMEM; - goto out; - } - - ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data, - MLX5_FPGA_ACCESS_TYPE_DONTCARE); - if (ret < 0) { - mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n", - ret); - goto out; - } - ret = 0; - - if (count > counters_count) - count = counters_count; - - /* Each counter is low word, then high. But each word is big-endian */ - for (i = 0; i < count; i++) - counters[i] = (u64)ntohl(data[i * 2]) | - ((u64)ntohl(data[i * 2 + 1]) << 32); - -out: - kfree(data); - return ret; -} - -static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags) -{ - struct mlx5_fpga_ipsec_cmd_context *context; - struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0}; - int err; - - cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); - cmd.flags = htonl(flags); - context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); - if (IS_ERR(context)) - return PTR_ERR(context); - - err = mlx5_fpga_ipsec_cmd_wait(context); - if (err) - goto out; - - if ((context->resp.flags & cmd.flags) != cmd.flags) { - mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n", - cmd.flags, - context->resp.flags); - err = -EIO; - } - -out: - kfree(context); - return err; -} - -static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev) -{ - u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev); - u32 flags = 0; - - if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER) - flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER; - - return mlx5_fpga_ipsec_set_caps(mdev, flags); -} - -static void -mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa) -{ - const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm; - - /* key */ - memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key, - aes_gcm->key_len / 8); - /* Duplicate 128 bit key twice according to HW layout */ - if (aes_gcm->key_len == 128) - memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], - aes_gcm->aes_key, aes_gcm->key_len / 8); - - /* salt and seq_iv */ - memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv, - sizeof(aes_gcm->seq_iv)); - memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt, - sizeof(aes_gcm->salt)); - - /* esn */ - if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) { - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN; - hw_sa->ipsec_sa_v1.flags |= - (xfrm_attrs->flags & - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; - hw_sa->esn = htonl(xfrm_attrs->esn); - } else { - hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN; - hw_sa->ipsec_sa_v1.flags &= - ~(xfrm_attrs->flags & - MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ? - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0; - hw_sa->esn = 0; - } - - /* rx handle */ - hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle); - - /* enc mode */ - switch (aes_gcm->key_len) { - case 128: - hw_sa->ipsec_sa_v1.enc_mode = - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128; - break; - case 256: - hw_sa->ipsec_sa_v1.enc_mode = - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128; - break; - } - - /* flags */ - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID | - MLX5_FPGA_IPSEC_SA_SPI_EN | - MLX5_FPGA_IPSEC_SA_IP_ESP; - - if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT) - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX; - else - hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX; -} - -static void -mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs, - const __be32 saddr[4], - const __be32 daddr[4], - const __be32 spi, bool is_ipv6, - struct mlx5_ifc_fpga_ipsec_sa *hw_sa) -{ - mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa); - - /* IPs */ - memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip)); - memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip)); - - /* SPI */ - hw_sa->ipsec_sa_v1.spi = spi; - - /* flags */ - if (is_ipv6) - hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6; -} - -static bool is_full_mask(const void *p, size_t len) -{ - WARN_ON(len % 4); - - return !memchr_inv(p, 0xff, len); -} - -static bool validate_fpga_full_mask(struct mlx5_core_dev *dev, - const u32 *match_c, - const u32 *match_v) -{ - const void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - match_c, - misc_parameters); - const void *headers_c = MLX5_ADDR_OF(fte_match_param, - match_c, - outer_headers); - const void *headers_v = MLX5_ADDR_OF(fte_match_param, - match_v, - outer_headers); - - if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) { - const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4); - const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, - ipv4)) || - !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout, - ipv4))) - return false; - } else { - const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6); - const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4, - headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6); - - if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6)) || - !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout, - ipv6))) - return false; - } - - if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - outer_esp_spi), - MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi))) - return false; - - return true; -} - -static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev, - u8 match_criteria_enable, - const u32 *match_c, - const u32 *match_v) -{ - u32 ipsec_dev_caps = mlx5_fpga_ipsec_device_caps(dev); - bool ipv6_flow; - - ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v); - - if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) || - mlx5_fs_is_outer_udp_flow(match_c, match_v) || - mlx5_fs_is_outer_tcp_flow(match_c, match_v) || - mlx5_fs_is_vxlan_flow(match_c) || - !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) || - ipv6_flow)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) && - mlx5_fs_is_outer_ipsec_flow(match_c)) - return false; - - if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) && - ipv6_flow) - return false; - - if (!validate_fpga_full_mask(dev, match_c, match_v)) - return false; - - return true; -} - -static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, - u8 match_criteria_enable, - const u32 *match_c, - const u32 *match_v, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_context *flow_context) -{ - const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) || - MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0); - bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) || - MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0); - int ret; - - ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c, - match_v); - if (!ret) - return ret; - - if (is_dmac || is_smac || - (match_criteria_enable & - ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || - (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) - return false; - - return true; -} - -static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], const __be32 daddr[4], - const __be32 spi, bool is_ipv6, u32 *sa_handle) -{ - struct mlx5_fpga_ipsec_sa_ctx *sa_ctx; - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - container_of(accel_xfrm, typeof(*fpga_xfrm), - accel_xfrm); - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - int opcode, err; - void *context; - - /* alloc SA */ - sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL); - if (!sa_ctx) - return ERR_PTR(-ENOMEM); - - sa_ctx->dev = mdev; - - /* build candidate SA */ - mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs, - saddr, daddr, spi, is_ipv6, - &sa_ctx->hw_sa); - - mutex_lock(&fpga_xfrm->lock); - - if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */ - /* all rules must be with same IPs and SPI */ - if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa, - sizeof(sa_ctx->hw_sa))) { - context = ERR_PTR(-EINVAL); - goto exists; - } - - ++fpga_xfrm->num_rules; - context = fpga_xfrm->sa_ctx; - goto exists; - } - - if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) { - err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL); - if (err < 0) { - context = ERR_PTR(err); - goto exists; - } - - sa_ctx->sa_handle = err; - if (sa_handle) - *sa_handle = sa_ctx->sa_handle; - } - /* This is unbounded fpga_xfrm, try to add to hash */ - mutex_lock(&fipsec->sa_hash_lock); - - err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa); - if (err) { - /* Can't bound different accel_xfrm to already existing sa_ctx. - * This is because we can't support multiple ketmats for - * same IPs and SPI - */ - context = ERR_PTR(-EEXIST); - goto unlock_hash; - } - - /* Bound accel_xfrm to sa_ctx */ - opcode = is_v2_sadb_supported(fdev->ipsec) ? - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 : - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA; - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); - sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) { - context = ERR_PTR(err); - goto delete_hash; - } - - mutex_unlock(&fipsec->sa_hash_lock); - - ++fpga_xfrm->num_rules; - fpga_xfrm->sa_ctx = sa_ctx; - sa_ctx->fpga_xfrm = fpga_xfrm; - - mutex_unlock(&fpga_xfrm->lock); - - return sa_ctx; - -delete_hash: - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa)); -unlock_hash: - mutex_unlock(&fipsec->sa_hash_lock); - if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_free(&fipsec->halloc, sa_ctx->sa_handle); -exists: - mutex_unlock(&fpga_xfrm->lock); - kfree(sa_ctx); - return context; -} - -static void * -mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, - struct fs_fte *fte, - bool is_egress) -{ - struct mlx5_accel_esp_xfrm *accel_xfrm; - __be32 saddr[4], daddr[4], spi; - struct mlx5_flow_group *fg; - bool is_ipv6 = false; - - fs_get_obj(fg, fte->node.parent); - /* validate */ - if (is_egress && - !mlx5_is_fpga_egress_ipsec_rule(mdev, - fg->mask.match_criteria_enable, - fg->mask.match_criteria, - fte->val, - &fte->action, - &fte->flow_context)) - return ERR_PTR(-EINVAL); - else if (!mlx5_is_fpga_ipsec_rule(mdev, - fg->mask.match_criteria_enable, - fg->mask.match_criteria, - fte->val)) - return ERR_PTR(-EINVAL); - - /* get xfrm context */ - accel_xfrm = - (struct mlx5_accel_esp_xfrm *)fte->action.esp_id; - - /* IPs */ - if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria, - fte->val)) { - memcpy(&saddr[3], - MLX5_ADDR_OF(fte_match_set_lyr_2_4, - fte->val, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - sizeof(saddr[3])); - memcpy(&daddr[3], - MLX5_ADDR_OF(fte_match_set_lyr_2_4, - fte->val, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - sizeof(daddr[3])); - } else { - memcpy(saddr, - MLX5_ADDR_OF(fte_match_param, - fte->val, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), - sizeof(saddr)); - memcpy(daddr, - MLX5_ADDR_OF(fte_match_param, - fte->val, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - sizeof(daddr)); - is_ipv6 = true; - } - - /* SPI */ - spi = MLX5_GET_BE(typeof(spi), - fte_match_param, fte->val, - misc_parameters.outer_esp_spi); - - /* create */ - return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm, - saddr, daddr, - spi, is_ipv6, NULL); -} - -static void -mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) -{ - struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - int opcode = is_v2_sadb_supported(fdev->ipsec) ? - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 : - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA; - int err; - - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode); - sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) { - WARN_ON(err); - return; - } - - if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action == - MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_free(&fipsec->halloc, sa_ctx->sa_handle); - - mutex_lock(&fipsec->sa_hash_lock); - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, - rhash_sa)); - mutex_unlock(&fipsec->sa_hash_lock); -} - -static void mlx5_fpga_ipsec_delete_sa_ctx(void *context) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm; - - mutex_lock(&fpga_xfrm->lock); - if (!--fpga_xfrm->num_rules) { - mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); - kfree(fpga_xfrm->sa_ctx); - fpga_xfrm->sa_ctx = NULL; - } - mutex_unlock(&fpga_xfrm->lock); -} - -static inline struct mlx5_fpga_ipsec_rule * -_rule_search(struct rb_root *root, struct fs_fte *fte) -{ - struct rb_node *node = root->rb_node; - - while (node) { - struct mlx5_fpga_ipsec_rule *rule = - container_of(node, struct mlx5_fpga_ipsec_rule, - node); - - if (rule->fte < fte) - node = node->rb_left; - else if (rule->fte > fte) - node = node->rb_right; - else - return rule; - } - return NULL; -} - -static struct mlx5_fpga_ipsec_rule * -rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte) -{ - struct mlx5_fpga_ipsec_rule *rule; - - mutex_lock(&ipsec_dev->rules_rb_lock); - rule = _rule_search(&ipsec_dev->rules_rb, fte); - mutex_unlock(&ipsec_dev->rules_rb_lock); - - return rule; -} - -static inline int _rule_insert(struct rb_root *root, - struct mlx5_fpga_ipsec_rule *rule) -{ - struct rb_node **new = &root->rb_node, *parent = NULL; - - /* Figure out where to put new node */ - while (*new) { - struct mlx5_fpga_ipsec_rule *this = - container_of(*new, struct mlx5_fpga_ipsec_rule, - node); - - parent = *new; - if (rule->fte < this->fte) - new = &((*new)->rb_left); - else if (rule->fte > this->fte) - new = &((*new)->rb_right); - else - return -EEXIST; - } - - /* Add new node and rebalance tree. */ - rb_link_node(&rule->node, parent, new); - rb_insert_color(&rule->node, root); - - return 0; -} - -static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - int ret; - - mutex_lock(&ipsec_dev->rules_rb_lock); - ret = _rule_insert(&ipsec_dev->rules_rb, rule); - mutex_unlock(&ipsec_dev->rules_rb_lock); - - return ret; -} - -static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - struct rb_root *root = &ipsec_dev->rules_rb; - - mutex_lock(&ipsec_dev->rules_rb_lock); - rb_erase(&rule->node, root); - mutex_unlock(&ipsec_dev->rules_rb_lock); -} - -static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev, - struct mlx5_fpga_ipsec_rule *rule) -{ - _rule_delete(ipsec_dev, rule); - kfree(rule); -} - -struct mailbox_mod { - uintptr_t saved_esp_id; - u32 saved_action; - u32 saved_outer_esp_spi_value; -}; - -static void restore_spec_mailbox(struct fs_fte *fte, - struct mailbox_mod *mbox_mod) -{ - char *misc_params_v = MLX5_ADDR_OF(fte_match_param, - fte->val, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - mbox_mod->saved_outer_esp_spi_value); - fte->action.action |= mbox_mod->saved_action; - fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id; -} - -static void modify_spec_mailbox(struct mlx5_core_dev *mdev, - struct fs_fte *fte, - struct mailbox_mod *mbox_mod) -{ - char *misc_params_v = MLX5_ADDR_OF(fte_match_param, - fte->val, - misc_parameters); - - mbox_mod->saved_esp_id = fte->action.esp_id; - mbox_mod->saved_action = fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT); - mbox_mod->saved_outer_esp_spi_value = - MLX5_GET(fte_match_set_misc, misc_params_v, - outer_esp_spi); - - fte->action.esp_id = 0; - fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT); - if (!MLX5_CAP_FLOWTABLE(mdev, - flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0); -} - -static enum fs_flow_table_type egress_to_fs_ft(bool egress) -{ - return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX; -} - -static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg, - bool is_egress) -{ - int (*create_flow_group)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, u32 *in, - struct mlx5_flow_group *fg) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group; - char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria.misc_parameters); - struct mlx5_core_dev *dev = ns->dev; - u32 saved_outer_esp_spi_mask; - u8 match_criteria_enable; - int ret; - - if (MLX5_CAP_FLOWTABLE(dev, - flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - return create_flow_group(ns, ft, in, fg); - - match_criteria_enable = - MLX5_GET(create_flow_group_in, in, match_criteria_enable); - saved_outer_esp_spi_mask = - MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi); - if (!match_criteria_enable || !saved_outer_esp_spi_mask) - return create_flow_group(ns, ft, in, fg); - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0); - - if (!(*misc_params_c) && - !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) - MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS); - - ret = create_flow_group(ns, ft, in, fg); - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask); - MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable); - - return ret; -} - -static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte, - bool is_egress) -{ - int (*create_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte; - struct mlx5_core_dev *dev = ns->dev; - struct mlx5_fpga_device *fdev = dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_ipsec_rule *rule; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return create_fte(ns, ft, fg, fte); - - rule = kzalloc(sizeof(*rule), GFP_KERNEL); - if (!rule) - return -ENOMEM; - - rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); - if (IS_ERR(rule->ctx)) { - int err = PTR_ERR(rule->ctx); - - kfree(rule); - return err; - } - - rule->fte = fte; - WARN_ON(rule_insert(fipsec, rule)); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = create_fte(ns, ft, fg, fte); - restore_spec_mailbox(fte, &mbox_mod); - if (ret) { - _rule_delete(fipsec, rule); - mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); - kfree(rule); - } - - return ret; -} - -static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte, - bool is_egress) -{ - int (*update_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte; - struct mlx5_core_dev *dev = ns->dev; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return update_fte(ns, ft, fg, modify_mask, fte); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = update_fte(ns, ft, fg, modify_mask, fte); - restore_spec_mailbox(fte, &mbox_mod); - - return ret; -} - -static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte, - bool is_egress) -{ - int (*delete_fte)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) = - mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte; - struct mlx5_core_dev *dev = ns->dev; - struct mlx5_fpga_device *fdev = dev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_ipsec_rule *rule; - bool is_esp = fte->action.esp_id; - struct mailbox_mod mbox_mod; - int ret; - - if (!is_esp || - !(fte->action.action & - (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT))) - return delete_fte(ns, ft, fte); - - rule = rule_search(fipsec, fte); - if (!rule) - return -ENOENT; - - mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx); - rule_delete(fipsec, rule); - - modify_spec_mailbox(dev, fte, &mbox_mod); - ret = delete_fte(ns, ft, fte); - restore_spec_mailbox(fte, &mbox_mod); - - return ret; -} - -static int -mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg) -{ - return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true); -} - -static int -mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true); -} - -static int -mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, - true); -} - -static int -mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_delete_fte(ns, ft, fte, true); -} - -static int -mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - u32 *in, - struct mlx5_flow_group *fg) -{ - return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false); -} - -static int -mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false); -} - -static int -mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct mlx5_flow_group *fg, - int modify_mask, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte, - false); -} - -static int -mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_table *ft, - struct fs_fte *fte) -{ - return fpga_ipsec_fs_delete_fte(ns, ft, fte, false); -} - -static struct mlx5_flow_cmds fpga_ipsec_ingress; -static struct mlx5_flow_cmds fpga_ipsec_egress; - -const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - switch (type) { - case FS_FT_NIC_RX: - return &fpga_ipsec_ingress; - case FS_FT_NIC_TX: - return &fpga_ipsec_egress; - default: - WARN_ON(true); - return NULL; - } -} - -static int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_conn_attr init_attr = {0}; - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_conn *conn; - int err; - - if (!mlx5_fpga_is_ipsec_device(mdev)) - return 0; - - fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL); - if (!fdev->ipsec) - return -ENOMEM; - - fdev->ipsec->fdev = fdev; - - err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps), - fdev->ipsec->caps); - if (err) { - mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n", - err); - goto error; - } - - INIT_LIST_HEAD(&fdev->ipsec->pending_cmds); - spin_lock_init(&fdev->ipsec->pending_cmds_lock); - - init_attr.rx_size = SBU_QP_QUEUE_SIZE; - init_attr.tx_size = SBU_QP_QUEUE_SIZE; - init_attr.recv_cb = mlx5_fpga_ipsec_recv; - init_attr.cb_arg = fdev; - conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); - if (IS_ERR(conn)) { - err = PTR_ERR(conn); - mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n", - err); - goto error; - } - fdev->ipsec->conn = conn; - - err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa); - if (err) - goto err_destroy_conn; - mutex_init(&fdev->ipsec->sa_hash_lock); - - fdev->ipsec->rules_rb = RB_ROOT; - mutex_init(&fdev->ipsec->rules_rb_lock); - - err = mlx5_fpga_ipsec_enable_supported_caps(mdev); - if (err) { - mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n", - err); - goto err_destroy_hash; - } - - ida_init(&fdev->ipsec->halloc); - - return 0; - -err_destroy_hash: - rhashtable_destroy(&fdev->ipsec->sa_hash); - -err_destroy_conn: - mlx5_fpga_sbu_conn_destroy(conn); - -error: - kfree(fdev->ipsec); - fdev->ipsec = NULL; - return err; -} - -static void destroy_rules_rb(struct rb_root *root) -{ - struct mlx5_fpga_ipsec_rule *r, *tmp; - - rbtree_postorder_for_each_entry_safe(r, tmp, root, node) { - rb_erase(&r->node, root); - mlx5_fpga_ipsec_delete_sa_ctx(r->ctx); - kfree(r); - } -} - -static void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!mlx5_fpga_is_ipsec_device(mdev)) - return; - - ida_destroy(&fdev->ipsec->halloc); - destroy_rules_rb(&fdev->ipsec->rules_rb); - rhashtable_destroy(&fdev->ipsec->sa_hash); - - mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn); - kfree(fdev->ipsec); - fdev->ipsec = NULL; -} - -void mlx5_fpga_ipsec_build_fs_cmds(void) -{ - /* ingress */ - fpga_ipsec_ingress.create_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table; - fpga_ipsec_ingress.destroy_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table; - fpga_ipsec_ingress.modify_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table; - fpga_ipsec_ingress.create_flow_group = - mlx5_fpga_ipsec_fs_create_flow_group_ingress; - fpga_ipsec_ingress.destroy_flow_group = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group; - fpga_ipsec_ingress.create_fte = - mlx5_fpga_ipsec_fs_create_fte_ingress; - fpga_ipsec_ingress.update_fte = - mlx5_fpga_ipsec_fs_update_fte_ingress; - fpga_ipsec_ingress.delete_fte = - mlx5_fpga_ipsec_fs_delete_fte_ingress; - fpga_ipsec_ingress.update_root_ft = - mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft; - - /* egress */ - fpga_ipsec_egress.create_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table; - fpga_ipsec_egress.destroy_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table; - fpga_ipsec_egress.modify_flow_table = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table; - fpga_ipsec_egress.create_flow_group = - mlx5_fpga_ipsec_fs_create_flow_group_egress; - fpga_ipsec_egress.destroy_flow_group = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group; - fpga_ipsec_egress.create_fte = - mlx5_fpga_ipsec_fs_create_fte_egress; - fpga_ipsec_egress.update_fte = - mlx5_fpga_ipsec_fs_update_fte_egress; - fpga_ipsec_egress.delete_fte = - mlx5_fpga_ipsec_fs_delete_fte_egress; - fpga_ipsec_egress.update_root_ft = - mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft; -} - -static int -mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - if (attrs->tfc_pad) { - mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n"); - return -EOPNOTSUPP; - } - - if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) { - mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) { - mlx5_core_err(mdev, "Only aes gcm keymat is supported\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.iv_algo != - MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) { - mlx5_core_err(mdev, "Only iv sequence algo is supported\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.icv_len != 128) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); - return -EOPNOTSUPP; - } - - if (attrs->keymat.aes_gcm.key_len != 128 && - attrs->keymat.aes_gcm.key_len != 256) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); - return -EOPNOTSUPP; - } - - if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) && - (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps, - v2_command))) { - mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); - return -EOPNOTSUPP; - } - - return 0; -} - -static struct mlx5_accel_esp_xfrm * -mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm; - - if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) { - mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n"); - return ERR_PTR(-EINVAL); - } - - if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { - mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); - return ERR_PTR(-EOPNOTSUPP); - } - - fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL); - if (!fpga_xfrm) - return ERR_PTR(-ENOMEM); - - mutex_init(&fpga_xfrm->lock); - memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs, - sizeof(fpga_xfrm->accel_xfrm.attrs)); - - return &fpga_xfrm->accel_xfrm; -} - -static void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ - struct mlx5_fpga_esp_xfrm *fpga_xfrm = - container_of(xfrm, struct mlx5_fpga_esp_xfrm, - accel_xfrm); - /* assuming no sa_ctx are connected to this xfrm_ctx */ - kfree(fpga_xfrm); -} - -static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - struct mlx5_core_dev *mdev = xfrm->mdev; - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_ipsec *fipsec = fdev->ipsec; - struct mlx5_fpga_esp_xfrm *fpga_xfrm; - struct mlx5_ifc_fpga_ipsec_sa org_hw_sa; - - int err = 0; - - if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) - return 0; - - if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { - mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); - return -EOPNOTSUPP; - } - - if (is_v2_sadb_supported(fipsec)) { - mlx5_core_warn(mdev, "Modify esp is not supported\n"); - return -EOPNOTSUPP; - } - - fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm); - - mutex_lock(&fpga_xfrm->lock); - - if (!fpga_xfrm->sa_ctx) - /* Unbounded xfrm, change only sw attrs */ - goto change_sw_xfrm_attrs; - - /* copy original hw sa */ - memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa)); - mutex_lock(&fipsec->sa_hash_lock); - /* remove original hw sa from hash */ - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, rhash_sa)); - /* update hw_sa with new xfrm attrs*/ - mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs, - &fpga_xfrm->sa_ctx->hw_sa); - /* try to insert new hw_sa to hash */ - err = rhashtable_insert_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, rhash_sa); - if (err) - goto rollback_sa; - - /* modify device with new hw_sa */ - err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2); - fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0; - if (err) - WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, - rhash_sa)); -rollback_sa: - if (err) { - /* return original hw_sa to hash */ - memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa, - sizeof(org_hw_sa)); - WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash, - &fpga_xfrm->sa_ctx->hash, - rhash_sa)); - } - mutex_unlock(&fipsec->sa_hash_lock); - -change_sw_xfrm_attrs: - if (!err) - memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs)); - mutex_unlock(&fpga_xfrm->lock); - return err; -} - -static const struct mlx5_accel_ipsec_ops fpga_ipsec_ops = { - .device_caps = mlx5_fpga_ipsec_device_caps, - .counters_count = mlx5_fpga_ipsec_counters_count, - .counters_read = mlx5_fpga_ipsec_counters_read, - .create_hw_context = mlx5_fpga_ipsec_create_sa_ctx, - .free_hw_context = mlx5_fpga_ipsec_delete_sa_ctx, - .init = mlx5_fpga_ipsec_init, - .cleanup = mlx5_fpga_ipsec_cleanup, - .esp_create_xfrm = mlx5_fpga_esp_create_xfrm, - .esp_modify_xfrm = mlx5_fpga_esp_modify_xfrm, - .esp_destroy_xfrm = mlx5_fpga_esp_destroy_xfrm, -}; - -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) -{ - if (!mlx5_fpga_is_ipsec_device(mdev)) - return NULL; - - return &fpga_ipsec_ops; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h deleted file mode 100644 index 8931b5584477..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_FPGA_IPSEC_H__ -#define __MLX5_FPGA_IPSEC_H__ - -#include "accel/ipsec.h" -#include "fs_cmd.h" - -#ifdef CONFIG_MLX5_FPGA_IPSEC -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev); -u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); -const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -void mlx5_fpga_ipsec_build_fs_cmds(void); -bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev); -#else -static inline -const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) -{ return NULL; } -static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } -static inline const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - return mlx5_fs_cmd_get_default(type); -} - -static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {}; -static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; } - -#endif /* CONFIG_MLX5_FPGA_IPSEC */ -#endif /* __MLX5_FPGA_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c deleted file mode 100644 index 29b7339ebfa3..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ /dev/null @@ -1,622 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include <linux/mlx5/device.h> -#include "fpga/tls.h" -#include "fpga/cmd.h" -#include "fpga/sdk.h" -#include "fpga/core.h" -#include "accel/tls.h" - -struct mlx5_fpga_tls_command_context; - -typedef void (*mlx5_fpga_tls_command_complete) - (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *ctx, - struct mlx5_fpga_dma_buf *resp); - -struct mlx5_fpga_tls_command_context { - struct list_head list; - /* There is no guarantee on the order between the TX completion - * and the command response. - * The TX completion is going to touch cmd->buf even in - * the case of successful transmission. - * So instead of requiring separate allocations for cmd - * and cmd->buf we've decided to use a reference counter - */ - refcount_t ref; - struct mlx5_fpga_dma_buf buf; - mlx5_fpga_tls_command_complete complete; -}; - -static void -mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx) -{ - if (refcount_dec_and_test(&ctx->ref)) - kfree(ctx); -} - -static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *resp) -{ - struct mlx5_fpga_conn *conn = fdev->tls->conn; - struct mlx5_fpga_tls_command_context *ctx; - struct mlx5_fpga_tls *tls = fdev->tls; - unsigned long flags; - - spin_lock_irqsave(&tls->pending_cmds_lock, flags); - ctx = list_first_entry(&tls->pending_cmds, - struct mlx5_fpga_tls_command_context, list); - list_del(&ctx->list); - spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); - ctx->complete(conn, fdev, ctx, resp); -} - -static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, - u8 status) -{ - struct mlx5_fpga_tls_command_context *ctx = - container_of(buf, struct mlx5_fpga_tls_command_context, buf); - - mlx5_fpga_tls_put_command_ctx(ctx); - - if (unlikely(status)) - mlx5_fpga_tls_cmd_complete(fdev, NULL); -} - -static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - mlx5_fpga_tls_command_complete complete) -{ - struct mlx5_fpga_tls *tls = fdev->tls; - unsigned long flags; - int ret; - - refcount_set(&cmd->ref, 2); - cmd->complete = complete; - cmd->buf.complete = mlx5_fpga_cmd_send_complete; - - spin_lock_irqsave(&tls->pending_cmds_lock, flags); - /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock - * to make sure commands are inserted to the tls->pending_cmds list - * and the command QP in the same order. - */ - ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf); - if (likely(!ret)) - list_add_tail(&cmd->list, &tls->pending_cmds); - else - complete(tls->conn, fdev, cmd, NULL); - spin_unlock_irqrestore(&tls->pending_cmds_lock, flags); -} - -/* Start of context identifiers range (inclusive) */ -#define SWID_START 0 -/* End of context identifiers range (exclusive) */ -#define SWID_END BIT(24) - -static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, - void *ptr) -{ - unsigned long flags; - int ret; - - /* TLS metadata format is 1 byte for syndrome followed - * by 3 bytes of swid (software ID) - * swid must not exceed 3 bytes. - * See tls_rxtx.c:insert_pet() for details - */ - BUILD_BUG_ON((SWID_END - 1) & 0xFF000000); - - idr_preload(GFP_KERNEL); - spin_lock_irqsave(idr_spinlock, flags); - ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC); - spin_unlock_irqrestore(idr_spinlock, flags); - idr_preload_end(); - - return ret; -} - -static void *mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) -{ - unsigned long flags; - void *ptr; - - spin_lock_irqsave(idr_spinlock, flags); - ptr = idr_remove(idr, swid); - spin_unlock_irqrestore(idr_spinlock, flags); - return ptr; -} - -static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_dma_buf *buf, u8 status) -{ - kfree(buf); -} - -static void -mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - struct mlx5_fpga_dma_buf *resp) -{ - if (resp) { - u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); - - if (syndrome) - mlx5_fpga_err(fdev, - "Teardown stream failed with syndrome = %d", - syndrome); - } - mlx5_fpga_tls_put_command_ctx(cmd); -} - -static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd) -{ - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow, - MLX5_BYTE_OFF(tls_flow, ipv6)); - - MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6)); - MLX5_SET(tls_cmd, cmd, direction_sx, - MLX5_GET(tls_flow, flow, direction_sx)); -} - -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn) -{ - struct mlx5_fpga_dma_buf *buf; - int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE; - void *flow; - void *cmd; - int ret; - - buf = kzalloc(size, GFP_ATOMIC); - if (!buf) - return -ENOMEM; - - cmd = (buf + 1); - - rcu_read_lock(); - flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - if (unlikely(!flow)) { - rcu_read_unlock(); - WARN_ONCE(1, "Received NULL pointer for handle\n"); - kfree(buf); - return -EINVAL; - } - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - rcu_read_unlock(); - - MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); - MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); - MLX5_SET(tls_cmd, cmd, tcp_sn, seq); - MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX); - - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - buf->complete = mlx_tls_kfree_complete; - - ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); - if (ret < 0) - kfree(buf); - - return ret; -} - -static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, - void *flow, u32 swid, gfp_t flags) -{ - struct mlx5_fpga_tls_command_context *ctx; - struct mlx5_fpga_dma_buf *buf; - void *cmd; - - ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags); - if (!ctx) - return; - - buf = &ctx->buf; - cmd = (ctx + 1); - MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); - MLX5_SET(tls_cmd, cmd, swid, swid); - - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - kfree(flow); - - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - - mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, - mlx5_fpga_tls_teardown_completion); -} - -void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - gfp_t flags, bool direction_sx) -{ - struct mlx5_fpga_tls *tls = mdev->fpga->tls; - void *flow; - - if (direction_sx) - flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, - swid); - else - flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, - swid); - - if (!flow) { - mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", - swid); - return; - } - - synchronize_rcu(); /* before kfree(flow) */ - mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); -} - -enum mlx5_fpga_setup_stream_status { - MLX5_FPGA_CMD_PENDING, - MLX5_FPGA_CMD_SEND_FAILED, - MLX5_FPGA_CMD_RESPONSE_RECEIVED, - MLX5_FPGA_CMD_ABANDONED, -}; - -struct mlx5_setup_stream_context { - struct mlx5_fpga_tls_command_context cmd; - atomic_t status; - u32 syndrome; - struct completion comp; -}; - -static void -mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn, - struct mlx5_fpga_device *fdev, - struct mlx5_fpga_tls_command_context *cmd, - struct mlx5_fpga_dma_buf *resp) -{ - struct mlx5_setup_stream_context *ctx = - container_of(cmd, struct mlx5_setup_stream_context, cmd); - int status = MLX5_FPGA_CMD_SEND_FAILED; - void *tls_cmd = ctx + 1; - - /* If we failed to send to command resp == NULL */ - if (resp) { - ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); - status = MLX5_FPGA_CMD_RESPONSE_RECEIVED; - } - - status = atomic_xchg_release(&ctx->status, status); - if (likely(status != MLX5_FPGA_CMD_ABANDONED)) { - complete(&ctx->comp); - return; - } - - mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n", - ctx->syndrome); - - if (!ctx->syndrome) { - /* The process was killed while waiting for the context to be - * added, and the add completed successfully. - * We need to destroy the HW context, and we can't can't reuse - * the command context because we might not have received - * the tx completion yet. - */ - mlx5_fpga_tls_del_flow(fdev->mdev, - MLX5_GET(tls_cmd, tls_cmd, swid), - GFP_ATOMIC, - MLX5_GET(tls_cmd, tls_cmd, - direction_sx)); - } - - mlx5_fpga_tls_put_command_ctx(cmd); -} - -static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev, - struct mlx5_setup_stream_context *ctx) -{ - struct mlx5_fpga_dma_buf *buf; - void *cmd = ctx + 1; - int status, ret = 0; - - buf = &ctx->cmd.buf; - buf->sg[0].data = cmd; - buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM); - - init_completion(&ctx->comp); - atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING); - ctx->syndrome = -1; - - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, - mlx5_fpga_tls_setup_completion); - wait_for_completion_killable(&ctx->comp); - - status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED); - if (unlikely(status == MLX5_FPGA_CMD_PENDING)) - /* ctx is going to be released in mlx5_fpga_tls_setup_completion */ - return -EINTR; - - if (unlikely(ctx->syndrome)) - ret = -ENOMEM; - - mlx5_fpga_tls_put_command_ctx(&ctx->cmd); - return ret; -} - -static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg, - struct mlx5_fpga_dma_buf *buf) -{ - struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg; - - mlx5_fpga_tls_cmd_complete(fdev, buf); -} - -bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev) -{ - if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) - return false; - - if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) != - MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_id) != - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS) - return false; - - if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0) - return false; - - return true; -} - -static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev, - u32 *p_caps) -{ - int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap); - u32 caps = 0; - void *buf; - - buf = kzalloc(cap_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf); - if (err) - goto out; - - if (MLX5_GET(tls_extended_cap, buf, tx)) - caps |= MLX5_ACCEL_TLS_TX; - if (MLX5_GET(tls_extended_cap, buf, rx)) - caps |= MLX5_ACCEL_TLS_RX; - if (MLX5_GET(tls_extended_cap, buf, tls_v12)) - caps |= MLX5_ACCEL_TLS_V12; - if (MLX5_GET(tls_extended_cap, buf, tls_v13)) - caps |= MLX5_ACCEL_TLS_V13; - if (MLX5_GET(tls_extended_cap, buf, lro)) - caps |= MLX5_ACCEL_TLS_LRO; - if (MLX5_GET(tls_extended_cap, buf, ipv6)) - caps |= MLX5_ACCEL_TLS_IPV6; - - if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128)) - caps |= MLX5_ACCEL_TLS_AES_GCM128; - if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256)) - caps |= MLX5_ACCEL_TLS_AES_GCM256; - - *p_caps = caps; - err = 0; -out: - kfree(buf); - return err; -} - -int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - struct mlx5_fpga_conn_attr init_attr = {0}; - struct mlx5_fpga_conn *conn; - struct mlx5_fpga_tls *tls; - int err = 0; - - if (!mlx5_fpga_is_tls_device(mdev) || !fdev) - return 0; - - tls = kzalloc(sizeof(*tls), GFP_KERNEL); - if (!tls) - return -ENOMEM; - - err = mlx5_fpga_tls_get_caps(fdev, &tls->caps); - if (err) - goto error; - - if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) { - err = -ENOTSUPP; - goto error; - } - - init_attr.rx_size = SBU_QP_QUEUE_SIZE; - init_attr.tx_size = SBU_QP_QUEUE_SIZE; - init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb; - init_attr.cb_arg = fdev; - conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr); - if (IS_ERR(conn)) { - err = PTR_ERR(conn); - mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n", - err); - goto error; - } - - tls->conn = conn; - spin_lock_init(&tls->pending_cmds_lock); - INIT_LIST_HEAD(&tls->pending_cmds); - - idr_init(&tls->tx_idr); - idr_init(&tls->rx_idr); - spin_lock_init(&tls->tx_idr_spinlock); - spin_lock_init(&tls->rx_idr_spinlock); - fdev->tls = tls; - return 0; - -error: - kfree(tls); - return err; -} - -void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev) -{ - struct mlx5_fpga_device *fdev = mdev->fpga; - - if (!fdev || !fdev->tls) - return; - - mlx5_fpga_sbu_conn_destroy(fdev->tls->conn); - kfree(fdev->tls); - fdev->tls = NULL; -} - -static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd, - struct tls_crypto_info *info, - __be64 *rcd_sn) -{ - struct tls12_crypto_info_aes_gcm_128 *crypto_info = - (struct tls12_crypto_info_aes_gcm_128 *)info; - - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq, - TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv), - crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key), - crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - /* in AES-GCM 128 we need to write the key twice */ - memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) + - TLS_CIPHER_AES_GCM_128_KEY_SIZE, - crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); - - MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128); -} - -static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps, - struct tls_crypto_info *crypto_info) -{ - __be64 rcd_sn; - - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - if (!(caps & MLX5_ACCEL_TLS_AES_GCM128)) - return -EINVAL; - mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 swid, u32 tcp_sn) -{ - u32 caps = mlx5_fpga_tls_device_caps(mdev); - struct mlx5_setup_stream_context *ctx; - int ret = -ENOMEM; - size_t cmd_size; - void *cmd; - - cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx); - ctx = kzalloc(cmd_size, GFP_KERNEL); - if (!ctx) - goto out; - - cmd = ctx + 1; - ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info); - if (ret) - goto free_ctx; - - mlx5_fpga_tls_flow_to_cmd(flow, cmd); - - MLX5_SET(tls_cmd, cmd, swid, swid); - MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn); - - return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx); - -free_ctx: - kfree(ctx); -out: - return ret; -} - -int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx) -{ - struct mlx5_fpga_tls *tls = mdev->fpga->tls; - int ret = -ENOMEM; - u32 swid; - - if (direction_sx) - ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, flow); - else - ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, flow); - - if (ret < 0) - return ret; - - swid = ret; - MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0); - - ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid, - start_offload_tcp_sn); - if (ret && ret != -EINTR) - goto free_swid; - - *p_swid = swid; - return 0; -free_swid: - if (direction_sx) - mlx5_fpga_tls_release_swid(&tls->tx_idr, - &tls->tx_idr_spinlock, swid); - else - mlx5_fpga_tls_release_swid(&tls->rx_idr, - &tls->rx_idr_spinlock, swid); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h deleted file mode 100644 index 5714cf391d1b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef __MLX5_FPGA_TLS_H__ -#define __MLX5_FPGA_TLS_H__ - -#include <linux/mlx5/driver.h> - -#include <net/tls.h> -#include "fpga/core.h" - -struct mlx5_fpga_tls { - struct list_head pending_cmds; - spinlock_t pending_cmds_lock; /* Protects pending_cmds */ - u32 caps; - struct mlx5_fpga_conn *conn; - - struct idr tx_idr; - struct idr rx_idr; - spinlock_t tx_idr_spinlock; /* protects the IDR */ - spinlock_t rx_idr_spinlock; /* protects the IDR */ -}; - -int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, - struct tls_crypto_info *crypto_info, - u32 start_offload_tcp_sn, u32 *p_swid, - bool direction_sx); - -void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, - gfp_t flags, bool direction_sx); - -bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev); -int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev); -void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev); - -static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev) -{ - return mdev->fpga->tls->caps; -} - -int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle, - u32 seq, __be64 rcd_sn); - -#endif /* __MLX5_FPGA_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index a0ac17c3f12f..33e9f86cf7d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -878,9 +878,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, table_type = FS_FT_NIC_RX; break; case MLX5_FLOW_NAMESPACE_EGRESS: -#ifdef CONFIG_MLX5_IPSEC case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL: -#endif max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 816d991f7621..297e6a468a3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,8 +40,6 @@ #include "fs_cmd.h" #include "fs_ft_pool.h" #include "diag/fs_tracepoint.h" -#include "accel/ipsec.h" -#include "fpga/ipsec.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -188,24 +186,18 @@ static struct init_tree_node { static struct init_tree_node egress_root_fs = { .type = FS_TYPE_NAMESPACE, -#ifdef CONFIG_MLX5_IPSEC .ar_size = 2, -#else - .ar_size = 1, -#endif .children = (struct init_tree_node[]) { ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, FS_CHAINING_CAPS_EGRESS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), -#ifdef CONFIG_MLX5_IPSEC ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0, FS_CHAINING_CAPS_EGRESS, ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS, KERNEL_TX_IPSEC_NUM_LEVELS))), -#endif } }; @@ -2519,10 +2511,6 @@ static struct mlx5_flow_root_namespace struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_namespace *ns; - if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && - (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX)) - cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type); - /* Create the root namespace */ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL); if (!root_ns) @@ -3172,8 +3160,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE || - MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) goto err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 614687e0e3d9..cfb8bedba512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -35,7 +35,6 @@ #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" #include "lib/tout.h" -#include "accel/tls.h" enum { MCQS_IDENTIFIER_BOOT_IMG = 0x1, @@ -249,7 +248,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (mlx5_accel_is_ktls_tx(dev) || mlx5_accel_is_ktls_rx(dev)) { + if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) { err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2589e39eb9c7..d504c8cb8f96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -62,9 +62,7 @@ #include "lib/mlx5.h" #include "lib/tout.h" #include "fpga/core.h" -#include "fpga/ipsec.h" -#include "accel/ipsec.h" -#include "accel/tls.h" +#include "en_accel/ipsec_offload.h" #include "lib/clock.h" #include "lib/vxlan.h" #include "lib/geneve.h" @@ -1183,14 +1181,6 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_fpga_start; } - mlx5_accel_ipsec_init(dev); - - err = mlx5_accel_tls_init(dev); - if (err) { - mlx5_core_err(dev, "TLS device start failed %d\n", err); - goto err_tls_start; - } - err = mlx5_init_fs(dev); if (err) { mlx5_core_err(dev, "Failed to init flow steering\n"); @@ -1238,9 +1228,6 @@ err_vhca: err_set_hca: mlx5_cleanup_fs(dev); err_fs: - mlx5_accel_tls_cleanup(dev); -err_tls_start: - mlx5_accel_ipsec_cleanup(dev); mlx5_fpga_device_stop(dev); err_fpga_start: mlx5_rsc_dump_cleanup(dev); @@ -1266,8 +1253,6 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); - mlx5_accel_ipsec_cleanup(dev); - mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); mlx5_rsc_dump_cleanup(dev); mlx5_hv_vhca_cleanup(dev->hv_vhca); @@ -1947,7 +1932,6 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); - mlx5_fpga_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 29a74b8bd5b5..f1bb243dfb8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -69,8 +69,8 @@ mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp, if (err) return err; - mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, - MLXSW_REG_MCIA_I2C_ADDR_LOW); + mlxsw_reg_mcia_pack(mcia_pl, 0, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, + 1, MLXSW_REG_MCIA_I2C_ADDR_LOW); err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); if (err) return err; @@ -145,7 +145,8 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, } } - mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, offset, size, i2c_addr); + mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, page, offset, size, + i2c_addr); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl); if (err) @@ -177,7 +178,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, int page; int err; - mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, + mlxsw_reg_mtmp_pack(mtmp_pl, 0, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, false); err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) @@ -219,12 +220,12 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, page = MLXSW_REG_MCIA_TH_PAGE_CMIS_NUM; else page = MLXSW_REG_MCIA_TH_PAGE_NUM; - mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, + mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, page, MLXSW_REG_MCIA_TH_PAGE_OFF + off, MLXSW_REG_MCIA_TH_ITEM_SIZE, MLXSW_REG_MCIA_I2C_ADDR_LOW); } else { - mlxsw_reg_mcia_pack(mcia_pl, module, 0, + mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, MLXSW_REG_MCIA_PAGE0_LO, off, MLXSW_REG_MCIA_TH_ITEM_SIZE, MLXSW_REG_MCIA_I2C_ADDR_HIGH); @@ -419,7 +420,7 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module, size = min_t(u8, page->length - bytes_read, MLXSW_REG_MCIA_EEPROM_SIZE); - mlxsw_reg_mcia_pack(mcia_pl, module, 0, page->page, + mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, page->page, device_addr + bytes_read, size, page->i2c_address); mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank); @@ -447,7 +448,7 @@ static int mlxsw_env_module_reset(struct mlxsw_core *mlxsw_core, u8 module) { char pmaos_pl[MLXSW_REG_PMAOS_LEN]; - mlxsw_reg_pmaos_pack(pmaos_pl, module); + mlxsw_reg_pmaos_pack(pmaos_pl, 0, module); mlxsw_reg_pmaos_rst_set(pmaos_pl, true); return mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl); @@ -519,7 +520,7 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module, params->policy = mlxsw_env->module_info[module].power_mode_policy; - mlxsw_reg_mcion_pack(mcion_pl, module); + mlxsw_reg_mcion_pack(mcion_pl, 0, module); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcion), mcion_pl); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to retrieve module's power mode"); @@ -547,7 +548,7 @@ static int mlxsw_env_module_enable_set(struct mlxsw_core *mlxsw_core, enum mlxsw_reg_pmaos_admin_status admin_status; char pmaos_pl[MLXSW_REG_PMAOS_LEN]; - mlxsw_reg_pmaos_pack(pmaos_pl, module); + mlxsw_reg_pmaos_pack(pmaos_pl, 0, module); admin_status = enable ? MLXSW_REG_PMAOS_ADMIN_STATUS_ENABLED : MLXSW_REG_PMAOS_ADMIN_STATUS_DISABLED; mlxsw_reg_pmaos_admin_status_set(pmaos_pl, admin_status); @@ -562,7 +563,7 @@ static int mlxsw_env_module_low_power_set(struct mlxsw_core *mlxsw_core, u16 eeprom_override_mask, eeprom_override; char pmmp_pl[MLXSW_REG_PMMP_LEN]; - mlxsw_reg_pmmp_pack(pmmp_pl, module); + mlxsw_reg_pmmp_pack(pmmp_pl, 0, module); mlxsw_reg_pmmp_sticky_set(pmmp_pl, true); /* Mask all the bits except low power mode. */ eeprom_override_mask = ~MLXSW_REG_PMMP_EEPROM_OVERRIDE_LOW_POWER_MASK; @@ -660,8 +661,8 @@ static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core, u16 temp; int err; - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, - 1); + mlxsw_reg_mtbr_pack(mtbr_pl, 0, + MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtbr), mtbr_pl); if (err) return err; @@ -930,7 +931,7 @@ mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core) for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) { char pmaos_pl[MLXSW_REG_PMAOS_LEN]; - mlxsw_reg_pmaos_pack(pmaos_pl, i); + mlxsw_reg_pmaos_pack(pmaos_pl, 0, i); mlxsw_reg_pmaos_e_set(pmaos_pl, MLXSW_REG_PMAOS_E_GENERATE_EVENT); mlxsw_reg_pmaos_ee_set(pmaos_pl, true); @@ -1059,12 +1060,12 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env) u8 module_count; int i, err; - mlxsw_reg_mgpir_pack(mgpir_pl); + mlxsw_reg_mgpir_pack(mgpir_pl, 0); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl); if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count); + mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count, NULL); env = kzalloc(struct_size(env, module_info, module_count), GFP_KERNEL); if (!env) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 8b170ad92302..2bc4c4556895 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -66,7 +66,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); - mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); @@ -89,7 +89,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index, mlxsw_hwmon->module_sensor_max); - mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); @@ -232,8 +232,9 @@ static int mlxsw_hwmon_module_temp_get(struct device *dev, int err; module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; - mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, - false, false); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, + MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false, + false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(dev, "Failed to query module temperature\n"); @@ -270,8 +271,8 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, int err; module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, - 1); + mlxsw_reg_mtbr_pack(mtbr_pl, 0, + MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); if (err) { dev_err(dev, "Failed to query module temperature sensor\n"); @@ -655,13 +656,13 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) u8 module_sensor_max; int i, err; - mlxsw_reg_mgpir_pack(mgpir_pl); + mlxsw_reg_mgpir_pack(mgpir_pl, 0); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); if (err) return err; mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, - &module_sensor_max); + &module_sensor_max, NULL); /* Add extra attributes for module temperature. Sensor index is * assigned to sensor_count value, while all indexed before @@ -706,12 +707,13 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) u8 gbox_num; int err; - mlxsw_reg_mgpir_pack(mgpir_pl); + mlxsw_reg_mgpir_pack(mgpir_pl, 0); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL); + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL, + NULL); if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || !gbox_num) return 0; @@ -721,7 +723,7 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) while (index < max_index) { sensor_index = index % mlxsw_hwmon->module_sensor_max + MLXSW_REG_MTMP_GBOX_INDEX_MIN; - mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, sensor_index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 05f54bd982c0..adb2820430b1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -271,7 +271,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, int temp; int err; - mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, 0, false, false); err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { @@ -431,7 +431,7 @@ mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core, int err; /* Read module temperature and thresholds. */ - mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, sensor_index, false, false); err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) { /* Set temperature and thresholds to zero to avoid passing @@ -576,7 +576,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, int err; index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module; - mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + mlxsw_reg_mtmp_pack(mtmp_pl, 0, index, false, false); err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); if (err) @@ -746,13 +746,13 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, char mgpir_pl[MLXSW_REG_MGPIR_LEN]; int i, err; - mlxsw_reg_mgpir_pack(mgpir_pl); + mlxsw_reg_mgpir_pack(mgpir_pl, 0); err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); if (err) return err; mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, - &thermal->tz_module_num); + &thermal->tz_module_num, NULL); thermal->tz_module_arr = kcalloc(thermal->tz_module_num, sizeof(*thermal->tz_module_arr), @@ -837,13 +837,13 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, int i; int err; - mlxsw_reg_mgpir_pack(mgpir_pl); + mlxsw_reg_mgpir_pack(mgpir_pl, 0); err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); if (err) return err; mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, - NULL); + NULL, NULL); if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || !gbox_num) return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 67b1a2f8397f..b8a236872fea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5769,9 +5769,10 @@ enum mlxsw_reg_pmaos_e { */ MLXSW_ITEM32(reg, pmaos, e, 0x04, 0, 2); -static inline void mlxsw_reg_pmaos_pack(char *payload, u8 module) +static inline void mlxsw_reg_pmaos_pack(char *payload, u8 slot_index, u8 module) { MLXSW_REG_ZERO(pmaos, payload); + mlxsw_reg_pmaos_slot_index_set(payload, slot_index); mlxsw_reg_pmaos_module_set(payload, module); } @@ -5984,6 +5985,12 @@ MLXSW_REG_DEFINE(pmmp, MLXSW_REG_PMMP_ID, MLXSW_REG_PMMP_LEN); */ MLXSW_ITEM32(reg, pmmp, module, 0x00, 16, 8); +/* reg_pmmp_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, pmmp, slot_index, 0x00, 24, 4); + /* reg_pmmp_sticky * When set, will keep eeprom_override values after plug-out event. * Access: OP @@ -6011,9 +6018,10 @@ enum { */ MLXSW_ITEM32(reg, pmmp, eeprom_override, 0x04, 0, 16); -static inline void mlxsw_reg_pmmp_pack(char *payload, u8 module) +static inline void mlxsw_reg_pmmp_pack(char *payload, u8 slot_index, u8 module) { MLXSW_REG_ZERO(pmmp, payload); + mlxsw_reg_pmmp_slot_index_set(payload, slot_index); mlxsw_reg_pmmp_module_set(payload, module); } @@ -9721,6 +9729,12 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN); +/* reg_mtmp_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, slot_index, 0x00, 16, 4); + #define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64 #define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256 /* reg_mtmp_sensor_index @@ -9810,11 +9824,12 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16); */ MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); -static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index, - bool max_temp_enable, +static inline void mlxsw_reg_mtmp_pack(char *payload, u8 slot_index, + u16 sensor_index, bool max_temp_enable, bool max_temp_reset) { MLXSW_REG_ZERO(mtmp, payload); + mlxsw_reg_mtmp_slot_index_set(payload, slot_index); mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); @@ -9880,6 +9895,12 @@ MLXSW_ITEM_BIT_ARRAY(reg, mtwe, sensor_warning, 0x0, 0x10, 1); MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); +/* reg_mtbr_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, mtbr, slot_index, 0x00, 16, 4); + /* reg_mtbr_base_sensor_index * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors, * 64-127 are mapped to the SFP+/QSFP modules sequentially). @@ -9912,10 +9933,11 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); -static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index, - u8 num_rec) +static inline void mlxsw_reg_mtbr_pack(char *payload, u8 slot_index, + u16 base_sensor_index, u8 num_rec) { MLXSW_REG_ZERO(mtbr, payload); + mlxsw_reg_mtbr_slot_index_set(payload, slot_index); mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index); mlxsw_reg_mtbr_num_rec_set(payload, num_rec); } @@ -9964,6 +9986,12 @@ MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1); */ MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8); +/* reg_mcia_slot_index + * Slot index (0: Main board) + * Access: Index + */ +MLXSW_ITEM32(reg, mcia, slot, 0x00, 12, 4); + enum { MLXSW_REG_MCIA_STATUS_GOOD = 0, /* No response from module's EEPROM. */ @@ -10063,11 +10091,13 @@ MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) / \ MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1) -static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, - u8 page_number, u16 device_addr, - u8 size, u8 i2c_device_addr) +static inline void mlxsw_reg_mcia_pack(char *payload, u8 slot_index, u8 module, + u8 lock, u8 page_number, + u16 device_addr, u8 size, + u8 i2c_device_addr) { MLXSW_REG_ZERO(mcia, payload); + mlxsw_reg_mcia_slot_set(payload, slot_index); mlxsw_reg_mcia_module_set(payload, module); mlxsw_reg_mcia_l_set(payload, lock); mlxsw_reg_mcia_page_number_set(payload, page_number); @@ -10499,6 +10529,12 @@ MLXSW_REG_DEFINE(mcion, MLXSW_REG_MCION_ID, MLXSW_REG_MCION_LEN); */ MLXSW_ITEM32(reg, mcion, module, 0x00, 16, 8); +/* reg_mcion_slot_index + * Slot index. + * Access: Index + */ +MLXSW_ITEM32(reg, mcion, slot_index, 0x00, 12, 4); + enum { MLXSW_REG_MCION_MODULE_STATUS_BITS_PRESENT_MASK = BIT(0), MLXSW_REG_MCION_MODULE_STATUS_BITS_LOW_POWER_MASK = BIT(8), @@ -10510,9 +10546,10 @@ enum { */ MLXSW_ITEM32(reg, mcion, module_status_bits, 0x04, 0, 16); -static inline void mlxsw_reg_mcion_pack(char *payload, u8 module) +static inline void mlxsw_reg_mcion_pack(char *payload, u8 slot_index, u8 module) { MLXSW_REG_ZERO(mcion, payload); + mlxsw_reg_mcion_slot_index_set(payload, slot_index); mlxsw_reg_mcion_module_set(payload, module); } @@ -11326,6 +11363,12 @@ enum mlxsw_reg_mgpir_device_type { MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE, }; +/* mgpir_slot_index + * Slot index (0: Main board). + * Access: Index + */ +MLXSW_ITEM32(reg, mgpir, slot_index, 0x00, 28, 4); + /* mgpir_device_type * Access: RO */ @@ -11343,21 +11386,35 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); */ MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); +/* max_modules_per_slot + * Maximum number of modules that can be connected per slot. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, max_modules_per_slot, 0x04, 16, 8); + +/* mgpir_num_of_slots + * Number of slots in the system. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_slots, 0x04, 8, 8); + /* mgpir_num_of_modules * Number of modules. * Access: RO */ MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8); -static inline void mlxsw_reg_mgpir_pack(char *payload) +static inline void mlxsw_reg_mgpir_pack(char *payload, u8 slot_index) { MLXSW_REG_ZERO(mgpir, payload); + mlxsw_reg_mgpir_slot_index_set(payload, slot_index); } static inline void mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, enum mlxsw_reg_mgpir_device_type *device_type, - u8 *devices_per_flash, u8 *num_of_modules) + u8 *devices_per_flash, u8 *num_of_modules, + u8 *num_of_slots) { if (num_of_devices) *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); @@ -11368,6 +11425,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, mlxsw_reg_mgpir_devices_per_flash_get(payload); if (num_of_modules) *num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload); + if (num_of_slots) + *num_of_slots = mlxsw_reg_mgpir_num_of_slots_get(payload); } /* MFDE - Monitoring FW Debug Register diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 79deb19e3a19..dc820d9f2696 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -21,6 +21,7 @@ #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> +#include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/nexthop.h> @@ -507,7 +508,7 @@ struct mlxsw_sp_fib4_entry { struct mlxsw_sp_fib_entry common; struct fib_info *fi; u32 tb_id; - u8 tos; + dscp_t dscp; u8 type; }; @@ -5559,7 +5560,7 @@ mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, common); - return !fib4_entry->tos; + return !fib4_entry->dscp; } static bool @@ -5620,7 +5621,7 @@ mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, fri.tb_id = fen_info->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = fen_info->dst_len; - fri.tos = fen_info->tos; + fri.dscp = fen_info->dscp; fri.type = fen_info->type; fri.offload = false; fri.trap = false; @@ -5645,7 +5646,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, fri.tb_id = fib4_entry->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; - fri.tos = fib4_entry->tos; + fri.dscp = fib4_entry->dscp; fri.type = fib4_entry->type; fri.offload = should_offload; fri.trap = !should_offload; @@ -5668,7 +5669,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fri.tb_id = fib4_entry->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; - fri.tos = fib4_entry->tos; + fri.dscp = fib4_entry->dscp; fri.type = fib4_entry->type; fri.offload = false; fri.trap = false; @@ -6250,7 +6251,7 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, fib_info_hold(fib4_entry->fi); fib4_entry->tb_id = fen_info->tb_id; fib4_entry->type = fen_info->type; - fib4_entry->tos = fen_info->tos; + fib4_entry->dscp = fen_info->dscp; fib_entry->fib_node = fib_node; @@ -6304,7 +6305,7 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, fib4_entry = container_of(fib_node->fib_entry, struct mlxsw_sp_fib4_entry, common); if (fib4_entry->tb_id == fen_info->tb_id && - fib4_entry->tos == fen_info->tos && + fib4_entry->dscp == fen_info->dscp && fib4_entry->type == fen_info->type && fib4_entry->fi == fen_info->fi) return fib4_entry; @@ -7010,7 +7011,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); if (IS_ERR(mlxsw_sp_rt6)) { err = PTR_ERR(mlxsw_sp_rt6); - goto err_rt6_create; + goto err_rt6_unwind; } list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); @@ -7019,14 +7020,12 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); if (err) - goto err_nexthop6_group_update; + goto err_rt6_unwind; return 0; -err_nexthop6_group_update: - i = nrt6; -err_rt6_create: - for (i--; i >= 0; i--) { +err_rt6_unwind: + for (; i > 0; i--) { fib6_entry->nrt6--; mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list); @@ -7154,7 +7153,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); if (IS_ERR(mlxsw_sp_rt6)) { err = PTR_ERR(mlxsw_sp_rt6); - goto err_rt6_create; + goto err_rt6_unwind; } list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); fib6_entry->nrt6++; @@ -7162,7 +7161,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) - goto err_nexthop6_group_get; + goto err_rt6_unwind; err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group, fib_node->fib); @@ -7181,10 +7180,8 @@ err_fib6_entry_type_set: mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib); err_nexthop_group_vr_link: mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry); -err_nexthop6_group_get: - i = nrt6; -err_rt6_create: - for (i--; i >= 0; i--) { +err_rt6_unwind: + for (; i > 0; i--) { fib6_entry->nrt6--; mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list); diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index a9ffc719aa0e..fd2e0ebb2427 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \ lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \ - lan966x_ptp.o + lan966x_ptp.o lan966x_fdma.o diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c new file mode 100644 index 000000000000..9e2a7323eaf0 --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -0,0 +1,842 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" + +static int lan966x_fdma_channel_active(struct lan966x *lan966x) +{ + return lan_rd(lan966x, FDMA_CH_ACTIVE); +} + +static struct page *lan966x_fdma_rx_alloc_page(struct lan966x_rx *rx, + struct lan966x_db *db) +{ + struct lan966x *lan966x = rx->lan966x; + dma_addr_t dma_addr; + struct page *page; + + page = dev_alloc_pages(rx->page_order); + if (unlikely(!page)) + return NULL; + + dma_addr = dma_map_page(lan966x->dev, page, 0, + PAGE_SIZE << rx->page_order, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(lan966x->dev, dma_addr))) + goto free_page; + + db->dataptr = dma_addr; + + return page; + +free_page: + __free_pages(page, rx->page_order); + return NULL; +} + +static void lan966x_fdma_rx_free_pages(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + struct lan966x_rx_dcb *dcb; + struct lan966x_db *db; + int i, j; + + for (i = 0; i < FDMA_DCB_MAX; ++i) { + dcb = &rx->dcbs[i]; + + for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) { + db = &dcb->db[j]; + dma_unmap_single(lan966x->dev, + (dma_addr_t)db->dataptr, + PAGE_SIZE << rx->page_order, + DMA_FROM_DEVICE); + __free_pages(rx->page[i][j], rx->page_order); + } + } +} + +static void lan966x_fdma_rx_add_dcb(struct lan966x_rx *rx, + struct lan966x_rx_dcb *dcb, + u64 nextptr) +{ + struct lan966x_db *db; + int i; + + for (i = 0; i < FDMA_RX_DCB_MAX_DBS; ++i) { + db = &dcb->db[i]; + db->status = FDMA_DCB_STATUS_INTR; + } + + dcb->nextptr = FDMA_DCB_INVALID_DATA; + dcb->info = FDMA_DCB_INFO_DATAL(PAGE_SIZE << rx->page_order); + + rx->last_entry->nextptr = nextptr; + rx->last_entry = dcb; +} + +static int lan966x_fdma_rx_alloc(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + struct lan966x_rx_dcb *dcb; + struct lan966x_db *db; + struct page *page; + int i, j; + int size; + + /* calculate how many pages are needed to allocate the dcbs */ + size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX; + size = ALIGN(size, PAGE_SIZE); + + rx->dcbs = dma_alloc_coherent(lan966x->dev, size, &rx->dma, GFP_KERNEL); + if (!rx->dcbs) + return -ENOMEM; + + rx->last_entry = rx->dcbs; + rx->db_index = 0; + rx->dcb_index = 0; + + /* Now for each dcb allocate the dbs */ + for (i = 0; i < FDMA_DCB_MAX; ++i) { + dcb = &rx->dcbs[i]; + dcb->info = 0; + + /* For each db allocate a page and map it to the DB dataptr. */ + for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) { + db = &dcb->db[j]; + page = lan966x_fdma_rx_alloc_page(rx, db); + if (!page) + return -ENOMEM; + + db->status = 0; + rx->page[i][j] = page; + } + + lan966x_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * i); + } + + return 0; +} + +static void lan966x_fdma_rx_free(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + u32 size; + + /* Now it is possible to do the cleanup of dcb */ + size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; + size = ALIGN(size, PAGE_SIZE); + dma_free_coherent(lan966x->dev, size, rx->dcbs, rx->dma); +} + +static void lan966x_fdma_rx_start(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + u32 mask; + + /* When activating a channel, first is required to write the first DCB + * address and then to activate it + */ + lan_wr(lower_32_bits((u64)rx->dma), lan966x, + FDMA_DCB_LLP(rx->channel_id)); + lan_wr(upper_32_bits((u64)rx->dma), lan966x, + FDMA_DCB_LLP1(rx->channel_id)); + + lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) | + FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) | + FDMA_CH_CFG_CH_INJ_PORT_SET(0) | + FDMA_CH_CFG_CH_MEM_SET(1), + lan966x, FDMA_CH_CFG(rx->channel_id)); + + /* Start fdma */ + lan_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(0), + FDMA_PORT_CTRL_XTR_STOP, + lan966x, FDMA_PORT_CTRL(0)); + + /* Enable interrupts */ + mask = lan_rd(lan966x, FDMA_INTR_DB_ENA); + mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask); + mask |= BIT(rx->channel_id); + lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask), + FDMA_INTR_DB_ENA_INTR_DB_ENA, + lan966x, FDMA_INTR_DB_ENA); + + /* Activate the channel */ + lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(rx->channel_id)), + FDMA_CH_ACTIVATE_CH_ACTIVATE, + lan966x, FDMA_CH_ACTIVATE); +} + +static void lan966x_fdma_rx_disable(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + u32 val; + + /* Disable the channel */ + lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(rx->channel_id)), + FDMA_CH_DISABLE_CH_DISABLE, + lan966x, FDMA_CH_DISABLE); + + readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x, + val, !(val & BIT(rx->channel_id)), + READL_SLEEP_US, READL_TIMEOUT_US); + + lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(rx->channel_id)), + FDMA_CH_DB_DISCARD_DB_DISCARD, + lan966x, FDMA_CH_DB_DISCARD); +} + +static void lan966x_fdma_rx_reload(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + + lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(rx->channel_id)), + FDMA_CH_RELOAD_CH_RELOAD, + lan966x, FDMA_CH_RELOAD); +} + +static void lan966x_fdma_tx_add_dcb(struct lan966x_tx *tx, + struct lan966x_tx_dcb *dcb) +{ + dcb->nextptr = FDMA_DCB_INVALID_DATA; + dcb->info = 0; +} + +static int lan966x_fdma_tx_alloc(struct lan966x_tx *tx) +{ + struct lan966x *lan966x = tx->lan966x; + struct lan966x_tx_dcb *dcb; + struct lan966x_db *db; + int size; + int i, j; + + tx->dcbs_buf = kcalloc(FDMA_DCB_MAX, sizeof(struct lan966x_tx_dcb_buf), + GFP_KERNEL); + if (!tx->dcbs_buf) + return -ENOMEM; + + /* calculate how many pages are needed to allocate the dcbs */ + size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; + size = ALIGN(size, PAGE_SIZE); + tx->dcbs = dma_alloc_coherent(lan966x->dev, size, &tx->dma, GFP_KERNEL); + if (!tx->dcbs) + goto out; + + /* Now for each dcb allocate the db */ + for (i = 0; i < FDMA_DCB_MAX; ++i) { + dcb = &tx->dcbs[i]; + + for (j = 0; j < FDMA_TX_DCB_MAX_DBS; ++j) { + db = &dcb->db[j]; + db->dataptr = 0; + db->status = 0; + } + + lan966x_fdma_tx_add_dcb(tx, dcb); + } + + return 0; + +out: + kfree(tx->dcbs_buf); + return -ENOMEM; +} + +static void lan966x_fdma_tx_free(struct lan966x_tx *tx) +{ + struct lan966x *lan966x = tx->lan966x; + int size; + + kfree(tx->dcbs_buf); + + size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; + size = ALIGN(size, PAGE_SIZE); + dma_free_coherent(lan966x->dev, size, tx->dcbs, tx->dma); +} + +static void lan966x_fdma_tx_activate(struct lan966x_tx *tx) +{ + struct lan966x *lan966x = tx->lan966x; + u32 mask; + + /* When activating a channel, first is required to write the first DCB + * address and then to activate it + */ + lan_wr(lower_32_bits((u64)tx->dma), lan966x, + FDMA_DCB_LLP(tx->channel_id)); + lan_wr(upper_32_bits((u64)tx->dma), lan966x, + FDMA_DCB_LLP1(tx->channel_id)); + + lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) | + FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) | + FDMA_CH_CFG_CH_INJ_PORT_SET(0) | + FDMA_CH_CFG_CH_MEM_SET(1), + lan966x, FDMA_CH_CFG(tx->channel_id)); + + /* Start fdma */ + lan_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0), + FDMA_PORT_CTRL_INJ_STOP, + lan966x, FDMA_PORT_CTRL(0)); + + /* Enable interrupts */ + mask = lan_rd(lan966x, FDMA_INTR_DB_ENA); + mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask); + mask |= BIT(tx->channel_id); + lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask), + FDMA_INTR_DB_ENA_INTR_DB_ENA, + lan966x, FDMA_INTR_DB_ENA); + + /* Activate the channel */ + lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(tx->channel_id)), + FDMA_CH_ACTIVATE_CH_ACTIVATE, + lan966x, FDMA_CH_ACTIVATE); +} + +static void lan966x_fdma_tx_disable(struct lan966x_tx *tx) +{ + struct lan966x *lan966x = tx->lan966x; + u32 val; + + /* Disable the channel */ + lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(tx->channel_id)), + FDMA_CH_DISABLE_CH_DISABLE, + lan966x, FDMA_CH_DISABLE); + + readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x, + val, !(val & BIT(tx->channel_id)), + READL_SLEEP_US, READL_TIMEOUT_US); + + lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(tx->channel_id)), + FDMA_CH_DB_DISCARD_DB_DISCARD, + lan966x, FDMA_CH_DB_DISCARD); + + tx->activated = false; +} + +static void lan966x_fdma_tx_reload(struct lan966x_tx *tx) +{ + struct lan966x *lan966x = tx->lan966x; + + /* Write the registers to reload the channel */ + lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(tx->channel_id)), + FDMA_CH_RELOAD_CH_RELOAD, + lan966x, FDMA_CH_RELOAD); +} + +static void lan966x_fdma_wakeup_netdev(struct lan966x *lan966x) +{ + struct lan966x_port *port; + int i; + + for (i = 0; i < lan966x->num_phys_ports; ++i) { + port = lan966x->ports[i]; + if (!port) + continue; + + if (netif_queue_stopped(port->dev)) + netif_wake_queue(port->dev); + } +} + +static void lan966x_fdma_stop_netdev(struct lan966x *lan966x) +{ + struct lan966x_port *port; + int i; + + for (i = 0; i < lan966x->num_phys_ports; ++i) { + port = lan966x->ports[i]; + if (!port) + continue; + + netif_stop_queue(port->dev); + } +} + +static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight) +{ + struct lan966x_tx *tx = &lan966x->tx; + struct lan966x_tx_dcb_buf *dcb_buf; + struct lan966x_db *db; + unsigned long flags; + bool clear = false; + int i; + + spin_lock_irqsave(&lan966x->tx_lock, flags); + for (i = 0; i < FDMA_DCB_MAX; ++i) { + dcb_buf = &tx->dcbs_buf[i]; + + if (!dcb_buf->used) + continue; + + db = &tx->dcbs[i].db[0]; + if (!(db->status & FDMA_DCB_STATUS_DONE)) + continue; + + dcb_buf->dev->stats.tx_packets++; + dcb_buf->dev->stats.tx_bytes += dcb_buf->skb->len; + + dcb_buf->used = false; + dma_unmap_single(lan966x->dev, + dcb_buf->dma_addr, + dcb_buf->skb->len, + DMA_TO_DEVICE); + if (!dcb_buf->ptp) + dev_kfree_skb_any(dcb_buf->skb); + + clear = true; + } + + if (clear) + lan966x_fdma_wakeup_netdev(lan966x); + + spin_unlock_irqrestore(&lan966x->tx_lock, flags); +} + +static bool lan966x_fdma_rx_more_frames(struct lan966x_rx *rx) +{ + struct lan966x_db *db; + + /* Check if there is any data */ + db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; + if (unlikely(!(db->status & FDMA_DCB_STATUS_DONE))) + return false; + + return true; +} + +static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx) +{ + struct lan966x *lan966x = rx->lan966x; + u64 src_port, timestamp; + struct lan966x_db *db; + struct sk_buff *skb; + struct page *page; + + /* Get the received frame and unmap it */ + db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; + page = rx->page[rx->dcb_index][rx->db_index]; + skb = build_skb(page_address(page), PAGE_SIZE << rx->page_order); + if (unlikely(!skb)) + goto unmap_page; + + dma_unmap_single(lan966x->dev, (dma_addr_t)db->dataptr, + FDMA_DCB_STATUS_BLOCKL(db->status), + DMA_FROM_DEVICE); + skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status)); + + lan966x_ifh_get_src_port(skb->data, &src_port); + lan966x_ifh_get_timestamp(skb->data, ×tamp); + + WARN_ON(src_port >= lan966x->num_phys_ports); + + skb->dev = lan966x->ports[src_port]->dev; + skb_pull(skb, IFH_LEN * sizeof(u32)); + + if (likely(!(skb->dev->features & NETIF_F_RXFCS))) + skb_trim(skb, skb->len - ETH_FCS_LEN); + + lan966x_ptp_rxtstamp(lan966x, skb, timestamp); + skb->protocol = eth_type_trans(skb, skb->dev); + + if (lan966x->bridge_mask & BIT(src_port)) { + skb->offload_fwd_mark = 1; + + skb_reset_network_header(skb); + if (!lan966x_hw_offload(lan966x, src_port, skb)) + skb->offload_fwd_mark = 0; + } + + skb->dev->stats.rx_bytes += skb->len; + skb->dev->stats.rx_packets++; + + return skb; + +unmap_page: + dma_unmap_page(lan966x->dev, (dma_addr_t)db->dataptr, + FDMA_DCB_STATUS_BLOCKL(db->status), + DMA_FROM_DEVICE); + __free_pages(page, rx->page_order); + + return NULL; +} + +static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight) +{ + struct lan966x *lan966x = container_of(napi, struct lan966x, napi); + struct lan966x_rx *rx = &lan966x->rx; + int dcb_reload = rx->dcb_index; + struct lan966x_rx_dcb *old_dcb; + struct lan966x_db *db; + struct sk_buff *skb; + struct page *page; + int counter = 0; + u64 nextptr; + + lan966x_fdma_tx_clear_buf(lan966x, weight); + + /* Get all received skb */ + while (counter < weight) { + if (!lan966x_fdma_rx_more_frames(rx)) + break; + + skb = lan966x_fdma_rx_get_frame(rx); + + rx->page[rx->dcb_index][rx->db_index] = NULL; + rx->dcb_index++; + rx->dcb_index &= FDMA_DCB_MAX - 1; + + if (!skb) + break; + + napi_gro_receive(&lan966x->napi, skb); + counter++; + } + + /* Allocate new pages and map them */ + while (dcb_reload != rx->dcb_index) { + db = &rx->dcbs[dcb_reload].db[rx->db_index]; + page = lan966x_fdma_rx_alloc_page(rx, db); + if (unlikely(!page)) + break; + rx->page[dcb_reload][rx->db_index] = page; + + old_dcb = &rx->dcbs[dcb_reload]; + dcb_reload++; + dcb_reload &= FDMA_DCB_MAX - 1; + + nextptr = rx->dma + ((unsigned long)old_dcb - + (unsigned long)rx->dcbs); + lan966x_fdma_rx_add_dcb(rx, old_dcb, nextptr); + lan966x_fdma_rx_reload(rx); + } + + if (counter < weight && napi_complete_done(napi, counter)) + lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA); + + return counter; +} + +irqreturn_t lan966x_fdma_irq_handler(int irq, void *args) +{ + struct lan966x *lan966x = args; + u32 db, err, err_type; + + db = lan_rd(lan966x, FDMA_INTR_DB); + err = lan_rd(lan966x, FDMA_INTR_ERR); + + if (db) { + lan_wr(0, lan966x, FDMA_INTR_DB_ENA); + lan_wr(db, lan966x, FDMA_INTR_DB); + + napi_schedule(&lan966x->napi); + } + + if (err) { + err_type = lan_rd(lan966x, FDMA_ERRORS); + + WARN(1, "Unexpected error: %d, error_type: %d\n", err, err_type); + + lan_wr(err, lan966x, FDMA_INTR_ERR); + lan_wr(err_type, lan966x, FDMA_ERRORS); + } + + return IRQ_HANDLED; +} + +static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx) +{ + struct lan966x_tx_dcb_buf *dcb_buf; + int i; + + for (i = 0; i < FDMA_DCB_MAX; ++i) { + dcb_buf = &tx->dcbs_buf[i]; + if (!dcb_buf->used && i != tx->last_in_use) + return i; + } + + return -1; +} + +int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + struct lan966x_tx_dcb_buf *next_dcb_buf; + struct lan966x_tx_dcb *next_dcb, *dcb; + struct lan966x_tx *tx = &lan966x->tx; + struct lan966x_db *next_db; + int needed_headroom; + int needed_tailroom; + dma_addr_t dma_addr; + int next_to_use; + int err; + + /* Get next index */ + next_to_use = lan966x_fdma_get_next_dcb(tx); + if (next_to_use < 0) { + netif_stop_queue(dev); + return NETDEV_TX_BUSY; + } + + if (skb_put_padto(skb, ETH_ZLEN)) { + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + /* skb processing */ + needed_headroom = max_t(int, IFH_LEN * sizeof(u32) - skb_headroom(skb), 0); + needed_tailroom = max_t(int, ETH_FCS_LEN - skb_tailroom(skb), 0); + if (needed_headroom || needed_tailroom || skb_header_cloned(skb)) { + err = pskb_expand_head(skb, needed_headroom, needed_tailroom, + GFP_ATOMIC); + if (unlikely(err)) { + dev->stats.tx_dropped++; + err = NETDEV_TX_OK; + goto release; + } + } + + skb_tx_timestamp(skb); + skb_push(skb, IFH_LEN * sizeof(u32)); + memcpy(skb->data, ifh, IFH_LEN * sizeof(u32)); + skb_put(skb, 4); + + dma_addr = dma_map_single(lan966x->dev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(lan966x->dev, dma_addr)) { + dev->stats.tx_dropped++; + err = NETDEV_TX_OK; + goto release; + } + + /* Setup next dcb */ + next_dcb = &tx->dcbs[next_to_use]; + next_dcb->nextptr = FDMA_DCB_INVALID_DATA; + + next_db = &next_dcb->db[0]; + next_db->dataptr = dma_addr; + next_db->status = FDMA_DCB_STATUS_SOF | + FDMA_DCB_STATUS_EOF | + FDMA_DCB_STATUS_INTR | + FDMA_DCB_STATUS_BLOCKO(0) | + FDMA_DCB_STATUS_BLOCKL(skb->len); + + /* Fill up the buffer */ + next_dcb_buf = &tx->dcbs_buf[next_to_use]; + next_dcb_buf->skb = skb; + next_dcb_buf->dma_addr = dma_addr; + next_dcb_buf->used = true; + next_dcb_buf->ptp = false; + next_dcb_buf->dev = dev; + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + next_dcb_buf->ptp = true; + + if (likely(lan966x->tx.activated)) { + /* Connect current dcb to the next db */ + dcb = &tx->dcbs[tx->last_in_use]; + dcb->nextptr = tx->dma + (next_to_use * + sizeof(struct lan966x_tx_dcb)); + + lan966x_fdma_tx_reload(tx); + } else { + /* Because it is first time, then just activate */ + lan966x->tx.activated = true; + lan966x_fdma_tx_activate(tx); + } + + /* Move to next dcb because this last in use */ + tx->last_in_use = next_to_use; + + return NETDEV_TX_OK; + +release: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + lan966x_ptp_txtstamp_release(port, skb); + + dev_kfree_skb_any(skb); + return err; +} + +static int lan966x_fdma_get_max_mtu(struct lan966x *lan966x) +{ + int max_mtu = 0; + int i; + + for (i = 0; i < lan966x->num_phys_ports; ++i) { + int mtu; + + if (!lan966x->ports[i]) + continue; + + mtu = lan966x->ports[i]->dev->mtu; + if (mtu > max_mtu) + max_mtu = mtu; + } + + return max_mtu; +} + +static int lan966x_qsys_sw_status(struct lan966x *lan966x) +{ + return lan_rd(lan966x, QSYS_SW_STATUS(CPU_PORT)); +} + +static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) +{ + void *rx_dcbs, *tx_dcbs, *tx_dcbs_buf; + dma_addr_t rx_dma, tx_dma; + u32 size; + int err; + + /* Store these for later to free them */ + rx_dma = lan966x->rx.dma; + tx_dma = lan966x->tx.dma; + rx_dcbs = lan966x->rx.dcbs; + tx_dcbs = lan966x->tx.dcbs; + tx_dcbs_buf = lan966x->tx.dcbs_buf; + + napi_synchronize(&lan966x->napi); + napi_disable(&lan966x->napi); + lan966x_fdma_stop_netdev(lan966x); + + lan966x_fdma_rx_disable(&lan966x->rx); + lan966x_fdma_rx_free_pages(&lan966x->rx); + lan966x->rx.page_order = round_up(new_mtu, PAGE_SIZE) / PAGE_SIZE - 1; + err = lan966x_fdma_rx_alloc(&lan966x->rx); + if (err) + goto restore; + lan966x_fdma_rx_start(&lan966x->rx); + + size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX; + size = ALIGN(size, PAGE_SIZE); + dma_free_coherent(lan966x->dev, size, rx_dcbs, rx_dma); + + lan966x_fdma_tx_disable(&lan966x->tx); + err = lan966x_fdma_tx_alloc(&lan966x->tx); + if (err) + goto restore_tx; + + size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; + size = ALIGN(size, PAGE_SIZE); + dma_free_coherent(lan966x->dev, size, tx_dcbs, tx_dma); + + kfree(tx_dcbs_buf); + + lan966x_fdma_wakeup_netdev(lan966x); + napi_enable(&lan966x->napi); + + return err; +restore: + lan966x->rx.dma = rx_dma; + lan966x->tx.dma = tx_dma; + lan966x_fdma_rx_start(&lan966x->rx); + +restore_tx: + lan966x->rx.dcbs = rx_dcbs; + lan966x->tx.dcbs = tx_dcbs; + lan966x->tx.dcbs_buf = tx_dcbs_buf; + + return err; +} + +int lan966x_fdma_change_mtu(struct lan966x *lan966x) +{ + int max_mtu; + int err; + u32 val; + + max_mtu = lan966x_fdma_get_max_mtu(lan966x); + max_mtu += IFH_LEN * sizeof(u32); + + if (round_up(max_mtu, PAGE_SIZE) / PAGE_SIZE - 1 == + lan966x->rx.page_order) + return 0; + + /* Disable the CPU port */ + lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(0), + QSYS_SW_PORT_MODE_PORT_ENA, + lan966x, QSYS_SW_PORT_MODE(CPU_PORT)); + + /* Flush the CPU queues */ + readx_poll_timeout(lan966x_qsys_sw_status, lan966x, + val, !(QSYS_SW_STATUS_EQ_AVAIL_GET(val)), + READL_SLEEP_US, READL_TIMEOUT_US); + + /* Add a sleep in case there are frames between the queues and the CPU + * port + */ + usleep_range(1000, 2000); + + err = lan966x_fdma_reload(lan966x, max_mtu); + + /* Enable back the CPU port */ + lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(1), + QSYS_SW_PORT_MODE_PORT_ENA, + lan966x, QSYS_SW_PORT_MODE(CPU_PORT)); + + return err; +} + +void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev) +{ + if (lan966x->fdma_ndev) + return; + + lan966x->fdma_ndev = dev; + netif_napi_add(dev, &lan966x->napi, lan966x_fdma_napi_poll, + NAPI_POLL_WEIGHT); + napi_enable(&lan966x->napi); +} + +void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev) +{ + if (lan966x->fdma_ndev == dev) { + netif_napi_del(&lan966x->napi); + lan966x->fdma_ndev = NULL; + } +} + +int lan966x_fdma_init(struct lan966x *lan966x) +{ + int err; + + if (!lan966x->fdma) + return 0; + + lan966x->rx.lan966x = lan966x; + lan966x->rx.channel_id = FDMA_XTR_CHANNEL; + lan966x->tx.lan966x = lan966x; + lan966x->tx.channel_id = FDMA_INJ_CHANNEL; + lan966x->tx.last_in_use = -1; + + err = lan966x_fdma_rx_alloc(&lan966x->rx); + if (err) + return err; + + err = lan966x_fdma_tx_alloc(&lan966x->tx); + if (err) { + lan966x_fdma_rx_free(&lan966x->rx); + return err; + } + + lan966x_fdma_rx_start(&lan966x->rx); + + return 0; +} + +void lan966x_fdma_deinit(struct lan966x *lan966x) +{ + if (!lan966x->fdma) + return; + + lan966x_fdma_rx_disable(&lan966x->rx); + lan966x_fdma_tx_disable(&lan966x->tx); + + napi_synchronize(&lan966x->napi); + napi_disable(&lan966x->napi); + + lan966x_fdma_rx_free_pages(&lan966x->rx); + lan966x_fdma_rx_free(&lan966x->rx); + lan966x_fdma_tx_free(&lan966x->tx); +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 958e55596b82..106d8c83544d 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -24,9 +24,6 @@ #define XTR_NOT_READY 0x07000080U #define XTR_VALID_BYTES(x) (4 - (((x) >> 24) & 3)) -#define READL_SLEEP_US 10 -#define READL_TIMEOUT_US 100000000 - #define IO_RANGES 2 static const struct of_device_id lan966x_match[] = { @@ -43,6 +40,7 @@ struct lan966x_main_io_resource { static const struct lan966x_main_io_resource lan966x_main_iomap[] = { { TARGET_CPU, 0xc0000, 0 }, /* 0xe00c0000 */ + { TARGET_FDMA, 0xc0400, 0 }, /* 0xe00c0400 */ { TARGET_ORG, 0, 1 }, /* 0xe2000000 */ { TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */ { TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */ @@ -343,7 +341,10 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev) } spin_lock(&lan966x->tx_lock); - err = lan966x_port_ifh_xmit(skb, ifh, dev); + if (port->lan966x->fdma) + err = lan966x_fdma_xmit(skb, ifh, dev); + else + err = lan966x_port_ifh_xmit(skb, ifh, dev); spin_unlock(&lan966x->tx_lock); return err; @@ -353,12 +354,24 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu) { struct lan966x_port *port = netdev_priv(dev); struct lan966x *lan966x = port->lan966x; + int old_mtu = dev->mtu; + int err; lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(new_mtu), lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); dev->mtu = new_mtu; - return 0; + if (!lan966x->fdma) + return 0; + + err = lan966x_fdma_change_mtu(lan966x); + if (err) { + lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(old_mtu), + lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port)); + dev->mtu = old_mtu; + } + + return err; } static int lan966x_mc_unsync(struct net_device *dev, const unsigned char *addr) @@ -432,8 +445,7 @@ bool lan966x_netdevice_check(const struct net_device *dev) return dev->netdev_ops == &lan966x_port_netdev_ops; } -static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, - struct sk_buff *skb) +bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb) { u32 val; @@ -520,7 +532,7 @@ static int lan966x_rx_frame_word(struct lan966x *lan966x, u8 grp, u32 *rval) } } -static void lan966x_ifh_get_src_port(void *ifh, u64 *src_port) +void lan966x_ifh_get_src_port(void *ifh, u64 *src_port) { packing(ifh, src_port, IFH_POS_SRCPORT + IFH_WID_SRCPORT - 1, IFH_POS_SRCPORT, IFH_LEN * 4, UNPACK, 0); @@ -532,7 +544,7 @@ static void lan966x_ifh_get_len(void *ifh, u64 *len) IFH_POS_LEN, IFH_LEN * 4, UNPACK, 0); } -static void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp) +void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp) { packing(ifh, timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1, IFH_POS_TIMESTAMP, IFH_LEN * 4, UNPACK, 0); @@ -652,6 +664,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) if (port->dev) unregister_netdev(port->dev); + if (lan966x->fdma && lan966x->fdma_ndev == port->dev) + lan966x_fdma_netdev_deinit(lan966x, port->dev); + if (port->phylink) { rtnl_lock(); lan966x_port_stop(port->dev); @@ -671,6 +686,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x) disable_irq(lan966x->ana_irq); lan966x->ana_irq = -ENXIO; } + + if (lan966x->fdma) + devm_free_irq(lan966x->dev, lan966x->fdma_irq, lan966x); } static int lan966x_probe_port(struct lan966x *lan966x, u32 p, @@ -799,12 +817,12 @@ static void lan966x_init(struct lan966x *lan966x) /* Do byte-swap and expect status after last data word * Extraction: Mode: manual extraction) | Byte_swap */ - lan_wr(QS_XTR_GRP_CFG_MODE_SET(1) | + lan_wr(QS_XTR_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) | QS_XTR_GRP_CFG_BYTE_SWAP_SET(1), lan966x, QS_XTR_GRP_CFG(0)); /* Injection: Mode: manual injection | Byte_swap */ - lan_wr(QS_INJ_GRP_CFG_MODE_SET(1) | + lan_wr(QS_INJ_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) | QS_INJ_GRP_CFG_BYTE_SWAP_SET(1), lan966x, QS_INJ_GRP_CFG(0)); @@ -1026,6 +1044,17 @@ static int lan966x_probe(struct platform_device *pdev) lan966x->ptp = 1; } + lan966x->fdma_irq = platform_get_irq_byname(pdev, "fdma"); + if (lan966x->fdma_irq > 0) { + err = devm_request_irq(&pdev->dev, lan966x->fdma_irq, + lan966x_fdma_irq_handler, 0, + "fdma irq", lan966x); + if (err) + return dev_err_probe(&pdev->dev, err, "Unable to use fdma irq"); + + lan966x->fdma = true; + } + /* init switch */ lan966x_init(lan966x); lan966x_stats_init(lan966x); @@ -1064,8 +1093,15 @@ static int lan966x_probe(struct platform_device *pdev) if (err) goto cleanup_fdb; + err = lan966x_fdma_init(lan966x); + if (err) + goto cleanup_ptp; + return 0; +cleanup_ptp: + lan966x_ptp_deinit(lan966x); + cleanup_fdb: lan966x_fdb_deinit(lan966x); @@ -1085,6 +1121,7 @@ static int lan966x_remove(struct platform_device *pdev) { struct lan966x *lan966x = platform_get_drvdata(pdev); + lan966x_fdma_deinit(lan966x); lan966x_cleanup_ports(lan966x); cancel_delayed_work_sync(&lan966x->stats_work); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index ae282da1da74..5213263c4e87 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -17,6 +17,9 @@ #define TABLE_UPDATE_SLEEP_US 10 #define TABLE_UPDATE_TIMEOUT_US 100000 +#define READL_SLEEP_US 10 +#define READL_TIMEOUT_US 100000000 + #define LAN966X_BUFFER_CELL_SZ 64 #define LAN966X_BUFFER_MEMORY (160 * 1024) #define LAN966X_BUFFER_MIN_SZ 60 @@ -58,6 +61,22 @@ #define IFH_REW_OP_ONE_STEP_PTP 0x3 #define IFH_REW_OP_TWO_STEP_PTP 0x4 +#define FDMA_RX_DCB_MAX_DBS 1 +#define FDMA_TX_DCB_MAX_DBS 1 +#define FDMA_DCB_INFO_DATAL(x) ((x) & GENMASK(15, 0)) + +#define FDMA_DCB_STATUS_BLOCKL(x) ((x) & GENMASK(15, 0)) +#define FDMA_DCB_STATUS_SOF BIT(16) +#define FDMA_DCB_STATUS_EOF BIT(17) +#define FDMA_DCB_STATUS_INTR BIT(18) +#define FDMA_DCB_STATUS_DONE BIT(19) +#define FDMA_DCB_STATUS_BLOCKO(x) (((x) << 20) & GENMASK(31, 20)) +#define FDMA_DCB_INVALID_DATA 0x1 + +#define FDMA_XTR_CHANNEL 6 +#define FDMA_INJ_CHANNEL 0 +#define FDMA_DCB_MAX 512 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -73,6 +92,83 @@ enum macaccess_entry_type { struct lan966x_port; +struct lan966x_db { + u64 dataptr; + u64 status; +}; + +struct lan966x_rx_dcb { + u64 nextptr; + u64 info; + struct lan966x_db db[FDMA_RX_DCB_MAX_DBS]; +}; + +struct lan966x_tx_dcb { + u64 nextptr; + u64 info; + struct lan966x_db db[FDMA_TX_DCB_MAX_DBS]; +}; + +struct lan966x_rx { + struct lan966x *lan966x; + + /* Pointer to the array of hardware dcbs. */ + struct lan966x_rx_dcb *dcbs; + + /* Pointer to the last address in the dcbs. */ + struct lan966x_rx_dcb *last_entry; + + /* For each DB, there is a page */ + struct page *page[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS]; + + /* Represents the db_index, it can have a value between 0 and + * FDMA_RX_DCB_MAX_DBS, once it reaches the value of FDMA_RX_DCB_MAX_DBS + * it means that the DCB can be reused. + */ + int db_index; + + /* Represents the index in the dcbs. It has a value between 0 and + * FDMA_DCB_MAX + */ + int dcb_index; + + /* Represents the dma address to the dcbs array */ + dma_addr_t dma; + + /* Represents the page order that is used to allocate the pages for the + * RX buffers. This value is calculated based on max MTU of the devices. + */ + u8 page_order; + + u8 channel_id; +}; + +struct lan966x_tx_dcb_buf { + struct net_device *dev; + struct sk_buff *skb; + dma_addr_t dma_addr; + bool used; + bool ptp; +}; + +struct lan966x_tx { + struct lan966x *lan966x; + + /* Pointer to the dcb list */ + struct lan966x_tx_dcb *dcbs; + u16 last_in_use; + + /* Represents the DMA address to the first entry of the dcb entries. */ + dma_addr_t dma; + + /* Array of dcbs that are given to the HW */ + struct lan966x_tx_dcb_buf *dcbs_buf; + + u8 channel_id; + + bool activated; +}; + struct lan966x_stat_layout { u32 offset; char name[ETH_GSTRING_LEN]; @@ -134,6 +230,7 @@ struct lan966x { int xtr_irq; int ana_irq; int ptp_irq; + int fdma_irq; /* worqueue for fdb */ struct workqueue_struct *fdb_work; @@ -150,6 +247,13 @@ struct lan966x { spinlock_t ptp_ts_id_lock; /* lock for ts_id */ struct mutex ptp_lock; /* lock for ptp interface state */ u16 ptp_skbs; + + /* fdma */ + bool fdma; + struct net_device *fdma_ndev; + struct lan966x_rx rx; + struct lan966x_tx tx; + struct napi_struct napi; }; struct lan966x_port_config { @@ -195,6 +299,11 @@ bool lan966x_netdevice_check(const struct net_device *dev); void lan966x_register_notifier_blocks(void); void lan966x_unregister_notifier_blocks(void); +bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb); + +void lan966x_ifh_get_src_port(void *ifh, u64 *src_port); +void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp); + void lan966x_stats_get(struct net_device *dev, struct rtnl_link_stats64 *stats); int lan966x_stats_init(struct lan966x *lan966x); @@ -284,6 +393,14 @@ void lan966x_ptp_txtstamp_release(struct lan966x_port *port, struct sk_buff *skb); irqreturn_t lan966x_ptp_irq_handler(int irq, void *args); +int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev); +int lan966x_fdma_change_mtu(struct lan966x *lan966x); +void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev); +void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev); +int lan966x_fdma_init(struct lan966x *lan966x); +void lan966x_fdma_deinit(struct lan966x *lan966x); +irqreturn_t lan966x_fdma_irq_handler(int irq, void *args); + static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, int gbase, int ginst, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 237555845a52..f141644e4372 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -393,6 +393,9 @@ void lan966x_port_init(struct lan966x_port *port) lan966x_port_config_down(port); + if (lan966x->fdma) + lan966x_fdma_netdev_init(lan966x, port->dev); + if (config->portmode != PHY_INTERFACE_MODE_QSGMII) return; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 0c0b3e173d53..2f59285bef29 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -17,6 +17,7 @@ enum lan966x_target { TARGET_CHIP_TOP = 5, TARGET_CPU = 6, TARGET_DEV = 13, + TARGET_FDMA = 21, TARGET_GCB = 27, TARGET_ORG = 36, TARGET_PTP = 41, @@ -578,6 +579,111 @@ enum lan966x_target { #define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\ FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x) +/* FDMA:FDMA:FDMA_CH_ACTIVATE */ +#define FDMA_CH_ACTIVATE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 0, 0, 1, 4) + +#define FDMA_CH_ACTIVATE_CH_ACTIVATE GENMASK(7, 0) +#define FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(x)\ + FIELD_PREP(FDMA_CH_ACTIVATE_CH_ACTIVATE, x) +#define FDMA_CH_ACTIVATE_CH_ACTIVATE_GET(x)\ + FIELD_GET(FDMA_CH_ACTIVATE_CH_ACTIVATE, x) + +/* FDMA:FDMA:FDMA_CH_RELOAD */ +#define FDMA_CH_RELOAD __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 4, 0, 1, 4) + +#define FDMA_CH_RELOAD_CH_RELOAD GENMASK(7, 0) +#define FDMA_CH_RELOAD_CH_RELOAD_SET(x)\ + FIELD_PREP(FDMA_CH_RELOAD_CH_RELOAD, x) +#define FDMA_CH_RELOAD_CH_RELOAD_GET(x)\ + FIELD_GET(FDMA_CH_RELOAD_CH_RELOAD, x) + +/* FDMA:FDMA:FDMA_CH_DISABLE */ +#define FDMA_CH_DISABLE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 8, 0, 1, 4) + +#define FDMA_CH_DISABLE_CH_DISABLE GENMASK(7, 0) +#define FDMA_CH_DISABLE_CH_DISABLE_SET(x)\ + FIELD_PREP(FDMA_CH_DISABLE_CH_DISABLE, x) +#define FDMA_CH_DISABLE_CH_DISABLE_GET(x)\ + FIELD_GET(FDMA_CH_DISABLE_CH_DISABLE, x) + +/* FDMA:FDMA:FDMA_CH_DB_DISCARD */ +#define FDMA_CH_DB_DISCARD __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 16, 0, 1, 4) + +#define FDMA_CH_DB_DISCARD_DB_DISCARD GENMASK(7, 0) +#define FDMA_CH_DB_DISCARD_DB_DISCARD_SET(x)\ + FIELD_PREP(FDMA_CH_DB_DISCARD_DB_DISCARD, x) +#define FDMA_CH_DB_DISCARD_DB_DISCARD_GET(x)\ + FIELD_GET(FDMA_CH_DB_DISCARD_DB_DISCARD, x) + +/* FDMA:FDMA:FDMA_DCB_LLP */ +#define FDMA_DCB_LLP(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 52, r, 8, 4) + +/* FDMA:FDMA:FDMA_DCB_LLP1 */ +#define FDMA_DCB_LLP1(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 84, r, 8, 4) + +/* FDMA:FDMA:FDMA_CH_ACTIVE */ +#define FDMA_CH_ACTIVE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 180, 0, 1, 4) + +/* FDMA:FDMA:FDMA_CH_CFG */ +#define FDMA_CH_CFG(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 224, r, 8, 4) + +#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY BIT(4) +#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(x)\ + FIELD_PREP(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x) +#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_GET(x)\ + FIELD_GET(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x) + +#define FDMA_CH_CFG_CH_INJ_PORT BIT(3) +#define FDMA_CH_CFG_CH_INJ_PORT_SET(x)\ + FIELD_PREP(FDMA_CH_CFG_CH_INJ_PORT, x) +#define FDMA_CH_CFG_CH_INJ_PORT_GET(x)\ + FIELD_GET(FDMA_CH_CFG_CH_INJ_PORT, x) + +#define FDMA_CH_CFG_CH_DCB_DB_CNT GENMASK(2, 1) +#define FDMA_CH_CFG_CH_DCB_DB_CNT_SET(x)\ + FIELD_PREP(FDMA_CH_CFG_CH_DCB_DB_CNT, x) +#define FDMA_CH_CFG_CH_DCB_DB_CNT_GET(x)\ + FIELD_GET(FDMA_CH_CFG_CH_DCB_DB_CNT, x) + +#define FDMA_CH_CFG_CH_MEM BIT(0) +#define FDMA_CH_CFG_CH_MEM_SET(x)\ + FIELD_PREP(FDMA_CH_CFG_CH_MEM, x) +#define FDMA_CH_CFG_CH_MEM_GET(x)\ + FIELD_GET(FDMA_CH_CFG_CH_MEM, x) + +/* FDMA:FDMA:FDMA_PORT_CTRL */ +#define FDMA_PORT_CTRL(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 376, r, 2, 4) + +#define FDMA_PORT_CTRL_INJ_STOP BIT(4) +#define FDMA_PORT_CTRL_INJ_STOP_SET(x)\ + FIELD_PREP(FDMA_PORT_CTRL_INJ_STOP, x) +#define FDMA_PORT_CTRL_INJ_STOP_GET(x)\ + FIELD_GET(FDMA_PORT_CTRL_INJ_STOP, x) + +#define FDMA_PORT_CTRL_XTR_STOP BIT(2) +#define FDMA_PORT_CTRL_XTR_STOP_SET(x)\ + FIELD_PREP(FDMA_PORT_CTRL_XTR_STOP, x) +#define FDMA_PORT_CTRL_XTR_STOP_GET(x)\ + FIELD_GET(FDMA_PORT_CTRL_XTR_STOP, x) + +/* FDMA:FDMA:FDMA_INTR_DB */ +#define FDMA_INTR_DB __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 392, 0, 1, 4) + +/* FDMA:FDMA:FDMA_INTR_DB_ENA */ +#define FDMA_INTR_DB_ENA __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 396, 0, 1, 4) + +#define FDMA_INTR_DB_ENA_INTR_DB_ENA GENMASK(7, 0) +#define FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(x)\ + FIELD_PREP(FDMA_INTR_DB_ENA_INTR_DB_ENA, x) +#define FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(x)\ + FIELD_GET(FDMA_INTR_DB_ENA_INTR_DB_ENA, x) + +/* FDMA:FDMA:FDMA_INTR_ERR */ +#define FDMA_INTR_ERR __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 400, 0, 1, 4) + +/* FDMA:FDMA:FDMA_ERRORS */ +#define FDMA_ERRORS __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 412, 0, 1, 4) + /* PTP:PTP_CFG:PTP_DOM_CFG */ #define PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index 3d379e937184..ddb34bfb9bef 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -13,22 +13,36 @@ #include <linux/ctype.h> #include <linux/types.h> #include <linux/sizes.h> +#include <linux/stringify.h> #ifndef NFP_SUBSYS #define NFP_SUBSYS "nfp" #endif -#define nfp_err(cpp, fmt, args...) \ +#define string_format(x) __FILE__ ":" __stringify(__LINE__) ": " x + +#define __nfp_err(cpp, fmt, args...) \ dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define nfp_warn(cpp, fmt, args...) \ +#define __nfp_warn(cpp, fmt, args...) \ dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define nfp_info(cpp, fmt, args...) \ +#define __nfp_info(cpp, fmt, args...) \ dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) -#define nfp_dbg(cpp, fmt, args...) \ +#define __nfp_dbg(cpp, fmt, args...) \ dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args) +#define __nfp_printk(level, cpp, fmt, args...) \ + dev_printk(level, nfp_cpp_device(cpp)->parent, \ + NFP_SUBSYS ": " fmt, ## args) + +#define nfp_err(cpp, fmt, args...) \ + __nfp_err(cpp, string_format(fmt), ## args) +#define nfp_warn(cpp, fmt, args...) \ + __nfp_warn(cpp, string_format(fmt), ## args) +#define nfp_info(cpp, fmt, args...) \ + __nfp_info(cpp, string_format(fmt), ## args) +#define nfp_dbg(cpp, fmt, args...) \ + __nfp_dbg(cpp, string_format(fmt), ## args) #define nfp_printk(level, cpp, fmt, args...) \ - dev_printk(level, nfp_cpp_device(cpp)->parent, \ - NFP_SUBSYS ": " fmt, ## args) + __nfp_printk(level, cpp, string_format(fmt), ## args) #define PCI_64BIT_BAR_COUNT 3 diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c index 96a2077fd315..7e286cddbedb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c +++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c @@ -161,11 +161,11 @@ EXPORT_SYMBOL(qed_vlan_get_ndev); struct pci_dev *qed_validate_ndev(struct net_device *ndev) { - struct pci_dev *pdev = NULL; struct net_device *upper; + struct pci_dev *pdev; for_each_pci_dev(pdev) { - if (pdev && pdev->driver && + if (pdev->driver && !strcmp(pdev->driver->name, "qede")) { upper = pci_get_drvdata(pdev); if (upper->ifindex == ndev->ifindex) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 50d535981a35..c9ee5011803f 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2256,7 +2256,7 @@ int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb, * guaranteed to satisfy the second as we only attempt TSO if * inner_network_header <= 208. */ - ip_tot_len = -EFX_TSO2_MAX_HDRLEN; + ip_tot_len = 0x10000 - EFX_TSO2_MAX_HDRLEN; EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN + (tcp->doff << 2u) > ip_tot_len); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index daf0c00c1242..c05a83da9e44 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -28,7 +28,6 @@ static inline netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct ef100_enqueue_skb, __efx_enqueue_skb, tx_queue, skb); } -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); void efx_xmit_done_single(struct efx_tx_queue *tx_queue); int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, void *type_data); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 377df8b7f015..eec80b024195 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -51,28 +51,7 @@ MODULE_PARM_DESC(irq_adapt_high_thresh, */ static int napi_weight = 64; -/*************** - * Housekeeping - ***************/ - -int efx_channel_dummy_op_int(struct efx_channel *channel) -{ - return 0; -} - -void efx_channel_dummy_op_void(struct efx_channel *channel) -{ -} - -static const struct efx_channel_type efx_default_channel_type = { - .pre_probe = efx_channel_dummy_op_int, - .post_remove = efx_channel_dummy_op_void, - .get_name = efx_get_channel_name, - .copy = efx_copy_channel, - .want_txqs = efx_default_channel_want_txqs, - .keep_eventq = false, - .want_pio = true, -}; +static const struct efx_channel_type efx_default_channel_type; /************* * INTERRUPTS @@ -619,6 +598,7 @@ void efx_fini_channels(struct efx_nic *efx) /* Allocate and initialise a channel structure, copying parameters * (but not resources) from an old channel structure. */ +static struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel) { struct efx_rx_queue *rx_queue; @@ -696,7 +676,8 @@ fail: return rc; } -void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) +static void efx_get_channel_name(struct efx_channel *channel, char *buf, + size_t len) { struct efx_nic *efx = channel->efx; const char *type; @@ -1004,7 +985,7 @@ int efx_set_channels(struct efx_nic *efx) return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); } -bool efx_default_channel_want_txqs(struct efx_channel *channel) +static bool efx_default_channel_want_txqs(struct efx_channel *channel) { return channel->channel - channel->efx->tx_channel_offset < channel->efx->n_tx_channels; @@ -1362,3 +1343,26 @@ void efx_fini_napi(struct efx_nic *efx) efx_for_each_channel(channel, efx) efx_fini_napi_channel(channel); } + +/*************** + * Housekeeping + ***************/ + +static int efx_channel_dummy_op_int(struct efx_channel *channel) +{ + return 0; +} + +void efx_channel_dummy_op_void(struct efx_channel *channel) +{ +} + +static const struct efx_channel_type efx_default_channel_type = { + .pre_probe = efx_channel_dummy_op_int, + .post_remove = efx_channel_dummy_op_void, + .get_name = efx_get_channel_name, + .copy = efx_copy_channel, + .want_txqs = efx_default_channel_want_txqs, + .keep_eventq = false, + .want_pio = true, +}; diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h index d77ec1f77fb1..64abb99a56b8 100644 --- a/drivers/net/ethernet/sfc/efx_channels.h +++ b/drivers/net/ethernet/sfc/efx_channels.h @@ -32,16 +32,13 @@ void efx_fini_eventq(struct efx_channel *channel); void efx_remove_eventq(struct efx_channel *channel); int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); -void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len); void efx_set_channel_names(struct efx_nic *efx); int efx_init_channels(struct efx_nic *efx); int efx_probe_channels(struct efx_nic *efx); int efx_set_channels(struct efx_nic *efx); -bool efx_default_channel_want_txqs(struct efx_channel *channel); void efx_remove_channel(struct efx_channel *channel); void efx_remove_channels(struct efx_nic *efx); void efx_fini_channels(struct efx_nic *efx); -struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel); void efx_start_channels(struct efx_nic *efx); void efx_stop_channels(struct efx_nic *efx); @@ -50,7 +47,6 @@ void efx_init_napi(struct efx_nic *efx); void efx_fini_napi_channel(struct efx_channel *channel); void efx_fini_napi(struct efx_nic *efx); -int efx_channel_dummy_op_int(struct efx_channel *channel); void efx_channel_dummy_op_void(struct efx_channel *channel); #endif diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index af37c990217e..f6577e74d6e6 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -51,8 +51,8 @@ static unsigned int efx_monitor_interval = 1 * HZ; /* Default stats update time */ #define STATS_PERIOD_MS_DEFAULT 1000 -const unsigned int efx_reset_type_max = RESET_TYPE_MAX; -const char *const efx_reset_type_names[] = { +static const unsigned int efx_reset_type_max = RESET_TYPE_MAX; +static const char *const efx_reset_type_names[] = { [RESET_TYPE_INVISIBLE] = "INVISIBLE", [RESET_TYPE_ALL] = "ALL", [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 0c6cc2191369..6bbdb5d2eebf 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -718,12 +718,14 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx, struct ef4_rx_queue *rx_queue) { unsigned int bufs_in_recycle_ring, page_ring_size; + struct iommu_domain __maybe_unused *domain; /* Set the RX recycle ring size */ #ifdef CONFIG_PPC64 bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU; #else - if (iommu_present(&pci_bus_type)) + domain = iommu_get_domain_for_dev(&efx->pci_dev->dev); + if (domain && domain->type != IOMMU_DOMAIN_IDENTITY) bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU; else bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_NOIOMMU; diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 148dcd48b58d..9599123bc28d 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -16,6 +16,7 @@ #include "bitfield.h" #include "efx.h" #include "rx_common.h" +#include "tx_common.h" #include "nic.h" #include "farch_regs.h" #include "sriov.h" diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index d3fcbf930dba..ff617b1b38d3 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -73,8 +73,8 @@ * \------------------------------ Resync (always set) * * The client writes it's request into MC shared memory, and rings the - * doorbell. Each request is completed by either by the MC writting - * back into shared memory, or by writting out an event. + * doorbell. Each request is completed by either by the MC writing + * back into shared memory, or by writing out an event. * * All MCDI commands support completion by shared memory response. Each * request may also contain additional data (accounted for by HEADER.LEN), diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index c75dc75e2857..318db906a154 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -612,11 +612,6 @@ extern const unsigned int efx_loopback_mode_max; #define LOOPBACK_MODE(efx) \ STRING_TABLE_LOOKUP((efx)->loopback_mode, efx_loopback_mode) -extern const char *const efx_reset_type_names[]; -extern const unsigned int efx_reset_type_max; -#define RESET_TYPE(type) \ - STRING_TABLE_LOOKUP(type, efx_reset_type) - enum efx_int_mode { /* Be careful if altering to correct macro below */ EFX_INT_MODE_MSIX = 0, diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 6983799e1c05..138bca611341 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -527,7 +527,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, /* PTP "event" packet */ if (unlikely(efx_xmit_with_hwtstamp(skb)) && - unlikely(efx_ptp_is_ptp_tx(efx, skb))) { + ((efx_ptp_use_mac_tx_timestamps(efx) && efx->ptp_data) || + unlikely(efx_ptp_is_ptp_tx(efx, skb)))) { /* There may be existing transmits on the channel that are * waiting for this packet to trigger the doorbell write. * We need to send the packets at this point. diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index a5d150c5f3d8..9bc625fccca0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -88,11 +88,9 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) u32 tmp, addr, value = MII_XGMAC_BUSY; int ret; - ret = pm_runtime_get_sync(priv->device); - if (ret < 0) { - pm_runtime_put_noidle(priv->device); + ret = pm_runtime_resume_and_get(priv->device); + if (ret < 0) return ret; - } /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, @@ -156,11 +154,9 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, u32 addr, tmp, value = MII_XGMAC_BUSY; int ret; - ret = pm_runtime_get_sync(priv->device); - if (ret < 0) { - pm_runtime_put_noidle(priv->device); + ret = pm_runtime_resume_and_get(priv->device); + if (ret < 0) return ret; - } /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, @@ -229,11 +225,9 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) int data = 0; u32 v; - data = pm_runtime_get_sync(priv->device); - if (data < 0) { - pm_runtime_put_noidle(priv->device); + data = pm_runtime_resume_and_get(priv->device); + if (data < 0) return data; - } value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; @@ -297,11 +291,9 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, u32 value = MII_BUSY; u32 v; - ret = pm_runtime_get_sync(priv->device); - if (ret < 0) { - pm_runtime_put_noidle(priv->device); + ret = pm_runtime_resume_and_get(priv->device); + if (ret < 0) return ret; - } value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index affcf92cd3aa..fb30bc5d56cb 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -94,6 +94,7 @@ config TI_K3_AM65_CPSW_NUSS depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER select NET_DEVLINK select TI_DAVINCI_MDIO + select PHYLINK imply PHY_TI_GMII_SEL depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS help diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index d2747e9db286..b7ebd741f284 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -173,11 +173,9 @@ static int am65_cpsw_nuss_ndo_slave_add_vid(struct net_device *ndev, if (!netif_running(ndev) || !vid) return 0; - ret = pm_runtime_get_sync(common->dev); - if (ret < 0) { - pm_runtime_put_noidle(common->dev); + ret = pm_runtime_resume_and_get(common->dev); + if (ret < 0) return ret; - } port_mask = BIT(port->port_id) | ALE_PORT_HOST; if (!vid) @@ -203,11 +201,9 @@ static int am65_cpsw_nuss_ndo_slave_kill_vid(struct net_device *ndev, if (!netif_running(ndev) || !vid) return 0; - ret = pm_runtime_get_sync(common->dev); - if (ret < 0) { - pm_runtime_put_noidle(common->dev); + ret = pm_runtime_resume_and_get(common->dev); + if (ret < 0) return ret; - } dev_info(common->dev, "Removing vlan %d from vlan filter\n", vid); ret = cpsw_ale_del_vlan(common->ale, vid, @@ -557,11 +553,9 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) struct am65_cpsw_port *port = am65_ndev_to_port(ndev); int ret, i; - ret = pm_runtime_get_sync(common->dev); - if (ret < 0) { - pm_runtime_put_noidle(common->dev); + ret = pm_runtime_resume_and_get(common->dev); + if (ret < 0) return ret; - } /* Notify the stack of the actual queue counts. */ ret = netif_set_real_num_tx_queues(ndev, common->tx_ch_num); @@ -1214,11 +1208,9 @@ static int am65_cpsw_nuss_ndo_slave_set_mac_address(struct net_device *ndev, if (ret < 0) return ret; - ret = pm_runtime_get_sync(common->dev); - if (ret < 0) { - pm_runtime_put_noidle(common->dev); + ret = pm_runtime_resume_and_get(common->dev); + if (ret < 0) return ret; - } cpsw_ale_del_ucast(common->ale, ndev->dev_addr, HOST_PORT_NUM, 0, 0); @@ -2692,9 +2684,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) common->bus_freq = clk_get_rate(clk); pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { - pm_runtime_put_noidle(dev); pm_runtime_disable(dev); return ret; } @@ -2789,11 +2780,9 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) common = dev_get_drvdata(dev); - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put_noidle(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) return ret; - } am65_cpsw_nuss_phylink_cleanup(common); am65_cpsw_unregister_devlink(common); diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index ebcc6386cc34..aa32dd905e2b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -8,10 +8,12 @@ #include <linux/pm_runtime.h> #include <linux/time.h> +#include <net/pkt_cls.h> #include "am65-cpsw-nuss.h" #include "am65-cpsw-qos.h" #include "am65-cpts.h" +#include "cpsw_ale.h" #define AM65_CPSW_REG_CTL 0x004 #define AM65_CPSW_PN_REG_CTL 0x004 @@ -588,12 +590,190 @@ static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data) return am65_cpsw_set_taprio(ndev, type_data); } +static int am65_cpsw_qos_clsflower_add_policer(struct am65_cpsw_port *port, + struct netlink_ext_ack *extack, + struct flow_cls_offload *cls, + u64 rate_pkt_ps) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + static const u8 mc_mac[] = {0x01, 0x00, 0x00, 0x00, 0x00, 0x00}; + struct am65_cpsw_qos *qos = &port->qos; + struct flow_match_eth_addrs match; + int ret; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported keys used"); + return -EOPNOTSUPP; + } + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + NL_SET_ERR_MSG_MOD(extack, "Not matching on eth address"); + return -EOPNOTSUPP; + } + + flow_rule_match_eth_addrs(rule, &match); + + if (!is_zero_ether_addr(match.mask->src)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on source MAC not supported"); + return -EOPNOTSUPP; + } + + if (is_broadcast_ether_addr(match.key->dst) && + is_broadcast_ether_addr(match.mask->dst)) { + ret = cpsw_ale_rx_ratelimit_bc(port->common->ale, port->port_id, rate_pkt_ps); + if (ret) + return ret; + + qos->ale_bc_ratelimit.cookie = cls->cookie; + qos->ale_bc_ratelimit.rate_packet_ps = rate_pkt_ps; + } else if (ether_addr_equal_unaligned(match.key->dst, mc_mac) && + ether_addr_equal_unaligned(match.mask->dst, mc_mac)) { + ret = cpsw_ale_rx_ratelimit_mc(port->common->ale, port->port_id, rate_pkt_ps); + if (ret) + return ret; + + qos->ale_mc_ratelimit.cookie = cls->cookie; + qos->ale_mc_ratelimit.rate_packet_ps = rate_pkt_ps; + } else { + NL_SET_ERR_MSG_MOD(extack, "Not supported matching key"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int am65_cpsw_qos_clsflower_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.rate_bytes_ps || act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when bytes per second/peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int am65_cpsw_qos_configure_clsflower(struct am65_cpsw_port *port, + struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + const struct flow_action_entry *act; + int i, ret; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + ret = am65_cpsw_qos_clsflower_policer_validate(&rule->action, act, extack); + if (ret) + return ret; + + return am65_cpsw_qos_clsflower_add_policer(port, extack, cls, + act->police.rate_pkt_ps); + default: + NL_SET_ERR_MSG_MOD(extack, + "Action not supported"); + return -EOPNOTSUPP; + } + } + return -EOPNOTSUPP; +} + +static int am65_cpsw_qos_delete_clsflower(struct am65_cpsw_port *port, struct flow_cls_offload *cls) +{ + struct am65_cpsw_qos *qos = &port->qos; + + if (cls->cookie == qos->ale_bc_ratelimit.cookie) { + qos->ale_bc_ratelimit.cookie = 0; + qos->ale_bc_ratelimit.rate_packet_ps = 0; + cpsw_ale_rx_ratelimit_bc(port->common->ale, port->port_id, 0); + } + + if (cls->cookie == qos->ale_mc_ratelimit.cookie) { + qos->ale_mc_ratelimit.cookie = 0; + qos->ale_mc_ratelimit.rate_packet_ps = 0; + cpsw_ale_rx_ratelimit_mc(port->common->ale, port->port_id, 0); + } + + return 0; +} + +static int am65_cpsw_qos_setup_tc_clsflower(struct am65_cpsw_port *port, + struct flow_cls_offload *cls_flower) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return am65_cpsw_qos_configure_clsflower(port, cls_flower); + case FLOW_CLS_DESTROY: + return am65_cpsw_qos_delete_clsflower(port, cls_flower); + default: + return -EOPNOTSUPP; + } +} + +static int am65_cpsw_qos_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct am65_cpsw_port *port = cb_priv; + + if (!tc_cls_can_offload_and_chain0(port->ndev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return am65_cpsw_qos_setup_tc_clsflower(port, type_data); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(am65_cpsw_qos_block_cb_list); + +static int am65_cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_block_offload *f) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + return flow_block_cb_setup_simple(f, &am65_cpsw_qos_block_cb_list, + am65_cpsw_qos_setup_tc_block_cb, + port, port, true); +} + int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_QDISC_TAPRIO: return am65_cpsw_setup_taprio(ndev, type_data); + case TC_SETUP_BLOCK: + return am65_cpsw_qos_setup_tc_block(ndev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.h b/drivers/net/ethernet/ti/am65-cpsw-qos.h index e8f1b6b59e93..fb223b43b196 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.h +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.h @@ -14,11 +14,19 @@ struct am65_cpsw_est { struct tc_taprio_qopt_offload taprio; }; +struct am65_cpsw_ale_ratelimit { + unsigned long cookie; + u64 rate_packet_ps; +}; + struct am65_cpsw_qos { struct am65_cpsw_est *est_admin; struct am65_cpsw_est *est_oper; ktime_t link_down_time; int link_speed; + + struct am65_cpsw_ale_ratelimit ale_bc_ratelimit; + struct am65_cpsw_ale_ratelimit ale_mc_ratelimit; }; int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 03575c017500..e6ad2e53f1cd 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -335,7 +335,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) static unsigned int cpsw_rxbuf_total_len(unsigned int len) { - len += CPSW_HEADROOM; + len += CPSW_HEADROOM_NA; len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); return SKB_DATA_ALIGN(len); @@ -756,11 +756,9 @@ static int cpsw_ndo_open(struct net_device *ndev) int ret; u32 reg; - ret = pm_runtime_get_sync(cpsw->dev); - if (ret < 0) { - pm_runtime_put_noidle(cpsw->dev); + ret = pm_runtime_resume_and_get(cpsw->dev); + if (ret < 0) return ret; - } netif_carrier_off(ndev); @@ -968,11 +966,9 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ret = pm_runtime_get_sync(cpsw->dev); - if (ret < 0) { - pm_runtime_put_noidle(cpsw->dev); + ret = pm_runtime_resume_and_get(cpsw->dev); + if (ret < 0) return ret; - } if (cpsw->data.dual_emac) { vid = cpsw->slaves[priv->emac_port].port_vlan; @@ -1052,11 +1048,9 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, if (vid == cpsw->data.default_vlan) return 0; - ret = pm_runtime_get_sync(cpsw->dev); - if (ret < 0) { - pm_runtime_put_noidle(cpsw->dev); + ret = pm_runtime_resume_and_get(cpsw->dev); + if (ret < 0) return ret; - } if (cpsw->data.dual_emac) { /* In dual EMAC, reserved VLAN id should not be used for @@ -1090,11 +1084,9 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, if (vid == cpsw->data.default_vlan) return 0; - ret = pm_runtime_get_sync(cpsw->dev); - if (ret < 0) { - pm_runtime_put_noidle(cpsw->dev); + ret = pm_runtime_resume_and_get(cpsw->dev); + if (ret < 0) return ret; - } if (cpsw->data.dual_emac) { int i; @@ -1567,11 +1559,9 @@ static int cpsw_probe(struct platform_device *pdev) /* Need to enable clocks with runtime PM api to access module * registers */ - ret = pm_runtime_get_sync(dev); - if (ret < 0) { - pm_runtime_put_noidle(dev); + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) goto clean_runtime_disable_ret; - } ret = cpsw_probe_dt(&cpsw->data, pdev); if (ret) @@ -1734,11 +1724,9 @@ static int cpsw_remove(struct platform_device *pdev) struct cpsw_common *cpsw = platform_get_drvdata(pdev); int i, ret; - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put_noidle(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) return ret; - } for (i = 0; i < cpsw->data.slaves; i++) if (cpsw->slaves[i].ndev) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 1ef0aaef5c61..231370e9a801 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -50,6 +50,8 @@ /* ALE_AGING_TIMER */ #define ALE_AGING_TIMER_MASK GENMASK(23, 0) +#define ALE_RATE_LIMIT_MIN_PPS 1000 + /** * struct ale_entry_fld - The ALE tbl entry field description * @start_bit: field start bit @@ -1136,6 +1138,50 @@ int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control) return tmp & BITMASK(info->bits); } +int cpsw_ale_rx_ratelimit_mc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps) + +{ + int val = ratelimit_pps / ALE_RATE_LIMIT_MIN_PPS; + u32 remainder = ratelimit_pps % ALE_RATE_LIMIT_MIN_PPS; + + if (ratelimit_pps && !val) { + dev_err(ale->params.dev, "ALE MC port:%d ratelimit min value 1000pps\n", port); + return -EINVAL; + } + + if (remainder) + dev_info(ale->params.dev, "ALE port:%d MC ratelimit set to %dpps (requested %d)\n", + port, ratelimit_pps - remainder, ratelimit_pps); + + cpsw_ale_control_set(ale, port, ALE_PORT_MCAST_LIMIT, val); + + dev_dbg(ale->params.dev, "ALE port:%d MC ratelimit set %d\n", + port, val * ALE_RATE_LIMIT_MIN_PPS); + return 0; +} + +int cpsw_ale_rx_ratelimit_bc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps) + +{ + int val = ratelimit_pps / ALE_RATE_LIMIT_MIN_PPS; + u32 remainder = ratelimit_pps % ALE_RATE_LIMIT_MIN_PPS; + + if (ratelimit_pps && !val) { + dev_err(ale->params.dev, "ALE port:%d BC ratelimit min value 1000pps\n", port); + return -EINVAL; + } + + if (remainder) + dev_info(ale->params.dev, "ALE port:%d BC ratelimit set to %dpps (requested %d)\n", + port, ratelimit_pps - remainder, ratelimit_pps); + + cpsw_ale_control_set(ale, port, ALE_PORT_BCAST_LIMIT, val); + + dev_dbg(ale->params.dev, "ALE port:%d BC ratelimit set %d\n", + port, val * ALE_RATE_LIMIT_MIN_PPS); + return 0; +} + static void cpsw_ale_timer(struct timer_list *t) { struct cpsw_ale *ale = from_timer(ale, t, timer); @@ -1199,6 +1245,26 @@ static void cpsw_ale_aging_stop(struct cpsw_ale *ale) void cpsw_ale_start(struct cpsw_ale *ale) { + unsigned long ale_prescale; + + /* configure Broadcast and Multicast Rate Limit + * number_of_packets = (Fclk / ALE_PRESCALE) * port.BCAST/MCAST_LIMIT + * ALE_PRESCALE width is 19bit and min value 0x10 + * port.BCAST/MCAST_LIMIT is 8bit + * + * For multi port configuration support the ALE_PRESCALE is configured to 1ms interval, + * which allows to configure port.BCAST/MCAST_LIMIT per port and achieve: + * min number_of_packets = 1000 when port.BCAST/MCAST_LIMIT = 1 + * max number_of_packets = 1000 * 255 = 255000 when port.BCAST/MCAST_LIMIT = 0xFF + */ + ale_prescale = ale->params.bus_freq / ALE_RATE_LIMIT_MIN_PPS; + writel((u32)ale_prescale, ale->params.ale_regs + ALE_PRESCALE); + + /* Allow MC/BC rate limiting globally. + * The actual Rate Limit cfg enabled per-port by port.BCAST/MCAST_LIMIT + */ + cpsw_ale_control_set(ale, 0, ALE_RATE_LIMIT, 1); + cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1); cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index 13fe47687fde..aba4572cfa3b 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -120,6 +120,8 @@ int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag, int reg_mcast, int unreg_mcast); int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port); void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port); +int cpsw_ale_rx_ratelimit_bc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps); +int cpsw_ale_rx_ratelimit_mc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps); int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control); int cpsw_ale_control_set(struct cpsw_ale *ale, int port, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index bd4b1528cf99..dfa0a9cf9d89 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -273,7 +273,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) static unsigned int cpsw_rxbuf_total_len(unsigned int len) { - len += CPSW_HEADROOM; + len += CPSW_HEADROOM_NA; len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); return SKB_DATA_ALIGN(len); @@ -498,6 +498,8 @@ static void cpsw_restore(struct cpsw_priv *priv) /* restore CBS offload */ cpsw_cbs_resume(&cpsw->slaves[priv->emac_port - 1], priv); + + cpsw_qos_clsflower_resume(priv); } static void cpsw_init_stp_ale_entry(struct cpsw_common *cpsw) @@ -1407,7 +1409,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) cpsw->slaves[i].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 8f6817f346ba..4dbd327c6a4d 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -502,6 +502,7 @@ int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs, ale_params.ale_ageout = ale_ageout; ale_params.ale_ports = CPSW_ALE_PORTS_NUM; ale_params.dev_id = "cpsw"; + ale_params.bus_freq = cpsw->bus_freq_mhz * 1000000; cpsw->ale = cpsw_ale_create(&ale_params); if (IS_ERR(cpsw->ale)) { @@ -1048,6 +1049,8 @@ static int cpsw_set_mqprio(struct net_device *ndev, void *type_data) return 0; } +static int cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_block_offload *f); + int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data) { @@ -1058,6 +1061,9 @@ int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, case TC_SETUP_QDISC_MQPRIO: return cpsw_set_mqprio(ndev, type_data); + case TC_SETUP_BLOCK: + return cpsw_qos_setup_tc_block(ndev, type_data); + default: return -EOPNOTSUPP; } @@ -1381,3 +1387,202 @@ drop: page_pool_recycle_direct(cpsw->page_pool[ch], page); return ret; } + +static int cpsw_qos_clsflower_add_policer(struct cpsw_priv *priv, + struct netlink_ext_ack *extack, + struct flow_cls_offload *cls, + u64 rate_pkt_ps) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + static const u8 mc_mac[] = {0x01, 0x00, 0x00, 0x00, 0x00, 0x00}; + struct flow_match_eth_addrs match; + u32 port_id; + int ret; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported keys used"); + return -EOPNOTSUPP; + } + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + NL_SET_ERR_MSG_MOD(extack, "Not matching on eth address"); + return -EOPNOTSUPP; + } + + flow_rule_match_eth_addrs(rule, &match); + + if (!is_zero_ether_addr(match.mask->src)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on source MAC not supported"); + return -EOPNOTSUPP; + } + + port_id = cpsw_slave_index(priv->cpsw, priv) + 1; + + if (is_broadcast_ether_addr(match.key->dst) && + is_broadcast_ether_addr(match.mask->dst)) { + ret = cpsw_ale_rx_ratelimit_bc(priv->cpsw->ale, port_id, rate_pkt_ps); + if (ret) + return ret; + + priv->ale_bc_ratelimit.cookie = cls->cookie; + priv->ale_bc_ratelimit.rate_packet_ps = rate_pkt_ps; + } else if (ether_addr_equal_unaligned(match.key->dst, mc_mac) && + ether_addr_equal_unaligned(match.mask->dst, mc_mac)) { + ret = cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id, rate_pkt_ps); + if (ret) + return ret; + + priv->ale_mc_ratelimit.cookie = cls->cookie; + priv->ale_mc_ratelimit.rate_packet_ps = rate_pkt_ps; + } else { + NL_SET_ERR_MSG_MOD(extack, "Not supported matching key"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int cpsw_qos_clsflower_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.rate_bytes_ps || act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when bytes per second/peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int cpsw_qos_configure_clsflower(struct cpsw_priv *priv, struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + const struct flow_action_entry *act; + int i, ret; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + ret = cpsw_qos_clsflower_policer_validate(&rule->action, act, extack); + if (ret) + return ret; + + return cpsw_qos_clsflower_add_policer(priv, extack, cls, + act->police.rate_pkt_ps); + default: + NL_SET_ERR_MSG_MOD(extack, "Action not supported"); + return -EOPNOTSUPP; + } + } + return -EOPNOTSUPP; +} + +static int cpsw_qos_delete_clsflower(struct cpsw_priv *priv, struct flow_cls_offload *cls) +{ + u32 port_id = cpsw_slave_index(priv->cpsw, priv) + 1; + + if (cls->cookie == priv->ale_bc_ratelimit.cookie) { + priv->ale_bc_ratelimit.cookie = 0; + priv->ale_bc_ratelimit.rate_packet_ps = 0; + cpsw_ale_rx_ratelimit_bc(priv->cpsw->ale, port_id, 0); + } + + if (cls->cookie == priv->ale_mc_ratelimit.cookie) { + priv->ale_mc_ratelimit.cookie = 0; + priv->ale_mc_ratelimit.rate_packet_ps = 0; + cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id, 0); + } + + return 0; +} + +static int cpsw_qos_setup_tc_clsflower(struct cpsw_priv *priv, struct flow_cls_offload *cls_flower) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return cpsw_qos_configure_clsflower(priv, cls_flower); + case FLOW_CLS_DESTROY: + return cpsw_qos_delete_clsflower(priv, cls_flower); + default: + return -EOPNOTSUPP; + } +} + +static int cpsw_qos_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct cpsw_priv *priv = cb_priv; + int ret; + + if (!tc_cls_can_offload_and_chain0(priv->ndev, type_data)) + return -EOPNOTSUPP; + + ret = pm_runtime_get_sync(priv->dev); + if (ret < 0) { + pm_runtime_put_noidle(priv->dev); + return ret; + } + + switch (type) { + case TC_SETUP_CLSFLOWER: + ret = cpsw_qos_setup_tc_clsflower(priv, type_data); + break; + default: + ret = -EOPNOTSUPP; + } + + pm_runtime_put(priv->dev); + return ret; +} + +static LIST_HEAD(cpsw_qos_block_cb_list); + +static int cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_block_offload *f) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + return flow_block_cb_setup_simple(f, &cpsw_qos_block_cb_list, + cpsw_qos_setup_tc_block_cb, + priv, priv, true); +} + +void cpsw_qos_clsflower_resume(struct cpsw_priv *priv) +{ + u32 port_id = cpsw_slave_index(priv->cpsw, priv) + 1; + + if (priv->ale_bc_ratelimit.cookie) + cpsw_ale_rx_ratelimit_bc(priv->cpsw->ale, port_id, + priv->ale_bc_ratelimit.rate_packet_ps); + + if (priv->ale_mc_ratelimit.cookie) + cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id, + priv->ale_mc_ratelimit.rate_packet_ps); +} diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 74555970730c..fc591f5ebe18 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -364,6 +364,11 @@ struct cpsw_common { u8 base_mac[ETH_ALEN]; }; +struct cpsw_ale_ratelimit { + unsigned long cookie; + u64 rate_packet_ps; +}; + struct cpsw_priv { struct net_device *ndev; struct device *dev; @@ -384,6 +389,8 @@ struct cpsw_priv { struct cpsw_common *cpsw; int offload_fwd_mark; u32 tx_packet_min; + struct cpsw_ale_ratelimit ale_bc_ratelimit; + struct cpsw_ale_ratelimit ale_mc_ratelimit; }; #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) @@ -411,7 +418,6 @@ struct __aligned(sizeof(long)) cpsw_meta_xdp { /* The buf includes headroom compatible with both skb and xdpf */ #define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN) -#define CPSW_HEADROOM ALIGN(CPSW_HEADROOM_NA, sizeof(long)) static inline int cpsw_is_xdpf_handle(void *handle) { @@ -462,6 +468,7 @@ int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, bool cpsw_shp_is_off(struct cpsw_priv *priv); void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv); void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv); +void cpsw_qos_clsflower_resume(struct cpsw_priv *priv); /* ethtool */ u32 cpsw_get_msglevel(struct net_device *ndev); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 869e362e09c1..3f6b9dfca095 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1515,7 +1515,7 @@ static int temac_probe(struct platform_device *pdev) of_node_put(dma_np); return PTR_ERR(lp->sdma_regs); } - if (of_get_property(dma_np, "little-endian", NULL)) { + if (of_property_read_bool(dma_np, "little-endian")) { lp->dma_in = temac_dma_in32_le; lp->dma_out = temac_dma_out32_le; } else { diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c index 1f382777aa5a..9abbdb71e629 100644 --- a/drivers/net/ethernet/xscale/ptp_ixp46x.c +++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c @@ -271,7 +271,7 @@ static int ptp_ixp_probe(struct platform_device *pdev) ixp_clock.master_irq = platform_get_irq(pdev, 0); ixp_clock.slave_irq = platform_get_irq(pdev, 1); if (IS_ERR(ixp_clock.regs) || - !ixp_clock.master_irq || !ixp_clock.slave_irq) + ixp_clock.master_irq < 0 || ixp_clock.slave_irq < 0) return -ENXIO; ixp_clock.caps = ptp_ixp_caps; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cf69da0e296c..25b38a374e3c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -15,6 +15,7 @@ #include <linux/list.h> #include <linux/hyperv.h> #include <linux/rndis.h> +#include <linux/jhash.h> /* RSS related */ #define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ @@ -237,6 +238,7 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); +void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev); u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, struct xdp_buff *xdp); unsigned int netvsc_xdp_fraglen(unsigned int len); @@ -246,6 +248,8 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netvsc_device *nvdev); int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog); int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf); +int netvsc_ndoxdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags); int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev, @@ -942,12 +946,21 @@ struct nvsc_rsc { #define NVSC_RSC_CSUM_INFO BIT(1) /* valid/present bit for 'csum_info' */ #define NVSC_RSC_HASH_INFO BIT(2) /* valid/present bit for 'hash_info' */ -struct netvsc_stats { +struct netvsc_stats_tx { + u64 packets; + u64 bytes; + u64 xdp_xmit; + struct u64_stats_sync syncp; +}; + +struct netvsc_stats_rx { u64 packets; u64 bytes; u64 broadcast; u64 multicast; u64 xdp_drop; + u64 xdp_redirect; + u64 xdp_tx; struct u64_stats_sync syncp; }; @@ -1046,6 +1059,55 @@ struct net_device_context { struct netvsc_device_info *saved_netvsc_dev_info; }; +/* Azure hosts don't support non-TCP port numbers in hashing for fragmented + * packets. We can use ethtool to change UDP hash level when necessary. + */ +static inline u32 netvsc_get_hash(struct sk_buff *skb, + const struct net_device_context *ndc) +{ + struct flow_keys flow; + u32 hash, pkt_proto = 0; + static u32 hashrnd __read_mostly; + + net_get_random_once(&hashrnd, sizeof(hashrnd)); + + if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) + return 0; + + switch (flow.basic.ip_proto) { + case IPPROTO_TCP: + if (flow.basic.n_proto == htons(ETH_P_IP)) + pkt_proto = HV_TCP4_L4HASH; + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + pkt_proto = HV_TCP6_L4HASH; + + break; + + case IPPROTO_UDP: + if (flow.basic.n_proto == htons(ETH_P_IP)) + pkt_proto = HV_UDP4_L4HASH; + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + pkt_proto = HV_UDP6_L4HASH; + + break; + } + + if (pkt_proto & ndc->l4_hash) { + return skb_get_hash(skb); + } else { + if (flow.basic.n_proto == htons(ETH_P_IP)) + hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd); + else if (flow.basic.n_proto == htons(ETH_P_IPV6)) + hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); + else + return 0; + + __skb_set_sw_hash(skb, hash, false); + } + + return hash; +} + /* Per channel data */ struct netvsc_channel { struct vmbus_channel *channel; @@ -1060,9 +1122,10 @@ struct netvsc_channel { struct bpf_prog __rcu *bpf_prog; struct xdp_rxq_info xdp_rxq; + bool xdp_flush; - struct netvsc_stats tx_stats; - struct netvsc_stats rx_stats; + struct netvsc_stats_tx tx_stats; + struct netvsc_stats_rx rx_stats; }; /* Per netvsc device */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 9442f751ad3a..6e42cb03e226 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/prefetch.h> +#include <linux/filter.h> #include <asm/sync_bitops.h> #include <asm/mshyperv.h> @@ -792,9 +793,9 @@ static void netvsc_send_tx_complete(struct net_device *ndev, int queue_sends; u64 cmd_rqst; - cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id); + cmd_rqst = channel->request_addr_callback(channel, desc->trans_id); if (cmd_rqst == VMBUS_RQST_ERROR) { - netdev_err(ndev, "Incorrect transaction id\n"); + netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id); return; } @@ -805,7 +806,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev, struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)skb->cb; u32 send_index = packet->send_buf_index; - struct netvsc_stats *tx_stats; + struct netvsc_stats_tx *tx_stats; if (send_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, send_index); @@ -854,9 +855,9 @@ static void netvsc_send_completion(struct net_device *ndev, /* First check if this is a VMBUS completion without data payload */ if (!msglen) { cmd_rqst = incoming_channel->request_addr_callback(incoming_channel, - (u64)desc->trans_id); + desc->trans_id); if (cmd_rqst == VMBUS_RQST_ERROR) { - netdev_err(ndev, "Invalid transaction id\n"); + netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id); return; } @@ -1670,12 +1671,17 @@ int netvsc_poll(struct napi_struct *napi, int budget) if (!nvchan->desc) nvchan->desc = hv_pkt_iter_first(channel); + nvchan->xdp_flush = false; + while (nvchan->desc && work_done < budget) { work_done += netvsc_process_raw_pkt(device, nvchan, net_device, ndev, nvchan->desc, budget); nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } + if (nvchan->xdp_flush) + xdp_do_flush(); + /* Send any pending receive completions */ ret = send_recv_completions(ndev, net_device, nvchan); diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 7856905414eb..4a9522689fa4 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -10,6 +10,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/netpoll.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/kernel.h> @@ -23,11 +24,13 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, struct xdp_buff *xdp) { + struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats; void *data = nvchan->rsc.data[0]; u32 len = nvchan->rsc.len[0]; struct page *page = NULL; struct bpf_prog *prog; u32 act = XDP_PASS; + bool drop = true; xdp->data_hard_start = NULL; @@ -60,9 +63,34 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, switch (act) { case XDP_PASS: case XDP_TX: + drop = false; + break; + case XDP_DROP: break; + case XDP_REDIRECT: + if (!xdp_do_redirect(ndev, xdp, prog)) { + nvchan->xdp_flush = true; + drop = false; + + u64_stats_update_begin(&rx_stats->syncp); + + rx_stats->xdp_redirect++; + rx_stats->packets++; + rx_stats->bytes += nvchan->rsc.pktlen; + + u64_stats_update_end(&rx_stats->syncp); + + break; + } else { + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->xdp_drop++; + u64_stats_update_end(&rx_stats->syncp); + } + + fallthrough; + case XDP_ABORTED: trace_xdp_exception(ndev, prog, act); break; @@ -74,7 +102,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, out: rcu_read_unlock(); - if (page && act != XDP_PASS && act != XDP_TX) { + if (page && drop) { __free_page(page); xdp->data_hard_start = NULL; } @@ -137,7 +165,6 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) { struct netdev_bpf xdp; - bpf_op_t ndo_bpf; int ret; ASSERT_RTNL(); @@ -145,8 +172,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) if (!vf_netdev) return 0; - ndo_bpf = vf_netdev->netdev_ops->ndo_bpf; - if (!ndo_bpf) + if (!vf_netdev->netdev_ops->ndo_bpf) return 0; memset(&xdp, 0, sizeof(xdp)); @@ -157,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) xdp.command = XDP_SETUP_PROG; xdp.prog = prog; - ret = ndo_bpf(vf_netdev, &xdp); + ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp); if (ret && prog) bpf_prog_put(prog); @@ -199,3 +225,68 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf) return -EINVAL; } } + +static int netvsc_ndoxdp_xmit_fm(struct net_device *ndev, + struct xdp_frame *frame, u16 q_idx) +{ + struct sk_buff *skb; + + skb = xdp_build_skb_from_frame(frame, ndev); + if (unlikely(!skb)) + return -ENOMEM; + + netvsc_get_hash(skb, netdev_priv(ndev)); + + skb_record_rx_queue(skb, q_idx); + + netvsc_xdp_xmit(skb, ndev); + + return 0; +} + +int netvsc_ndoxdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + const struct net_device_ops *vf_ops; + struct netvsc_stats_tx *tx_stats; + struct netvsc_device *nvsc_dev; + struct net_device *vf_netdev; + int i, count = 0; + u16 q_idx; + + /* Don't transmit if netvsc_device is gone */ + nvsc_dev = rcu_dereference_bh(ndev_ctx->nvdev); + if (unlikely(!nvsc_dev || nvsc_dev->destroy)) + return 0; + + /* If VF is present and up then redirect packets to it. + * Skip the VF if it is marked down or has no carrier. + * If netpoll is in uses, then VF can not be used either. + */ + vf_netdev = rcu_dereference_bh(ndev_ctx->vf_netdev); + if (vf_netdev && netif_running(vf_netdev) && + netif_carrier_ok(vf_netdev) && !netpoll_tx_running(ndev) && + vf_netdev->netdev_ops->ndo_xdp_xmit && + ndev_ctx->data_path_is_vf) { + vf_ops = vf_netdev->netdev_ops; + return vf_ops->ndo_xdp_xmit(vf_netdev, n, frames, flags); + } + + q_idx = smp_processor_id() % ndev->real_num_tx_queues; + + for (i = 0; i < n; i++) { + if (netvsc_ndoxdp_xmit_fm(ndev, frames[i], q_idx)) + break; + + count++; + } + + tx_stats = &nvsc_dev->chan_table[q_idx].tx_stats; + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->xdp_xmit += count; + u64_stats_update_end(&tx_stats->syncp); + + return count; +} diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index fde1c492ca02..27f6bbca6619 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -242,56 +242,6 @@ static inline void *init_ppi_data(struct rndis_message *msg, return ppi + 1; } -/* Azure hosts don't support non-TCP port numbers in hashing for fragmented - * packets. We can use ethtool to change UDP hash level when necessary. - */ -static inline u32 netvsc_get_hash( - struct sk_buff *skb, - const struct net_device_context *ndc) -{ - struct flow_keys flow; - u32 hash, pkt_proto = 0; - static u32 hashrnd __read_mostly; - - net_get_random_once(&hashrnd, sizeof(hashrnd)); - - if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) - return 0; - - switch (flow.basic.ip_proto) { - case IPPROTO_TCP: - if (flow.basic.n_proto == htons(ETH_P_IP)) - pkt_proto = HV_TCP4_L4HASH; - else if (flow.basic.n_proto == htons(ETH_P_IPV6)) - pkt_proto = HV_TCP6_L4HASH; - - break; - - case IPPROTO_UDP: - if (flow.basic.n_proto == htons(ETH_P_IP)) - pkt_proto = HV_UDP4_L4HASH; - else if (flow.basic.n_proto == htons(ETH_P_IPV6)) - pkt_proto = HV_UDP6_L4HASH; - - break; - } - - if (pkt_proto & ndc->l4_hash) { - return skb_get_hash(skb); - } else { - if (flow.basic.n_proto == htons(ETH_P_IP)) - hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd); - else if (flow.basic.n_proto == htons(ETH_P_IPV6)) - hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); - else - return 0; - - __skb_set_sw_hash(skb, hash, false); - } - - return hash; -} - static inline int netvsc_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, int old_idx) { @@ -804,7 +754,7 @@ void netvsc_linkstatus_callback(struct net_device *net, } /* This function should only be called after skb_record_rx_queue() */ -static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) +void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev) { int rc; @@ -925,7 +875,7 @@ int netvsc_recv_callback(struct net_device *net, struct vmbus_channel *channel = nvchan->channel; u16 q_idx = channel->offermsg.offer.sub_channel_index; struct sk_buff *skb; - struct netvsc_stats *rx_stats = &nvchan->rx_stats; + struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats; struct xdp_buff xdp; u32 act; @@ -934,6 +884,9 @@ int netvsc_recv_callback(struct net_device *net, act = netvsc_run_xdp(net, nvchan, &xdp); + if (act == XDP_REDIRECT) + return NVSP_STAT_SUCCESS; + if (act != XDP_PASS && act != XDP_TX) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->xdp_drop++; @@ -958,6 +911,9 @@ int netvsc_recv_callback(struct net_device *net, * statistics will not work correctly. */ u64_stats_update_begin(&rx_stats->syncp); + if (act == XDP_TX) + rx_stats->xdp_tx++; + rx_stats->packets++; rx_stats->bytes += nvchan->rsc.pktlen; @@ -1353,28 +1309,29 @@ static void netvsc_get_pcpu_stats(struct net_device *net, /* fetch percpu stats of netvsc */ for (i = 0; i < nvdev->num_chn; i++) { const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; - const struct netvsc_stats *stats; + const struct netvsc_stats_tx *tx_stats; + const struct netvsc_stats_rx *rx_stats; struct netvsc_ethtool_pcpu_stats *this_tot = &pcpu_tot[nvchan->channel->target_cpu]; u64 packets, bytes; unsigned int start; - stats = &nvchan->tx_stats; + tx_stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); this_tot->tx_bytes += bytes; this_tot->tx_packets += packets; - stats = &nvchan->rx_stats; + rx_stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); this_tot->rx_bytes += bytes; this_tot->rx_packets += packets; @@ -1406,27 +1363,28 @@ static void netvsc_get_stats64(struct net_device *net, for (i = 0; i < nvdev->num_chn; i++) { const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; - const struct netvsc_stats *stats; + const struct netvsc_stats_tx *tx_stats; + const struct netvsc_stats_rx *rx_stats; u64 packets, bytes, multicast; unsigned int start; - stats = &nvchan->tx_stats; + tx_stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); t->tx_bytes += bytes; t->tx_packets += packets; - stats = &nvchan->rx_stats; + rx_stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&stats->syncp); - packets = stats->packets; - bytes = stats->bytes; - multicast = stats->multicast + stats->broadcast; - } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + multicast = rx_stats->multicast + rx_stats->broadcast; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); t->rx_bytes += bytes; t->rx_packets += packets; @@ -1515,8 +1473,8 @@ static const struct { /* statistics per queue (rx/tx packets/bytes) */ #define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats)) -/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */ -#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5) +/* 8 statistics per queue (rx/tx packets/bytes, XDP actions) */ +#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 8) static int netvsc_get_sset_count(struct net_device *dev, int string_set) { @@ -1543,12 +1501,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev); const void *nds = &ndc->eth_stats; - const struct netvsc_stats *qstats; + const struct netvsc_stats_tx *tx_stats; + const struct netvsc_stats_rx *rx_stats; struct netvsc_vf_pcpu_stats sum; struct netvsc_ethtool_pcpu_stats *pcpu_sum; unsigned int start; u64 packets, bytes; u64 xdp_drop; + u64 xdp_redirect; + u64 xdp_tx; + u64 xdp_xmit; int i, j, cpu; if (!nvdev) @@ -1562,26 +1524,32 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset); for (j = 0; j < nvdev->num_chn; j++) { - qstats = &nvdev->chan_table[j].tx_stats; + tx_stats = &nvdev->chan_table[j].tx_stats; do { - start = u64_stats_fetch_begin_irq(&qstats->syncp); - packets = qstats->packets; - bytes = qstats->bytes; - } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + start = u64_stats_fetch_begin_irq(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; + } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; + data[i++] = xdp_xmit; - qstats = &nvdev->chan_table[j].rx_stats; + rx_stats = &nvdev->chan_table[j].rx_stats; do { - start = u64_stats_fetch_begin_irq(&qstats->syncp); - packets = qstats->packets; - bytes = qstats->bytes; - xdp_drop = qstats->xdp_drop; - } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + start = u64_stats_fetch_begin_irq(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_redirect = rx_stats->xdp_redirect; + xdp_tx = rx_stats->xdp_tx; + } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); data[i++] = packets; data[i++] = bytes; data[i++] = xdp_drop; + data[i++] = xdp_redirect; + data[i++] = xdp_tx; } pcpu_sum = kvmalloc_array(num_possible_cpus(), @@ -1622,9 +1590,12 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) for (i = 0; i < nvdev->num_chn; i++) { ethtool_sprintf(&p, "tx_queue_%u_packets", i); ethtool_sprintf(&p, "tx_queue_%u_bytes", i); + ethtool_sprintf(&p, "tx_queue_%u_xdp_xmit", i); ethtool_sprintf(&p, "rx_queue_%u_packets", i); ethtool_sprintf(&p, "rx_queue_%u_bytes", i); ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i); + ethtool_sprintf(&p, "rx_queue_%u_xdp_redirect", i); + ethtool_sprintf(&p, "rx_queue_%u_xdp_tx", i); } for_each_present_cpu(cpu) { @@ -2057,6 +2028,7 @@ static const struct net_device_ops device_ops = { .ndo_select_queue = netvsc_select_queue, .ndo_get_stats64 = netvsc_get_stats64, .ndo_bpf = netvsc_bpf, + .ndo_xdp_xmit = netvsc_ndoxdp_xmit, }; /* diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index e2273588c75b..7aa49827196f 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -21,6 +21,10 @@ #define ASPEED_MDIO_CTRL_OP GENMASK(27, 26) #define MDIO_C22_OP_WRITE 0b01 #define MDIO_C22_OP_READ 0b10 +#define MDIO_C45_OP_ADDR 0b00 +#define MDIO_C45_OP_WRITE 0b01 +#define MDIO_C45_OP_PREAD 0b10 +#define MDIO_C45_OP_READ 0b11 #define ASPEED_MDIO_CTRL_PHYAD GENMASK(25, 21) #define ASPEED_MDIO_CTRL_REGAD GENMASK(20, 16) #define ASPEED_MDIO_CTRL_MIIWDATA GENMASK(15, 0) @@ -39,34 +43,35 @@ struct aspeed_mdio { void __iomem *base; }; -static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum) +static int aspeed_mdio_op(struct mii_bus *bus, u8 st, u8 op, u8 phyad, u8 regad, + u16 data) { struct aspeed_mdio *ctx = bus->priv; u32 ctrl; - u32 data; - int rc; - - dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d\n", __func__, addr, - regnum); - /* Just clause 22 for the moment */ - if (regnum & MII_ADDR_C45) - return -EOPNOTSUPP; + dev_dbg(&bus->dev, "%s: st: %u op: %u, phyad: %u, regad: %u, data: %u\n", + __func__, st, op, phyad, regad, data); ctrl = ASPEED_MDIO_CTRL_FIRE - | FIELD_PREP(ASPEED_MDIO_CTRL_ST, ASPEED_MDIO_CTRL_ST_C22) - | FIELD_PREP(ASPEED_MDIO_CTRL_OP, MDIO_C22_OP_READ) - | FIELD_PREP(ASPEED_MDIO_CTRL_PHYAD, addr) - | FIELD_PREP(ASPEED_MDIO_CTRL_REGAD, regnum); + | FIELD_PREP(ASPEED_MDIO_CTRL_ST, st) + | FIELD_PREP(ASPEED_MDIO_CTRL_OP, op) + | FIELD_PREP(ASPEED_MDIO_CTRL_PHYAD, phyad) + | FIELD_PREP(ASPEED_MDIO_CTRL_REGAD, regad) + | FIELD_PREP(ASPEED_MDIO_DATA_MIIRDATA, data); iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL); - rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, + return readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, !(ctrl & ASPEED_MDIO_CTRL_FIRE), ASPEED_MDIO_INTERVAL_US, ASPEED_MDIO_TIMEOUT_US); - if (rc < 0) - return rc; +} + +static int aspeed_mdio_get_data(struct mii_bus *bus) +{ + struct aspeed_mdio *ctx = bus->priv; + u32 data; + int rc; rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data, data & ASPEED_MDIO_DATA_IDLE, @@ -78,31 +83,80 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum) return FIELD_GET(ASPEED_MDIO_DATA_MIIRDATA, data); } -static int aspeed_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +static int aspeed_mdio_read_c22(struct mii_bus *bus, int addr, int regnum) { - struct aspeed_mdio *ctx = bus->priv; - u32 ctrl; + int rc; - dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d, val: 0x%x\n", - __func__, addr, regnum, val); + rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C22, MDIO_C22_OP_READ, + addr, regnum, 0); + if (rc < 0) + return rc; + + return aspeed_mdio_get_data(bus); +} + +static int aspeed_mdio_write_c22(struct mii_bus *bus, int addr, int regnum, + u16 val) +{ + return aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C22, MDIO_C22_OP_WRITE, + addr, regnum, val); +} + +static int aspeed_mdio_read_c45(struct mii_bus *bus, int addr, int regnum) +{ + u8 c45_dev = (regnum >> 16) & 0x1F; + u16 c45_addr = regnum & 0xFFFF; + int rc; + + rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_ADDR, + addr, c45_dev, c45_addr); + if (rc < 0) + return rc; + + rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_READ, + addr, c45_dev, 0); + if (rc < 0) + return rc; + + return aspeed_mdio_get_data(bus); +} + +static int aspeed_mdio_write_c45(struct mii_bus *bus, int addr, int regnum, + u16 val) +{ + u8 c45_dev = (regnum >> 16) & 0x1F; + u16 c45_addr = regnum & 0xFFFF; + int rc; + + rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_ADDR, + addr, c45_dev, c45_addr); + if (rc < 0) + return rc; + + return aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_WRITE, + addr, c45_dev, val); +} + +static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d\n", __func__, addr, + regnum); - /* Just clause 22 for the moment */ if (regnum & MII_ADDR_C45) - return -EOPNOTSUPP; + return aspeed_mdio_read_c45(bus, addr, regnum); - ctrl = ASPEED_MDIO_CTRL_FIRE - | FIELD_PREP(ASPEED_MDIO_CTRL_ST, ASPEED_MDIO_CTRL_ST_C22) - | FIELD_PREP(ASPEED_MDIO_CTRL_OP, MDIO_C22_OP_WRITE) - | FIELD_PREP(ASPEED_MDIO_CTRL_PHYAD, addr) - | FIELD_PREP(ASPEED_MDIO_CTRL_REGAD, regnum) - | FIELD_PREP(ASPEED_MDIO_CTRL_MIIWDATA, val); + return aspeed_mdio_read_c22(bus, addr, regnum); +} - iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL); +static int aspeed_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +{ + dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d, val: 0x%x\n", + __func__, addr, regnum, val); - return readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, - !(ctrl & ASPEED_MDIO_CTRL_FIRE), - ASPEED_MDIO_INTERVAL_US, - ASPEED_MDIO_TIMEOUT_US); + if (regnum & MII_ADDR_C45) + return aspeed_mdio_write_c45(bus, addr, regnum, val); + + return aspeed_mdio_write_c22(bus, addr, regnum, val); } static int aspeed_mdio_probe(struct platform_device *pdev) @@ -125,6 +179,7 @@ static int aspeed_mdio_probe(struct platform_device *pdev) bus->parent = &pdev->dev; bus->read = aspeed_mdio_read; bus->write = aspeed_mdio_write; + bus->probe_capabilities = MDIOBUS_C22_C45; rc = of_mdiobus_register(bus, pdev->dev.of_node); if (rc) { diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 582969751b4c..08541007b18a 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/clk.h> #include <linux/io.h> #include <linux/iopoll.h> #include <linux/kernel.h> @@ -30,6 +31,8 @@ #define MSCC_MIIM_CMD_VLD BIT(31) #define MSCC_MIIM_REG_DATA 0xC #define MSCC_MIIM_DATA_ERROR (BIT(16) | BIT(17)) +#define MSCC_MIIM_REG_CFG 0x10 +#define MSCC_MIIM_CFG_PRESCALE_MASK GENMASK(7, 0) #define MSCC_PHY_REG_PHY_CFG 0x0 #define PHY_CFG_PHY_ENA (BIT(0) | BIT(1) | BIT(2) | BIT(3)) @@ -50,6 +53,8 @@ struct mscc_miim_dev { int mii_status_offset; struct regmap *phy_regs; const struct mscc_miim_info *info; + struct clk *clk; + u32 bus_freq; }; /* When high resolution timers aren't built-in: we can't use usleep_range() as @@ -241,9 +246,33 @@ int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name, } EXPORT_SYMBOL(mscc_miim_setup); +static int mscc_miim_clk_set(struct mii_bus *bus) +{ + struct mscc_miim_dev *miim = bus->priv; + unsigned long rate; + u32 div; + + /* Keep the current settings */ + if (!miim->bus_freq) + return 0; + + rate = clk_get_rate(miim->clk); + + div = DIV_ROUND_UP(rate, 2 * miim->bus_freq) - 1; + if (div == 0 || div & ~MSCC_MIIM_CFG_PRESCALE_MASK) { + dev_err(&bus->dev, "Incorrect MDIO clock frequency\n"); + return -EINVAL; + } + + return regmap_update_bits(miim->regs, MSCC_MIIM_REG_CFG, + MSCC_MIIM_CFG_PRESCALE_MASK, div); +} + static int mscc_miim_probe(struct platform_device *pdev) { struct regmap *mii_regmap, *phy_regmap = NULL; + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; void __iomem *regs, *phy_regs; struct mscc_miim_dev *miim; struct resource *res; @@ -252,63 +281,87 @@ static int mscc_miim_probe(struct platform_device *pdev) regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(regs)) { - dev_err(&pdev->dev, "Unable to map MIIM registers\n"); + dev_err(dev, "Unable to map MIIM registers\n"); return PTR_ERR(regs); } - mii_regmap = devm_regmap_init_mmio(&pdev->dev, regs, - &mscc_miim_regmap_config); + mii_regmap = devm_regmap_init_mmio(dev, regs, &mscc_miim_regmap_config); if (IS_ERR(mii_regmap)) { - dev_err(&pdev->dev, "Unable to create MIIM regmap\n"); + dev_err(dev, "Unable to create MIIM regmap\n"); return PTR_ERR(mii_regmap); } /* This resource is optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (res) { - phy_regs = devm_ioremap_resource(&pdev->dev, res); + phy_regs = devm_ioremap_resource(dev, res); if (IS_ERR(phy_regs)) { - dev_err(&pdev->dev, "Unable to map internal phy registers\n"); + dev_err(dev, "Unable to map internal phy registers\n"); return PTR_ERR(phy_regs); } - phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs, + phy_regmap = devm_regmap_init_mmio(dev, phy_regs, &mscc_miim_phy_regmap_config); if (IS_ERR(phy_regmap)) { - dev_err(&pdev->dev, "Unable to create phy register regmap\n"); + dev_err(dev, "Unable to create phy register regmap\n"); return PTR_ERR(phy_regmap); } } - ret = mscc_miim_setup(&pdev->dev, &bus, "mscc_miim", mii_regmap, 0); + ret = mscc_miim_setup(dev, &bus, "mscc_miim", mii_regmap, 0); if (ret < 0) { - dev_err(&pdev->dev, "Unable to setup the MDIO bus\n"); + dev_err(dev, "Unable to setup the MDIO bus\n"); return ret; } miim = bus->priv; miim->phy_regs = phy_regmap; - miim->info = device_get_match_data(&pdev->dev); + miim->info = device_get_match_data(dev); if (!miim->info) return -EINVAL; - ret = of_mdiobus_register(bus, pdev->dev.of_node); - if (ret < 0) { - dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret); + miim->clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(miim->clk)) + return PTR_ERR(miim->clk); + + of_property_read_u32(np, "clock-frequency", &miim->bus_freq); + + if (miim->bus_freq && !miim->clk) { + dev_err(dev, "cannot use clock-frequency without a clock\n"); + return -EINVAL; + } + + ret = clk_prepare_enable(miim->clk); + if (ret) return ret; + + ret = mscc_miim_clk_set(bus); + if (ret) + goto out_disable_clk; + + ret = of_mdiobus_register(bus, np); + if (ret < 0) { + dev_err(dev, "Cannot register MDIO bus (%d)\n", ret); + goto out_disable_clk; } platform_set_drvdata(pdev, bus); return 0; + +out_disable_clk: + clk_disable_unprepare(miim->clk); + return ret; } static int mscc_miim_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); + struct mscc_miim_dev *miim = bus->priv; + clk_disable_unprepare(miim->clk); mdiobus_unregister(bus); return 0; diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 378ee779061c..c8f398f5bc5b 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -22,6 +22,7 @@ #include <linux/spinlock_types.h> #include <linux/types.h> #include <net/fib_notifier.h> +#include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/fib_rules.h> @@ -78,7 +79,7 @@ struct nsim_fib_rt { struct nsim_fib4_rt { struct nsim_fib_rt common; struct fib_info *fi; - u8 tos; + dscp_t dscp; u8 type; }; @@ -283,7 +284,7 @@ nsim_fib4_rt_create(struct nsim_fib_data *data, fib4_rt->fi = fen_info->fi; fib_info_hold(fib4_rt->fi); - fib4_rt->tos = fen_info->tos; + fib4_rt->dscp = fen_info->dscp; fib4_rt->type = fen_info->type; return fib4_rt; @@ -322,7 +323,7 @@ nsim_fib4_rt_offload_failed_flag_set(struct net *net, fri.tb_id = fen_info->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = fen_info->dst_len; - fri.tos = fen_info->tos; + fri.dscp = fen_info->dscp; fri.type = fen_info->type; fri.offload = false; fri.trap = false; @@ -342,7 +343,7 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net, fri.tb_id = fib4_rt->common.key.tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; - fri.tos = fib4_rt->tos; + fri.dscp = fib4_rt->dscp; fri.type = fib4_rt->type; fri.offload = false; fri.trap = trap; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fc53b71dc872..96840695debd 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -70,6 +70,27 @@ #define KSZ8081_LMD_SHORT_INDICATOR BIT(12) #define KSZ8081_LMD_DELTA_TIME_MASK GENMASK(8, 0) +#define KSZ9x31_LMD 0x12 +#define KSZ9x31_LMD_VCT_EN BIT(15) +#define KSZ9x31_LMD_VCT_DIS_TX BIT(14) +#define KSZ9x31_LMD_VCT_PAIR(n) (((n) & 0x3) << 12) +#define KSZ9x31_LMD_VCT_SEL_RESULT 0 +#define KSZ9x31_LMD_VCT_SEL_THRES_HI BIT(10) +#define KSZ9x31_LMD_VCT_SEL_THRES_LO BIT(11) +#define KSZ9x31_LMD_VCT_SEL_MASK GENMASK(11, 10) +#define KSZ9x31_LMD_VCT_ST_NORMAL 0 +#define KSZ9x31_LMD_VCT_ST_OPEN 1 +#define KSZ9x31_LMD_VCT_ST_SHORT 2 +#define KSZ9x31_LMD_VCT_ST_FAIL 3 +#define KSZ9x31_LMD_VCT_ST_MASK GENMASK(9, 8) +#define KSZ9x31_LMD_VCT_DATA_REFLECTED_INVALID BIT(7) +#define KSZ9x31_LMD_VCT_DATA_SIG_WAIT_TOO_LONG BIT(6) +#define KSZ9x31_LMD_VCT_DATA_MASK100 BIT(5) +#define KSZ9x31_LMD_VCT_DATA_NLP_FLP BIT(4) +#define KSZ9x31_LMD_VCT_DATA_LO_PULSE_MASK GENMASK(3, 2) +#define KSZ9x31_LMD_VCT_DATA_HI_PULSE_MASK GENMASK(1, 0) +#define KSZ9x31_LMD_VCT_DATA_MASK GENMASK(7, 0) + /* Lan8814 general Interrupt control/status reg in GPHY specific block. */ #define LAN8814_INTC 0x18 #define LAN8814_INTS 0x1B @@ -280,6 +301,7 @@ struct kszphy_priv { struct kszphy_ptp_priv ptp_priv; const struct kszphy_type *type; int led_mode; + u16 vct_ctrl1000; bool rmii_ref_clk_sel; bool rmii_ref_clk_sel_val; u64 stats[ARRAY_SIZE(kszphy_hw_stats)]; @@ -1326,6 +1348,199 @@ static int ksz9031_read_status(struct phy_device *phydev) return 0; } +static int ksz9x31_cable_test_start(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + int ret; + + /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic + * Prior to running the cable diagnostics, Auto-negotiation should + * be disabled, full duplex set and the link speed set to 1000Mbps + * via the Basic Control Register. + */ + ret = phy_modify(phydev, MII_BMCR, + BMCR_SPEED1000 | BMCR_FULLDPLX | + BMCR_ANENABLE | BMCR_SPEED100, + BMCR_SPEED1000 | BMCR_FULLDPLX); + if (ret) + return ret; + + /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic + * The Master-Slave configuration should be set to Slave by writing + * a value of 0x1000 to the Auto-Negotiation Master Slave Control + * Register. + */ + ret = phy_read(phydev, MII_CTRL1000); + if (ret < 0) + return ret; + + /* Cache these bits, they need to be restored once LinkMD finishes. */ + priv->vct_ctrl1000 = ret & (CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER); + ret &= ~(CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER); + ret |= CTL1000_ENABLE_MASTER; + + return phy_write(phydev, MII_CTRL1000, ret); +} + +static int ksz9x31_cable_test_result_trans(u16 status) +{ + switch (FIELD_GET(KSZ9x31_LMD_VCT_ST_MASK, status)) { + case KSZ9x31_LMD_VCT_ST_NORMAL: + return ETHTOOL_A_CABLE_RESULT_CODE_OK; + case KSZ9x31_LMD_VCT_ST_OPEN: + return ETHTOOL_A_CABLE_RESULT_CODE_OPEN; + case KSZ9x31_LMD_VCT_ST_SHORT: + return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT; + case KSZ9x31_LMD_VCT_ST_FAIL: + fallthrough; + default: + return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC; + } +} + +static bool ksz9x31_cable_test_failed(u16 status) +{ + int stat = FIELD_GET(KSZ9x31_LMD_VCT_ST_MASK, status); + + return stat == KSZ9x31_LMD_VCT_ST_FAIL; +} + +static bool ksz9x31_cable_test_fault_length_valid(u16 status) +{ + switch (FIELD_GET(KSZ9x31_LMD_VCT_ST_MASK, status)) { + case KSZ9x31_LMD_VCT_ST_OPEN: + fallthrough; + case KSZ9x31_LMD_VCT_ST_SHORT: + return true; + } + return false; +} + +static int ksz9x31_cable_test_fault_length(struct phy_device *phydev, u16 stat) +{ + int dt = FIELD_GET(KSZ9x31_LMD_VCT_DATA_MASK, stat); + + /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic + * + * distance to fault = (VCT_DATA - 22) * 4 / cable propagation velocity + */ + if ((phydev->phy_id & MICREL_PHY_ID_MASK) == PHY_ID_KSZ9131) + dt = clamp(dt - 22, 0, 255); + + return (dt * 400) / 10; +} + +static int ksz9x31_cable_test_wait_for_completion(struct phy_device *phydev) +{ + int val, ret; + + ret = phy_read_poll_timeout(phydev, KSZ9x31_LMD, val, + !(val & KSZ9x31_LMD_VCT_EN), + 30000, 100000, true); + + return ret < 0 ? ret : 0; +} + +static int ksz9x31_cable_test_get_pair(int pair) +{ + static const int ethtool_pair[] = { + ETHTOOL_A_CABLE_PAIR_A, + ETHTOOL_A_CABLE_PAIR_B, + ETHTOOL_A_CABLE_PAIR_C, + ETHTOOL_A_CABLE_PAIR_D, + }; + + return ethtool_pair[pair]; +} + +static int ksz9x31_cable_test_one_pair(struct phy_device *phydev, int pair) +{ + int ret, val; + + /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic + * To test each individual cable pair, set the cable pair in the Cable + * Diagnostics Test Pair (VCT_PAIR[1:0]) field of the LinkMD Cable + * Diagnostic Register, along with setting the Cable Diagnostics Test + * Enable (VCT_EN) bit. The Cable Diagnostics Test Enable (VCT_EN) bit + * will self clear when the test is concluded. + */ + ret = phy_write(phydev, KSZ9x31_LMD, + KSZ9x31_LMD_VCT_EN | KSZ9x31_LMD_VCT_PAIR(pair)); + if (ret) + return ret; + + ret = ksz9x31_cable_test_wait_for_completion(phydev); + if (ret) + return ret; + + val = phy_read(phydev, KSZ9x31_LMD); + if (val < 0) + return val; + + if (ksz9x31_cable_test_failed(val)) + return -EAGAIN; + + ret = ethnl_cable_test_result(phydev, + ksz9x31_cable_test_get_pair(pair), + ksz9x31_cable_test_result_trans(val)); + if (ret) + return ret; + + if (!ksz9x31_cable_test_fault_length_valid(val)) + return 0; + + return ethnl_cable_test_fault_length(phydev, + ksz9x31_cable_test_get_pair(pair), + ksz9x31_cable_test_fault_length(phydev, val)); +} + +static int ksz9x31_cable_test_get_status(struct phy_device *phydev, + bool *finished) +{ + struct kszphy_priv *priv = phydev->priv; + unsigned long pair_mask = 0xf; + int retries = 20; + int pair, ret, rv; + + *finished = false; + + /* Try harder if link partner is active */ + while (pair_mask && retries--) { + for_each_set_bit(pair, &pair_mask, 4) { + ret = ksz9x31_cable_test_one_pair(phydev, pair); + if (ret == -EAGAIN) + continue; + if (ret < 0) + return ret; + clear_bit(pair, &pair_mask); + } + /* If link partner is in autonegotiation mode it will send 2ms + * of FLPs with at least 6ms of silence. + * Add 2ms sleep to have better chances to hit this silence. + */ + if (pair_mask) + usleep_range(2000, 3000); + } + + /* Report remaining unfinished pair result as unknown. */ + for_each_set_bit(pair, &pair_mask, 4) { + ret = ethnl_cable_test_result(phydev, + ksz9x31_cable_test_get_pair(pair), + ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC); + } + + *finished = true; + + /* Restore cached bits from before LinkMD got started. */ + rv = phy_modify(phydev, MII_CTRL1000, + CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER, + priv->vct_ctrl1000); + if (rv) + return rv; + + return ret; +} + static int ksz8873mll_config_aneg(struct phy_device *phydev) { return 0; @@ -2806,6 +3021,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ9031, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ9031 Gigabit PHY", + .flags = PHY_POLL_CABLE_TEST, .driver_data = &ksz9021_type, .probe = kszphy_probe, .get_features = ksz9031_get_features, @@ -2819,6 +3035,8 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = kszphy_suspend, .resume = kszphy_resume, + .cable_test_start = ksz9x31_cable_test_start, + .cable_test_get_status = ksz9x31_cable_test_get_status, }, { .phy_id = PHY_ID_LAN8814, .phy_id_mask = MICREL_PHY_ID_MASK, @@ -2853,6 +3071,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Microchip KSZ9131 Gigabit PHY", /* PHY_GBIT_FEATURES */ + .flags = PHY_POLL_CABLE_TEST, .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9131_config_init, @@ -2863,6 +3082,8 @@ static struct phy_driver ksphy_driver[] = { .get_stats = kszphy_get_stats, .suspend = kszphy_suspend, .resume = kszphy_resume, + .cable_test_start = ksz9x31_cable_test_start, + .cable_test_get_status = ksz9x31_cable_test_get_status, }, { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 06943889d747..33c285252584 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2778,34 +2778,6 @@ static const struct sfp_upstream_ops sfp_phylink_ops = { /* Helpers for MAC drivers */ -/** - * phylink_helper_basex_speed() - 1000BaseX/2500BaseX helper - * @state: a pointer to a &struct phylink_link_state - * - * Inspect the interface mode, advertising mask or forced speed and - * decide whether to run at 2.5Gbit or 1Gbit appropriately, switching - * the interface mode to suit. @state->interface is appropriately - * updated, and the advertising mask has the "other" baseX_Full flag - * cleared. - */ -void phylink_helper_basex_speed(struct phylink_link_state *state) -{ - if (phy_interface_mode_is_8023z(state->interface)) { - bool want_2500 = state->an_enabled ? - phylink_test(state->advertising, 2500baseX_Full) : - state->speed == SPEED_2500; - - if (want_2500) { - phylink_clear(state->advertising, 1000baseX_Full); - state->interface = PHY_INTERFACE_MODE_2500BASEX; - } else { - phylink_clear(state->advertising, 2500baseX_Full); - state->interface = PHY_INTERFACE_MODE_1000BASEX; - } - } -} -EXPORT_SYMBOL_GPL(phylink_helper_basex_speed); - static void phylink_decode_c37_word(struct phylink_link_state *state, uint16_t config_reg, int speed) { diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 3619520340b7..1b41cd9732d7 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1011,8 +1011,7 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, goto end; } - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &error); + skb = skb_recv_datagram(sk, flags, &error); if (error < 0) goto end; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 9b4dfa3001d6..2de09ad5bac0 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -479,7 +479,7 @@ static int usbnet_cdc_zte_bind(struct usbnet *dev, struct usb_interface *intf) * device MAC address has been updated). Always set MAC address to that of the * device. */ -static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { if (skb->len < ETH_HLEN || !(skb->data[0] & 0x02)) return 1; @@ -489,6 +489,7 @@ static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return 1; } +EXPORT_SYMBOL_GPL(usbnet_cdc_zte_rx_fixup); /* Ensure correct link state * diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3353e761016d..9e2f48c2e85e 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -190,7 +190,6 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) skbn = netdev_alloc_skb(net, pkt_len + LL_MAX_HEADER); if (!skbn) return 0; - skbn->dev = net; switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: @@ -1358,6 +1357,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 247f58cb0f84..4e70dec30e5a 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -418,10 +418,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) goto halt_fail_and_release; } - if (bp[0] & 0x02) - eth_hw_addr_random(net); - else - eth_hw_addr_set(net, bp); + eth_hw_addr_set(net, bp); /* set a nonzero filter to enable data transfers */ memset(u.set, 0, sizeof *u.set); @@ -463,6 +460,16 @@ static int rndis_bind(struct usbnet *dev, struct usb_interface *intf) return generic_rndis_bind(dev, intf, FLAG_RNDIS_PHYM_NOT_WIRELESS); } +static int zte_rndis_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int status = rndis_bind(dev, intf); + + if (!status && (dev->net->dev_addr[0] & 0x02)) + eth_hw_addr_random(dev->net); + + return status; +} + void rndis_unbind(struct usbnet *dev, struct usb_interface *intf) { struct rndis_halt *halt; @@ -485,10 +492,14 @@ EXPORT_SYMBOL_GPL(rndis_unbind); */ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { + bool dst_mac_fixup; + /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; + dst_mac_fixup = !!(dev->driver_info->data & RNDIS_DRIVER_DATA_DST_MAC_FIXUP); + /* peripheral may have batched packets to us... */ while (likely(skb->len)) { struct rndis_data_hdr *hdr = (void *)skb->data; @@ -523,10 +534,17 @@ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; skb_pull(skb, msg_len - sizeof *hdr); skb_trim(skb2, data_len); + + if (unlikely(dst_mac_fixup)) + usbnet_cdc_zte_rx_fixup(dev, skb2); + usbnet_skb_return(dev, skb2); } /* caller will usbnet_skb_return the remaining packet */ + if (unlikely(dst_mac_fixup)) + usbnet_cdc_zte_rx_fixup(dev, skb); + return 1; } EXPORT_SYMBOL_GPL(rndis_rx_fixup); @@ -600,6 +618,17 @@ static const struct driver_info rndis_poll_status_info = { .tx_fixup = rndis_tx_fixup, }; +static const struct driver_info zte_rndis_info = { + .description = "ZTE RNDIS device", + .flags = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .data = RNDIS_DRIVER_DATA_DST_MAC_FIXUP, + .bind = zte_rndis_bind, + .unbind = rndis_unbind, + .status = rndis_status, + .rx_fixup = rndis_rx_fixup, + .tx_fixup = rndis_tx_fixup, +}; + /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -614,6 +643,16 @@ static const struct usb_device_id products [] = { USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long)&rndis_info, }, { + /* ZTE WWAN modules */ + USB_VENDOR_AND_INTERFACE_INFO(0x19d2, + USB_CLASS_WIRELESS_CONTROLLER, 1, 3), + .driver_info = (unsigned long)&zte_rndis_info, +}, { + /* ZTE WWAN modules, ACM flavour */ + USB_VENDOR_AND_INTERFACE_INFO(0x19d2, + USB_CLASS_COMM, 2 /* ACM */, 0x0ff), + .driver_info = (unsigned long)&zte_rndis_info, +}, { /* RNDIS is MSFT's un-official variant of CDC ACM */ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_info, diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 140780ac1745..588b2333cdb8 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -57,34 +57,6 @@ config COSA The driver will be compiled as a module: the module will be called cosa. -# -# Lan Media's board. Currently 1000, 1200, 5200, 5245 -# -config LANMEDIA - tristate "LanMedia Corp. SSI/V.35, T1/E1, HSSI, T3 boards" - depends on PCI && VIRT_TO_BUS && HDLC - help - Driver for the following Lan Media family of serial boards: - - - LMC 1000 board allows you to connect synchronous serial devices - (for example base-band modems, or any other device with the X.21, - V.24, V.35 or V.36 interface) to your Linux box. - - - LMC 1200 with on board DSU board allows you to connect your Linux - box directly to a T1 or E1 circuit. - - - LMC 5200 board provides a HSSI interface capable of running up to - 52 Mbits per second. - - - LMC 5245 board connects directly to a T3 circuit saving the - additional external hardware. - - To change setting such as clock source you will need lmcctl. - It is available at <ftp://ftp.lanmedia.com/> (broken link). - - To compile this driver as a module, choose M here: the - module will be called lmc. - # There is no way to detect a Sealevel board. Force it modular config SEALEVEL_4021 tristate "Sealevel Systems 4021 support" diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 480bcd1f6c1c..1cd42147b34f 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -19,8 +19,6 @@ obj-$(CONFIG_SEALEVEL_4021) += z85230.o sealevel.o obj-$(CONFIG_COSA) += cosa.o obj-$(CONFIG_FARSYNC) += farsync.o -obj-$(CONFIG_LANMEDIA) += lmc/ - obj-$(CONFIG_LAPBETHER) += lapbether.o obj-$(CONFIG_N2) += n2.o obj-$(CONFIG_C101) += c101.o diff --git a/drivers/net/wan/lmc/Makefile b/drivers/net/wan/lmc/Makefile deleted file mode 100644 index f00fe4491d69..000000000000 --- a/drivers/net/wan/lmc/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Makefile for the Lan Media 21140 based WAN cards -# Specifically the 1000,1200,5200,5245 -# - -obj-$(CONFIG_LANMEDIA) += lmc.o - -lmc-objs := lmc_debug.o lmc_media.o lmc_main.o lmc_proto.o - -# Like above except every packet gets echoed to KERN_DEBUG -# in hex -# -# DBDEF = \ -# -DDEBUG \ -# -DLMC_PACKET_LOG - -ccflags-y := $(DBGDEF) diff --git a/drivers/net/wan/lmc/lmc.h b/drivers/net/wan/lmc/lmc.h deleted file mode 100644 index d7d59b4595f9..000000000000 --- a/drivers/net/wan/lmc/lmc.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LMC_H_ -#define _LMC_H_ - -#include "lmc_var.h" - -/* - * prototypes for everyone - */ -int lmc_probe(struct net_device * dev); -unsigned lmc_mii_readreg(lmc_softc_t * const sc, unsigned - devaddr, unsigned regno); -void lmc_mii_writereg(lmc_softc_t * const sc, unsigned devaddr, - unsigned regno, unsigned data); -void lmc_led_on(lmc_softc_t * const, u32); -void lmc_led_off(lmc_softc_t * const, u32); -unsigned lmc_mii_readreg(lmc_softc_t * const, unsigned, unsigned); -void lmc_mii_writereg(lmc_softc_t * const, unsigned, unsigned, unsigned); -void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits); -void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits); - -int lmc_ioctl(struct net_device *dev, struct if_settings *ifs); - -extern lmc_media_t lmc_ds3_media; -extern lmc_media_t lmc_ssi_media; -extern lmc_media_t lmc_t1_media; -extern lmc_media_t lmc_hssi_media; - -#ifdef _DBG_EVENTLOG -static void lmcEventLog(u32 EventNum, u32 arg2, u32 arg3); -#endif - -#endif diff --git a/drivers/net/wan/lmc/lmc_debug.c b/drivers/net/wan/lmc/lmc_debug.c deleted file mode 100644 index 2b6051bda3fb..000000000000 --- a/drivers/net/wan/lmc/lmc_debug.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/netdevice.h> -#include <linux/interrupt.h> - -#include "lmc_debug.h" - -/* - * Prints out len, max to 80 octets using printk, 20 per line - */ -#ifdef DEBUG -#ifdef LMC_PACKET_LOG -void lmcConsoleLog(char *type, unsigned char *ucData, int iLen) -{ - int iNewLine = 1; - char str[80], *pstr; - - sprintf(str, KERN_DEBUG "lmc: %s: ", type); - pstr = str+strlen(str); - - if(iLen > 240){ - printk(KERN_DEBUG "lmc: Printing 240 chars... out of: %d\n", iLen); - iLen = 240; - } - else{ - printk(KERN_DEBUG "lmc: Printing %d chars\n", iLen); - } - - while(iLen > 0) - { - sprintf(pstr, "%02x ", *ucData); - pstr+=3; - ucData++; - if( !(iNewLine % 20)) - { - sprintf(pstr, "\n"); - printk(str); - sprintf(str, KERN_DEBUG "lmc: %s: ", type); - pstr=str+strlen(str); - } - iNewLine++; - iLen--; - } - sprintf(pstr, "\n"); - printk(str); -} -#endif -#endif - -#ifdef DEBUG -u32 lmcEventLogIndex; -u32 lmcEventLogBuf[LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS]; - -void lmcEventLog(u32 EventNum, u32 arg2, u32 arg3) -{ - lmcEventLogBuf[lmcEventLogIndex++] = EventNum; - lmcEventLogBuf[lmcEventLogIndex++] = arg2; - lmcEventLogBuf[lmcEventLogIndex++] = arg3; - lmcEventLogBuf[lmcEventLogIndex++] = jiffies; - - lmcEventLogIndex &= (LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS) - 1; -} -#endif /* DEBUG */ - -/* --------------------------- end if_lmc_linux.c ------------------------ */ diff --git a/drivers/net/wan/lmc/lmc_debug.h b/drivers/net/wan/lmc/lmc_debug.h deleted file mode 100644 index cfae9eddf003..000000000000 --- a/drivers/net/wan/lmc/lmc_debug.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LMC_DEBUG_H_ -#define _LMC_DEBUG_H_ - -#ifdef DEBUG -#ifdef LMC_PACKET_LOG -#define LMC_CONSOLE_LOG(x,y,z) lmcConsoleLog((x), (y), (z)) -#else -#define LMC_CONSOLE_LOG(x,y,z) -#endif -#else -#define LMC_CONSOLE_LOG(x,y,z) -#endif - - - -/* Debug --- Event log definitions --- */ -/* EVENTLOGSIZE*EVENTLOGARGS needs to be a power of 2 */ -#define LMC_EVENTLOGSIZE 1024 /* number of events in eventlog */ -#define LMC_EVENTLOGARGS 4 /* number of args for each event */ - -/* event indicators */ -#define LMC_EVENT_XMT 1 -#define LMC_EVENT_XMTEND 2 -#define LMC_EVENT_XMTINT 3 -#define LMC_EVENT_RCVINT 4 -#define LMC_EVENT_RCVEND 5 -#define LMC_EVENT_INT 6 -#define LMC_EVENT_XMTINTTMO 7 -#define LMC_EVENT_XMTPRCTMO 8 -#define LMC_EVENT_INTEND 9 -#define LMC_EVENT_RESET1 10 -#define LMC_EVENT_RESET2 11 -#define LMC_EVENT_FORCEDRESET 12 -#define LMC_EVENT_WATCHDOG 13 -#define LMC_EVENT_BADPKTSURGE 14 -#define LMC_EVENT_TBUSY0 15 -#define LMC_EVENT_TBUSY1 16 - - -#ifdef DEBUG -extern u32 lmcEventLogIndex; -extern u32 lmcEventLogBuf[LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS]; -#define LMC_EVENT_LOG(x, y, z) lmcEventLog((x), (y), (z)) -#else -#define LMC_EVENT_LOG(x,y,z) -#endif /* end ifdef _DBG_EVENTLOG */ - -void lmcConsoleLog(char *type, unsigned char *ucData, int iLen); -void lmcEventLog(u32 EventNum, u32 arg2, u32 arg3); - -#endif diff --git a/drivers/net/wan/lmc/lmc_ioctl.h b/drivers/net/wan/lmc/lmc_ioctl.h deleted file mode 100644 index 8c65e2176e94..000000000000 --- a/drivers/net/wan/lmc/lmc_ioctl.h +++ /dev/null @@ -1,255 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _LMC_IOCTL_H_ -#define _LMC_IOCTL_H_ -/* $Id: lmc_ioctl.h,v 1.15 2000/04/06 12:16:43 asj Exp $ */ - - /* - * Copyright (c) 1997-2000 LAN Media Corporation (LMC) - * All rights reserved. www.lanmedia.com - * - * This code is written by: - * Andrew Stanley-Jones (asj@cban.com) - * Rob Braun (bbraun@vix.com), - * Michael Graff (explorer@vix.com) and - * Matt Thomas (matt@3am-software.com). - */ - -#define LMCIOCGINFO SIOCDEVPRIVATE+3 /* get current state */ -#define LMCIOCSINFO SIOCDEVPRIVATE+4 /* set state to user values */ -#define LMCIOCGETLMCSTATS SIOCDEVPRIVATE+5 -#define LMCIOCCLEARLMCSTATS SIOCDEVPRIVATE+6 -#define LMCIOCDUMPEVENTLOG SIOCDEVPRIVATE+7 -#define LMCIOCGETXINFO SIOCDEVPRIVATE+8 -#define LMCIOCSETCIRCUIT SIOCDEVPRIVATE+9 -#define LMCIOCUNUSEDATM SIOCDEVPRIVATE+10 -#define LMCIOCRESET SIOCDEVPRIVATE+11 -#define LMCIOCT1CONTROL SIOCDEVPRIVATE+12 -#define LMCIOCIFTYPE SIOCDEVPRIVATE+13 -#define LMCIOCXILINX SIOCDEVPRIVATE+14 - -#define LMC_CARDTYPE_UNKNOWN -1 -#define LMC_CARDTYPE_HSSI 1 /* probed card is a HSSI card */ -#define LMC_CARDTYPE_DS3 2 /* probed card is a DS3 card */ -#define LMC_CARDTYPE_SSI 3 /* probed card is a SSI card */ -#define LMC_CARDTYPE_T1 4 /* probed card is a T1 card */ - -#define LMC_CTL_CARDTYPE_LMC5200 0 /* HSSI */ -#define LMC_CTL_CARDTYPE_LMC5245 1 /* DS3 */ -#define LMC_CTL_CARDTYPE_LMC1000 2 /* SSI, V.35 */ -#define LMC_CTL_CARDTYPE_LMC1200 3 /* DS1 */ - -#define LMC_CTL_OFF 0 /* generic OFF value */ -#define LMC_CTL_ON 1 /* generic ON value */ - -#define LMC_CTL_CLOCK_SOURCE_EXT 0 /* clock off line */ -#define LMC_CTL_CLOCK_SOURCE_INT 1 /* internal clock */ - -#define LMC_CTL_CRC_LENGTH_16 16 -#define LMC_CTL_CRC_LENGTH_32 32 -#define LMC_CTL_CRC_BYTESIZE_2 2 -#define LMC_CTL_CRC_BYTESIZE_4 4 - - -#define LMC_CTL_CABLE_LENGTH_LT_100FT 0 /* DS3 cable < 100 feet */ -#define LMC_CTL_CABLE_LENGTH_GT_100FT 1 /* DS3 cable >= 100 feet */ - -#define LMC_CTL_CIRCUIT_TYPE_E1 0 -#define LMC_CTL_CIRCUIT_TYPE_T1 1 - -/* - * IFTYPE defines - */ -#define LMC_PPP 1 /* use generic HDLC interface */ -#define LMC_NET 2 /* use direct net interface */ -#define LMC_RAW 3 /* use direct net interface */ - -/* - * These are not in the least IOCTL related, but I want them common. - */ -/* - * assignments for the GPIO register on the DEC chip (common) - */ -#define LMC_GEP_INIT 0x01 /* 0: */ -#define LMC_GEP_RESET 0x02 /* 1: */ -#define LMC_GEP_MODE 0x10 /* 4: */ -#define LMC_GEP_DP 0x20 /* 5: */ -#define LMC_GEP_DATA 0x40 /* 6: serial out */ -#define LMC_GEP_CLK 0x80 /* 7: serial clock */ - -/* - * HSSI GPIO assignments - */ -#define LMC_GEP_HSSI_ST 0x04 /* 2: receive timing sense (deprecated) */ -#define LMC_GEP_HSSI_CLOCK 0x08 /* 3: clock source */ - -/* - * T1 GPIO assignments - */ -#define LMC_GEP_SSI_GENERATOR 0x04 /* 2: enable prog freq gen serial i/f */ -#define LMC_GEP_SSI_TXCLOCK 0x08 /* 3: provide clock on TXCLOCK output */ - -/* - * Common MII16 bits - */ -#define LMC_MII16_LED0 0x0080 -#define LMC_MII16_LED1 0x0100 -#define LMC_MII16_LED2 0x0200 -#define LMC_MII16_LED3 0x0400 /* Error, and the red one */ -#define LMC_MII16_LED_ALL 0x0780 /* LED bit mask */ -#define LMC_MII16_FIFO_RESET 0x0800 - -/* - * definitions for HSSI - */ -#define LMC_MII16_HSSI_TA 0x0001 -#define LMC_MII16_HSSI_CA 0x0002 -#define LMC_MII16_HSSI_LA 0x0004 -#define LMC_MII16_HSSI_LB 0x0008 -#define LMC_MII16_HSSI_LC 0x0010 -#define LMC_MII16_HSSI_TM 0x0020 -#define LMC_MII16_HSSI_CRC 0x0040 - -/* - * assignments for the MII register 16 (DS3) - */ -#define LMC_MII16_DS3_ZERO 0x0001 -#define LMC_MII16_DS3_TRLBK 0x0002 -#define LMC_MII16_DS3_LNLBK 0x0004 -#define LMC_MII16_DS3_RAIS 0x0008 -#define LMC_MII16_DS3_TAIS 0x0010 -#define LMC_MII16_DS3_BIST 0x0020 -#define LMC_MII16_DS3_DLOS 0x0040 -#define LMC_MII16_DS3_CRC 0x1000 -#define LMC_MII16_DS3_SCRAM 0x2000 -#define LMC_MII16_DS3_SCRAM_LARS 0x4000 - -/* Note: 2 pairs of LEDs where swapped by mistake - * in Xilinx code for DS3 & DS1 adapters */ -#define LMC_DS3_LED0 0x0100 /* bit 08 yellow */ -#define LMC_DS3_LED1 0x0080 /* bit 07 blue */ -#define LMC_DS3_LED2 0x0400 /* bit 10 green */ -#define LMC_DS3_LED3 0x0200 /* bit 09 red */ - -/* - * framer register 0 and 7 (7 is latched and reset on read) - */ -#define LMC_FRAMER_REG0_DLOS 0x80 /* digital loss of service */ -#define LMC_FRAMER_REG0_OOFS 0x40 /* out of frame sync */ -#define LMC_FRAMER_REG0_AIS 0x20 /* alarm indication signal */ -#define LMC_FRAMER_REG0_CIS 0x10 /* channel idle */ -#define LMC_FRAMER_REG0_LOC 0x08 /* loss of clock */ - -/* - * Framer register 9 contains the blue alarm signal - */ -#define LMC_FRAMER_REG9_RBLUE 0x02 /* Blue alarm failure */ - -/* - * Framer register 0x10 contains xbit error - */ -#define LMC_FRAMER_REG10_XBIT 0x01 /* X bit error alarm failure */ - -/* - * And SSI, LMC1000 - */ -#define LMC_MII16_SSI_DTR 0x0001 /* DTR output RW */ -#define LMC_MII16_SSI_DSR 0x0002 /* DSR input RO */ -#define LMC_MII16_SSI_RTS 0x0004 /* RTS output RW */ -#define LMC_MII16_SSI_CTS 0x0008 /* CTS input RO */ -#define LMC_MII16_SSI_DCD 0x0010 /* DCD input RO */ -#define LMC_MII16_SSI_RI 0x0020 /* RI input RO */ -#define LMC_MII16_SSI_CRC 0x1000 /* CRC select - RW */ - -/* - * bits 0x0080 through 0x0800 are generic, and described - * above with LMC_MII16_LED[0123] _LED_ALL, and _FIFO_RESET - */ -#define LMC_MII16_SSI_LL 0x1000 /* LL output RW */ -#define LMC_MII16_SSI_RL 0x2000 /* RL output RW */ -#define LMC_MII16_SSI_TM 0x4000 /* TM input RO */ -#define LMC_MII16_SSI_LOOP 0x8000 /* loopback enable RW */ - -/* - * Some of the MII16 bits are mirrored in the MII17 register as well, - * but let's keep thing separate for now, and get only the cable from - * the MII17. - */ -#define LMC_MII17_SSI_CABLE_MASK 0x0038 /* mask to extract the cable type */ -#define LMC_MII17_SSI_CABLE_SHIFT 3 /* shift to extract the cable type */ - -/* - * And T1, LMC1200 - */ -#define LMC_MII16_T1_UNUSED1 0x0003 -#define LMC_MII16_T1_XOE 0x0004 -#define LMC_MII16_T1_RST 0x0008 /* T1 chip reset - RW */ -#define LMC_MII16_T1_Z 0x0010 /* output impedance T1=1, E1=0 output - RW */ -#define LMC_MII16_T1_INTR 0x0020 /* interrupt from 8370 - RO */ -#define LMC_MII16_T1_ONESEC 0x0040 /* one second square wave - ro */ - -#define LMC_MII16_T1_LED0 0x0100 -#define LMC_MII16_T1_LED1 0x0080 -#define LMC_MII16_T1_LED2 0x0400 -#define LMC_MII16_T1_LED3 0x0200 -#define LMC_MII16_T1_FIFO_RESET 0x0800 - -#define LMC_MII16_T1_CRC 0x1000 /* CRC select - RW */ -#define LMC_MII16_T1_UNUSED2 0xe000 - - -/* 8370 framer registers */ - -#define T1FRAMER_ALARM1_STATUS 0x47 -#define T1FRAMER_ALARM2_STATUS 0x48 -#define T1FRAMER_FERR_LSB 0x50 -#define T1FRAMER_FERR_MSB 0x51 /* framing bit error counter */ -#define T1FRAMER_LCV_LSB 0x54 -#define T1FRAMER_LCV_MSB 0x55 /* line code violation counter */ -#define T1FRAMER_AERR 0x5A - -/* mask for the above AERR register */ -#define T1FRAMER_LOF_MASK (0x0f0) /* receive loss of frame */ -#define T1FRAMER_COFA_MASK (0x0c0) /* change of frame alignment */ -#define T1FRAMER_SEF_MASK (0x03) /* severely errored frame */ - -/* 8370 framer register ALM1 (0x47) values - * used to determine link status - */ - -#define T1F_SIGFRZ 0x01 /* signaling freeze */ -#define T1F_RLOF 0x02 /* receive loss of frame alignment */ -#define T1F_RLOS 0x04 /* receive loss of signal */ -#define T1F_RALOS 0x08 /* receive analog loss of signal or RCKI loss of clock */ -#define T1F_RAIS 0x10 /* receive alarm indication signal */ -#define T1F_UNUSED 0x20 -#define T1F_RYEL 0x40 /* receive yellow alarm */ -#define T1F_RMYEL 0x80 /* receive multiframe yellow alarm */ - -#define LMC_T1F_WRITE 0 -#define LMC_T1F_READ 1 - -typedef struct lmc_st1f_control { - int command; - int address; - int value; - char __user *data; -} lmc_t1f_control; - -enum lmc_xilinx_c { - lmc_xilinx_reset = 1, - lmc_xilinx_load_prom = 2, - lmc_xilinx_load = 3 -}; - -struct lmc_xilinx_control { - enum lmc_xilinx_c command; - int len; - char __user *data; -}; - -/* ------------------ end T1 defs ------------------- */ - -#define LMC_MII_LedMask 0x0780 -#define LMC_MII_LedBitPos 7 - -#endif diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c deleted file mode 100644 index 76c6b4f89890..000000000000 --- a/drivers/net/wan/lmc/lmc_main.c +++ /dev/null @@ -1,2009 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - /* - * Copyright (c) 1997-2000 LAN Media Corporation (LMC) - * All rights reserved. www.lanmedia.com - * Generic HDLC port Copyright (C) 2008 Krzysztof Halasa <khc@pm.waw.pl> - * - * This code is written by: - * Andrew Stanley-Jones (asj@cban.com) - * Rob Braun (bbraun@vix.com), - * Michael Graff (explorer@vix.com) and - * Matt Thomas (matt@3am-software.com). - * - * With Help By: - * David Boggs - * Ron Crane - * Alan Cox - * - * Driver for the LanMedia LMC5200, LMC5245, LMC1000, LMC1200 cards. - * - * To control link specific options lmcctl is required. - * It can be obtained from ftp.lanmedia.com. - * - * Linux driver notes: - * Linux uses the device struct lmc_private to pass private information - * around. - * - * The initialization portion of this driver (the lmc_reset() and the - * lmc_dec_reset() functions, as well as the led controls and the - * lmc_initcsrs() functions. - * - * The watchdog function runs every second and checks to see if - * we still have link, and that the timing source is what we expected - * it to be. If link is lost, the interface is marked down, and - * we no longer can transmit. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/timer.h> -#include <linux/ptrace.h> -#include <linux/errno.h> -#include <linux/ioport.h> -#include <linux/slab.h> -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/hdlc.h> -#include <linux/in.h> -#include <linux/if_arp.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/inet.h> -#include <linux/bitops.h> -#include <asm/processor.h> /* Processor type for cache alignment. */ -#include <asm/io.h> -#include <asm/dma.h> -#include <linux/uaccess.h> -#include <linux/jiffies.h> -//#include <asm/spinlock.h> - -#define DRIVER_MAJOR_VERSION 1 -#define DRIVER_MINOR_VERSION 34 -#define DRIVER_SUB_VERSION 0 - -#define DRIVER_VERSION ((DRIVER_MAJOR_VERSION << 8) + DRIVER_MINOR_VERSION) - -#include "lmc.h" -#include "lmc_var.h" -#include "lmc_ioctl.h" -#include "lmc_debug.h" -#include "lmc_proto.h" - -static int LMC_PKT_BUF_SZ = 1542; - -static const struct pci_device_id lmc_pci_tbl[] = { - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_TULIP_FAST, - PCI_VENDOR_ID_LMC, PCI_ANY_ID }, - { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_TULIP_FAST, - PCI_ANY_ID, PCI_VENDOR_ID_LMC }, - { 0 } -}; - -MODULE_DEVICE_TABLE(pci, lmc_pci_tbl); -MODULE_LICENSE("GPL v2"); - - -static netdev_tx_t lmc_start_xmit(struct sk_buff *skb, - struct net_device *dev); -static int lmc_rx (struct net_device *dev); -static int lmc_open(struct net_device *dev); -static int lmc_close(struct net_device *dev); -static struct net_device_stats *lmc_get_stats(struct net_device *dev); -static irqreturn_t lmc_interrupt(int irq, void *dev_instance); -static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, size_t csr_size); -static void lmc_softreset(lmc_softc_t * const); -static void lmc_running_reset(struct net_device *dev); -static int lmc_ifdown(struct net_device * const); -static void lmc_watchdog(struct timer_list *t); -static void lmc_reset(lmc_softc_t * const sc); -static void lmc_dec_reset(lmc_softc_t * const sc); -static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue); - -/* - * linux reserves 16 device specific IOCTLs. We call them - * LMCIOC* to control various bits of our world. - */ -static int lmc_siocdevprivate(struct net_device *dev, struct ifreq *ifr, - void __user *data, int cmd) /*fold00*/ -{ - lmc_softc_t *sc = dev_to_sc(dev); - lmc_ctl_t ctl; - int ret = -EOPNOTSUPP; - u16 regVal; - unsigned long flags; - - /* - * Most functions mess with the structure - * Disable interrupts while we do the polling - */ - - switch (cmd) { - /* - * Return current driver state. Since we keep this up - * To date internally, just copy this out to the user. - */ - case LMCIOCGINFO: /*fold01*/ - if (copy_to_user(data, &sc->ictl, sizeof(lmc_ctl_t))) - ret = -EFAULT; - else - ret = 0; - break; - - case LMCIOCSINFO: /*fold01*/ - if (!capable(CAP_NET_ADMIN)) { - ret = -EPERM; - break; - } - - if(dev->flags & IFF_UP){ - ret = -EBUSY; - break; - } - - if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) { - ret = -EFAULT; - break; - } - - spin_lock_irqsave(&sc->lmc_lock, flags); - sc->lmc_media->set_status (sc, &ctl); - - if(ctl.crc_length != sc->ictl.crc_length) { - sc->lmc_media->set_crc_length(sc, ctl.crc_length); - if (sc->ictl.crc_length == LMC_CTL_CRC_LENGTH_16) - sc->TxDescriptControlInit |= LMC_TDES_ADD_CRC_DISABLE; - else - sc->TxDescriptControlInit &= ~LMC_TDES_ADD_CRC_DISABLE; - } - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - ret = 0; - break; - - case LMCIOCIFTYPE: /*fold01*/ - { - u16 old_type = sc->if_type; - u16 new_type; - - if (!capable(CAP_NET_ADMIN)) { - ret = -EPERM; - break; - } - - if (copy_from_user(&new_type, data, sizeof(u16))) { - ret = -EFAULT; - break; - } - - - if (new_type == old_type) - { - ret = 0 ; - break; /* no change */ - } - - spin_lock_irqsave(&sc->lmc_lock, flags); - lmc_proto_close(sc); - - sc->if_type = new_type; - lmc_proto_attach(sc); - ret = lmc_proto_open(sc); - spin_unlock_irqrestore(&sc->lmc_lock, flags); - break; - } - - case LMCIOCGETXINFO: /*fold01*/ - spin_lock_irqsave(&sc->lmc_lock, flags); - sc->lmc_xinfo.Magic0 = 0xBEEFCAFE; - - sc->lmc_xinfo.PciCardType = sc->lmc_cardtype; - sc->lmc_xinfo.PciSlotNumber = 0; - sc->lmc_xinfo.DriverMajorVersion = DRIVER_MAJOR_VERSION; - sc->lmc_xinfo.DriverMinorVersion = DRIVER_MINOR_VERSION; - sc->lmc_xinfo.DriverSubVersion = DRIVER_SUB_VERSION; - sc->lmc_xinfo.XilinxRevisionNumber = - lmc_mii_readreg (sc, 0, 3) & 0xf; - sc->lmc_xinfo.MaxFrameSize = LMC_PKT_BUF_SZ; - sc->lmc_xinfo.link_status = sc->lmc_media->get_link_status (sc); - sc->lmc_xinfo.mii_reg16 = lmc_mii_readreg (sc, 0, 16); - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - sc->lmc_xinfo.Magic1 = 0xDEADBEEF; - - if (copy_to_user(data, &sc->lmc_xinfo, sizeof(struct lmc_xinfo))) - ret = -EFAULT; - else - ret = 0; - - break; - - case LMCIOCGETLMCSTATS: - spin_lock_irqsave(&sc->lmc_lock, flags); - if (sc->lmc_cardtype == LMC_CARDTYPE_T1) { - lmc_mii_writereg(sc, 0, 17, T1FRAMER_FERR_LSB); - sc->extra_stats.framingBitErrorCount += - lmc_mii_readreg(sc, 0, 18) & 0xff; - lmc_mii_writereg(sc, 0, 17, T1FRAMER_FERR_MSB); - sc->extra_stats.framingBitErrorCount += - (lmc_mii_readreg(sc, 0, 18) & 0xff) << 8; - lmc_mii_writereg(sc, 0, 17, T1FRAMER_LCV_LSB); - sc->extra_stats.lineCodeViolationCount += - lmc_mii_readreg(sc, 0, 18) & 0xff; - lmc_mii_writereg(sc, 0, 17, T1FRAMER_LCV_MSB); - sc->extra_stats.lineCodeViolationCount += - (lmc_mii_readreg(sc, 0, 18) & 0xff) << 8; - lmc_mii_writereg(sc, 0, 17, T1FRAMER_AERR); - regVal = lmc_mii_readreg(sc, 0, 18) & 0xff; - - sc->extra_stats.lossOfFrameCount += - (regVal & T1FRAMER_LOF_MASK) >> 4; - sc->extra_stats.changeOfFrameAlignmentCount += - (regVal & T1FRAMER_COFA_MASK) >> 2; - sc->extra_stats.severelyErroredFrameCount += - regVal & T1FRAMER_SEF_MASK; - } - spin_unlock_irqrestore(&sc->lmc_lock, flags); - if (copy_to_user(data, &sc->lmc_device->stats, - sizeof(sc->lmc_device->stats)) || - copy_to_user(data + sizeof(sc->lmc_device->stats), - &sc->extra_stats, sizeof(sc->extra_stats))) - ret = -EFAULT; - else - ret = 0; - break; - - case LMCIOCCLEARLMCSTATS: - if (!capable(CAP_NET_ADMIN)) { - ret = -EPERM; - break; - } - - spin_lock_irqsave(&sc->lmc_lock, flags); - memset(&sc->lmc_device->stats, 0, sizeof(sc->lmc_device->stats)); - memset(&sc->extra_stats, 0, sizeof(sc->extra_stats)); - sc->extra_stats.check = STATCHECK; - sc->extra_stats.version_size = (DRIVER_VERSION << 16) + - sizeof(sc->lmc_device->stats) + sizeof(sc->extra_stats); - sc->extra_stats.lmc_cardtype = sc->lmc_cardtype; - spin_unlock_irqrestore(&sc->lmc_lock, flags); - ret = 0; - break; - - case LMCIOCSETCIRCUIT: /*fold01*/ - if (!capable(CAP_NET_ADMIN)){ - ret = -EPERM; - break; - } - - if(dev->flags & IFF_UP){ - ret = -EBUSY; - break; - } - - if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) { - ret = -EFAULT; - break; - } - spin_lock_irqsave(&sc->lmc_lock, flags); - sc->lmc_media->set_circuit_type(sc, ctl.circuit_type); - sc->ictl.circuit_type = ctl.circuit_type; - spin_unlock_irqrestore(&sc->lmc_lock, flags); - ret = 0; - - break; - - case LMCIOCRESET: /*fold01*/ - if (!capable(CAP_NET_ADMIN)){ - ret = -EPERM; - break; - } - - spin_lock_irqsave(&sc->lmc_lock, flags); - /* Reset driver and bring back to current state */ - printk (" REG16 before reset +%04x\n", lmc_mii_readreg (sc, 0, 16)); - lmc_running_reset (dev); - printk (" REG16 after reset +%04x\n", lmc_mii_readreg (sc, 0, 16)); - - LMC_EVENT_LOG(LMC_EVENT_FORCEDRESET, LMC_CSR_READ (sc, csr_status), lmc_mii_readreg (sc, 0, 16)); - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - ret = 0; - break; - -#ifdef DEBUG - case LMCIOCDUMPEVENTLOG: - if (copy_to_user(data, &lmcEventLogIndex, sizeof(u32))) { - ret = -EFAULT; - break; - } - if (copy_to_user(data + sizeof(u32), lmcEventLogBuf, - sizeof(lmcEventLogBuf))) - ret = -EFAULT; - else - ret = 0; - - break; -#endif /* end ifdef _DBG_EVENTLOG */ - case LMCIOCT1CONTROL: /*fold01*/ - if (sc->lmc_cardtype != LMC_CARDTYPE_T1){ - ret = -EOPNOTSUPP; - break; - } - break; - case LMCIOCXILINX: /*fold01*/ - { - struct lmc_xilinx_control xc; /*fold02*/ - - if (!capable(CAP_NET_ADMIN)){ - ret = -EPERM; - break; - } - - /* - * Stop the xwitter whlie we restart the hardware - */ - netif_stop_queue(dev); - - if (copy_from_user(&xc, data, sizeof(struct lmc_xilinx_control))) { - ret = -EFAULT; - break; - } - switch(xc.command){ - case lmc_xilinx_reset: /*fold02*/ - { - spin_lock_irqsave(&sc->lmc_lock, flags); - lmc_mii_readreg (sc, 0, 16); - - /* - * Make all of them 0 and make input - */ - lmc_gpio_mkinput(sc, 0xff); - - /* - * make the reset output - */ - lmc_gpio_mkoutput(sc, LMC_GEP_RESET); - - /* - * RESET low to force configuration. This also forces - * the transmitter clock to be internal, but we expect to reset - * that later anyway. - */ - - sc->lmc_gpio &= ~LMC_GEP_RESET; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - - /* - * hold for more than 10 microseconds - */ - udelay(50); - - sc->lmc_gpio |= LMC_GEP_RESET; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - - /* - * stop driving Xilinx-related signals - */ - lmc_gpio_mkinput(sc, 0xff); - - /* Reset the frammer hardware */ - sc->lmc_media->set_link_status (sc, 1); - sc->lmc_media->set_status (sc, NULL); -// lmc_softreset(sc); - - { - int i; - for(i = 0; i < 5; i++){ - lmc_led_on(sc, LMC_DS3_LED0); - mdelay(100); - lmc_led_off(sc, LMC_DS3_LED0); - lmc_led_on(sc, LMC_DS3_LED1); - mdelay(100); - lmc_led_off(sc, LMC_DS3_LED1); - lmc_led_on(sc, LMC_DS3_LED3); - mdelay(100); - lmc_led_off(sc, LMC_DS3_LED3); - lmc_led_on(sc, LMC_DS3_LED2); - mdelay(100); - lmc_led_off(sc, LMC_DS3_LED2); - } - } - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - - - ret = 0x0; - - } - - break; - case lmc_xilinx_load_prom: /*fold02*/ - { - int timeout = 500000; - spin_lock_irqsave(&sc->lmc_lock, flags); - lmc_mii_readreg (sc, 0, 16); - - /* - * Make all of them 0 and make input - */ - lmc_gpio_mkinput(sc, 0xff); - - /* - * make the reset output - */ - lmc_gpio_mkoutput(sc, LMC_GEP_DP | LMC_GEP_RESET); - - /* - * RESET low to force configuration. This also forces - * the transmitter clock to be internal, but we expect to reset - * that later anyway. - */ - - sc->lmc_gpio &= ~(LMC_GEP_RESET | LMC_GEP_DP); - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - - /* - * hold for more than 10 microseconds - */ - udelay(50); - - sc->lmc_gpio |= LMC_GEP_DP | LMC_GEP_RESET; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - /* - * busy wait for the chip to reset - */ - while( (LMC_CSR_READ(sc, csr_gp) & LMC_GEP_INIT) == 0 && - (timeout-- > 0)) - cpu_relax(); - - - /* - * stop driving Xilinx-related signals - */ - lmc_gpio_mkinput(sc, 0xff); - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - ret = 0x0; - - - break; - - } - - case lmc_xilinx_load: /*fold02*/ - { - char *data; - int pos; - int timeout = 500000; - - if (!xc.data) { - ret = -EINVAL; - break; - } - - data = memdup_user(xc.data, xc.len); - if (IS_ERR(data)) { - ret = PTR_ERR(data); - break; - } - - printk("%s: Starting load of data Len: %d at 0x%p == 0x%p\n", dev->name, xc.len, xc.data, data); - - spin_lock_irqsave(&sc->lmc_lock, flags); - lmc_gpio_mkinput(sc, 0xff); - - /* - * Clear the Xilinx and start prgramming from the DEC - */ - - /* - * Set ouput as: - * Reset: 0 (active) - * DP: 0 (active) - * Mode: 1 - * - */ - sc->lmc_gpio = 0x00; - sc->lmc_gpio &= ~LMC_GEP_DP; - sc->lmc_gpio &= ~LMC_GEP_RESET; - sc->lmc_gpio |= LMC_GEP_MODE; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - lmc_gpio_mkoutput(sc, LMC_GEP_MODE | LMC_GEP_DP | LMC_GEP_RESET); - - /* - * Wait at least 10 us 20 to be safe - */ - udelay(50); - - /* - * Clear reset and activate programming lines - * Reset: Input - * DP: Input - * Clock: Output - * Data: Output - * Mode: Output - */ - lmc_gpio_mkinput(sc, LMC_GEP_DP | LMC_GEP_RESET); - - /* - * Set LOAD, DATA, Clock to 1 - */ - sc->lmc_gpio = 0x00; - sc->lmc_gpio |= LMC_GEP_MODE; - sc->lmc_gpio |= LMC_GEP_DATA; - sc->lmc_gpio |= LMC_GEP_CLK; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - lmc_gpio_mkoutput(sc, LMC_GEP_DATA | LMC_GEP_CLK | LMC_GEP_MODE ); - - /* - * busy wait for the chip to reset - */ - while( (LMC_CSR_READ(sc, csr_gp) & LMC_GEP_INIT) == 0 && - (timeout-- > 0)) - cpu_relax(); - - printk(KERN_DEBUG "%s: Waited %d for the Xilinx to clear its memory\n", dev->name, 500000-timeout); - - for(pos = 0; pos < xc.len; pos++){ - switch(data[pos]){ - case 0: - sc->lmc_gpio &= ~LMC_GEP_DATA; /* Data is 0 */ - break; - case 1: - sc->lmc_gpio |= LMC_GEP_DATA; /* Data is 1 */ - break; - default: - printk(KERN_WARNING "%s Bad data in xilinx programming data at %d, got %d wanted 0 or 1\n", dev->name, pos, data[pos]); - sc->lmc_gpio |= LMC_GEP_DATA; /* Assume it's 1 */ - } - sc->lmc_gpio &= ~LMC_GEP_CLK; /* Clock to zero */ - sc->lmc_gpio |= LMC_GEP_MODE; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - udelay(1); - - sc->lmc_gpio |= LMC_GEP_CLK; /* Put the clack back to one */ - sc->lmc_gpio |= LMC_GEP_MODE; - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - udelay(1); - } - if((LMC_CSR_READ(sc, csr_gp) & LMC_GEP_INIT) == 0){ - printk(KERN_WARNING "%s: Reprogramming FAILED. Needs to be reprogrammed. (corrupted data)\n", dev->name); - } - else if((LMC_CSR_READ(sc, csr_gp) & LMC_GEP_DP) == 0){ - printk(KERN_WARNING "%s: Reprogramming FAILED. Needs to be reprogrammed. (done)\n", dev->name); - } - else { - printk(KERN_DEBUG "%s: Done reprogramming Xilinx, %d bits, good luck!\n", dev->name, pos); - } - - lmc_gpio_mkinput(sc, 0xff); - - sc->lmc_miireg16 |= LMC_MII16_FIFO_RESET; - lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16); - - sc->lmc_miireg16 &= ~LMC_MII16_FIFO_RESET; - lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16); - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - kfree(data); - - ret = 0; - - break; - } - default: /*fold02*/ - ret = -EBADE; - break; - } - - netif_wake_queue(dev); - sc->lmc_txfull = 0; - - } - break; - default: - break; - } - - return ret; -} - - -/* the watchdog process that cruises around */ -static void lmc_watchdog(struct timer_list *t) /*fold00*/ -{ - lmc_softc_t *sc = from_timer(sc, t, timer); - struct net_device *dev = sc->lmc_device; - int link_status; - u32 ticks; - unsigned long flags; - - spin_lock_irqsave(&sc->lmc_lock, flags); - - if(sc->check != 0xBEAFCAFE){ - printk("LMC: Corrupt net_device struct, breaking out\n"); - spin_unlock_irqrestore(&sc->lmc_lock, flags); - return; - } - - - /* Make sure the tx jabber and rx watchdog are off, - * and the transmit and receive processes are running. - */ - - LMC_CSR_WRITE (sc, csr_15, 0x00000011); - sc->lmc_cmdmode |= TULIP_CMD_TXRUN | TULIP_CMD_RXRUN; - LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode); - - if (sc->lmc_ok == 0) - goto kick_timer; - - LMC_EVENT_LOG(LMC_EVENT_WATCHDOG, LMC_CSR_READ (sc, csr_status), lmc_mii_readreg (sc, 0, 16)); - - /* --- begin time out check ----------------------------------- - * check for a transmit interrupt timeout - * Has the packet xmt vs xmt serviced threshold been exceeded */ - if (sc->lmc_taint_tx == sc->lastlmc_taint_tx && - sc->lmc_device->stats.tx_packets > sc->lasttx_packets && - sc->tx_TimeoutInd == 0) - { - - /* wait for the watchdog to come around again */ - sc->tx_TimeoutInd = 1; - } - else if (sc->lmc_taint_tx == sc->lastlmc_taint_tx && - sc->lmc_device->stats.tx_packets > sc->lasttx_packets && - sc->tx_TimeoutInd) - { - - LMC_EVENT_LOG(LMC_EVENT_XMTINTTMO, LMC_CSR_READ (sc, csr_status), 0); - - sc->tx_TimeoutDisplay = 1; - sc->extra_stats.tx_TimeoutCnt++; - - /* DEC chip is stuck, hit it with a RESET!!!! */ - lmc_running_reset (dev); - - - /* look at receive & transmit process state to make sure they are running */ - LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ (sc, csr_status), 0); - - /* look at: DSR - 02 for Reg 16 - * CTS - 08 - * DCD - 10 - * RI - 20 - * for Reg 17 - */ - LMC_EVENT_LOG(LMC_EVENT_RESET2, lmc_mii_readreg (sc, 0, 16), lmc_mii_readreg (sc, 0, 17)); - - /* reset the transmit timeout detection flag */ - sc->tx_TimeoutInd = 0; - sc->lastlmc_taint_tx = sc->lmc_taint_tx; - sc->lasttx_packets = sc->lmc_device->stats.tx_packets; - } else { - sc->tx_TimeoutInd = 0; - sc->lastlmc_taint_tx = sc->lmc_taint_tx; - sc->lasttx_packets = sc->lmc_device->stats.tx_packets; - } - - /* --- end time out check ----------------------------------- */ - - - link_status = sc->lmc_media->get_link_status (sc); - - /* - * hardware level link lost, but the interface is marked as up. - * Mark it as down. - */ - if ((link_status == 0) && (sc->last_link_status != 0)) { - printk(KERN_WARNING "%s: hardware/physical link down\n", dev->name); - sc->last_link_status = 0; - /* lmc_reset (sc); Why reset??? The link can go down ok */ - - /* Inform the world that link has been lost */ - netif_carrier_off(dev); - } - - /* - * hardware link is up, but the interface is marked as down. - * Bring it back up again. - */ - if (link_status != 0 && sc->last_link_status == 0) { - printk(KERN_WARNING "%s: hardware/physical link up\n", dev->name); - sc->last_link_status = 1; - /* lmc_reset (sc); Again why reset??? */ - - netif_carrier_on(dev); - } - - /* Call media specific watchdog functions */ - sc->lmc_media->watchdog(sc); - - /* - * Poke the transmitter to make sure it - * never stops, even if we run out of mem - */ - LMC_CSR_WRITE(sc, csr_rxpoll, 0); - - /* - * Check for code that failed - * and try and fix it as appropriate - */ - if(sc->failed_ring == 1){ - /* - * Failed to setup the recv/xmit rin - * Try again - */ - sc->failed_ring = 0; - lmc_softreset(sc); - } - if(sc->failed_recv_alloc == 1){ - /* - * We failed to alloc mem in the - * interrupt handler, go through the rings - * and rebuild them - */ - sc->failed_recv_alloc = 0; - lmc_softreset(sc); - } - - - /* - * remember the timer value - */ -kick_timer: - - ticks = LMC_CSR_READ (sc, csr_gp_timer); - LMC_CSR_WRITE (sc, csr_gp_timer, 0xffffffffUL); - sc->ictl.ticks = 0x0000ffff - (ticks & 0x0000ffff); - - /* - * restart this timer. - */ - sc->timer.expires = jiffies + (HZ); - add_timer (&sc->timer); - - spin_unlock_irqrestore(&sc->lmc_lock, flags); -} - -static int lmc_attach(struct net_device *dev, unsigned short encoding, - unsigned short parity) -{ - if (encoding == ENCODING_NRZ && parity == PARITY_CRC16_PR1_CCITT) - return 0; - return -EINVAL; -} - -static const struct net_device_ops lmc_ops = { - .ndo_open = lmc_open, - .ndo_stop = lmc_close, - .ndo_start_xmit = hdlc_start_xmit, - .ndo_siocwandev = hdlc_ioctl, - .ndo_siocdevprivate = lmc_siocdevprivate, - .ndo_tx_timeout = lmc_driver_timeout, - .ndo_get_stats = lmc_get_stats, -}; - -static int lmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - lmc_softc_t *sc; - struct net_device *dev; - u16 subdevice; - u16 AdapModelNum; - int err; - static int cards_found; - - err = pcim_enable_device(pdev); - if (err) { - printk(KERN_ERR "lmc: pci enable failed: %d\n", err); - return err; - } - - err = pci_request_regions(pdev, "lmc"); - if (err) { - printk(KERN_ERR "lmc: pci_request_region failed\n"); - return err; - } - - /* - * Allocate our own device structure - */ - sc = devm_kzalloc(&pdev->dev, sizeof(lmc_softc_t), GFP_KERNEL); - if (!sc) - return -ENOMEM; - - dev = alloc_hdlcdev(sc); - if (!dev) { - printk(KERN_ERR "lmc:alloc_netdev for device failed\n"); - return -ENOMEM; - } - - - dev->type = ARPHRD_HDLC; - dev_to_hdlc(dev)->xmit = lmc_start_xmit; - dev_to_hdlc(dev)->attach = lmc_attach; - dev->netdev_ops = &lmc_ops; - dev->watchdog_timeo = HZ; /* 1 second */ - dev->tx_queue_len = 100; - sc->lmc_device = dev; - sc->name = dev->name; - sc->if_type = LMC_PPP; - sc->check = 0xBEAFCAFE; - dev->base_addr = pci_resource_start(pdev, 0); - dev->irq = pdev->irq; - pci_set_drvdata(pdev, dev); - SET_NETDEV_DEV(dev, &pdev->dev); - - /* - * This will get the protocol layer ready and do any 1 time init's - * Must have a valid sc and dev structure - */ - lmc_proto_attach(sc); - - /* Init the spin lock so can call it latter */ - - spin_lock_init(&sc->lmc_lock); - pci_set_master(pdev); - - printk(KERN_INFO "hdlc: detected at %lx, irq %d\n", - dev->base_addr, dev->irq); - - err = register_hdlc_device(dev); - if (err) { - printk(KERN_ERR "%s: register_netdev failed.\n", dev->name); - free_netdev(dev); - return err; - } - - sc->lmc_cardtype = LMC_CARDTYPE_UNKNOWN; - sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_EXT; - - /* - * - * Check either the subvendor or the subdevice, some systems reverse - * the setting in the bois, seems to be version and arch dependent? - * Fix the error, exchange the two values - */ - if ((subdevice = pdev->subsystem_device) == PCI_VENDOR_ID_LMC) - subdevice = pdev->subsystem_vendor; - - switch (subdevice) { - case PCI_DEVICE_ID_LMC_HSSI: - printk(KERN_INFO "%s: LMC HSSI\n", dev->name); - sc->lmc_cardtype = LMC_CARDTYPE_HSSI; - sc->lmc_media = &lmc_hssi_media; - break; - case PCI_DEVICE_ID_LMC_DS3: - printk(KERN_INFO "%s: LMC DS3\n", dev->name); - sc->lmc_cardtype = LMC_CARDTYPE_DS3; - sc->lmc_media = &lmc_ds3_media; - break; - case PCI_DEVICE_ID_LMC_SSI: - printk(KERN_INFO "%s: LMC SSI\n", dev->name); - sc->lmc_cardtype = LMC_CARDTYPE_SSI; - sc->lmc_media = &lmc_ssi_media; - break; - case PCI_DEVICE_ID_LMC_T1: - printk(KERN_INFO "%s: LMC T1\n", dev->name); - sc->lmc_cardtype = LMC_CARDTYPE_T1; - sc->lmc_media = &lmc_t1_media; - break; - default: - printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name); - unregister_hdlc_device(dev); - return -EIO; - break; - } - - lmc_initcsrs (sc, dev->base_addr, 8); - - lmc_gpio_mkinput (sc, 0xff); - sc->lmc_gpio = 0; /* drive no signals yet */ - - sc->lmc_media->defaults (sc); - - sc->lmc_media->set_link_status (sc, LMC_LINK_UP); - - /* verify that the PCI Sub System ID matches the Adapter Model number - * from the MII register - */ - AdapModelNum = (lmc_mii_readreg (sc, 0, 3) & 0x3f0) >> 4; - - if ((AdapModelNum != LMC_ADAP_T1 || /* detect LMC1200 */ - subdevice != PCI_DEVICE_ID_LMC_T1) && - (AdapModelNum != LMC_ADAP_SSI || /* detect LMC1000 */ - subdevice != PCI_DEVICE_ID_LMC_SSI) && - (AdapModelNum != LMC_ADAP_DS3 || /* detect LMC5245 */ - subdevice != PCI_DEVICE_ID_LMC_DS3) && - (AdapModelNum != LMC_ADAP_HSSI || /* detect LMC5200 */ - subdevice != PCI_DEVICE_ID_LMC_HSSI)) - printk(KERN_WARNING "%s: Model number (%d) miscompare for PCI" - " Subsystem ID = 0x%04x\n", - dev->name, AdapModelNum, subdevice); - - /* - * reset clock - */ - LMC_CSR_WRITE (sc, csr_gp_timer, 0xFFFFFFFFUL); - - sc->board_idx = cards_found++; - sc->extra_stats.check = STATCHECK; - sc->extra_stats.version_size = (DRIVER_VERSION << 16) + - sizeof(sc->lmc_device->stats) + sizeof(sc->extra_stats); - sc->extra_stats.lmc_cardtype = sc->lmc_cardtype; - - sc->lmc_ok = 0; - sc->last_link_status = 0; - - return 0; -} - -/* - * Called from pci when removing module. - */ -static void lmc_remove_one(struct pci_dev *pdev) -{ - struct net_device *dev = pci_get_drvdata(pdev); - - if (dev) { - printk(KERN_DEBUG "%s: removing...\n", dev->name); - unregister_hdlc_device(dev); - free_netdev(dev); - } -} - -/* After this is called, packets can be sent. - * Does not initialize the addresses - */ -static int lmc_open(struct net_device *dev) -{ - lmc_softc_t *sc = dev_to_sc(dev); - int err; - - lmc_led_on(sc, LMC_DS3_LED0); - - lmc_dec_reset(sc); - lmc_reset(sc); - - LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ(sc, csr_status), 0); - LMC_EVENT_LOG(LMC_EVENT_RESET2, lmc_mii_readreg(sc, 0, 16), - lmc_mii_readreg(sc, 0, 17)); - - if (sc->lmc_ok) - return 0; - - lmc_softreset (sc); - - /* Since we have to use PCI bus, this should work on x86,alpha,ppc */ - if (request_irq (dev->irq, lmc_interrupt, IRQF_SHARED, dev->name, dev)){ - printk(KERN_WARNING "%s: could not get irq: %d\n", dev->name, dev->irq); - return -EAGAIN; - } - sc->got_irq = 1; - - /* Assert Terminal Active */ - sc->lmc_miireg16 |= LMC_MII16_LED_ALL; - sc->lmc_media->set_link_status (sc, LMC_LINK_UP); - - /* - * reset to last state. - */ - sc->lmc_media->set_status (sc, NULL); - - /* setup default bits to be used in tulip_desc_t transmit descriptor - * -baz */ - sc->TxDescriptControlInit = ( - LMC_TDES_INTERRUPT_ON_COMPLETION - | LMC_TDES_FIRST_SEGMENT - | LMC_TDES_LAST_SEGMENT - | LMC_TDES_SECOND_ADDR_CHAINED - | LMC_TDES_DISABLE_PADDING - ); - - if (sc->ictl.crc_length == LMC_CTL_CRC_LENGTH_16) { - /* disable 32 bit CRC generated by ASIC */ - sc->TxDescriptControlInit |= LMC_TDES_ADD_CRC_DISABLE; - } - sc->lmc_media->set_crc_length(sc, sc->ictl.crc_length); - /* Acknoledge the Terminal Active and light LEDs */ - - /* dev->flags |= IFF_UP; */ - - if ((err = lmc_proto_open(sc)) != 0) - return err; - - netif_start_queue(dev); - sc->extra_stats.tx_tbusy0++; - - /* - * select what interrupts we want to get - */ - sc->lmc_intrmask = 0; - /* Should be using the default interrupt mask defined in the .h file. */ - sc->lmc_intrmask |= (TULIP_STS_NORMALINTR - | TULIP_STS_RXINTR - | TULIP_STS_TXINTR - | TULIP_STS_ABNRMLINTR - | TULIP_STS_SYSERROR - | TULIP_STS_TXSTOPPED - | TULIP_STS_TXUNDERFLOW - | TULIP_STS_RXSTOPPED - | TULIP_STS_RXNOBUF - ); - LMC_CSR_WRITE (sc, csr_intr, sc->lmc_intrmask); - - sc->lmc_cmdmode |= TULIP_CMD_TXRUN; - sc->lmc_cmdmode |= TULIP_CMD_RXRUN; - LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode); - - sc->lmc_ok = 1; /* Run watchdog */ - - /* - * Set the if up now - pfb - */ - - sc->last_link_status = 1; - - /* - * Setup a timer for the watchdog on probe, and start it running. - * Since lmc_ok == 0, it will be a NOP for now. - */ - timer_setup(&sc->timer, lmc_watchdog, 0); - sc->timer.expires = jiffies + HZ; - add_timer (&sc->timer); - - return 0; -} - -/* Total reset to compensate for the AdTran DSU doing bad things - * under heavy load - */ - -static void lmc_running_reset (struct net_device *dev) /*fold00*/ -{ - lmc_softc_t *sc = dev_to_sc(dev); - - /* stop interrupts */ - /* Clear the interrupt mask */ - LMC_CSR_WRITE (sc, csr_intr, 0x00000000); - - lmc_dec_reset (sc); - lmc_reset (sc); - lmc_softreset (sc); - /* sc->lmc_miireg16 |= LMC_MII16_LED_ALL; */ - sc->lmc_media->set_link_status (sc, 1); - sc->lmc_media->set_status (sc, NULL); - - netif_wake_queue(dev); - - sc->lmc_txfull = 0; - sc->extra_stats.tx_tbusy0++; - - sc->lmc_intrmask = TULIP_DEFAULT_INTR_MASK; - LMC_CSR_WRITE (sc, csr_intr, sc->lmc_intrmask); - - sc->lmc_cmdmode |= (TULIP_CMD_TXRUN | TULIP_CMD_RXRUN); - LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode); -} - - -/* This is what is called when you ifconfig down a device. - * This disables the timer for the watchdog and keepalives, - * and disables the irq for dev. - */ -static int lmc_close(struct net_device *dev) -{ - /* not calling release_region() as we should */ - lmc_softc_t *sc = dev_to_sc(dev); - - sc->lmc_ok = 0; - sc->lmc_media->set_link_status (sc, 0); - del_timer (&sc->timer); - lmc_proto_close(sc); - lmc_ifdown (dev); - - return 0; -} - -/* Ends the transfer of packets */ -/* When the interface goes down, this is called */ -static int lmc_ifdown (struct net_device *dev) /*fold00*/ -{ - lmc_softc_t *sc = dev_to_sc(dev); - u32 csr6; - int i; - - /* Don't let anything else go on right now */ - // dev->start = 0; - netif_stop_queue(dev); - sc->extra_stats.tx_tbusy1++; - - /* stop interrupts */ - /* Clear the interrupt mask */ - LMC_CSR_WRITE (sc, csr_intr, 0x00000000); - - /* Stop Tx and Rx on the chip */ - csr6 = LMC_CSR_READ (sc, csr_command); - csr6 &= ~LMC_DEC_ST; /* Turn off the Transmission bit */ - csr6 &= ~LMC_DEC_SR; /* Turn off the Receive bit */ - LMC_CSR_WRITE (sc, csr_command, csr6); - - sc->lmc_device->stats.rx_missed_errors += - LMC_CSR_READ(sc, csr_missed_frames) & 0xffff; - - /* release the interrupt */ - if(sc->got_irq == 1){ - free_irq (dev->irq, dev); - sc->got_irq = 0; - } - - /* free skbuffs in the Rx queue */ - for (i = 0; i < LMC_RXDESCS; i++) - { - struct sk_buff *skb = sc->lmc_rxq[i]; - sc->lmc_rxq[i] = NULL; - sc->lmc_rxring[i].status = 0; - sc->lmc_rxring[i].length = 0; - sc->lmc_rxring[i].buffer1 = 0xDEADBEEF; - if (skb != NULL) - dev_kfree_skb(skb); - sc->lmc_rxq[i] = NULL; - } - - for (i = 0; i < LMC_TXDESCS; i++) - { - if (sc->lmc_txq[i] != NULL) - dev_kfree_skb(sc->lmc_txq[i]); - sc->lmc_txq[i] = NULL; - } - - lmc_led_off (sc, LMC_MII16_LED_ALL); - - netif_wake_queue(dev); - sc->extra_stats.tx_tbusy0++; - - return 0; -} - -/* Interrupt handling routine. This will take an incoming packet, or clean - * up after a trasmit. - */ -static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ -{ - struct net_device *dev = (struct net_device *) dev_instance; - lmc_softc_t *sc = dev_to_sc(dev); - u32 csr; - int i; - s32 stat; - unsigned int badtx; - int max_work = LMC_RXDESCS; - int handled = 0; - - spin_lock(&sc->lmc_lock); - - /* - * Read the csr to find what interrupts we have (if any) - */ - csr = LMC_CSR_READ (sc, csr_status); - - /* - * Make sure this is our interrupt - */ - if ( ! (csr & sc->lmc_intrmask)) { - goto lmc_int_fail_out; - } - - /* always go through this loop at least once */ - while (csr & sc->lmc_intrmask) { - handled = 1; - - /* - * Clear interrupt bits, we handle all case below - */ - LMC_CSR_WRITE (sc, csr_status, csr); - - /* - * One of - * - Transmit process timed out CSR5<1> - * - Transmit jabber timeout CSR5<3> - * - Transmit underflow CSR5<5> - * - Transmit Receiver buffer unavailable CSR5<7> - * - Receive process stopped CSR5<8> - * - Receive watchdog timeout CSR5<9> - * - Early transmit interrupt CSR5<10> - * - * Is this really right? Should we do a running reset for jabber? - * (being a WAN card and all) - */ - if (csr & TULIP_STS_ABNRMLINTR){ - lmc_running_reset (dev); - break; - } - - if (csr & TULIP_STS_RXINTR) - lmc_rx (dev); - - if (csr & (TULIP_STS_TXINTR | TULIP_STS_TXNOBUF | TULIP_STS_TXSTOPPED)) { - - int n_compl = 0 ; - /* reset the transmit timeout detection flag -baz */ - sc->extra_stats.tx_NoCompleteCnt = 0; - - badtx = sc->lmc_taint_tx; - i = badtx % LMC_TXDESCS; - - while ((badtx < sc->lmc_next_tx)) { - stat = sc->lmc_txring[i].status; - - LMC_EVENT_LOG (LMC_EVENT_XMTINT, stat, - sc->lmc_txring[i].length); - /* - * If bit 31 is 1 the tulip owns it break out of the loop - */ - if (stat & 0x80000000) - break; - - n_compl++ ; /* i.e., have an empty slot in ring */ - /* - * If we have no skbuff or have cleared it - * Already continue to the next buffer - */ - if (sc->lmc_txq[i] == NULL) - continue; - - /* - * Check the total error summary to look for any errors - */ - if (stat & 0x8000) { - sc->lmc_device->stats.tx_errors++; - if (stat & 0x4104) - sc->lmc_device->stats.tx_aborted_errors++; - if (stat & 0x0C00) - sc->lmc_device->stats.tx_carrier_errors++; - if (stat & 0x0200) - sc->lmc_device->stats.tx_window_errors++; - if (stat & 0x0002) - sc->lmc_device->stats.tx_fifo_errors++; - } else { - sc->lmc_device->stats.tx_bytes += sc->lmc_txring[i].length & 0x7ff; - - sc->lmc_device->stats.tx_packets++; - } - - dev_consume_skb_irq(sc->lmc_txq[i]); - sc->lmc_txq[i] = NULL; - - badtx++; - i = badtx % LMC_TXDESCS; - } - - if (sc->lmc_next_tx - badtx > LMC_TXDESCS) - { - printk ("%s: out of sync pointer\n", dev->name); - badtx += LMC_TXDESCS; - } - LMC_EVENT_LOG(LMC_EVENT_TBUSY0, n_compl, 0); - sc->lmc_txfull = 0; - netif_wake_queue(dev); - sc->extra_stats.tx_tbusy0++; - - -#ifdef DEBUG - sc->extra_stats.dirtyTx = badtx; - sc->extra_stats.lmc_next_tx = sc->lmc_next_tx; - sc->extra_stats.lmc_txfull = sc->lmc_txfull; -#endif - sc->lmc_taint_tx = badtx; - - /* - * Why was there a break here??? - */ - } /* end handle transmit interrupt */ - - if (csr & TULIP_STS_SYSERROR) { - u32 error; - printk (KERN_WARNING "%s: system bus error csr: %#8.8x\n", dev->name, csr); - error = csr>>23 & 0x7; - switch(error){ - case 0x000: - printk(KERN_WARNING "%s: Parity Fault (bad)\n", dev->name); - break; - case 0x001: - printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name); - break; - case 0x002: - printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name); - break; - default: - printk(KERN_WARNING "%s: This bus error code was supposed to be reserved!\n", dev->name); - } - lmc_dec_reset (sc); - lmc_reset (sc); - LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ (sc, csr_status), 0); - LMC_EVENT_LOG(LMC_EVENT_RESET2, - lmc_mii_readreg (sc, 0, 16), - lmc_mii_readreg (sc, 0, 17)); - - } - - - if(max_work-- <= 0) - break; - - /* - * Get current csr status to make sure - * we've cleared all interrupts - */ - csr = LMC_CSR_READ (sc, csr_status); - } /* end interrupt loop */ - LMC_EVENT_LOG(LMC_EVENT_INT, firstcsr, csr); - -lmc_int_fail_out: - - spin_unlock(&sc->lmc_lock); - - return IRQ_RETVAL(handled); -} - -static netdev_tx_t lmc_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - lmc_softc_t *sc = dev_to_sc(dev); - u32 flag; - int entry; - unsigned long flags; - - spin_lock_irqsave(&sc->lmc_lock, flags); - - /* normal path, tbusy known to be zero */ - - entry = sc->lmc_next_tx % LMC_TXDESCS; - - sc->lmc_txq[entry] = skb; - sc->lmc_txring[entry].buffer1 = virt_to_bus (skb->data); - - LMC_CONSOLE_LOG("xmit", skb->data, skb->len); - -#ifndef GCOM - /* If the queue is less than half full, don't interrupt */ - if (sc->lmc_next_tx - sc->lmc_taint_tx < LMC_TXDESCS / 2) - { - /* Do not interrupt on completion of this packet */ - flag = 0x60000000; - netif_wake_queue(dev); - } - else if (sc->lmc_next_tx - sc->lmc_taint_tx == LMC_TXDESCS / 2) - { - /* This generates an interrupt on completion of this packet */ - flag = 0xe0000000; - netif_wake_queue(dev); - } - else if (sc->lmc_next_tx - sc->lmc_taint_tx < LMC_TXDESCS - 1) - { - /* Do not interrupt on completion of this packet */ - flag = 0x60000000; - netif_wake_queue(dev); - } - else - { - /* This generates an interrupt on completion of this packet */ - flag = 0xe0000000; - sc->lmc_txfull = 1; - netif_stop_queue(dev); - } -#else - flag = LMC_TDES_INTERRUPT_ON_COMPLETION; - - if (sc->lmc_next_tx - sc->lmc_taint_tx >= LMC_TXDESCS - 1) - { /* ring full, go busy */ - sc->lmc_txfull = 1; - netif_stop_queue(dev); - sc->extra_stats.tx_tbusy1++; - LMC_EVENT_LOG(LMC_EVENT_TBUSY1, entry, 0); - } -#endif - - - if (entry == LMC_TXDESCS - 1) /* last descriptor in ring */ - flag |= LMC_TDES_END_OF_RING; /* flag as such for Tulip */ - - /* don't pad small packets either */ - flag = sc->lmc_txring[entry].length = (skb->len) | flag | - sc->TxDescriptControlInit; - - /* set the transmit timeout flag to be checked in - * the watchdog timer handler. -baz - */ - - sc->extra_stats.tx_NoCompleteCnt++; - sc->lmc_next_tx++; - - /* give ownership to the chip */ - LMC_EVENT_LOG(LMC_EVENT_XMT, flag, entry); - sc->lmc_txring[entry].status = 0x80000000; - - /* send now! */ - LMC_CSR_WRITE (sc, csr_txpoll, 0); - - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - return NETDEV_TX_OK; -} - - -static int lmc_rx(struct net_device *dev) -{ - lmc_softc_t *sc = dev_to_sc(dev); - int i; - int rx_work_limit = LMC_RXDESCS; - int rxIntLoopCnt; /* debug -baz */ - int localLengthErrCnt = 0; - long stat; - struct sk_buff *skb, *nsb; - u16 len; - - lmc_led_on(sc, LMC_DS3_LED3); - - rxIntLoopCnt = 0; /* debug -baz */ - - i = sc->lmc_next_rx % LMC_RXDESCS; - - while (((stat = sc->lmc_rxring[i].status) & LMC_RDES_OWN_BIT) != DESC_OWNED_BY_DC21X4) - { - rxIntLoopCnt++; /* debug -baz */ - len = ((stat & LMC_RDES_FRAME_LENGTH) >> RDES_FRAME_LENGTH_BIT_NUMBER); - if ((stat & 0x0300) != 0x0300) { /* Check first segment and last segment */ - if ((stat & 0x0000ffff) != 0x7fff) { - /* Oversized frame */ - sc->lmc_device->stats.rx_length_errors++; - goto skip_packet; - } - } - - if (stat & 0x00000008) { /* Catch a dribbling bit error */ - sc->lmc_device->stats.rx_errors++; - sc->lmc_device->stats.rx_frame_errors++; - goto skip_packet; - } - - - if (stat & 0x00000004) { /* Catch a CRC error by the Xilinx */ - sc->lmc_device->stats.rx_errors++; - sc->lmc_device->stats.rx_crc_errors++; - goto skip_packet; - } - - if (len > LMC_PKT_BUF_SZ) { - sc->lmc_device->stats.rx_length_errors++; - localLengthErrCnt++; - goto skip_packet; - } - - if (len < sc->lmc_crcSize + 2) { - sc->lmc_device->stats.rx_length_errors++; - sc->extra_stats.rx_SmallPktCnt++; - localLengthErrCnt++; - goto skip_packet; - } - - if(stat & 0x00004000){ - printk(KERN_WARNING "%s: Receiver descriptor error, receiver out of sync?\n", dev->name); - } - - len -= sc->lmc_crcSize; - - skb = sc->lmc_rxq[i]; - - /* - * We ran out of memory at some point - * just allocate an skb buff and continue. - */ - - if (!skb) { - nsb = dev_alloc_skb (LMC_PKT_BUF_SZ + 2); - if (nsb) { - sc->lmc_rxq[i] = nsb; - nsb->dev = dev; - sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb)); - } - sc->failed_recv_alloc = 1; - goto skip_packet; - } - - sc->lmc_device->stats.rx_packets++; - sc->lmc_device->stats.rx_bytes += len; - - LMC_CONSOLE_LOG("recv", skb->data, len); - - /* - * I'm not sure of the sanity of this - * Packets could be arriving at a constant - * 44.210mbits/sec and we're going to copy - * them into a new buffer?? - */ - - if(len > (LMC_MTU - (LMC_MTU>>2))){ /* len > LMC_MTU * 0.75 */ - /* - * If it's a large packet don't copy it just hand it up - */ - give_it_anyways: - - sc->lmc_rxq[i] = NULL; - sc->lmc_rxring[i].buffer1 = 0x0; - - skb_put (skb, len); - skb->protocol = lmc_proto_type(sc, skb); - skb_reset_mac_header(skb); - /* skb_reset_network_header(skb); */ - skb->dev = dev; - lmc_proto_netif(sc, skb); - - /* - * This skb will be destroyed by the upper layers, make a new one - */ - nsb = dev_alloc_skb (LMC_PKT_BUF_SZ + 2); - if (nsb) { - sc->lmc_rxq[i] = nsb; - nsb->dev = dev; - sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb)); - /* Transferred to 21140 below */ - } - else { - /* - * We've run out of memory, stop trying to allocate - * memory and exit the interrupt handler - * - * The chip may run out of receivers and stop - * in which care we'll try to allocate the buffer - * again. (once a second) - */ - sc->extra_stats.rx_BuffAllocErr++; - LMC_EVENT_LOG(LMC_EVENT_RCVINT, stat, len); - sc->failed_recv_alloc = 1; - goto skip_out_of_mem; - } - } - else { - nsb = dev_alloc_skb(len); - if(!nsb) { - goto give_it_anyways; - } - skb_copy_from_linear_data(skb, skb_put(nsb, len), len); - - nsb->protocol = lmc_proto_type(sc, nsb); - skb_reset_mac_header(nsb); - /* skb_reset_network_header(nsb); */ - nsb->dev = dev; - lmc_proto_netif(sc, nsb); - } - - skip_packet: - LMC_EVENT_LOG(LMC_EVENT_RCVINT, stat, len); - sc->lmc_rxring[i].status = DESC_OWNED_BY_DC21X4; - - sc->lmc_next_rx++; - i = sc->lmc_next_rx % LMC_RXDESCS; - rx_work_limit--; - if (rx_work_limit < 0) - break; - } - - /* detect condition for LMC1000 where DSU cable attaches and fills - * descriptors with bogus packets - * - if (localLengthErrCnt > LMC_RXDESCS - 3) { - sc->extra_stats.rx_BadPktSurgeCnt++; - LMC_EVENT_LOG(LMC_EVENT_BADPKTSURGE, localLengthErrCnt, - sc->extra_stats.rx_BadPktSurgeCnt); - } */ - - /* save max count of receive descriptors serviced */ - if (rxIntLoopCnt > sc->extra_stats.rxIntLoopCnt) - sc->extra_stats.rxIntLoopCnt = rxIntLoopCnt; /* debug -baz */ - -#ifdef DEBUG - if (rxIntLoopCnt == 0) - { - for (i = 0; i < LMC_RXDESCS; i++) - { - if ((sc->lmc_rxring[i].status & LMC_RDES_OWN_BIT) - != DESC_OWNED_BY_DC21X4) - { - rxIntLoopCnt++; - } - } - LMC_EVENT_LOG(LMC_EVENT_RCVEND, rxIntLoopCnt, 0); - } -#endif - - - lmc_led_off(sc, LMC_DS3_LED3); - -skip_out_of_mem: - return 0; -} - -static struct net_device_stats *lmc_get_stats(struct net_device *dev) -{ - lmc_softc_t *sc = dev_to_sc(dev); - unsigned long flags; - - spin_lock_irqsave(&sc->lmc_lock, flags); - - sc->lmc_device->stats.rx_missed_errors += LMC_CSR_READ(sc, csr_missed_frames) & 0xffff; - - spin_unlock_irqrestore(&sc->lmc_lock, flags); - - return &sc->lmc_device->stats; -} - -static struct pci_driver lmc_driver = { - .name = "lmc", - .id_table = lmc_pci_tbl, - .probe = lmc_init_one, - .remove = lmc_remove_one, -}; - -module_pci_driver(lmc_driver); - -unsigned lmc_mii_readreg (lmc_softc_t * const sc, unsigned devaddr, unsigned regno) /*fold00*/ -{ - int i; - int command = (0xf6 << 10) | (devaddr << 5) | regno; - int retval = 0; - - LMC_MII_SYNC (sc); - - for (i = 15; i >= 0; i--) - { - int dataval = (command & (1 << i)) ? 0x20000 : 0; - - LMC_CSR_WRITE (sc, csr_9, dataval); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - LMC_CSR_WRITE (sc, csr_9, dataval | 0x10000); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - } - - for (i = 19; i > 0; i--) - { - LMC_CSR_WRITE (sc, csr_9, 0x40000); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - retval = (retval << 1) | ((LMC_CSR_READ (sc, csr_9) & 0x80000) ? 1 : 0); - LMC_CSR_WRITE (sc, csr_9, 0x40000 | 0x10000); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - } - - return (retval >> 1) & 0xffff; -} - -void lmc_mii_writereg (lmc_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data) /*fold00*/ -{ - int i = 32; - int command = (0x5002 << 16) | (devaddr << 23) | (regno << 18) | data; - - LMC_MII_SYNC (sc); - - i = 31; - while (i >= 0) - { - int datav; - - if (command & (1 << i)) - datav = 0x20000; - else - datav = 0x00000; - - LMC_CSR_WRITE (sc, csr_9, datav); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - LMC_CSR_WRITE (sc, csr_9, (datav | 0x10000)); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - i--; - } - - i = 2; - while (i > 0) - { - LMC_CSR_WRITE (sc, csr_9, 0x40000); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - LMC_CSR_WRITE (sc, csr_9, 0x50000); - lmc_delay (); - /* __SLOW_DOWN_IO; */ - i--; - } -} - -static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/ -{ - int i; - - /* Initialize the receive rings and buffers. */ - sc->lmc_txfull = 0; - sc->lmc_next_rx = 0; - sc->lmc_next_tx = 0; - sc->lmc_taint_rx = 0; - sc->lmc_taint_tx = 0; - - /* - * Setup each one of the receiver buffers - * allocate an skbuff for each one, setup the descriptor table - * and point each buffer at the next one - */ - - for (i = 0; i < LMC_RXDESCS; i++) - { - struct sk_buff *skb; - - if (sc->lmc_rxq[i] == NULL) - { - skb = dev_alloc_skb (LMC_PKT_BUF_SZ + 2); - if(skb == NULL){ - printk(KERN_WARNING "%s: Failed to allocate receiver ring, will try again\n", sc->name); - sc->failed_ring = 1; - break; - } - else{ - sc->lmc_rxq[i] = skb; - } - } - else - { - skb = sc->lmc_rxq[i]; - } - - skb->dev = sc->lmc_device; - - /* owned by 21140 */ - sc->lmc_rxring[i].status = 0x80000000; - - /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */ - sc->lmc_rxring[i].length = skb_tailroom(skb); - - /* use to be tail which is dumb since you're thinking why write - * to the end of the packj,et but since there's nothing there tail == data - */ - sc->lmc_rxring[i].buffer1 = virt_to_bus (skb->data); - - /* This is fair since the structure is static and we have the next address */ - sc->lmc_rxring[i].buffer2 = virt_to_bus (&sc->lmc_rxring[i + 1]); - - } - - /* - * Sets end of ring - */ - if (i != 0) { - sc->lmc_rxring[i - 1].length |= 0x02000000; /* Set end of buffers flag */ - sc->lmc_rxring[i - 1].buffer2 = virt_to_bus(&sc->lmc_rxring[0]); /* Point back to the start */ - } - LMC_CSR_WRITE (sc, csr_rxlist, virt_to_bus (sc->lmc_rxring)); /* write base address */ - - /* Initialize the transmit rings and buffers */ - for (i = 0; i < LMC_TXDESCS; i++) - { - if (sc->lmc_txq[i] != NULL){ /* have buffer */ - dev_kfree_skb(sc->lmc_txq[i]); /* free it */ - sc->lmc_device->stats.tx_dropped++; /* We just dropped a packet */ - } - sc->lmc_txq[i] = NULL; - sc->lmc_txring[i].status = 0x00000000; - sc->lmc_txring[i].buffer2 = virt_to_bus (&sc->lmc_txring[i + 1]); - } - sc->lmc_txring[i - 1].buffer2 = virt_to_bus (&sc->lmc_txring[0]); - LMC_CSR_WRITE (sc, csr_txlist, virt_to_bus (sc->lmc_txring)); -} - -void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits) /*fold00*/ -{ - sc->lmc_gpio_io &= ~bits; - LMC_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET | (sc->lmc_gpio_io)); -} - -void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits) /*fold00*/ -{ - sc->lmc_gpio_io |= bits; - LMC_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET | (sc->lmc_gpio_io)); -} - -void lmc_led_on(lmc_softc_t * const sc, u32 led) /*fold00*/ -{ - if ((~sc->lmc_miireg16) & led) /* Already on! */ - return; - - sc->lmc_miireg16 &= ~led; - lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16); -} - -void lmc_led_off(lmc_softc_t * const sc, u32 led) /*fold00*/ -{ - if (sc->lmc_miireg16 & led) /* Already set don't do anything */ - return; - - sc->lmc_miireg16 |= led; - lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16); -} - -static void lmc_reset(lmc_softc_t * const sc) /*fold00*/ -{ - sc->lmc_miireg16 |= LMC_MII16_FIFO_RESET; - lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16); - - sc->lmc_miireg16 &= ~LMC_MII16_FIFO_RESET; - lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16); - - /* - * make some of the GPIO pins be outputs - */ - lmc_gpio_mkoutput(sc, LMC_GEP_RESET); - - /* - * RESET low to force state reset. This also forces - * the transmitter clock to be internal, but we expect to reset - * that later anyway. - */ - sc->lmc_gpio &= ~(LMC_GEP_RESET); - LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio); - - /* - * hold for more than 10 microseconds - */ - udelay(50); - - /* - * stop driving Xilinx-related signals - */ - lmc_gpio_mkinput(sc, LMC_GEP_RESET); - - /* - * Call media specific init routine - */ - sc->lmc_media->init(sc); - - sc->extra_stats.resetCount++; -} - -static void lmc_dec_reset(lmc_softc_t * const sc) /*fold00*/ -{ - u32 val; - - /* - * disable all interrupts - */ - sc->lmc_intrmask = 0; - LMC_CSR_WRITE(sc, csr_intr, sc->lmc_intrmask); - - /* - * Reset the chip with a software reset command. - * Wait 10 microseconds (actually 50 PCI cycles but at - * 33MHz that comes to two microseconds but wait a - * bit longer anyways) - */ - LMC_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); - udelay(25); -#ifdef __sparc__ - sc->lmc_busmode = LMC_CSR_READ(sc, csr_busmode); - sc->lmc_busmode = 0x00100000; - sc->lmc_busmode &= ~TULIP_BUSMODE_SWRESET; - LMC_CSR_WRITE(sc, csr_busmode, sc->lmc_busmode); -#endif - sc->lmc_cmdmode = LMC_CSR_READ(sc, csr_command); - - /* - * We want: - * no ethernet address in frames we write - * disable padding (txdesc, padding disable) - * ignore runt frames (rdes0 bit 15) - * no receiver watchdog or transmitter jabber timer - * (csr15 bit 0,14 == 1) - * if using 16-bit CRC, turn off CRC (trans desc, crc disable) - */ - - sc->lmc_cmdmode |= ( TULIP_CMD_PROMISCUOUS - | TULIP_CMD_FULLDUPLEX - | TULIP_CMD_PASSBADPKT - | TULIP_CMD_NOHEARTBEAT - | TULIP_CMD_PORTSELECT - | TULIP_CMD_RECEIVEALL - | TULIP_CMD_MUSTBEONE - ); - sc->lmc_cmdmode &= ~( TULIP_CMD_OPERMODE - | TULIP_CMD_THRESHOLDCTL - | TULIP_CMD_STOREFWD - | TULIP_CMD_TXTHRSHLDCTL - ); - - LMC_CSR_WRITE(sc, csr_command, sc->lmc_cmdmode); - - /* - * disable receiver watchdog and transmit jabber - */ - val = LMC_CSR_READ(sc, csr_sia_general); - val |= (TULIP_WATCHDOG_TXDISABLE | TULIP_WATCHDOG_RXDISABLE); - LMC_CSR_WRITE(sc, csr_sia_general, val); -} - -static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, /*fold00*/ - size_t csr_size) -{ - sc->lmc_csrs.csr_busmode = csr_base + 0 * csr_size; - sc->lmc_csrs.csr_txpoll = csr_base + 1 * csr_size; - sc->lmc_csrs.csr_rxpoll = csr_base + 2 * csr_size; - sc->lmc_csrs.csr_rxlist = csr_base + 3 * csr_size; - sc->lmc_csrs.csr_txlist = csr_base + 4 * csr_size; - sc->lmc_csrs.csr_status = csr_base + 5 * csr_size; - sc->lmc_csrs.csr_command = csr_base + 6 * csr_size; - sc->lmc_csrs.csr_intr = csr_base + 7 * csr_size; - sc->lmc_csrs.csr_missed_frames = csr_base + 8 * csr_size; - sc->lmc_csrs.csr_9 = csr_base + 9 * csr_size; - sc->lmc_csrs.csr_10 = csr_base + 10 * csr_size; - sc->lmc_csrs.csr_11 = csr_base + 11 * csr_size; - sc->lmc_csrs.csr_12 = csr_base + 12 * csr_size; - sc->lmc_csrs.csr_13 = csr_base + 13 * csr_size; - sc->lmc_csrs.csr_14 = csr_base + 14 * csr_size; - sc->lmc_csrs.csr_15 = csr_base + 15 * csr_size; -} - -static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue) -{ - lmc_softc_t *sc = dev_to_sc(dev); - u32 csr6; - unsigned long flags; - - spin_lock_irqsave(&sc->lmc_lock, flags); - - printk("%s: Xmitter busy|\n", dev->name); - - sc->extra_stats.tx_tbusy_calls++; - if (time_is_before_jiffies(dev_trans_start(dev) + TX_TIMEOUT)) - goto bug_out; - - /* - * Chip seems to have locked up - * Reset it - * This whips out all our descriptor - * table and starts from scartch - */ - - LMC_EVENT_LOG(LMC_EVENT_XMTPRCTMO, - LMC_CSR_READ (sc, csr_status), - sc->extra_stats.tx_ProcTimeout); - - lmc_running_reset (dev); - - LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ (sc, csr_status), 0); - LMC_EVENT_LOG(LMC_EVENT_RESET2, - lmc_mii_readreg (sc, 0, 16), - lmc_mii_readreg (sc, 0, 17)); - - /* restart the tx processes */ - csr6 = LMC_CSR_READ (sc, csr_command); - LMC_CSR_WRITE (sc, csr_command, csr6 | 0x0002); - LMC_CSR_WRITE (sc, csr_command, csr6 | 0x2002); - - /* immediate transmit */ - LMC_CSR_WRITE (sc, csr_txpoll, 0); - - sc->lmc_device->stats.tx_errors++; - sc->extra_stats.tx_ProcTimeout++; /* -baz */ - - netif_trans_update(dev); /* prevent tx timeout */ - -bug_out: - - spin_unlock_irqrestore(&sc->lmc_lock, flags); -} diff --git a/drivers/net/wan/lmc/lmc_media.c b/drivers/net/wan/lmc/lmc_media.c deleted file mode 100644 index ec1ac7b1f3fd..000000000000 --- a/drivers/net/wan/lmc/lmc_media.c +++ /dev/null @@ -1,1206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* $Id: lmc_media.c,v 1.13 2000/04/11 05:25:26 asj Exp $ */ - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/timer.h> -#include <linux/ptrace.h> -#include <linux/errno.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/in.h> -#include <linux/if_arp.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/inet.h> -#include <linux/bitops.h> - -#include <asm/processor.h> /* Processor type for cache alignment. */ -#include <asm/io.h> -#include <asm/dma.h> - -#include <linux/uaccess.h> - -#include "lmc.h" -#include "lmc_var.h" -#include "lmc_ioctl.h" -#include "lmc_debug.h" - -#define CONFIG_LMC_IGNORE_HARDWARE_HANDSHAKE 1 - - /* - * Copyright (c) 1997-2000 LAN Media Corporation (LMC) - * All rights reserved. www.lanmedia.com - * - * This code is written by: - * Andrew Stanley-Jones (asj@cban.com) - * Rob Braun (bbraun@vix.com), - * Michael Graff (explorer@vix.com) and - * Matt Thomas (matt@3am-software.com). - */ - -/* - * protocol independent method. - */ -static void lmc_set_protocol (lmc_softc_t * const, lmc_ctl_t *); - -/* - * media independent methods to check on media status, link, light LEDs, - * etc. - */ -static void lmc_ds3_init (lmc_softc_t * const); -static void lmc_ds3_default (lmc_softc_t * const); -static void lmc_ds3_set_status (lmc_softc_t * const, lmc_ctl_t *); -static void lmc_ds3_set_100ft (lmc_softc_t * const, int); -static int lmc_ds3_get_link_status (lmc_softc_t * const); -static void lmc_ds3_set_crc_length (lmc_softc_t * const, int); -static void lmc_ds3_set_scram (lmc_softc_t * const, int); -static void lmc_ds3_watchdog (lmc_softc_t * const); - -static void lmc_hssi_init (lmc_softc_t * const); -static void lmc_hssi_default (lmc_softc_t * const); -static void lmc_hssi_set_status (lmc_softc_t * const, lmc_ctl_t *); -static void lmc_hssi_set_clock (lmc_softc_t * const, int); -static int lmc_hssi_get_link_status (lmc_softc_t * const); -static void lmc_hssi_set_link_status (lmc_softc_t * const, int); -static void lmc_hssi_set_crc_length (lmc_softc_t * const, int); -static void lmc_hssi_watchdog (lmc_softc_t * const); - -static void lmc_ssi_init (lmc_softc_t * const); -static void lmc_ssi_default (lmc_softc_t * const); -static void lmc_ssi_set_status (lmc_softc_t * const, lmc_ctl_t *); -static void lmc_ssi_set_clock (lmc_softc_t * const, int); -static void lmc_ssi_set_speed (lmc_softc_t * const, lmc_ctl_t *); -static int lmc_ssi_get_link_status (lmc_softc_t * const); -static void lmc_ssi_set_link_status (lmc_softc_t * const, int); -static void lmc_ssi_set_crc_length (lmc_softc_t * const, int); -static void lmc_ssi_watchdog (lmc_softc_t * const); - -static void lmc_t1_init (lmc_softc_t * const); -static void lmc_t1_default (lmc_softc_t * const); -static void lmc_t1_set_status (lmc_softc_t * const, lmc_ctl_t *); -static int lmc_t1_get_link_status (lmc_softc_t * const); -static void lmc_t1_set_circuit_type (lmc_softc_t * const, int); -static void lmc_t1_set_crc_length (lmc_softc_t * const, int); -static void lmc_t1_set_clock (lmc_softc_t * const, int); -static void lmc_t1_watchdog (lmc_softc_t * const); - -static void lmc_dummy_set_1 (lmc_softc_t * const, int); -static void lmc_dummy_set2_1 (lmc_softc_t * const, lmc_ctl_t *); - -static inline void write_av9110_bit (lmc_softc_t *, int); -static void write_av9110(lmc_softc_t *, u32, u32, u32, u32, u32); - -lmc_media_t lmc_ds3_media = { - .init = lmc_ds3_init, /* special media init stuff */ - .defaults = lmc_ds3_default, /* reset to default state */ - .set_status = lmc_ds3_set_status, /* reset status to state provided */ - .set_clock_source = lmc_dummy_set_1, /* set clock source */ - .set_speed = lmc_dummy_set2_1, /* set line speed */ - .set_cable_length = lmc_ds3_set_100ft, /* set cable length */ - .set_scrambler = lmc_ds3_set_scram, /* set scrambler */ - .get_link_status = lmc_ds3_get_link_status, /* get link status */ - .set_link_status = lmc_dummy_set_1, /* set link status */ - .set_crc_length = lmc_ds3_set_crc_length, /* set CRC length */ - .set_circuit_type = lmc_dummy_set_1, /* set T1 or E1 circuit type */ - .watchdog = lmc_ds3_watchdog -}; - -lmc_media_t lmc_hssi_media = { - .init = lmc_hssi_init, /* special media init stuff */ - .defaults = lmc_hssi_default, /* reset to default state */ - .set_status = lmc_hssi_set_status, /* reset status to state provided */ - .set_clock_source = lmc_hssi_set_clock, /* set clock source */ - .set_speed = lmc_dummy_set2_1, /* set line speed */ - .set_cable_length = lmc_dummy_set_1, /* set cable length */ - .set_scrambler = lmc_dummy_set_1, /* set scrambler */ - .get_link_status = lmc_hssi_get_link_status, /* get link status */ - .set_link_status = lmc_hssi_set_link_status, /* set link status */ - .set_crc_length = lmc_hssi_set_crc_length, /* set CRC length */ - .set_circuit_type = lmc_dummy_set_1, /* set T1 or E1 circuit type */ - .watchdog = lmc_hssi_watchdog -}; - -lmc_media_t lmc_ssi_media = { - .init = lmc_ssi_init, /* special media init stuff */ - .defaults = lmc_ssi_default, /* reset to default state */ - .set_status = lmc_ssi_set_status, /* reset status to state provided */ - .set_clock_source = lmc_ssi_set_clock, /* set clock source */ - .set_speed = lmc_ssi_set_speed, /* set line speed */ - .set_cable_length = lmc_dummy_set_1, /* set cable length */ - .set_scrambler = lmc_dummy_set_1, /* set scrambler */ - .get_link_status = lmc_ssi_get_link_status, /* get link status */ - .set_link_status = lmc_ssi_set_link_status, /* set link status */ - .set_crc_length = lmc_ssi_set_crc_length, /* set CRC length */ - .set_circuit_type = lmc_dummy_set_1, /* set T1 or E1 circuit type */ - .watchdog = lmc_ssi_watchdog -}; - -lmc_media_t lmc_t1_media = { - .init = lmc_t1_init, /* special media init stuff */ - .defaults = lmc_t1_default, /* reset to default state */ - .set_status = lmc_t1_set_status, /* reset status to state provided */ - .set_clock_source = lmc_t1_set_clock, /* set clock source */ - .set_speed = lmc_dummy_set2_1, /* set line speed */ - .set_cable_length = lmc_dummy_set_1, /* set cable length */ - .set_scrambler = lmc_dummy_set_1, /* set scrambler */ - .get_link_status = lmc_t1_get_link_status, /* get link status */ - .set_link_status = lmc_dummy_set_1, /* set link status */ - .set_crc_length = lmc_t1_set_crc_length, /* set CRC length */ - .set_circuit_type = lmc_t1_set_circuit_type, /* set T1 or E1 circuit type */ - .watchdog = lmc_t1_watchdog -}; - -static void -lmc_dummy_set_1 (lmc_softc_t * const sc, int a) -{ -} - -static void -lmc_dummy_set2_1 (lmc_softc_t * const sc, lmc_ctl_t * a) -{ -} - -/* - * HSSI methods - */ - -static void -lmc_hssi_init (lmc_softc_t * const sc) -{ - sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC5200; - - lmc_gpio_mkoutput (sc, LMC_GEP_HSSI_CLOCK); -} - -static void -lmc_hssi_default (lmc_softc_t * const sc) -{ - sc->lmc_miireg16 = LMC_MII16_LED_ALL; - - sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN); - sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT); - sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16); -} - -/* - * Given a user provided state, set ourselves up to match it. This will - * always reset the card if needed. - */ -static void -lmc_hssi_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl) -{ - if (ctl == NULL) - { - sc->lmc_media->set_clock_source (sc, sc->ictl.clock_source); - lmc_set_protocol (sc, NULL); - - return; - } - - /* - * check for change in clock source - */ - if (ctl->clock_source && !sc->ictl.clock_source) - { - sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_INT); - sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_INT; - } - else if (!ctl->clock_source && sc->ictl.clock_source) - { - sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_EXT; - sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT); - } - - lmc_set_protocol (sc, ctl); -} - -/* - * 1 == internal, 0 == external - */ -static void -lmc_hssi_set_clock (lmc_softc_t * const sc, int ie) -{ - int old; - old = sc->ictl.clock_source; - if (ie == LMC_CTL_CLOCK_SOURCE_EXT) - { - sc->lmc_gpio |= LMC_GEP_HSSI_CLOCK; - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_EXT; - if(old != ie) - printk (LMC_PRINTF_FMT ": clock external\n", LMC_PRINTF_ARGS); - } - else - { - sc->lmc_gpio &= ~(LMC_GEP_HSSI_CLOCK); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT; - if(old != ie) - printk (LMC_PRINTF_FMT ": clock internal\n", LMC_PRINTF_ARGS); - } -} - -/* - * return hardware link status. - * 0 == link is down, 1 == link is up. - */ -static int -lmc_hssi_get_link_status (lmc_softc_t * const sc) -{ - /* - * We're using the same code as SSI since - * they're practically the same - */ - return lmc_ssi_get_link_status(sc); -} - -static void -lmc_hssi_set_link_status (lmc_softc_t * const sc, int state) -{ - if (state == LMC_LINK_UP) - sc->lmc_miireg16 |= LMC_MII16_HSSI_TA; - else - sc->lmc_miireg16 &= ~LMC_MII16_HSSI_TA; - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -/* - * 0 == 16bit, 1 == 32bit - */ -static void -lmc_hssi_set_crc_length (lmc_softc_t * const sc, int state) -{ - if (state == LMC_CTL_CRC_LENGTH_32) - { - /* 32 bit */ - sc->lmc_miireg16 |= LMC_MII16_HSSI_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32; - } - else - { - /* 16 bit */ - sc->lmc_miireg16 &= ~LMC_MII16_HSSI_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16; - } - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -static void -lmc_hssi_watchdog (lmc_softc_t * const sc) -{ - /* HSSI is blank */ -} - -/* - * DS3 methods - */ - -/* - * Set cable length - */ -static void -lmc_ds3_set_100ft (lmc_softc_t * const sc, int ie) -{ - if (ie == LMC_CTL_CABLE_LENGTH_GT_100FT) - { - sc->lmc_miireg16 &= ~LMC_MII16_DS3_ZERO; - sc->ictl.cable_length = LMC_CTL_CABLE_LENGTH_GT_100FT; - } - else if (ie == LMC_CTL_CABLE_LENGTH_LT_100FT) - { - sc->lmc_miireg16 |= LMC_MII16_DS3_ZERO; - sc->ictl.cable_length = LMC_CTL_CABLE_LENGTH_LT_100FT; - } - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -static void -lmc_ds3_default (lmc_softc_t * const sc) -{ - sc->lmc_miireg16 = LMC_MII16_LED_ALL; - - sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN); - sc->lmc_media->set_cable_length (sc, LMC_CTL_CABLE_LENGTH_LT_100FT); - sc->lmc_media->set_scrambler (sc, LMC_CTL_OFF); - sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16); -} - -/* - * Given a user provided state, set ourselves up to match it. This will - * always reset the card if needed. - */ -static void -lmc_ds3_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl) -{ - if (ctl == NULL) - { - sc->lmc_media->set_cable_length (sc, sc->ictl.cable_length); - sc->lmc_media->set_scrambler (sc, sc->ictl.scrambler_onoff); - lmc_set_protocol (sc, NULL); - - return; - } - - /* - * check for change in cable length setting - */ - if (ctl->cable_length && !sc->ictl.cable_length) - lmc_ds3_set_100ft (sc, LMC_CTL_CABLE_LENGTH_GT_100FT); - else if (!ctl->cable_length && sc->ictl.cable_length) - lmc_ds3_set_100ft (sc, LMC_CTL_CABLE_LENGTH_LT_100FT); - - /* - * Check for change in scrambler setting (requires reset) - */ - if (ctl->scrambler_onoff && !sc->ictl.scrambler_onoff) - lmc_ds3_set_scram (sc, LMC_CTL_ON); - else if (!ctl->scrambler_onoff && sc->ictl.scrambler_onoff) - lmc_ds3_set_scram (sc, LMC_CTL_OFF); - - lmc_set_protocol (sc, ctl); -} - -static void -lmc_ds3_init (lmc_softc_t * const sc) -{ - int i; - - sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC5245; - - /* writes zeros everywhere */ - for (i = 0; i < 21; i++) - { - lmc_mii_writereg (sc, 0, 17, i); - lmc_mii_writereg (sc, 0, 18, 0); - } - - /* set some essential bits */ - lmc_mii_writereg (sc, 0, 17, 1); - lmc_mii_writereg (sc, 0, 18, 0x25); /* ser, xtx */ - - lmc_mii_writereg (sc, 0, 17, 5); - lmc_mii_writereg (sc, 0, 18, 0x80); /* emode */ - - lmc_mii_writereg (sc, 0, 17, 14); - lmc_mii_writereg (sc, 0, 18, 0x30); /* rcgen, tcgen */ - - /* clear counters and latched bits */ - for (i = 0; i < 21; i++) - { - lmc_mii_writereg (sc, 0, 17, i); - lmc_mii_readreg (sc, 0, 18); - } -} - -/* - * 1 == DS3 payload scrambled, 0 == not scrambled - */ -static void -lmc_ds3_set_scram (lmc_softc_t * const sc, int ie) -{ - if (ie == LMC_CTL_ON) - { - sc->lmc_miireg16 |= LMC_MII16_DS3_SCRAM; - sc->ictl.scrambler_onoff = LMC_CTL_ON; - } - else - { - sc->lmc_miireg16 &= ~LMC_MII16_DS3_SCRAM; - sc->ictl.scrambler_onoff = LMC_CTL_OFF; - } - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -/* - * return hardware link status. - * 0 == link is down, 1 == link is up. - */ -static int -lmc_ds3_get_link_status (lmc_softc_t * const sc) -{ - u16 link_status, link_status_11; - int ret = 1; - - lmc_mii_writereg (sc, 0, 17, 7); - link_status = lmc_mii_readreg (sc, 0, 18); - - /* LMC5245 (DS3) & LMC1200 (DS1) LED definitions - * led0 yellow = far-end adapter is in Red alarm condition - * led1 blue = received an Alarm Indication signal - * (upstream failure) - * led2 Green = power to adapter, Gate Array loaded & driver - * attached - * led3 red = Loss of Signal (LOS) or out of frame (OOF) - * conditions detected on T3 receive signal - */ - - lmc_led_on(sc, LMC_DS3_LED2); - - if ((link_status & LMC_FRAMER_REG0_DLOS) || - (link_status & LMC_FRAMER_REG0_OOFS)){ - ret = 0; - if(sc->last_led_err[3] != 1){ - u16 r1; - lmc_mii_writereg (sc, 0, 17, 01); /* Turn on Xbit error as our cisco does */ - r1 = lmc_mii_readreg (sc, 0, 18); - r1 &= 0xfe; - lmc_mii_writereg(sc, 0, 18, r1); - printk(KERN_WARNING "%s: Red Alarm - Loss of Signal or Loss of Framing\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED3); /* turn on red LED */ - sc->last_led_err[3] = 1; - } - else { - lmc_led_off(sc, LMC_DS3_LED3); /* turn on red LED */ - if(sc->last_led_err[3] == 1){ - u16 r1; - lmc_mii_writereg (sc, 0, 17, 01); /* Turn off Xbit error */ - r1 = lmc_mii_readreg (sc, 0, 18); - r1 |= 0x01; - lmc_mii_writereg(sc, 0, 18, r1); - } - sc->last_led_err[3] = 0; - } - - lmc_mii_writereg(sc, 0, 17, 0x10); - link_status_11 = lmc_mii_readreg(sc, 0, 18); - if((link_status & LMC_FRAMER_REG0_AIS) || - (link_status_11 & LMC_FRAMER_REG10_XBIT)) { - ret = 0; - if(sc->last_led_err[0] != 1){ - printk(KERN_WARNING "%s: AIS Alarm or XBit Error\n", sc->name); - printk(KERN_WARNING "%s: Remote end has loss of signal or framing\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED0); - sc->last_led_err[0] = 1; - } - else { - lmc_led_off(sc, LMC_DS3_LED0); - sc->last_led_err[0] = 0; - } - - lmc_mii_writereg (sc, 0, 17, 9); - link_status = lmc_mii_readreg (sc, 0, 18); - - if(link_status & LMC_FRAMER_REG9_RBLUE){ - ret = 0; - if(sc->last_led_err[1] != 1){ - printk(KERN_WARNING "%s: Blue Alarm - Receiving all 1's\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED1); - sc->last_led_err[1] = 1; - } - else { - lmc_led_off(sc, LMC_DS3_LED1); - sc->last_led_err[1] = 0; - } - - return ret; -} - -/* - * 0 == 16bit, 1 == 32bit - */ -static void -lmc_ds3_set_crc_length (lmc_softc_t * const sc, int state) -{ - if (state == LMC_CTL_CRC_LENGTH_32) - { - /* 32 bit */ - sc->lmc_miireg16 |= LMC_MII16_DS3_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32; - } - else - { - /* 16 bit */ - sc->lmc_miireg16 &= ~LMC_MII16_DS3_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16; - } - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -static void -lmc_ds3_watchdog (lmc_softc_t * const sc) -{ - -} - - -/* - * SSI methods - */ - -static void lmc_ssi_init(lmc_softc_t * const sc) -{ - u16 mii17; - int cable; - - sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC1000; - - mii17 = lmc_mii_readreg(sc, 0, 17); - - cable = (mii17 & LMC_MII17_SSI_CABLE_MASK) >> LMC_MII17_SSI_CABLE_SHIFT; - sc->ictl.cable_type = cable; - - lmc_gpio_mkoutput(sc, LMC_GEP_SSI_TXCLOCK); -} - -static void -lmc_ssi_default (lmc_softc_t * const sc) -{ - sc->lmc_miireg16 = LMC_MII16_LED_ALL; - - /* - * make TXCLOCK always be an output - */ - lmc_gpio_mkoutput (sc, LMC_GEP_SSI_TXCLOCK); - - sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN); - sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT); - sc->lmc_media->set_speed (sc, NULL); - sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16); -} - -/* - * Given a user provided state, set ourselves up to match it. This will - * always reset the card if needed. - */ -static void -lmc_ssi_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl) -{ - if (ctl == NULL) - { - sc->lmc_media->set_clock_source (sc, sc->ictl.clock_source); - sc->lmc_media->set_speed (sc, &sc->ictl); - lmc_set_protocol (sc, NULL); - - return; - } - - /* - * check for change in clock source - */ - if (ctl->clock_source == LMC_CTL_CLOCK_SOURCE_INT - && sc->ictl.clock_source == LMC_CTL_CLOCK_SOURCE_EXT) - { - sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_INT); - sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_INT; - } - else if (ctl->clock_source == LMC_CTL_CLOCK_SOURCE_EXT - && sc->ictl.clock_source == LMC_CTL_CLOCK_SOURCE_INT) - { - sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT); - sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_EXT; - } - - if (ctl->clock_rate != sc->ictl.clock_rate) - sc->lmc_media->set_speed (sc, ctl); - - lmc_set_protocol (sc, ctl); -} - -/* - * 1 == internal, 0 == external - */ -static void -lmc_ssi_set_clock (lmc_softc_t * const sc, int ie) -{ - int old; - old = ie; - if (ie == LMC_CTL_CLOCK_SOURCE_EXT) - { - sc->lmc_gpio &= ~(LMC_GEP_SSI_TXCLOCK); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_EXT; - if(ie != old) - printk (LMC_PRINTF_FMT ": clock external\n", LMC_PRINTF_ARGS); - } - else - { - sc->lmc_gpio |= LMC_GEP_SSI_TXCLOCK; - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT; - if(ie != old) - printk (LMC_PRINTF_FMT ": clock internal\n", LMC_PRINTF_ARGS); - } -} - -static void -lmc_ssi_set_speed (lmc_softc_t * const sc, lmc_ctl_t * ctl) -{ - lmc_ctl_t *ictl = &sc->ictl; - lmc_av9110_t *av; - - /* original settings for clock rate of: - * 100 Khz (8,25,0,0,2) were incorrect - * they should have been 80,125,1,3,3 - * There are 17 param combinations to produce this freq. - * For 1.5 Mhz use 120,100,1,1,2 (226 param. combinations) - */ - if (ctl == NULL) - { - av = &ictl->cardspec.ssi; - ictl->clock_rate = 1500000; - av->f = ictl->clock_rate; - av->n = 120; - av->m = 100; - av->v = 1; - av->x = 1; - av->r = 2; - - write_av9110 (sc, av->n, av->m, av->v, av->x, av->r); - return; - } - - av = &ctl->cardspec.ssi; - - if (av->f == 0) - return; - - ictl->clock_rate = av->f; /* really, this is the rate we are */ - ictl->cardspec.ssi = *av; - - write_av9110 (sc, av->n, av->m, av->v, av->x, av->r); -} - -/* - * return hardware link status. - * 0 == link is down, 1 == link is up. - */ -static int -lmc_ssi_get_link_status (lmc_softc_t * const sc) -{ - u16 link_status; - u32 ticks; - int ret = 1; - int hw_hdsk = 1; - - /* - * missing CTS? Hmm. If we require CTS on, we may never get the - * link to come up, so omit it in this test. - * - * Also, it seems that with a loopback cable, DCD isn't asserted, - * so just check for things like this: - * DSR _must_ be asserted. - * One of DCD or CTS must be asserted. - */ - - /* LMC 1000 (SSI) LED definitions - * led0 Green = power to adapter, Gate Array loaded & - * driver attached - * led1 Green = DSR and DTR and RTS and CTS are set - * led2 Green = Cable detected - * led3 red = No timing is available from the - * cable or the on-board frequency - * generator. - */ - - link_status = lmc_mii_readreg (sc, 0, 16); - - /* Is the transmit clock still available */ - ticks = LMC_CSR_READ (sc, csr_gp_timer); - ticks = 0x0000ffff - (ticks & 0x0000ffff); - - lmc_led_on (sc, LMC_MII16_LED0); - - /* ====== transmit clock determination ===== */ - if (sc->lmc_timing == LMC_CTL_CLOCK_SOURCE_INT) { - lmc_led_off(sc, LMC_MII16_LED3); - } - else if (ticks == 0 ) { /* no clock found ? */ - ret = 0; - if (sc->last_led_err[3] != 1) { - sc->extra_stats.tx_lossOfClockCnt++; - printk(KERN_WARNING "%s: Lost Clock, Link Down\n", sc->name); - } - sc->last_led_err[3] = 1; - lmc_led_on (sc, LMC_MII16_LED3); /* turn ON red LED */ - } - else { - if(sc->last_led_err[3] == 1) - printk(KERN_WARNING "%s: Clock Returned\n", sc->name); - sc->last_led_err[3] = 0; - lmc_led_off (sc, LMC_MII16_LED3); /* turn OFF red LED */ - } - - if ((link_status & LMC_MII16_SSI_DSR) == 0) { /* Also HSSI CA */ - ret = 0; - hw_hdsk = 0; - } - -#ifdef CONFIG_LMC_IGNORE_HARDWARE_HANDSHAKE - if ((link_status & (LMC_MII16_SSI_CTS | LMC_MII16_SSI_DCD)) == 0){ - ret = 0; - hw_hdsk = 0; - } -#endif - - if(hw_hdsk == 0){ - if(sc->last_led_err[1] != 1) - printk(KERN_WARNING "%s: DSR not asserted\n", sc->name); - sc->last_led_err[1] = 1; - lmc_led_off(sc, LMC_MII16_LED1); - } - else { - if(sc->last_led_err[1] != 0) - printk(KERN_WARNING "%s: DSR now asserted\n", sc->name); - sc->last_led_err[1] = 0; - lmc_led_on(sc, LMC_MII16_LED1); - } - - if(ret == 1) { - lmc_led_on(sc, LMC_MII16_LED2); /* Over all good status? */ - } - - return ret; -} - -static void -lmc_ssi_set_link_status (lmc_softc_t * const sc, int state) -{ - if (state == LMC_LINK_UP) - { - sc->lmc_miireg16 |= (LMC_MII16_SSI_DTR | LMC_MII16_SSI_RTS); - printk (LMC_PRINTF_FMT ": asserting DTR and RTS\n", LMC_PRINTF_ARGS); - } - else - { - sc->lmc_miireg16 &= ~(LMC_MII16_SSI_DTR | LMC_MII16_SSI_RTS); - printk (LMC_PRINTF_FMT ": deasserting DTR and RTS\n", LMC_PRINTF_ARGS); - } - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); - -} - -/* - * 0 == 16bit, 1 == 32bit - */ -static void -lmc_ssi_set_crc_length (lmc_softc_t * const sc, int state) -{ - if (state == LMC_CTL_CRC_LENGTH_32) - { - /* 32 bit */ - sc->lmc_miireg16 |= LMC_MII16_SSI_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32; - sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_4; - - } - else - { - /* 16 bit */ - sc->lmc_miireg16 &= ~LMC_MII16_SSI_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16; - sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_2; - } - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -/* - * These are bits to program the ssi frequency generator - */ -static inline void -write_av9110_bit (lmc_softc_t * sc, int c) -{ - /* - * set the data bit as we need it. - */ - sc->lmc_gpio &= ~(LMC_GEP_CLK); - if (c & 0x01) - sc->lmc_gpio |= LMC_GEP_DATA; - else - sc->lmc_gpio &= ~(LMC_GEP_DATA); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - - /* - * set the clock to high - */ - sc->lmc_gpio |= LMC_GEP_CLK; - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - - /* - * set the clock to low again. - */ - sc->lmc_gpio &= ~(LMC_GEP_CLK); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); -} - -static void write_av9110(lmc_softc_t *sc, u32 n, u32 m, u32 v, u32 x, u32 r) -{ - int i; - -#if 0 - printk (LMC_PRINTF_FMT ": speed %u, %d %d %d %d %d\n", - LMC_PRINTF_ARGS, sc->ictl.clock_rate, n, m, v, x, r); -#endif - - sc->lmc_gpio |= LMC_GEP_SSI_GENERATOR; - sc->lmc_gpio &= ~(LMC_GEP_DATA | LMC_GEP_CLK); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - - /* - * Set the TXCLOCK, GENERATOR, SERIAL, and SERIALCLK - * as outputs. - */ - lmc_gpio_mkoutput (sc, (LMC_GEP_DATA | LMC_GEP_CLK - | LMC_GEP_SSI_GENERATOR)); - - sc->lmc_gpio &= ~(LMC_GEP_SSI_GENERATOR); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - - /* - * a shifting we will go... - */ - for (i = 0; i < 7; i++) - write_av9110_bit (sc, n >> i); - for (i = 0; i < 7; i++) - write_av9110_bit (sc, m >> i); - for (i = 0; i < 1; i++) - write_av9110_bit (sc, v >> i); - for (i = 0; i < 2; i++) - write_av9110_bit (sc, x >> i); - for (i = 0; i < 2; i++) - write_av9110_bit (sc, r >> i); - for (i = 0; i < 5; i++) - write_av9110_bit (sc, 0x17 >> i); - - /* - * stop driving serial-related signals - */ - lmc_gpio_mkinput (sc, - (LMC_GEP_DATA | LMC_GEP_CLK - | LMC_GEP_SSI_GENERATOR)); -} - -static void lmc_ssi_watchdog(lmc_softc_t * const sc) -{ - u16 mii17 = lmc_mii_readreg(sc, 0, 17); - if (((mii17 >> 3) & 7) == 7) - lmc_led_off(sc, LMC_MII16_LED2); - else - lmc_led_on(sc, LMC_MII16_LED2); -} - -/* - * T1 methods - */ - -/* - * The framer regs are multiplexed through MII regs 17 & 18 - * write the register address to MII reg 17 and the * data to MII reg 18. */ -static void -lmc_t1_write (lmc_softc_t * const sc, int a, int d) -{ - lmc_mii_writereg (sc, 0, 17, a); - lmc_mii_writereg (sc, 0, 18, d); -} - -/* Save a warning -static int -lmc_t1_read (lmc_softc_t * const sc, int a) -{ - lmc_mii_writereg (sc, 0, 17, a); - return lmc_mii_readreg (sc, 0, 18); -} -*/ - - -static void -lmc_t1_init (lmc_softc_t * const sc) -{ - u16 mii16; - int i; - - sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC1200; - mii16 = lmc_mii_readreg (sc, 0, 16); - - /* reset 8370 */ - mii16 &= ~LMC_MII16_T1_RST; - lmc_mii_writereg (sc, 0, 16, mii16 | LMC_MII16_T1_RST); - lmc_mii_writereg (sc, 0, 16, mii16); - - /* set T1 or E1 line. Uses sc->lmcmii16 reg in function so update it */ - sc->lmc_miireg16 = mii16; - lmc_t1_set_circuit_type(sc, LMC_CTL_CIRCUIT_TYPE_T1); - mii16 = sc->lmc_miireg16; - - lmc_t1_write (sc, 0x01, 0x1B); /* CR0 - primary control */ - lmc_t1_write (sc, 0x02, 0x42); /* JAT_CR - jitter atten config */ - lmc_t1_write (sc, 0x14, 0x00); /* LOOP - loopback config */ - lmc_t1_write (sc, 0x15, 0x00); /* DL3_TS - external data link timeslot */ - lmc_t1_write (sc, 0x18, 0xFF); /* PIO - programmable I/O */ - lmc_t1_write (sc, 0x19, 0x30); /* POE - programmable OE */ - lmc_t1_write (sc, 0x1A, 0x0F); /* CMUX - clock input mux */ - lmc_t1_write (sc, 0x20, 0x41); /* LIU_CR - RX LIU config */ - lmc_t1_write (sc, 0x22, 0x76); /* RLIU_CR - RX LIU config */ - lmc_t1_write (sc, 0x40, 0x03); /* RCR0 - RX config */ - lmc_t1_write (sc, 0x45, 0x00); /* RALM - RX alarm config */ - lmc_t1_write (sc, 0x46, 0x05); /* LATCH - RX alarm/err/cntr latch */ - lmc_t1_write (sc, 0x68, 0x40); /* TLIU_CR - TX LIU config */ - lmc_t1_write (sc, 0x70, 0x0D); /* TCR0 - TX framer config */ - lmc_t1_write (sc, 0x71, 0x05); /* TCR1 - TX config */ - lmc_t1_write (sc, 0x72, 0x0B); /* TFRM - TX frame format */ - lmc_t1_write (sc, 0x73, 0x00); /* TERROR - TX error insert */ - lmc_t1_write (sc, 0x74, 0x00); /* TMAN - TX manual Sa/FEBE config */ - lmc_t1_write (sc, 0x75, 0x00); /* TALM - TX alarm signal config */ - lmc_t1_write (sc, 0x76, 0x00); /* TPATT - TX test pattern config */ - lmc_t1_write (sc, 0x77, 0x00); /* TLB - TX inband loopback config */ - lmc_t1_write (sc, 0x90, 0x05); /* CLAD_CR - clock rate adapter config */ - lmc_t1_write (sc, 0x91, 0x05); /* CSEL - clad freq sel */ - lmc_t1_write (sc, 0xA6, 0x00); /* DL1_CTL - DL1 control */ - lmc_t1_write (sc, 0xB1, 0x00); /* DL2_CTL - DL2 control */ - lmc_t1_write (sc, 0xD0, 0x47); /* SBI_CR - sys bus iface config */ - lmc_t1_write (sc, 0xD1, 0x70); /* RSB_CR - RX sys bus config */ - lmc_t1_write (sc, 0xD4, 0x30); /* TSB_CR - TX sys bus config */ - for (i = 0; i < 32; i++) - { - lmc_t1_write (sc, 0x0E0 + i, 0x00); /* SBCn - sys bus per-channel ctl */ - lmc_t1_write (sc, 0x100 + i, 0x00); /* TPCn - TX per-channel ctl */ - lmc_t1_write (sc, 0x180 + i, 0x00); /* RPCn - RX per-channel ctl */ - } - for (i = 1; i < 25; i++) - { - lmc_t1_write (sc, 0x0E0 + i, 0x0D); /* SBCn - sys bus per-channel ctl */ - } - - mii16 |= LMC_MII16_T1_XOE; - lmc_mii_writereg (sc, 0, 16, mii16); - sc->lmc_miireg16 = mii16; -} - -static void -lmc_t1_default (lmc_softc_t * const sc) -{ - sc->lmc_miireg16 = LMC_MII16_LED_ALL; - sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN); - sc->lmc_media->set_circuit_type (sc, LMC_CTL_CIRCUIT_TYPE_T1); - sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16); - /* Right now we can only clock from out internal source */ - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT; -} -/* * Given a user provided state, set ourselves up to match it. This will * always reset the card if needed. - */ -static void -lmc_t1_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl) -{ - if (ctl == NULL) - { - sc->lmc_media->set_circuit_type (sc, sc->ictl.circuit_type); - lmc_set_protocol (sc, NULL); - - return; - } - /* - * check for change in circuit type */ - if (ctl->circuit_type == LMC_CTL_CIRCUIT_TYPE_T1 - && sc->ictl.circuit_type == - LMC_CTL_CIRCUIT_TYPE_E1) sc->lmc_media->set_circuit_type (sc, - LMC_CTL_CIRCUIT_TYPE_E1); - else if (ctl->circuit_type == LMC_CTL_CIRCUIT_TYPE_E1 - && sc->ictl.circuit_type == LMC_CTL_CIRCUIT_TYPE_T1) - sc->lmc_media->set_circuit_type (sc, LMC_CTL_CIRCUIT_TYPE_T1); - lmc_set_protocol (sc, ctl); -} -/* - * return hardware link status. - * 0 == link is down, 1 == link is up. - */ static int -lmc_t1_get_link_status (lmc_softc_t * const sc) -{ - u16 link_status; - int ret = 1; - - /* LMC5245 (DS3) & LMC1200 (DS1) LED definitions - * led0 yellow = far-end adapter is in Red alarm condition - * led1 blue = received an Alarm Indication signal - * (upstream failure) - * led2 Green = power to adapter, Gate Array loaded & driver - * attached - * led3 red = Loss of Signal (LOS) or out of frame (OOF) - * conditions detected on T3 receive signal - */ - lmc_led_on(sc, LMC_DS3_LED2); - - lmc_mii_writereg (sc, 0, 17, T1FRAMER_ALARM1_STATUS); - link_status = lmc_mii_readreg (sc, 0, 18); - - - if (link_status & T1F_RAIS) { /* turn on blue LED */ - ret = 0; - if(sc->last_led_err[1] != 1){ - printk(KERN_WARNING "%s: Receive AIS/Blue Alarm. Far end in RED alarm\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED1); - sc->last_led_err[1] = 1; - } - else { - if(sc->last_led_err[1] != 0){ - printk(KERN_WARNING "%s: End AIS/Blue Alarm\n", sc->name); - } - lmc_led_off (sc, LMC_DS3_LED1); - sc->last_led_err[1] = 0; - } - - /* - * Yellow Alarm is nasty evil stuff, looks at data patterns - * inside the channel and confuses it with HDLC framing - * ignore all yellow alarms. - * - * Do listen to MultiFrame Yellow alarm which while implemented - * different ways isn't in the channel and hence somewhat - * more reliable - */ - - if (link_status & T1F_RMYEL) { - ret = 0; - if(sc->last_led_err[0] != 1){ - printk(KERN_WARNING "%s: Receive Yellow AIS Alarm\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED0); - sc->last_led_err[0] = 1; - } - else { - if(sc->last_led_err[0] != 0){ - printk(KERN_WARNING "%s: End of Yellow AIS Alarm\n", sc->name); - } - lmc_led_off(sc, LMC_DS3_LED0); - sc->last_led_err[0] = 0; - } - - /* - * Loss of signal and los of frame - * Use the green bit to identify which one lit the led - */ - if(link_status & T1F_RLOF){ - ret = 0; - if(sc->last_led_err[3] != 1){ - printk(KERN_WARNING "%s: Local Red Alarm: Loss of Framing\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED3); - sc->last_led_err[3] = 1; - - } - else { - if(sc->last_led_err[3] != 0){ - printk(KERN_WARNING "%s: End Red Alarm (LOF)\n", sc->name); - } - if( ! (link_status & T1F_RLOS)) - lmc_led_off(sc, LMC_DS3_LED3); - sc->last_led_err[3] = 0; - } - - if(link_status & T1F_RLOS){ - ret = 0; - if(sc->last_led_err[2] != 1){ - printk(KERN_WARNING "%s: Local Red Alarm: Loss of Signal\n", sc->name); - } - lmc_led_on(sc, LMC_DS3_LED3); - sc->last_led_err[2] = 1; - - } - else { - if(sc->last_led_err[2] != 0){ - printk(KERN_WARNING "%s: End Red Alarm (LOS)\n", sc->name); - } - if( ! (link_status & T1F_RLOF)) - lmc_led_off(sc, LMC_DS3_LED3); - sc->last_led_err[2] = 0; - } - - sc->lmc_xinfo.t1_alarm1_status = link_status; - - lmc_mii_writereg (sc, 0, 17, T1FRAMER_ALARM2_STATUS); - sc->lmc_xinfo.t1_alarm2_status = lmc_mii_readreg (sc, 0, 18); - - return ret; -} - -/* - * 1 == T1 Circuit Type , 0 == E1 Circuit Type - */ -static void -lmc_t1_set_circuit_type (lmc_softc_t * const sc, int ie) -{ - if (ie == LMC_CTL_CIRCUIT_TYPE_T1) { - sc->lmc_miireg16 |= LMC_MII16_T1_Z; - sc->ictl.circuit_type = LMC_CTL_CIRCUIT_TYPE_T1; - printk(KERN_INFO "%s: In T1 Mode\n", sc->name); - } - else { - sc->lmc_miireg16 &= ~LMC_MII16_T1_Z; - sc->ictl.circuit_type = LMC_CTL_CIRCUIT_TYPE_E1; - printk(KERN_INFO "%s: In E1 Mode\n", sc->name); - } - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); - -} - -/* - * 0 == 16bit, 1 == 32bit */ -static void -lmc_t1_set_crc_length (lmc_softc_t * const sc, int state) -{ - if (state == LMC_CTL_CRC_LENGTH_32) - { - /* 32 bit */ - sc->lmc_miireg16 |= LMC_MII16_T1_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32; - sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_4; - - } - else - { - /* 16 bit */ sc->lmc_miireg16 &= ~LMC_MII16_T1_CRC; - sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16; - sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_2; - - } - - lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16); -} - -/* - * 1 == internal, 0 == external - */ -static void -lmc_t1_set_clock (lmc_softc_t * const sc, int ie) -{ - int old; - old = ie; - if (ie == LMC_CTL_CLOCK_SOURCE_EXT) - { - sc->lmc_gpio &= ~(LMC_GEP_SSI_TXCLOCK); - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_EXT; - if(old != ie) - printk (LMC_PRINTF_FMT ": clock external\n", LMC_PRINTF_ARGS); - } - else - { - sc->lmc_gpio |= LMC_GEP_SSI_TXCLOCK; - LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio); - sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT; - if(old != ie) - printk (LMC_PRINTF_FMT ": clock internal\n", LMC_PRINTF_ARGS); - } -} - -static void -lmc_t1_watchdog (lmc_softc_t * const sc) -{ -} - -static void -lmc_set_protocol (lmc_softc_t * const sc, lmc_ctl_t * ctl) -{ - if (!ctl) - sc->ictl.keepalive_onoff = LMC_CTL_ON; -} diff --git a/drivers/net/wan/lmc/lmc_proto.c b/drivers/net/wan/lmc/lmc_proto.c deleted file mode 100644 index e5487616a816..000000000000 --- a/drivers/net/wan/lmc/lmc_proto.c +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - /* - * Copyright (c) 1997-2000 LAN Media Corporation (LMC) - * All rights reserved. www.lanmedia.com - * - * This code is written by: - * Andrew Stanley-Jones (asj@cban.com) - * Rob Braun (bbraun@vix.com), - * Michael Graff (explorer@vix.com) and - * Matt Thomas (matt@3am-software.com). - * - * With Help By: - * David Boggs - * Ron Crane - * Allan Cox - * - * Driver for the LanMedia LMC5200, LMC5245, LMC1000, LMC1200 cards. - */ - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/timer.h> -#include <linux/ptrace.h> -#include <linux/errno.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/in.h> -#include <linux/if_arp.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/inet.h> -#include <linux/workqueue.h> -#include <linux/proc_fs.h> -#include <linux/bitops.h> -#include <asm/processor.h> /* Processor type for cache alignment. */ -#include <asm/io.h> -#include <asm/dma.h> -#include <linux/smp.h> - -#include "lmc.h" -#include "lmc_var.h" -#include "lmc_debug.h" -#include "lmc_ioctl.h" -#include "lmc_proto.h" - -// attach -void lmc_proto_attach(lmc_softc_t *sc) /*FOLD00*/ -{ - if (sc->if_type == LMC_NET) { - struct net_device *dev = sc->lmc_device; - /* - * They set a few basics because they don't use HDLC - */ - dev->flags |= IFF_POINTOPOINT; - dev->hard_header_len = 0; - dev->addr_len = 0; - } -} - -int lmc_proto_open(lmc_softc_t *sc) -{ - int ret = 0; - - if (sc->if_type == LMC_PPP) { - ret = hdlc_open(sc->lmc_device); - if (ret < 0) - printk(KERN_WARNING "%s: HDLC open failed: %d\n", - sc->name, ret); - } - return ret; -} - -void lmc_proto_close(lmc_softc_t *sc) -{ - if (sc->if_type == LMC_PPP) - hdlc_close(sc->lmc_device); -} - -__be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb) /*FOLD00*/ -{ - switch(sc->if_type){ - case LMC_PPP: - return hdlc_type_trans(skb, sc->lmc_device); - case LMC_NET: - return htons(ETH_P_802_2); - case LMC_RAW: /* Packet type for skbuff kind of useless */ - return htons(ETH_P_802_2); - default: - printk(KERN_WARNING "%s: No protocol set for this interface, assuming 802.2 (which is wrong!!)\n", sc->name); - return htons(ETH_P_802_2); - } -} - -void lmc_proto_netif(lmc_softc_t *sc, struct sk_buff *skb) /*FOLD00*/ -{ - switch(sc->if_type){ - case LMC_PPP: - case LMC_NET: - default: - netif_rx(skb); - break; - case LMC_RAW: - break; - } -} diff --git a/drivers/net/wan/lmc/lmc_proto.h b/drivers/net/wan/lmc/lmc_proto.h deleted file mode 100644 index e56e7072de44..000000000000 --- a/drivers/net/wan/lmc/lmc_proto.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LMC_PROTO_H_ -#define _LMC_PROTO_H_ - -#include <linux/hdlc.h> - -void lmc_proto_attach(lmc_softc_t *sc); -int lmc_proto_open(lmc_softc_t *sc); -void lmc_proto_close(lmc_softc_t *sc); -__be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb); -void lmc_proto_netif(lmc_softc_t *sc, struct sk_buff *skb); - -static inline lmc_softc_t* dev_to_sc(struct net_device *dev) -{ - return (lmc_softc_t *)dev_to_hdlc(dev)->priv; -} - -#endif diff --git a/drivers/net/wan/lmc/lmc_var.h b/drivers/net/wan/lmc/lmc_var.h deleted file mode 100644 index 99f0aa787a35..000000000000 --- a/drivers/net/wan/lmc/lmc_var.h +++ /dev/null @@ -1,468 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _LMC_VAR_H_ -#define _LMC_VAR_H_ - - /* - * Copyright (c) 1997-2000 LAN Media Corporation (LMC) - * All rights reserved. www.lanmedia.com - * - * This code is written by: - * Andrew Stanley-Jones (asj@cban.com) - * Rob Braun (bbraun@vix.com), - * Michael Graff (explorer@vix.com) and - * Matt Thomas (matt@3am-software.com). - */ - -#include <linux/timer.h> - -/* - * basic definitions used in lmc include files - */ - -typedef struct lmc___softc lmc_softc_t; -typedef struct lmc___media lmc_media_t; -typedef struct lmc___ctl lmc_ctl_t; - -#define lmc_csrptr_t unsigned long - -#define LMC_REG_RANGE 0x80 - -#define LMC_PRINTF_FMT "%s" -#define LMC_PRINTF_ARGS (sc->lmc_device->name) - -#define TX_TIMEOUT (2*HZ) - -#define LMC_TXDESCS 32 -#define LMC_RXDESCS 32 - -#define LMC_LINK_UP 1 -#define LMC_LINK_DOWN 0 - -/* These macros for generic read and write to and from the dec chip */ -#define LMC_CSR_READ(sc, csr) \ - inl((sc)->lmc_csrs.csr) -#define LMC_CSR_WRITE(sc, reg, val) \ - outl((val), (sc)->lmc_csrs.reg) - -//#ifdef _LINUX_DELAY_H -// #define SLOW_DOWN_IO udelay(2); -// #undef __SLOW_DOWN_IO -// #define __SLOW_DOWN_IO udelay(2); -//#endif - -#define DELAY(n) SLOW_DOWN_IO - -#define lmc_delay() inl(sc->lmc_csrs.csr_9) - -/* This macro sync's up with the mii so that reads and writes can take place */ -#define LMC_MII_SYNC(sc) do {int n=32; while( n >= 0 ) { \ - LMC_CSR_WRITE((sc), csr_9, 0x20000); \ - lmc_delay(); \ - LMC_CSR_WRITE((sc), csr_9, 0x30000); \ - lmc_delay(); \ - n--; }} while(0) - -struct lmc_regfile_t { - lmc_csrptr_t csr_busmode; /* CSR0 */ - lmc_csrptr_t csr_txpoll; /* CSR1 */ - lmc_csrptr_t csr_rxpoll; /* CSR2 */ - lmc_csrptr_t csr_rxlist; /* CSR3 */ - lmc_csrptr_t csr_txlist; /* CSR4 */ - lmc_csrptr_t csr_status; /* CSR5 */ - lmc_csrptr_t csr_command; /* CSR6 */ - lmc_csrptr_t csr_intr; /* CSR7 */ - lmc_csrptr_t csr_missed_frames; /* CSR8 */ - lmc_csrptr_t csr_9; /* CSR9 */ - lmc_csrptr_t csr_10; /* CSR10 */ - lmc_csrptr_t csr_11; /* CSR11 */ - lmc_csrptr_t csr_12; /* CSR12 */ - lmc_csrptr_t csr_13; /* CSR13 */ - lmc_csrptr_t csr_14; /* CSR14 */ - lmc_csrptr_t csr_15; /* CSR15 */ -}; - -#define csr_enetrom csr_9 /* 21040 */ -#define csr_reserved csr_10 /* 21040 */ -#define csr_full_duplex csr_11 /* 21040 */ -#define csr_bootrom csr_10 /* 21041/21140A/?? */ -#define csr_gp csr_12 /* 21140* */ -#define csr_watchdog csr_15 /* 21140* */ -#define csr_gp_timer csr_11 /* 21041/21140* */ -#define csr_srom_mii csr_9 /* 21041/21140* */ -#define csr_sia_status csr_12 /* 2104x */ -#define csr_sia_connectivity csr_13 /* 2104x */ -#define csr_sia_tx_rx csr_14 /* 2104x */ -#define csr_sia_general csr_15 /* 2104x */ - -/* tulip length/control transmit descriptor definitions - * used to define bits in the second tulip_desc_t field (length) - * for the transmit descriptor -baz */ - -#define LMC_TDES_FIRST_BUFFER_SIZE ((u32)(0x000007FF)) -#define LMC_TDES_SECOND_BUFFER_SIZE ((u32)(0x003FF800)) -#define LMC_TDES_HASH_FILTERING ((u32)(0x00400000)) -#define LMC_TDES_DISABLE_PADDING ((u32)(0x00800000)) -#define LMC_TDES_SECOND_ADDR_CHAINED ((u32)(0x01000000)) -#define LMC_TDES_END_OF_RING ((u32)(0x02000000)) -#define LMC_TDES_ADD_CRC_DISABLE ((u32)(0x04000000)) -#define LMC_TDES_SETUP_PACKET ((u32)(0x08000000)) -#define LMC_TDES_INVERSE_FILTERING ((u32)(0x10000000)) -#define LMC_TDES_FIRST_SEGMENT ((u32)(0x20000000)) -#define LMC_TDES_LAST_SEGMENT ((u32)(0x40000000)) -#define LMC_TDES_INTERRUPT_ON_COMPLETION ((u32)(0x80000000)) - -#define TDES_SECOND_BUFFER_SIZE_BIT_NUMBER 11 -#define TDES_COLLISION_COUNT_BIT_NUMBER 3 - -/* Constants for the RCV descriptor RDES */ - -#define LMC_RDES_OVERFLOW ((u32)(0x00000001)) -#define LMC_RDES_CRC_ERROR ((u32)(0x00000002)) -#define LMC_RDES_DRIBBLING_BIT ((u32)(0x00000004)) -#define LMC_RDES_REPORT_ON_MII_ERR ((u32)(0x00000008)) -#define LMC_RDES_RCV_WATCHDOG_TIMEOUT ((u32)(0x00000010)) -#define LMC_RDES_FRAME_TYPE ((u32)(0x00000020)) -#define LMC_RDES_COLLISION_SEEN ((u32)(0x00000040)) -#define LMC_RDES_FRAME_TOO_LONG ((u32)(0x00000080)) -#define LMC_RDES_LAST_DESCRIPTOR ((u32)(0x00000100)) -#define LMC_RDES_FIRST_DESCRIPTOR ((u32)(0x00000200)) -#define LMC_RDES_MULTICAST_FRAME ((u32)(0x00000400)) -#define LMC_RDES_RUNT_FRAME ((u32)(0x00000800)) -#define LMC_RDES_DATA_TYPE ((u32)(0x00003000)) -#define LMC_RDES_LENGTH_ERROR ((u32)(0x00004000)) -#define LMC_RDES_ERROR_SUMMARY ((u32)(0x00008000)) -#define LMC_RDES_FRAME_LENGTH ((u32)(0x3FFF0000)) -#define LMC_RDES_OWN_BIT ((u32)(0x80000000)) - -#define RDES_FRAME_LENGTH_BIT_NUMBER 16 - -#define LMC_RDES_ERROR_MASK ( (u32)( \ - LMC_RDES_OVERFLOW \ - | LMC_RDES_DRIBBLING_BIT \ - | LMC_RDES_REPORT_ON_MII_ERR \ - | LMC_RDES_COLLISION_SEEN ) ) - - -/* - * Ioctl info - */ - -typedef struct { - u32 n; - u32 m; - u32 v; - u32 x; - u32 r; - u32 f; - u32 exact; -} lmc_av9110_t; - -/* - * Common structure passed to the ioctl code. - */ -struct lmc___ctl { - u32 cardtype; - u32 clock_source; /* HSSI, T1 */ - u32 clock_rate; /* T1 */ - u32 crc_length; - u32 cable_length; /* DS3 */ - u32 scrambler_onoff; /* DS3 */ - u32 cable_type; /* T1 */ - u32 keepalive_onoff; /* protocol */ - u32 ticks; /* ticks/sec */ - union { - lmc_av9110_t ssi; - } cardspec; - u32 circuit_type; /* T1 or E1 */ -}; - - -/* - * Careful, look at the data sheet, there's more to this - * structure than meets the eye. It should probably be: - * - * struct tulip_desc_t { - * u8 own:1; - * u32 status:31; - * u32 control:10; - * u32 buffer1; - * u32 buffer2; - * }; - * You could also expand status control to provide more bit information - */ - -struct tulip_desc_t { - s32 status; - s32 length; - u32 buffer1; - u32 buffer2; -}; - -/* - * media independent methods to check on media status, link, light LEDs, - * etc. - */ -struct lmc___media { - void (* init)(lmc_softc_t * const); - void (* defaults)(lmc_softc_t * const); - void (* set_status)(lmc_softc_t * const, lmc_ctl_t *); - void (* set_clock_source)(lmc_softc_t * const, int); - void (* set_speed)(lmc_softc_t * const, lmc_ctl_t *); - void (* set_cable_length)(lmc_softc_t * const, int); - void (* set_scrambler)(lmc_softc_t * const, int); - int (* get_link_status)(lmc_softc_t * const); - void (* set_link_status)(lmc_softc_t * const, int); - void (* set_crc_length)(lmc_softc_t * const, int); - void (* set_circuit_type)(lmc_softc_t * const, int); - void (* watchdog)(lmc_softc_t * const); -}; - - -#define STATCHECK 0xBEEFCAFE - -struct lmc_extra_statistics -{ - u32 version_size; - u32 lmc_cardtype; - - u32 tx_ProcTimeout; - u32 tx_IntTimeout; - u32 tx_NoCompleteCnt; - u32 tx_MaxXmtsB4Int; - u32 tx_TimeoutCnt; - u32 tx_OutOfSyncPtr; - u32 tx_tbusy0; - u32 tx_tbusy1; - u32 tx_tbusy_calls; - u32 resetCount; - u32 lmc_txfull; - u32 tbusy; - u32 dirtyTx; - u32 lmc_next_tx; - u32 otherTypeCnt; - u32 lastType; - u32 lastTypeOK; - u32 txLoopCnt; - u32 usedXmtDescripCnt; - u32 txIndexCnt; - u32 rxIntLoopCnt; - - u32 rx_SmallPktCnt; - u32 rx_BadPktSurgeCnt; - u32 rx_BuffAllocErr; - u32 tx_lossOfClockCnt; - - /* T1 error counters */ - u32 framingBitErrorCount; - u32 lineCodeViolationCount; - - u32 lossOfFrameCount; - u32 changeOfFrameAlignmentCount; - u32 severelyErroredFrameCount; - - u32 check; -}; - -typedef struct lmc_xinfo { - u32 Magic0; /* BEEFCAFE */ - - u32 PciCardType; - u32 PciSlotNumber; /* PCI slot number */ - - u16 DriverMajorVersion; - u16 DriverMinorVersion; - u16 DriverSubVersion; - - u16 XilinxRevisionNumber; - u16 MaxFrameSize; - - u16 t1_alarm1_status; - u16 t1_alarm2_status; - - int link_status; - u32 mii_reg16; - - u32 Magic1; /* DEADBEEF */ -} LMC_XINFO; - - -/* - * forward decl - */ -struct lmc___softc { - char *name; - u8 board_idx; - struct lmc_extra_statistics extra_stats; - struct net_device *lmc_device; - - int hang, rxdesc, bad_packet, some_counter; - u32 txgo; - struct lmc_regfile_t lmc_csrs; - volatile u32 lmc_txtick; - volatile u32 lmc_rxtick; - u32 lmc_flags; - u32 lmc_intrmask; /* our copy of csr_intr */ - u32 lmc_cmdmode; /* our copy of csr_cmdmode */ - u32 lmc_busmode; /* our copy of csr_busmode */ - u32 lmc_gpio_io; /* state of in/out settings */ - u32 lmc_gpio; /* state of outputs */ - struct sk_buff* lmc_txq[LMC_TXDESCS]; - struct sk_buff* lmc_rxq[LMC_RXDESCS]; - volatile - struct tulip_desc_t lmc_rxring[LMC_RXDESCS]; - volatile - struct tulip_desc_t lmc_txring[LMC_TXDESCS]; - unsigned int lmc_next_rx, lmc_next_tx; - volatile - unsigned int lmc_taint_tx, lmc_taint_rx; - int lmc_tx_start, lmc_txfull; - int lmc_txbusy; - u16 lmc_miireg16; - int lmc_ok; - int last_link_status; - int lmc_cardtype; - u32 last_frameerr; - lmc_media_t *lmc_media; - struct timer_list timer; - lmc_ctl_t ictl; - u32 TxDescriptControlInit; - - int tx_TimeoutInd; /* additional driver state */ - int tx_TimeoutDisplay; - unsigned int lastlmc_taint_tx; - int lasttx_packets; - u32 tx_clockState; - u32 lmc_crcSize; - LMC_XINFO lmc_xinfo; - char lmc_yel, lmc_blue, lmc_red; /* for T1 and DS3 */ - char lmc_timing; /* for HSSI and SSI */ - int got_irq; - - char last_led_err[4]; - - u32 last_int; - u32 num_int; - - spinlock_t lmc_lock; - u16 if_type; /* HDLC/PPP or NET */ - - /* Failure cases */ - u8 failed_ring; - u8 failed_recv_alloc; - - /* Structure check */ - u32 check; -}; - -#define LMC_PCI_TIME 1 -#define LMC_EXT_TIME 0 - -#define PKT_BUF_SZ 1542 /* was 1536 */ - -/* CSR5 settings */ -#define TIMER_INT 0x00000800 -#define TP_LINK_FAIL 0x00001000 -#define TP_LINK_PASS 0x00000010 -#define NORMAL_INT 0x00010000 -#define ABNORMAL_INT 0x00008000 -#define RX_JABBER_INT 0x00000200 -#define RX_DIED 0x00000100 -#define RX_NOBUFF 0x00000080 -#define RX_INT 0x00000040 -#define TX_FIFO_UNDER 0x00000020 -#define TX_JABBER 0x00000008 -#define TX_NOBUFF 0x00000004 -#define TX_DIED 0x00000002 -#define TX_INT 0x00000001 - -/* CSR6 settings */ -#define OPERATION_MODE 0x00000200 /* Full Duplex */ -#define PROMISC_MODE 0x00000040 /* Promiscuous Mode */ -#define RECEIVE_ALL 0x40000000 /* Receive All */ -#define PASS_BAD_FRAMES 0x00000008 /* Pass Bad Frames */ - -/* Dec control registers CSR6 as well */ -#define LMC_DEC_ST 0x00002000 -#define LMC_DEC_SR 0x00000002 - -/* CSR15 settings */ -#define RECV_WATCHDOG_DISABLE 0x00000010 -#define JABBER_DISABLE 0x00000001 - -/* More settings */ -/* - * aSR6 -- Command (Operation Mode) Register - */ -#define TULIP_CMD_RECEIVEALL 0x40000000L /* (RW) Receivel all frames? */ -#define TULIP_CMD_MUSTBEONE 0x02000000L /* (RW) Must Be One (21140) */ -#define TULIP_CMD_TXTHRSHLDCTL 0x00400000L /* (RW) Transmit Threshold Mode (21140) */ -#define TULIP_CMD_STOREFWD 0x00200000L /* (RW) Store and Forward (21140) */ -#define TULIP_CMD_NOHEARTBEAT 0x00080000L /* (RW) No Heartbeat (21140) */ -#define TULIP_CMD_PORTSELECT 0x00040000L /* (RW) Post Select (100Mb) (21140) */ -#define TULIP_CMD_FULLDUPLEX 0x00000200L /* (RW) Full Duplex Mode */ -#define TULIP_CMD_OPERMODE 0x00000C00L /* (RW) Operating Mode */ -#define TULIP_CMD_PROMISCUOUS 0x00000041L /* (RW) Promiscuous Mode */ -#define TULIP_CMD_PASSBADPKT 0x00000008L /* (RW) Pass Bad Frames */ -#define TULIP_CMD_THRESHOLDCTL 0x0000C000L /* (RW) Threshold Control */ - -#define TULIP_GP_PINSET 0x00000100L -#define TULIP_BUSMODE_SWRESET 0x00000001L -#define TULIP_WATCHDOG_TXDISABLE 0x00000001L -#define TULIP_WATCHDOG_RXDISABLE 0x00000010L - -#define TULIP_STS_NORMALINTR 0x00010000L /* (RW) Normal Interrupt */ -#define TULIP_STS_ABNRMLINTR 0x00008000L /* (RW) Abnormal Interrupt */ -#define TULIP_STS_ERI 0x00004000L /* (RW) Early Receive Interrupt */ -#define TULIP_STS_SYSERROR 0x00002000L /* (RW) System Error */ -#define TULIP_STS_GTE 0x00000800L /* (RW) General Pupose Timer Exp */ -#define TULIP_STS_ETI 0x00000400L /* (RW) Early Transmit Interrupt */ -#define TULIP_STS_RXWT 0x00000200L /* (RW) Receiver Watchdog Timeout */ -#define TULIP_STS_RXSTOPPED 0x00000100L /* (RW) Receiver Process Stopped */ -#define TULIP_STS_RXNOBUF 0x00000080L /* (RW) Receive Buf Unavail */ -#define TULIP_STS_RXINTR 0x00000040L /* (RW) Receive Interrupt */ -#define TULIP_STS_TXUNDERFLOW 0x00000020L /* (RW) Transmit Underflow */ -#define TULIP_STS_TXJABER 0x00000008L /* (RW) Jabber timeout */ -#define TULIP_STS_TXNOBUF 0x00000004L -#define TULIP_STS_TXSTOPPED 0x00000002L /* (RW) Transmit Process Stopped */ -#define TULIP_STS_TXINTR 0x00000001L /* (RW) Transmit Interrupt */ - -#define TULIP_STS_RXS_STOPPED 0x00000000L /* 000 - Stopped */ - -#define TULIP_STS_RXSTOPPED 0x00000100L /* (RW) Receive Process Stopped */ -#define TULIP_STS_RXNOBUF 0x00000080L - -#define TULIP_CMD_TXRUN 0x00002000L /* (RW) Start/Stop Transmitter */ -#define TULIP_CMD_RXRUN 0x00000002L /* (RW) Start/Stop Receive Filtering */ -#define TULIP_DSTS_TxDEFERRED 0x00000001 /* Initially Deferred */ -#define TULIP_DSTS_OWNER 0x80000000 /* Owner (1 = 21040) */ -#define TULIP_DSTS_RxMIIERR 0x00000008 -#define LMC_DSTS_ERRSUM (TULIP_DSTS_RxMIIERR) - -#define TULIP_DEFAULT_INTR_MASK (TULIP_STS_NORMALINTR \ - | TULIP_STS_RXINTR \ - | TULIP_STS_TXINTR \ - | TULIP_STS_ABNRMLINTR \ - | TULIP_STS_SYSERROR \ - | TULIP_STS_TXSTOPPED \ - | TULIP_STS_TXUNDERFLOW\ - | TULIP_STS_RXSTOPPED ) - -#define DESC_OWNED_BY_SYSTEM ((u32)(0x00000000)) -#define DESC_OWNED_BY_DC21X4 ((u32)(0x80000000)) - -#ifndef TULIP_CMD_RECEIVEALL -#define TULIP_CMD_RECEIVEALL 0x40000000L -#endif - -/* Adapter module number */ -#define LMC_ADAP_HSSI 2 -#define LMC_ADAP_DS3 3 -#define LMC_ADAP_SSI 4 -#define LMC_ADAP_T1 5 - -#define LMC_MTU 1500 - -#define LMC_CRC_LEN_16 2 /* 16-bit CRC */ -#define LMC_CRC_LEN_32 4 - -#endif /* _LMC_VAR_H_ */ diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 9055cb380ee2..db0f4fcfdaf4 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -79,8 +79,9 @@ extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); -extern void icmpv6_param_prob(struct sk_buff *skb, - u8 code, int pos); +extern void icmpv6_param_prob_reason(struct sk_buff *skb, + u8 code, int pos, + enum skb_drop_reason reason); struct flowi6; struct in6_addr; @@ -91,6 +92,12 @@ extern void icmpv6_flow_init(struct sock *sk, const struct in6_addr *daddr, int oif); +static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +{ + icmpv6_param_prob_reason(skb, code, pos, + SKB_DROP_REASON_NOT_SPECIFIED); +} + static inline bool icmpv6_is_err(int type) { switch (type) { diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index dacf69516002..0f2596297f6a 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -111,46 +111,43 @@ struct mlx5_accel_esp_xfrm { struct mlx5_accel_esp_xfrm_attrs attrs; }; -enum { - MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0, -}; - enum mlx5_accel_ipsec_cap { MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0, - MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1, - MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2, - MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3, - MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4, - MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5, - MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 6, - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7, + MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 1, + MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 2, + MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 3, + MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 4, }; -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_EN_IPSEC -u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev); +u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev); struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags); + const struct mlx5_accel_esp_xfrm_attrs *attrs); void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm); int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_accel_esp_xfrm_attrs *attrs); #else -static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; } +static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) +{ + return 0; +} static inline struct mlx5_accel_esp_xfrm * mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) { return ERR_PTR(-EOPNOTSUPP); } + const struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {} static inline int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; } -#endif /* CONFIG_MLX5_ACCEL */ +#endif /* CONFIG_MLX5_EN_IPSEC */ #endif /* __MLX5_ACCEL_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9424503eb8d3..ff47d49d8be4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -778,9 +778,6 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif -#ifdef CONFIG_MLX5_ACCEL - const struct mlx5_accel_ipsec_ops *ipsec_ops; -#endif struct mlx5_clock clock; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h index 07d77323f78a..45c7c0d67635 100644 --- a/include/linux/mlx5/mlx5_ifc_fpga.h +++ b/include/linux/mlx5/mlx5_ifc_fpga.h @@ -54,7 +54,6 @@ enum { enum { MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2, - MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3, }; struct mlx5_ifc_fpga_shell_caps_bits { @@ -387,89 +386,6 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_tls_extended_cap_bits { - u8 aes_gcm_128[0x1]; - u8 aes_gcm_256[0x1]; - u8 reserved_at_2[0x1e]; - u8 reserved_at_20[0x20]; - u8 context_capacity_total[0x20]; - u8 context_capacity_rx[0x20]; - u8 context_capacity_tx[0x20]; - u8 reserved_at_a0[0x10]; - u8 tls_counter_size[0x10]; - u8 tls_counters_addr_low[0x20]; - u8 tls_counters_addr_high[0x20]; - u8 rx[0x1]; - u8 tx[0x1]; - u8 tls_v12[0x1]; - u8 tls_v13[0x1]; - u8 lro[0x1]; - u8 ipv6[0x1]; - u8 reserved_at_106[0x1a]; -}; - -struct mlx5_ifc_ipsec_extended_cap_bits { - u8 encapsulation[0x20]; - - u8 reserved_0[0x12]; - u8 v2_command[0x1]; - u8 udp_encap[0x1]; - u8 rx_no_trailer[0x1]; - u8 ipv4_fragment[0x1]; - u8 ipv6[0x1]; - u8 esn[0x1]; - u8 lso[0x1]; - u8 transport_and_tunnel_mode[0x1]; - u8 tunnel_mode[0x1]; - u8 transport_mode[0x1]; - u8 ah_esp[0x1]; - u8 esp[0x1]; - u8 ah[0x1]; - u8 ipv4_options[0x1]; - - u8 auth_alg[0x20]; - - u8 enc_alg[0x20]; - - u8 sa_cap[0x20]; - - u8 reserved_1[0x10]; - u8 number_of_ipsec_counters[0x10]; - - u8 ipsec_counters_addr_low[0x20]; - u8 ipsec_counters_addr_high[0x20]; -}; - -struct mlx5_ifc_ipsec_counters_bits { - u8 dec_in_packets[0x40]; - - u8 dec_out_packets[0x40]; - - u8 dec_bypass_packets[0x40]; - - u8 enc_in_packets[0x40]; - - u8 enc_out_packets[0x40]; - - u8 enc_bypass_packets[0x40]; - - u8 drop_dec_packets[0x40]; - - u8 failed_auth_dec_packets[0x40]; - - u8 drop_enc_packets[0x40]; - - u8 success_add_sa[0x40]; - - u8 fail_add_sa[0x40]; - - u8 success_delete_sa[0x40]; - - u8 fail_delete_sa[0x40]; - - u8 dropped_cmd[0x40]; -}; - enum { MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1, MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2, @@ -486,131 +402,4 @@ struct mlx5_ifc_fpga_qp_error_event_bits { u8 reserved_at_c0[0x8]; u8 fpga_qpn[0x18]; }; -enum mlx5_ifc_fpga_ipsec_response_syndrome { - MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0, - MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1, - MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2, - MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3, -}; - -struct mlx5_ifc_fpga_ipsec_cmd_resp { - __be32 syndrome; - union { - __be32 sw_sa_handle; - __be32 flags; - }; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_cmd_opcode { - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1, - MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2, - MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3, - MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4, - MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5, -}; - -enum mlx5_ifc_fpga_ipsec_cap { - MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0), -}; - -struct mlx5_ifc_fpga_ipsec_cmd_cap { - __be32 cmd; - __be32 flags; - u8 reserved[24]; -} __packed; - -enum mlx5_ifc_fpga_ipsec_sa_flags { - MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0), - MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1), - MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2), - MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3), - MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4), - MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5), - MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6), - MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7), -}; - -enum mlx5_ifc_fpga_ipsec_sa_enc_mode { - MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1, - MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3, -}; - -struct mlx5_ifc_fpga_ipsec_sa_v1 { - __be32 cmd; - u8 key_enc[32]; - u8 key_auth[32]; - __be32 sip[4]; - __be32 dip[4]; - union { - struct { - __be32 reserved; - u8 salt_iv[8]; - __be32 salt; - } __packed gcm; - struct { - u8 salt[16]; - } __packed cbc; - }; - __be32 spi; - __be32 sw_sa_handle; - __be16 tfclen; - u8 enc_mode; - u8 reserved1[2]; - u8 flags; - u8 reserved2[2]; -}; - -struct mlx5_ifc_fpga_ipsec_sa { - struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1; - __be16 udp_sp; - __be16 udp_dp; - u8 reserved1[4]; - __be32 esn; - __be16 vid; /* only 12 bits, rest is reserved */ - __be16 reserved2; -} __packed; - -enum fpga_tls_cmds { - CMD_SETUP_STREAM = 0x1001, - CMD_TEARDOWN_STREAM = 0x1002, - CMD_RESYNC_RX = 0x1003, -}; - -#define MLX5_TLS_1_2 (0) - -#define MLX5_TLS_ALG_AES_GCM_128 (0) -#define MLX5_TLS_ALG_AES_GCM_256 (1) - -struct mlx5_ifc_tls_cmd_bits { - u8 command_type[0x20]; - u8 ipv6[0x1]; - u8 direction_sx[0x1]; - u8 tls_version[0x2]; - u8 reserved[0x1c]; - u8 swid[0x20]; - u8 src_port[0x10]; - u8 dst_port[0x10]; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6; - union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6; - u8 tls_rcd_sn[0x40]; - u8 tcp_sn[0x20]; - u8 tls_implicit_iv[0x20]; - u8 tls_xor_iv[0x40]; - u8 encryption_key[0x100]; - u8 alg[4]; - u8 reserved2[0x1c]; - u8 reserved3[0x4a0]; -}; - -struct mlx5_ifc_tls_resp_bits { - u8 syndrome[0x20]; - u8 stream_id[0x20]; - u8 reserved[0x40]; -}; - -#define MLX5_TLS_COMMAND_SIZE (0x100) - #endif /* MLX5_IFC_FPGA_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 28a928b0684b..e96ee1e348cb 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -141,7 +141,7 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) +#define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ MLX5_GET(reg, out, field)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 59e27a2b7bf0..a602f29365b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -59,7 +59,8 @@ struct dsa_port; struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; - +struct netdev_name_node; +struct sd_flow_limit; struct sfp_bus; /* 802.11 specific */ struct wireless_dev; @@ -202,6 +203,7 @@ struct net_device_core_stats { local_t rx_dropped; local_t tx_dropped; local_t rx_nohandler; + local_t rx_otherhost_dropped; } __aligned(4 * sizeof(local_t)); #include <linux/cache.h> @@ -862,6 +864,7 @@ enum net_device_path_type { DEV_PATH_BRIDGE, DEV_PATH_PPPOE, DEV_PATH_DSA, + DEV_PATH_MTK_WDMA, }; struct net_device_path { @@ -887,6 +890,12 @@ struct net_device_path { int port; u16 proto; } dsa; + struct { + u8 wdma_idx; + u8 queue; + u16 wcid; + u8 bss; + } mtk_wdma; }; }; @@ -1013,16 +1022,6 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; -struct netdev_name_node { - struct hlist_node hlist; - struct list_head list; - struct net_device *dev; - const char *name; -}; - -int netdev_name_node_alt_create(struct net_device *dev, const char *name); -int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); - struct netdev_net_notifier { struct list_head list; struct notifier_block *nb; @@ -1261,6 +1260,10 @@ struct netdev_net_notifier { * struct net_device *dev, * const unsigned char *addr, u16 vid) * Deletes the FDB entry from dev coresponding to addr. + * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[], + * struct net_device *dev, + * u16 vid, + * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, * int *idx) @@ -1511,6 +1514,11 @@ struct net_device_ops { struct net_device *dev, const unsigned char *addr, u16 vid); + int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + u16 vid, + struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, @@ -2968,7 +2976,6 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -int netdev_get_name(struct net *net, char *name, int ifindex); int dev_restart(struct net_device *dev); @@ -3027,19 +3034,6 @@ static inline bool dev_has_header(const struct net_device *dev) return dev->header_ops && dev->header_ops->create; } -#ifdef CONFIG_NET_FLOW_LIMIT -#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ -struct sd_flow_limit { - u64 count; - unsigned int num_buckets; - unsigned int history_head; - u16 history[FLOW_LIMIT_HISTORY]; - u8 buckets[]; -}; - -extern int netdev_flow_limit_table_len; -#endif /* CONFIG_NET_FLOW_LIMIT */ - /* * Incoming packets are placed on per-CPU queues */ @@ -3763,7 +3757,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); -int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, @@ -3775,13 +3768,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, return __dev_change_net_namespace(dev, net, pat, 0); } int __dev_set_mtu(struct net_device *, int); -int dev_validate_mtu(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); -int dev_set_mtu_ext(struct net_device *dev, int mtu, - struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); -int dev_change_tx_queue_len(struct net_device *, unsigned long); -void dev_set_group(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, @@ -3789,24 +3776,13 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); -int dev_change_carrier(struct net_device *, bool new_carrier); -int dev_get_phys_port_id(struct net_device *dev, - struct netdev_phys_item_id *ppid); -int dev_get_phys_port_name(struct net_device *dev, - char *name, size_t len); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -int dev_change_proto_down(struct net_device *dev, bool proto_down); -void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, - u32 value); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); @@ -3871,6 +3847,7 @@ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) +DEV_CORE_STATS_INC(rx_otherhost_dropped) static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -3891,12 +3868,6 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); -extern int netdev_budget; -extern unsigned int netdev_budget_usecs; - -/* Called by rtnetlink.c:rtnl_unlock() */ -void netdev_run_todo(void); - static inline void __dev_put(struct net_device *dev) { if (dev) { @@ -4013,10 +3984,7 @@ static inline void dev_replace_track(struct net_device *odev, * called netif_lowerlayer_*() because they represent the state of any * kind of lower layer not just hardware media. */ - -void linkwatch_init_dev(struct net_device *dev); void linkwatch_fire_event(struct net_device *dev); -void linkwatch_forget_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4462,9 +4430,6 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); -void dev_addr_flush(struct net_device *dev); -int dev_addr_init(struct net_device *dev); -void dev_addr_check(struct net_device *dev); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); @@ -4554,7 +4519,6 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); -void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); @@ -4572,11 +4536,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); extern int netdev_max_backlog; -extern int netdev_tstamp_prequeue; -extern int netdev_unregister_timeout_secs; -extern int weight_p; -extern int dev_weight_rx_bias; -extern int dev_weight_tx_bias; extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; @@ -4764,12 +4723,6 @@ static inline void netdev_rx_csum_fault(struct net_device *dev, void net_enable_timestamp(void); void net_disable_timestamp(void); -#ifdef CONFIG_PROC_FS -int __init dev_proc_init(void); -#else -#define dev_proc_init() 0 -#endif - static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, bool more) @@ -4805,8 +4758,6 @@ extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -void linkwatch_run_queue(void); - static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 223781622b33..6d06896fc20d 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -160,11 +160,6 @@ struct phylink_mac_ops { * clearing unsupported speeds and duplex settings. The port modes * should not be cleared; phylink_set_port_modes() will help with this. * - * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX - * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode - * based on @state->advertising and/or @state->speed and update - * @state->interface accordingly. See phylink_helper_basex_speed(). - * * When @config->supported_interfaces has been set, phylink will iterate * over the supported interfaces to determine the full capability of the * MAC. The validation function must not print errors if @state->interface @@ -579,7 +574,6 @@ int phylink_speed_up(struct phylink *pl); #define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) void phylink_set_port_modes(unsigned long *bits); -void phylink_helper_basex_speed(struct phylink_link_state *state); void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, u16 bmsr, u16 lpa); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a30cae8b0a5..0ef11df1bc67 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -408,11 +408,9 @@ enum skb_drop_reason { */ SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle - * the skb. For an etner packet, - * this means that L3 protocol is - * not supported - */ + SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented + * or not supported + */ SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation * error */ @@ -444,9 +442,35 @@ enum skb_drop_reason { SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented * at tun/tap, e.g., check_filter() */ + SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ + SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC + * 2211, such as a broadcasts + * ICMP_TIMESTAMP + */ + SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding + * to IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed + * the MTU) + */ SKB_DROP_REASON_MAX, }; +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED) \ + SKB_DR_SET(name, reason); \ + } while (0) + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -3836,8 +3860,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, - int *err); +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h new file mode 100644 index 000000000000..7e00cca06709 --- /dev/null +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -0,0 +1,131 @@ +#ifndef __MTK_WED_H +#define __MTK_WED_H + +#include <linux/kernel.h> +#include <linux/rcupdate.h> +#include <linux/regmap.h> +#include <linux/pci.h> + +#define MTK_WED_TX_QUEUES 2 + +struct mtk_wed_hw; +struct mtk_wdma_desc; + +struct mtk_wed_ring { + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + int size; + + u32 reg_base; + void __iomem *wpdma; +}; + +struct mtk_wed_device { +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + const struct mtk_wed_ops *ops; + struct device *dev; + struct mtk_wed_hw *hw; + bool init_done, running; + int wdma_idx; + int irq; + + struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring txfree_ring; + struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; + + struct { + int size; + void **pages; + struct mtk_wdma_desc *desc; + dma_addr_t desc_phys; + } buf_ring; + + /* filled by driver: */ + struct { + struct pci_dev *pci_dev; + + u32 wpdma_phys; + + u16 token_start; + unsigned int nbuf; + + u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); + int (*offload_enable)(struct mtk_wed_device *wed); + void (*offload_disable)(struct mtk_wed_device *wed); + } wlan; +#endif +}; + +struct mtk_wed_ops { + int (*attach)(struct mtk_wed_device *dev); + int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*txfree_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); + void (*detach)(struct mtk_wed_device *dev); + + void (*stop)(struct mtk_wed_device *dev); + void (*start)(struct mtk_wed_device *dev, u32 irq_mask); + void (*reset_dma)(struct mtk_wed_device *dev); + + u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg); + void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val); + + u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask); + void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); +}; + +extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; + +static inline int +mtk_wed_device_attach(struct mtk_wed_device *dev) +{ + int ret = -ENODEV; + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + rcu_read_lock(); + dev->ops = rcu_dereference(mtk_soc_wed_ops); + if (dev->ops) + ret = dev->ops->attach(dev); + else + rcu_read_unlock(); + + if (ret) + dev->ops = NULL; +#endif + + return ret; +} + +#ifdef CONFIG_NET_MEDIATEK_SOC_WED +#define mtk_wed_device_active(_dev) !!(_dev)->ops +#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) +#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->tx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \ + (_dev)->ops->txfree_ring_setup(_dev, _regs) +#define mtk_wed_device_reg_read(_dev, _reg) \ + (_dev)->ops->reg_read(_dev, _reg) +#define mtk_wed_device_reg_write(_dev, _reg, _val) \ + (_dev)->ops->reg_write(_dev, _reg, _val) +#define mtk_wed_device_irq_get(_dev, _mask) \ + (_dev)->ops->irq_get(_dev, _mask) +#define mtk_wed_device_irq_set_mask(_dev, _mask) \ + (_dev)->ops->irq_set_mask(_dev, _mask) +#else +static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) +{ + return false; +} +#define mtk_wed_device_detach(_dev) do {} while (0) +#define mtk_wed_device_start(_dev, _mask) do {} while (0) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_reg_read(_dev, _reg) 0 +#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) +#define mtk_wed_device_irq_get(_dev, _mask) 0 +#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) +#endif + +#endif diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index 809bccd08455..cc42db51bbba 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -197,6 +197,7 @@ struct rndis_keepalive_c { /* IN (optionally OUT) */ /* Flags for driver_info::data */ #define RNDIS_DRIVER_DATA_POLL_STATUS 1 /* poll status before control */ +#define RNDIS_DRIVER_DATA_DST_MAC_FIXUP 2 /* device ignores configured MAC address */ extern void rndis_status(struct usbnet *dev, struct urb *urb); extern int diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 8336e86ce606..1b4d72d5e891 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -214,6 +214,7 @@ extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf) extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); +extern int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb); /* CDC and RNDIS support the same host-chosen packet filters for IN transfers */ #define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \ diff --git a/include/net/act_api.h b/include/net/act_api.h index 3049cb69c025..9cf6870b526e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -134,7 +134,8 @@ struct tc_action_ops { (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); int (*offload_act_setup)(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind); + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack); }; struct tc_action_net { diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 4cfdef6ca4f6..c8490729b4ae 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,6 +64,14 @@ struct inet6_ifaddr { struct hlist_node addr_lst; struct list_head if_list; + /* + * Used to safely traverse idev->addr_list in process context + * if the idev->lock needed to protect idev->addr_list cannot be held. + * In that case, add the items to this list temporarily and iterate + * without holding idev->lock. + * See addrconf_ifdown and dev_forward_change. + */ + struct list_head if_list_aux; struct list_head tmp_list; struct inet6_ifaddr *ifpub; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6a82bcb8813b..a378eff827c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -212,7 +212,7 @@ struct fib_rt_info { u32 tb_id; __be32 dst; int dst_len; - u8 tos; + dscp_t dscp; u8 type; u8 offload:1, trap:1, @@ -225,7 +225,7 @@ struct fib_entry_notifier_info { u32 dst; int dst_len; struct fib_info *fi; - u8 tos; + dscp_t dscp; u8 type; u32 tb_id; }; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b08b70989d2c..69e6c6a218be 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -43,6 +43,11 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; +struct nf_conntrack_net_ecache { + struct delayed_work dwork; + struct netns_ct *ct_net; +}; + struct nf_conntrack_net { /* only used when new connection is allocated: */ atomic_t count; @@ -58,8 +63,7 @@ struct nf_conntrack_net { struct ctl_table_header *sysctl_header; #endif #ifdef CONFIG_NF_CONNTRACK_EVENTS - struct delayed_work ecache_dwork; - struct netns_ct *ct_net; + struct nf_conntrack_net_ecache ecache; #endif }; diff --git a/include/net/ping.h b/include/net/ping.h index 2fe78874318c..e4ff3911cbf5 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -71,12 +71,12 @@ void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -bool ping_rcv(struct sk_buff *skb); +enum skb_drop_reason ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a3b57a93228a..8cf001aed858 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -547,10 +547,12 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts); + const struct tcf_exts *exts, + struct netlink_ext_ack *extack); void tc_cleanup_offload_action(struct flow_action *flow_action); int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]); + struct tc_action *actions[], + struct netlink_ext_ack *extack); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 9f48733bfd21..bf8bb3357825 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -10,9 +10,23 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { - RTNL_FLAG_DOIT_UNLOCKED = 1, + RTNL_FLAG_DOIT_UNLOCKED = BIT(0), + RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), }; +enum rtnl_kinds { + RTNL_KIND_NEW, + RTNL_KIND_DEL, + RTNL_KIND_GET, + RTNL_KIND_SET +}; +#define RTNL_KIND_MASK 0x3 + +static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype) +{ + return msgtype & RTNL_KIND_MASK; +} + void rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_register_module(struct module *owner, int protocol, int msgtype, diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index bf3716fe83e0..a04999ee99b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,7 +103,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); -struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); diff --git a/include/net/sock.h b/include/net/sock.h index c4b91fc19b9c..a01d6c421aa2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1202,8 +1202,7 @@ struct proto { int (*sendmsg)(struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, - int *addr_len); + size_t len, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, @@ -2392,7 +2391,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, void (*destructor)(struct sock *sk, struct sk_buff *skb)); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); + +int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason); + +static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return sock_queue_rcv_skb_reason(sk, skb, NULL); +} int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); struct sk_buff *sock_dequeue_err_skb(struct sock *sk); diff --git a/include/net/strparser.h b/include/net/strparser.h index 732b7097d78e..a191486eb1e4 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -70,6 +70,10 @@ struct sk_skb_cb { * when dst_reg == src_reg. */ u64 temp_reg; + struct tls_msg { + u8 control; + u8 decrypted; + } tls; }; static inline struct strp_msg *strp_msg(struct sk_buff *skb) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index eb8f01c819e6..832efd40e023 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -59,4 +59,19 @@ static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } +static inline bool is_tcf_gact_continue(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_UNSPEC, false); +} + +static inline bool is_tcf_gact_reclassify(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_RECLASSIFY, false); +} + +static inline bool is_tcf_gact_pipe(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_PIPE, false); +} + #endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 00bfee70609e..cab8229b9bed 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -94,4 +94,16 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a) return priority; } +/* Return true iff action is queue_mapping */ +static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING); +} + +/* Return true iff action is inheritdsfield */ +static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a) +{ + return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD); +} + #endif /* __NET_TC_SKBEDIT_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 70ca4a5e330a..679b1964d494 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -407,7 +407,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); int tcp_set_rcvlowat(struct sock *sk, int val); int tcp_set_window_clamp(struct sock *sk, int val); @@ -1139,15 +1139,6 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } -static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ca_ops->set_state) - icsk->icsk_ca_ops->set_state(sk, ca_state); - icsk->icsk_ca_state = ca_state; -} - static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1156,6 +1147,9 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_cong.c */ +void tcp_set_ca_state(struct sock *sk, const u8 ca_state); + /* From tcp_rate.c */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, @@ -1207,9 +1201,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) +{ + return tp->snd_cwnd; +} + +static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) +{ + WARN_ON_ONCE((int)val <= 0); + tp->snd_cwnd = val; +} + static inline bool tcp_in_slow_start(const struct tcp_sock *tp) { - return tp->snd_cwnd < tp->snd_ssthresh; + return tcp_snd_cwnd(tp) < tp->snd_ssthresh; } static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) @@ -1235,8 +1240,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, - ((tp->snd_cwnd >> 1) + - (tp->snd_cwnd >> 2))); + ((tcp_snd_cwnd(tp) >> 1) + + (tcp_snd_cwnd(tp) >> 2))); } /* Use define here intentionally to get WARN_ON location shown at the caller */ @@ -1278,7 +1283,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) /* If in slow start, ensure cwnd grows to twice what was ACKed. */ if (tcp_in_slow_start(tp)) - return tp->snd_cwnd < 2 * tp->max_packets_out; + return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; return tp->is_cwnd_limited; } diff --git a/include/net/tls.h b/include/net/tls.h index b6968a5b5538..b59f0a63292b 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -64,6 +64,7 @@ #define TLS_AAD_SPACE_SIZE 13 #define MAX_IV_SIZE 16 +#define TLS_TAG_SIZE 16 #define TLS_MAX_REC_SEQ_SIZE 8 /* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes. @@ -117,11 +118,6 @@ struct tls_rec { u8 aead_req_ctx[]; }; -struct tls_msg { - struct strp_msg rxm; - u8 control; -}; - struct tx_work { struct delayed_work work; struct sock *sk; @@ -152,13 +148,10 @@ struct tls_sw_context_rx { void (*saved_data_ready)(struct sock *sk); struct sk_buff *recv_pkt; - u8 control; u8 async_capable:1; - u8 decrypted:1; atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; - bool async_notify; }; struct tls_record_info { @@ -378,7 +371,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len); + int flags, int *addr_len); bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, @@ -411,7 +404,9 @@ void tls_free_partial_record(struct sock *sk, struct tls_context *ctx); static inline struct tls_msg *tls_msg(struct sk_buff *skb) { - return (struct tls_msg *)strp_msg(skb); + struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb; + + return &scb->tls; } static inline bool tls_is_partially_sent_record(struct tls_context *ctx) diff --git a/include/net/udp.h b/include/net/udp.h index f1c2a88c9005..b83a00330566 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -250,14 +250,14 @@ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); -struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *off, int *err); +struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, + int *err); static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_udp(sk, flags, noblock, &off, err); + return __skb_recv_udp(sk, flags, &off, err); } int udp_v4_early_demux(struct sk_buff *skb); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 69d883f7fb41..11ee4eaf84bd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2497,15 +2497,7 @@ struct ib_device_ops { struct ib_flow_attr *flow_attr, struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); - struct ib_flow_action *(*create_flow_action_esp)( - struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); int (*destroy_flow_action)(struct ib_flow_action *action); - int (*modify_flow_action_esp)( - struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port, int state); int (*get_vf_config)(struct ib_device *device, int vf, u32 port, diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index e1670e1e4934..2da72a9a5764 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -50,7 +50,7 @@ EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ EM(SKB_DROP_REASON_XDP, XDP) \ EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ + EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ @@ -61,6 +61,11 @@ EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ + EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ + EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ + EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ + EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ + EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 521059d8dc0a..901b440238d5 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe, __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; __entry->snd_una = tp->snd_una; - __entry->snd_cwnd = tp->snd_cwnd; + __entry->snd_cwnd = tcp_snd_cwnd(tp); __entry->snd_wnd = tp->snd_wnd; __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); @@ -371,6 +371,51 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum, TP_ARGS(skb) ); +TRACE_EVENT(tcp_cong_state_set, + + TP_PROTO(struct sock *sk, const u8 ca_state), + + TP_ARGS(sk, ca_state), + + TP_STRUCT__entry( + __field(const void *, skaddr) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + __field(__u8, cong_state) + ), + + TP_fast_assign( + struct inet_sock *inet = inet_sk(sk); + __be32 *p32; + + __entry->skaddr = sk; + + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + + p32 = (__be32 *) __entry->saddr; + *p32 = inet->inet_saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = inet->inet_daddr; + + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->cong_state = ca_state; + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u", + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6, + __entry->cong_state) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index b0d8fea1951d..a9162a6c0284 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -33,8 +33,8 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc284c048e69..d1e600816b82 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -211,6 +211,9 @@ struct rtnl_link_stats { * @rx_nohandler: Number of packets received on the interface * but dropped by the networking stack because the device is * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. */ struct rtnl_link_stats64 { __u64 rx_packets; @@ -243,6 +246,8 @@ struct rtnl_link_stats64 { __u64 rx_compressed; __u64 tx_compressed; __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; }; /* Subset of link stats useful for in-HW collection. Meaning of the fields is as diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index db05fb55055e..39c565e460c7 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -32,6 +32,8 @@ enum { NDA_NH_ID, NDA_FDB_EXT_ATTRS, NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, __NDA_MAX }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 4c0cde075c27..855dffb4c1c3 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -72,6 +72,7 @@ struct nlmsghdr { /* Modifiers to DELETE request */ #define NLM_F_NONREC 0x100 /* Do not delete recursively */ +#define NLM_F_BULK 0x200 /* Delete multiple objects */ /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4dfc05651c98..c00adf2fe868 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -43,10 +43,6 @@ #include <linux/tipc.h> #include <asm/byteorder.h> -#ifndef __KERNEL__ -#include <arpa/inet.h> /* for ntohs etc. */ -#endif - /* * Configuration * @@ -269,33 +265,33 @@ static inline int TLV_OK(const void *tlv, __u16 space) */ return (space >= TLV_SPACE(0)) && - (ntohs(((struct tlv_desc *)tlv)->tlv_len) <= space); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space); } static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type) { return TLV_OK(tlv, space) && - (ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type); } static inline int TLV_GET_LEN(struct tlv_desc *tlv) { - return ntohs(tlv->tlv_len); + return __be16_to_cpu(tlv->tlv_len); } static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len) { - tlv->tlv_len = htons(len); + tlv->tlv_len = __cpu_to_be16(len); } static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type) { - return (ntohs(tlv->tlv_type) == type); + return (__be16_to_cpu(tlv->tlv_type) == type); } static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type) { - tlv->tlv_type = htons(type); + tlv->tlv_type = __cpu_to_be16(type); } static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) @@ -305,8 +301,8 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) tlv_len = TLV_LENGTH(len); tlv_ptr = (struct tlv_desc *)tlv; - tlv_ptr->tlv_type = htons(type); - tlv_ptr->tlv_len = htons(tlv_len); + tlv_ptr->tlv_type = __cpu_to_be16(type); + tlv_ptr->tlv_len = __cpu_to_be16(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); @@ -348,7 +344,7 @@ static inline void *TLV_LIST_DATA(struct tlv_list_desc *list) static inline void TLV_LIST_STEP(struct tlv_list_desc *list) { - __u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len)); + __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len)); list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space); list->tlv_space -= tlv_space; @@ -404,9 +400,9 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, msg_len = TCM_LENGTH(data_len); tcm_hdr = (struct tipc_cfg_msg_hdr *)msg; - tcm_hdr->tcm_len = htonl(msg_len); - tcm_hdr->tcm_type = htons(cmd); - tcm_hdr->tcm_flags = htons(flags); + tcm_hdr->tcm_len = __cpu_to_be32(msg_len); + tcm_hdr->tcm_type = __cpu_to_be16(cmd); + tcm_hdr->tcm_flags = __cpu_to_be16(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 110029ede71e..dea920b3b840 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -330,35 +330,34 @@ static void cache_btf_id(struct bpf_iter_target_info *tinfo, bool bpf_iter_prog_supported(struct bpf_prog *prog) { const char *attach_fname = prog->aux->attach_func_name; + struct bpf_iter_target_info *tinfo = NULL, *iter; u32 prog_btf_id = prog->aux->attach_btf_id; const char *prefix = BPF_ITER_FUNC_PREFIX; - struct bpf_iter_target_info *tinfo; int prefix_len = strlen(prefix); - bool supported = false; if (strncmp(attach_fname, prefix, prefix_len)) return false; mutex_lock(&targets_mutex); - list_for_each_entry(tinfo, &targets, list) { - if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { - supported = true; + list_for_each_entry(iter, &targets, list) { + if (iter->btf_id && iter->btf_id == prog_btf_id) { + tinfo = iter; break; } - if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { - cache_btf_id(tinfo, prog); - supported = true; + if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { + cache_btf_id(iter, prog); + tinfo = iter; break; } } mutex_unlock(&targets_mutex); - if (supported) { + if (tinfo) { prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; } - return supported; + return tinfo != NULL; } const struct bpf_func_proto * @@ -499,12 +498,11 @@ bool bpf_link_is_iter(struct bpf_link *link) int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog) { + struct bpf_iter_target_info *tinfo = NULL, *iter; struct bpf_link_primer link_primer; - struct bpf_iter_target_info *tinfo; union bpf_iter_link_info linfo; struct bpf_iter_link *link; u32 prog_btf_id, linfo_len; - bool existed = false; bpfptr_t ulinfo; int err; @@ -530,14 +528,14 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); - list_for_each_entry(tinfo, &targets, list) { - if (tinfo->btf_id == prog_btf_id) { - existed = true; + list_for_each_entry(iter, &targets, list) { + if (iter->btf_id == prog_btf_id) { + tinfo = iter; break; } } mutex_unlock(&targets_mutex); - if (!existed) + if (!tinfo) return -ENOENT; link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 34725bfa1e97..1dd5266fbebb 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -100,13 +100,11 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); - cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); if (!smap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); - smap->map.value_size = value_size; smap->n_buckets = n_buckets; err = get_callchain_buffers(sysctl_perf_event_max_stack); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d175b70067b3..9c1a02b82ecd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4861,6 +4861,11 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_KEY: + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } return check_mem_region_access(env, regno, reg->off, access_size, reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: @@ -4871,13 +4876,23 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_map_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MEM: + if (type_is_rdonly_mem(reg->type)) { + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); + return -EACCES; + } + } return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); case PTR_TO_BUF: if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) + if (meta && meta->raw_mode) { + verbose(env, "R%d cannot write into %s\n", regno, + reg_type_str(env, reg->type)); return -EACCES; + } max_access = &env->prog->aux->max_rdonly_access; } else { @@ -4919,8 +4934,7 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, * out. Only upper bounds can be learned because retval is an * int type and negative retvals are allowed. */ - if (meta) - meta->msize_max_value = reg->umax_value; + meta->msize_max_value = reg->umax_value; /* The register is SCALAR_VALUE; the access check * happens using its boundaries. @@ -4963,24 +4977,33 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size) { + bool may_be_null = type_may_be_null(reg->type); + struct bpf_reg_state saved_reg; + struct bpf_call_arg_meta meta; + int err; + if (register_is_null(reg)) return 0; - if (type_may_be_null(reg->type)) { - /* Assuming that the register contains a value check if the memory - * access is safe. Temporarily save and restore the register's state as - * the conversion shouldn't be visible to a caller. - */ - const struct bpf_reg_state saved_reg = *reg; - int rv; - + memset(&meta, 0, sizeof(meta)); + /* Assuming that the register contains a value check if the memory + * access is safe. Temporarily save and restore the register's state as + * the conversion shouldn't be visible to a caller. + */ + if (may_be_null) { + saved_reg = *reg; mark_ptr_not_null_reg(reg); - rv = check_helper_mem_access(env, regno, mem_size, true, NULL); - *reg = saved_reg; - return rv; } - return check_helper_mem_access(env, regno, mem_size, true, NULL); + err = check_helper_mem_access(env, regno, mem_size, true, &meta); + /* Check access for BPF_WRITE */ + meta.raw_mode = true; + err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta); + + if (may_be_null) + *reg = saved_reg; + + return err; } int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, @@ -4989,16 +5012,22 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; bool may_be_null = type_may_be_null(mem_reg->type); struct bpf_reg_state saved_reg; + struct bpf_call_arg_meta meta; int err; WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); + memset(&meta, 0, sizeof(meta)); + if (may_be_null) { saved_reg = *mem_reg; mark_ptr_not_null_reg(mem_reg); } - err = check_mem_size_reg(env, reg, regno, true, NULL); + err = check_mem_size_reg(env, reg, regno, true, &meta); + /* Check access for BPF_WRITE */ + meta.raw_mode = true; + err = err ?: check_mem_size_reg(env, reg, regno, true, &meta); if (may_be_null) *mem_reg = saved_reg; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d8553f46caa2..b26f3da943de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2254,15 +2254,13 @@ static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void const struct bpf_kprobe_multi_link *link = priv; unsigned long *addr_a = a, *addr_b = b; u64 *cookie_a, *cookie_b; - unsigned long tmp1; - u64 tmp2; cookie_a = link->cookies + (addr_a - link->addrs); cookie_b = link->cookies + (addr_b - link->addrs); /* swap addr_a/addr_b and cookie_a/cookie_b values */ - tmp1 = *addr_a; *addr_a = *addr_b; *addr_b = tmp1; - tmp2 = *cookie_a; *cookie_a = *cookie_b; *cookie_b = tmp2; + swap(*addr_a, *addr_b); + swap(*cookie_a, *cookie_b); } static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 0c5cb2d6436a..2a7836e115b4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -53,6 +53,7 @@ #define FLAG_EXPECTED_FAIL BIT(1) #define FLAG_SKB_FRAG BIT(2) #define FLAG_VERIFIER_ZEXT BIT(3) +#define FLAG_LARGE_MEM BIT(4) enum { CLASSIC = BIT(6), /* Old BPF instructions only. */ @@ -7838,7 +7839,7 @@ static struct bpf_test tests[] = { }, /* BPF_LDX_MEM B/H/W/DW */ { - "BPF_LDX_MEM | BPF_B", + "BPF_LDX_MEM | BPF_B, base", .u.insns_int = { BPF_LD_IMM64(R1, 0x0102030405060708ULL), BPF_LD_IMM64(R2, 0x0000000000000008ULL), @@ -7878,7 +7879,56 @@ static struct bpf_test tests[] = { .stack_depth = 8, }, { - "BPF_LDX_MEM | BPF_H", + "BPF_LDX_MEM | BPF_B, negative offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000000088ULL), + BPF_ALU64_IMM(BPF_ADD, R1, 512), + BPF_STX_MEM(BPF_B, R1, R2, -256), + BPF_LDX_MEM(BPF_B, R0, R1, -256), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_B, small positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000000088ULL), + BPF_STX_MEM(BPF_B, R1, R2, 256), + BPF_LDX_MEM(BPF_B, R0, R1, 256), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_B, large positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000000088ULL), + BPF_STX_MEM(BPF_B, R1, R2, 4096), + BPF_LDX_MEM(BPF_B, R0, R1, 4096), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 4096 + 16, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_H, base", .u.insns_int = { BPF_LD_IMM64(R1, 0x0102030405060708ULL), BPF_LD_IMM64(R2, 0x0000000000000708ULL), @@ -7918,7 +7968,72 @@ static struct bpf_test tests[] = { .stack_depth = 8, }, { - "BPF_LDX_MEM | BPF_W", + "BPF_LDX_MEM | BPF_H, negative offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000008788ULL), + BPF_ALU64_IMM(BPF_ADD, R1, 512), + BPF_STX_MEM(BPF_H, R1, R2, -256), + BPF_LDX_MEM(BPF_H, R0, R1, -256), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_H, small positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000008788ULL), + BPF_STX_MEM(BPF_H, R1, R2, 256), + BPF_LDX_MEM(BPF_H, R0, R1, 256), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_H, large positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000008788ULL), + BPF_STX_MEM(BPF_H, R1, R2, 8192), + BPF_LDX_MEM(BPF_H, R0, R1, 8192), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 8192 + 16, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_H, unaligned positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000000008788ULL), + BPF_STX_MEM(BPF_H, R1, R2, 13), + BPF_LDX_MEM(BPF_H, R0, R1, 13), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 32, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_W, base", .u.insns_int = { BPF_LD_IMM64(R1, 0x0102030405060708ULL), BPF_LD_IMM64(R2, 0x0000000005060708ULL), @@ -7957,6 +8072,162 @@ static struct bpf_test tests[] = { { { 0, 0 } }, .stack_depth = 8, }, + { + "BPF_LDX_MEM | BPF_W, negative offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000085868788ULL), + BPF_ALU64_IMM(BPF_ADD, R1, 512), + BPF_STX_MEM(BPF_W, R1, R2, -256), + BPF_LDX_MEM(BPF_W, R0, R1, -256), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_W, small positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000085868788ULL), + BPF_STX_MEM(BPF_W, R1, R2, 256), + BPF_LDX_MEM(BPF_W, R0, R1, 256), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_W, large positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000085868788ULL), + BPF_STX_MEM(BPF_W, R1, R2, 16384), + BPF_LDX_MEM(BPF_W, R0, R1, 16384), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 16384 + 16, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_W, unaligned positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_LD_IMM64(R3, 0x0000000085868788ULL), + BPF_STX_MEM(BPF_W, R1, R2, 13), + BPF_LDX_MEM(BPF_W, R0, R1, 13), + BPF_JMP_REG(BPF_JNE, R0, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 32, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_DW, base", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0102030405060708ULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), + BPF_LDX_MEM(BPF_DW, R0, R10, -8), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEM | BPF_DW, MSB set", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x8182838485868788ULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), + BPF_LDX_MEM(BPF_DW, R0, R10, -8), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEM | BPF_DW, negative offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_ALU64_IMM(BPF_ADD, R1, 512), + BPF_STX_MEM(BPF_DW, R1, R2, -256), + BPF_LDX_MEM(BPF_DW, R0, R1, -256), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_DW, small positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_STX_MEM(BPF_DW, R1, R2, 256), + BPF_LDX_MEM(BPF_DW, R0, R1, 256), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 512, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEM | BPF_DW, large positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_STX_MEM(BPF_DW, R1, R2, 32760), + BPF_LDX_MEM(BPF_DW, R0, R1, 32760), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 32768, 0 } }, + .stack_depth = 0, + }, + { + "BPF_LDX_MEM | BPF_DW, unaligned positive offset", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x8182838485868788ULL), + BPF_STX_MEM(BPF_DW, R1, R2, 13), + BPF_LDX_MEM(BPF_DW, R0, R1, 13), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL | FLAG_LARGE_MEM, + { }, + { { 32, 0 } }, + .stack_depth = 0, + }, /* BPF_STX_MEM B/H/W/DW */ { "BPF_STX_MEM | BPF_B", @@ -14094,6 +14365,9 @@ static void *generate_test_data(struct bpf_test *test, int sub) if (test->aux & FLAG_NO_DATA) return NULL; + if (test->aux & FLAG_LARGE_MEM) + return kmalloc(test->test[sub].data_size, GFP_KERNEL); + /* Test case expects an skb, so populate one. Various * subtests generate skbs of different sizes based on * the same data. @@ -14137,7 +14411,10 @@ static void release_test_data(const struct bpf_test *test, void *data) if (test->aux & FLAG_NO_DATA) return; - kfree_skb(data); + if (test->aux & FLAG_LARGE_MEM) + kfree(data); + else + kfree_skb(data); } static int filter_length(int which) @@ -14674,6 +14951,36 @@ static struct tail_call_test tail_call_tests[] = { .result = 10, }, { + "Tail call load/store leaf", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_REG(BPF_MOV, R3, BPF_REG_FP), + BPF_STX_MEM(BPF_DW, R3, R1, -8), + BPF_STX_MEM(BPF_DW, R3, R2, -16), + BPF_LDX_MEM(BPF_DW, R0, BPF_REG_FP, -8), + BPF_JMP_REG(BPF_JNE, R0, R1, 3), + BPF_LDX_MEM(BPF_DW, R0, BPF_REG_FP, -16), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + .result = 0, + .stack_depth = 32, + }, + { + "Tail call load/store", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, 3), + BPF_STX_MEM(BPF_DW, BPF_REG_FP, R0, -8), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 0, + .stack_depth = 16, + }, + { "Tail call error path, max count reached", .insns = { BPF_LDX_MEM(BPF_W, R2, R1, 0), diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index bf5736c1d458..a06f4d4a6f47 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int err = 0; struct sk_buff *skb; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); lock_sock(sk); if (!skb) diff --git a/net/atm/common.c b/net/atm/common.c index 1cfa9bf1d187..d0c8ab7ff8f6 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, !test_bit(ATM_VF_READY, &vcc->flags)) return 0; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error); + skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 363d47f94532..116481e4da82 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1669,8 +1669,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } /* Now we can treat all alike */ - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a0cb2e3da8d4..62705734343b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -251,7 +251,6 @@ EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; size_t copied; @@ -263,7 +262,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 33b3c0ffc339..189e3115c8c6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1453,7 +1453,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; @@ -1470,7 +1469,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (sk->sk_state == BT_CLOSED) return 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8d6bab244c4a..58a4f70e01e3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -465,6 +465,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fix_features = br_fix_features, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, + .ndo_fdb_del_bulk = br_fdb_delete_bulk, .ndo_fdb_dump = br_fdb_dump, .ndo_fdb_get = br_fdb_get, .ndo_bridge_getlink = br_getlink, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6ccda68bd473..1a3d583fbc8e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -558,18 +558,161 @@ void br_fdb_cleanup(struct work_struct *work) mod_delayed_work(system_long_wq, &br->gc_work, work_delay); } -/* Completely flush all dynamic entries in forwarding database.*/ -void br_fdb_flush(struct net_bridge *br) +static bool __fdb_flush_matches(const struct net_bridge *br, + const struct net_bridge_fdb_entry *f, + const struct net_bridge_fdb_flush_desc *desc) +{ + const struct net_bridge_port *dst = READ_ONCE(f->dst); + int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex; + + if (desc->vlan_id && desc->vlan_id != f->key.vlan_id) + return false; + if (desc->port_ifindex && desc->port_ifindex != port_ifidx) + return false; + if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags) + return false; + + return true; +} + +/* Flush forwarding database entries matching the description */ +void br_fdb_flush(struct net_bridge *br, + const struct net_bridge_fdb_flush_desc *desc) { struct net_bridge_fdb_entry *f; - struct hlist_node *tmp; - spin_lock_bh(&br->hash_lock); - hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { - if (!test_bit(BR_FDB_STATIC, &f->flags)) + rcu_read_lock(); + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { + if (!__fdb_flush_matches(br, f, desc)) + continue; + + spin_lock_bh(&br->hash_lock); + if (!hlist_unhashed(&f->fdb_node)) fdb_delete(br, f, true); + spin_unlock_bh(&br->hash_lock); } - spin_unlock_bh(&br->hash_lock); + rcu_read_unlock(); +} + +static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state) +{ + unsigned long flags = 0; + + if (ndm_state & NUD_PERMANENT) + __set_bit(BR_FDB_LOCAL, &flags); + if (ndm_state & NUD_NOARP) + __set_bit(BR_FDB_STATIC, &flags); + + return flags; +} + +static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags) +{ + unsigned long flags = 0; + + if (ndm_flags & NTF_USE) + __set_bit(BR_FDB_ADDED_BY_USER, &flags); + if (ndm_flags & NTF_EXT_LEARNED) + __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags); + if (ndm_flags & NTF_OFFLOADED) + __set_bit(BR_FDB_OFFLOADED, &flags); + if (ndm_flags & NTF_STICKY) + __set_bit(BR_FDB_STICKY, &flags); + + return flags; +} + +static int __fdb_flush_validate_ifindex(const struct net_bridge *br, + int ifindex, + struct netlink_ext_ack *extack) +{ + const struct net_device *dev; + + dev = __dev_get_by_index(dev_net(br->dev), ifindex); + if (!dev) { + NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex"); + return -ENODEV; + } + if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port"); + return -EINVAL; + } + if (netif_is_bridge_master(dev) && dev != br->dev) { + NL_SET_ERR_MSG_MOD(extack, + "Flush bridge device does not match target bridge device"); + return -EINVAL; + } + if (netif_is_bridge_port(dev)) { + struct net_bridge_port *p = br_port_get_rtnl(dev); + + if (p->br != br) { + NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); + return -EINVAL; + } + } + + return 0; +} + +int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, u16 vid, + struct netlink_ext_ack *extack) +{ + u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; + struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid }; + struct net_bridge_port *p = NULL; + struct net_bridge *br; + + if (netif_is_bridge_master(dev)) { + br = netdev_priv(dev); + } else { + p = br_port_get_rtnl(dev); + if (!p) { + NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port"); + return -EINVAL; + } + br = p->br; + } + + if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); + return -EINVAL; + } + if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) { + NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set"); + return -EINVAL; + } + + desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state); + desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags); + if (tb[NDA_NDM_STATE_MASK]) { + u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]); + + desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask); + } + if (tb[NDA_NDM_FLAGS_MASK]) { + u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]); + + desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); + } + if (tb[NDA_IFINDEX]) { + int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]); + + err = __fdb_flush_validate_ifindex(br, ifidx, extack); + if (err) + return err; + desc.port_ifindex = ifidx; + } else if (p) { + /* flush was invoked with port device and NTF_MASTER */ + desc.port_ifindex = p->dev->ifindex; + } + + br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n", + desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask); + + br_fdb_flush(br, &desc); + + return 0; } /* Flush all entries referring to a specific port. diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 4556d913955b..fdcc641fc89a 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -251,14 +251,16 @@ static int __mdb_fill_info(struct sk_buff *skb, __mdb_entry_fill_flags(&e, flags); e.ifindex = ifindex; e.vid = mp->addr.vid; - if (mp->addr.proto == htons(ETH_P_IP)) + if (mp->addr.proto == htons(ETH_P_IP)) { e.addr.u.ip4 = mp->addr.dst.ip4; #if IS_ENABLED(CONFIG_IPV6) - else if (mp->addr.proto == htons(ETH_P_IPV6)) + } else if (mp->addr.proto == htons(ETH_P_IPV6)) { e.addr.u.ip6 = mp->addr.dst.ip6; #endif - else + } else { ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr); + e.state = MDB_PG_FLAGS_PERMANENT; + } e.addr.proto = mp->addr.proto; nest_ent = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO); @@ -873,8 +875,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return -EINVAL; /* host join errors which can happen before creating the group */ - if (!port) { - /* don't allow any flags for host-joined groups */ + if (!port && !br_group_is_l2(&group)) { + /* don't allow any flags for host-joined IP groups */ if (entry->state) { NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); return -EINVAL; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 200ad05b296f..bb01776d2d88 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1326,8 +1326,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], br_recalculate_fwd_mask(br); } - if (data[IFLA_BR_FDB_FLUSH]) - br_fdb_flush(br); + if (data[IFLA_BR_FDB_FLUSH]) { + struct net_bridge_fdb_flush_desc desc = { + .flags_mask = BR_FDB_STATIC + }; + + br_fdb_flush(br, &desc); + } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (data[IFLA_BR_MCAST_ROUTER]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 18ccc3d5d296..6ae882cfae1c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -274,6 +274,13 @@ struct net_bridge_fdb_entry { struct rcu_head rcu; }; +struct net_bridge_fdb_flush_desc { + unsigned long flags; + unsigned long flags_mask; + int port_ifindex; + u16 vlan_id; +}; + #define MDB_PG_FLAGS_PERMANENT BIT(0) #define MDB_PG_FLAGS_OFFLOAD BIT(1) #define MDB_PG_FLAGS_FAST_LEAVE BIT(2) @@ -755,11 +762,17 @@ static inline void br_netpoll_disable(struct net_bridge_port *p) #endif /* br_fdb.c */ +#define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF) +#define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP) +#define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \ + NTF_STICKY | NTF_OFFLOADED) + int br_fdb_init(void); void br_fdb_fini(void); int br_fdb_hash_init(struct net_bridge *br); void br_fdb_hash_fini(struct net_bridge *br); -void br_fdb_flush(struct net_bridge *br); +void br_fdb_flush(struct net_bridge *br, + const struct net_bridge_fdb_flush_desc *desc); void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid); @@ -781,6 +794,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); +int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, u16 vid, + struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 8cc44c367231..81400e0b26ac 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -72,7 +72,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, /* Flags that can be offloaded to hardware */ #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \ - BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED) + BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \ + BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST) int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long flags, diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3f7ca88c2aa3..612e367fff20 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -344,7 +344,11 @@ static DEVICE_ATTR_RW(group_addr); static int set_flush(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br_fdb_flush(br); + struct net_bridge_fdb_flush_desc desc = { + .flags_mask = BR_FDB_STATIC + }; + + br_fdb_flush(br, &desc); return 0; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 2b8892d502f7..251e666ba9a2 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -282,7 +282,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, if (flags & MSG_OOB) goto read_error; - skb = skb_recv_datagram(sk, flags, 0 , &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) goto read_error; copylen = skb->len; diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d209b52e6a..64c07e650bb4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1632,12 +1632,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; - int noblock; int err; - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &error); + skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; diff --git a/net/can/isotp.c b/net/can/isotp.c index bafb0fb5f0e0..02d81effaa54 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1047,7 +1047,6 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; struct isotp_sock *so = isotp_sk(sk); - int noblock = flags & MSG_DONTWAIT; int ret = 0; if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) @@ -1056,8 +1055,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (!so->bound) return -EADDRNOTAVAIL; - flags &= ~MSG_DONTWAIT; - skb = skb_recv_datagram(sk, flags, noblock, &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6dff4510687a..0bb4fd3f6264 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -802,7 +802,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, SCM_J1939_ERRQUEUE); - skb = skb_recv_datagram(sk, flags, 0, &ret); + skb = skb_recv_datagram(sk, flags, &ret); if (!skb) return ret; diff --git a/net/can/raw.c b/net/can/raw.c index 7105fa4824e4..0cf728dcff36 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -846,16 +846,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, struct sock *sk = sock->sk; struct sk_buff *skb; int err = 0; - int noblock; - - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; if (flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) return err; diff --git a/net/core/datagram.c b/net/core/datagram.c index ee290776c661..70126d15ca6e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -310,12 +310,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, - int noblock, int *err) + int *err) { int off = 0; - return __skb_recv_datagram(sk, &sk->sk_receive_queue, - flags | (noblock ? MSG_DONTWAIT : 0), + return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 8c6c08446556..ba853e878007 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -151,6 +151,7 @@ #include <linux/prandom.h> #include <linux/once_lite.h> +#include "dev.h" #include "net-sysfs.h" @@ -701,6 +702,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, if (WARN_ON_ONCE(last_dev == ctx.dev)) return -1; } + + if (!ctx.dev) + return ret; + path = dev_fwd_path(stack); if (!path) return -1; @@ -5370,13 +5375,11 @@ check_vlan_id: *ppt_prev = pt_prev; } else { drop: - if (!deliver_exact) { + if (!deliver_exact) dev_core_stats_rx_dropped_inc(skb->dev); - kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT); - } else { + else dev_core_stats_rx_nohandler_inc(skb->dev); - kfree_skb(skb); - } + kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ @@ -8641,7 +8644,6 @@ void dev_set_group(struct net_device *dev, int new_group) { dev->group = new_group; } -EXPORT_SYMBOL(dev_set_group); /** * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR. @@ -8756,7 +8758,6 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier) return -ENODEV; return ops->ndo_change_carrier(dev, new_carrier); } -EXPORT_SYMBOL(dev_change_carrier); /** * dev_get_phys_port_id - Get device physical port ID @@ -8774,7 +8775,6 @@ int dev_get_phys_port_id(struct net_device *dev, return -EOPNOTSUPP; return ops->ndo_get_phys_port_id(dev, ppid); } -EXPORT_SYMBOL(dev_get_phys_port_id); /** * dev_get_phys_port_name - Get device physical port name @@ -8797,7 +8797,6 @@ int dev_get_phys_port_name(struct net_device *dev, } return devlink_compat_phys_port_name_get(dev, name, len); } -EXPORT_SYMBOL(dev_get_phys_port_name); /** * dev_get_port_parent_id - Get the device's port parent identifier @@ -8879,7 +8878,6 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) dev->proto_down = proto_down; return 0; } -EXPORT_SYMBOL(dev_change_proto_down); /** * dev_change_proto_down_reason - proto down reason @@ -8904,7 +8902,6 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, } } } -EXPORT_SYMBOL(dev_change_proto_down_reason); struct bpf_xdp_link { struct bpf_link link; @@ -9431,7 +9428,7 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static LIST_HEAD(net_todo_list); +LIST_HEAD(net_todo_list); DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) @@ -10359,6 +10356,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, storage->rx_dropped += local_read(&core_stats->rx_dropped); storage->tx_dropped += local_read(&core_stats->tx_dropped); storage->rx_nohandler += local_read(&core_stats->rx_nohandler); + storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped); } } return storage; diff --git a/net/core/dev.h b/net/core/dev.h new file mode 100644 index 000000000000..27923df00637 --- /dev/null +++ b/net/core/dev.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_CORE_DEV_H +#define _NET_CORE_DEV_H + +#include <linux/types.h> + +struct net; +struct net_device; +struct netdev_bpf; +struct netdev_phys_item_id; +struct netlink_ext_ack; + +/* Random bits of netdevice that don't need to be exposed */ +#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ +struct sd_flow_limit { + u64 count; + unsigned int num_buckets; + unsigned int history_head; + u16 history[FLOW_LIMIT_HISTORY]; + u8 buckets[]; +}; + +extern int netdev_flow_limit_table_len; + +#ifdef CONFIG_PROC_FS +int __init dev_proc_init(void); +#else +#define dev_proc_init() 0 +#endif + +void linkwatch_init_dev(struct net_device *dev); +void linkwatch_forget_dev(struct net_device *dev); +void linkwatch_run_queue(void); + +void dev_addr_flush(struct net_device *dev); +int dev_addr_init(struct net_device *dev); +void dev_addr_check(struct net_device *dev); + +/* sysctls not referred to from outside net/core/ */ +extern int netdev_budget; +extern unsigned int netdev_budget_usecs; + +extern int netdev_tstamp_prequeue; +extern int netdev_unregister_timeout_secs; +extern int weight_p; +extern int dev_weight_rx_bias; +extern int dev_weight_tx_bias; + +/* rtnl helpers */ +extern struct list_head net_todo_list; +void netdev_run_todo(void); + +/* netdev management, shared between various uAPI entry points */ +struct netdev_name_node { + struct hlist_node hlist; + struct list_head list; + struct net_device *dev; + const char *name; +}; + +int netdev_get_name(struct net *net, char *name, int ifindex); +int dev_change_name(struct net_device *dev, const char *newname); + +int netdev_name_node_alt_create(struct net_device *dev, const char *name); +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); + +int dev_validate_mtu(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); +int dev_set_mtu_ext(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); + +int dev_get_phys_port_id(struct net_device *dev, + struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); + +int dev_change_proto_down(struct net_device *dev, bool proto_down); +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value); + +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, int expected_fd, u32 flags); + +int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len); +void dev_set_group(struct net_device *dev, int new_group); +int dev_change_carrier(struct net_device *dev, bool new_carrier); + +void __dev_set_rx_mode(struct net_device *dev); + +#endif diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index bead38ca50bd..baa63dee2829 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -12,6 +12,8 @@ #include <linux/export.h> #include <linux/list.h> +#include "dev.h" + /* * General list handling functions */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1b807d119da5..4f6be442ae7e 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -10,6 +10,8 @@ #include <net/dsa.h> #include <net/wext.h> +#include "dev.h" + /* * Map an interface index to its name (SIOCGIFNAME) */ diff --git a/net/core/filter.c b/net/core/filter.c index 64470a727ef7..143f442a9505 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5173,7 +5173,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else - tp->snd_cwnd = val; + tcp_snd_cwnd_set(tp, val); break; case TCP_BPF_SNDCWND_CLAMP: if (val <= 0) { diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 95098d1a49bd..a244d3bade7d 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -18,6 +18,7 @@ #include <linux/bitops.h> #include <linux/types.h> +#include "dev.h" enum lw_bits { LW_URGENT = 0, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 88cc0ad7d386..1ec23bf8b05c 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -4,6 +4,8 @@ #include <linux/seq_file.h> #include <net/wext.h> +#include "dev.h" + #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) #define get_bucket(x) ((x) >> BUCKET_SPACE) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9cbc1c8289bc..4980c3a50475 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include <linux/of_net.h> #include <linux/cpu.h> +#include "dev.h" #include "net-sysfs.h" #ifdef CONFIG_SYSFS diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1943c0f0307d..4af55d28ffa3 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -36,6 +36,12 @@ this_cpu_inc(s->__stat); \ } while (0) +#define recycle_stat_add(pool, __stat, val) \ + do { \ + struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ + this_cpu_add(s->__stat, val); \ + } while (0) + bool page_pool_get_stats(struct page_pool *pool, struct page_pool_stats *stats) { @@ -63,6 +69,7 @@ EXPORT_SYMBOL(page_pool_get_stats); #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) +#define recycle_stat_add(pool, __stat, val) #endif static int page_pool_init(struct page_pool *pool, @@ -566,9 +573,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ page_pool_ring_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) - break; /* ring full */ + if (__ptr_ring_produce(&pool->ring, data[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } } + recycle_stat_add(pool, ring, i); page_pool_ring_unlock(pool); /* Hopefully all pages was return into ptr_ring */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d1381ea6d52e..8bf770a7261e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -54,6 +54,8 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> +#include "dev.h" + #define RTNL_MAX_TYPE 50 #define RTNL_SLAVE_MAX_TYPE 40 @@ -95,6 +97,39 @@ void __rtnl_unlock(void) defer_kfree_skb_list = NULL; + /* Ensure that we didn't actually add any TODO item when __rtnl_unlock() + * is used. In some places, e.g. in cfg80211, we have code that will do + * something like + * rtnl_lock() + * wiphy_lock() + * ... + * rtnl_unlock() + * + * and because netdev_run_todo() acquires the RTNL for items on the list + * we could cause a situation such as this: + * Thread 1 Thread 2 + * rtnl_lock() + * unregister_netdevice() + * __rtnl_unlock() + * rtnl_lock() + * wiphy_lock() + * rtnl_unlock() + * netdev_run_todo() + * __rtnl_unlock() + * + * // list not empty now + * // because of thread 2 + * rtnl_lock() + * while (!list_empty(...)) + * rtnl_lock() + * wiphy_lock() + * **** DEADLOCK **** + * + * However, usage of __rtnl_unlock() is rare, and so we can ensure that + * it's not used in cases where something is added to do the list. + */ + WARN_ON(!list_empty(&net_todo_list)); + mutex_unlock(&rtnl_mutex); while (head) { @@ -214,6 +249,8 @@ static int rtnl_register_internal(struct module *owner, if (dumpit) link->dumpit = dumpit; + WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL && + (flags & RTNL_FLAG_BULK_DEL_SUPPORTED)); link->flags |= flags; /* publish protocol:msgtype */ @@ -4132,22 +4169,36 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); +static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { + [NDA_VLAN] = { .type = NLA_U16 }, + [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct net *net = sock_net(skb->sk); + const struct net_device_ops *ops; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; - __u8 *addr; + __u8 *addr = NULL; int err; u16 vid; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, - extack); + if (!del_bulk) { + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, + NULL, extack); + } else { + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, + fdb_del_bulk_policy, extack); + } if (err < 0) return err; @@ -4163,9 +4214,12 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - NL_SET_ERR_MSG(extack, "invalid address"); - return -EINVAL; + if (!del_bulk) { + if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { + NL_SET_ERR_MSG(extack, "invalid address"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); } if (dev->type != ARPHRD_ETHER) { @@ -4173,8 +4227,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -4185,10 +4237,16 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); - const struct net_device_ops *ops = br_dev->netdev_ops; - if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + ops = br_dev->netdev_ops; + if (!del_bulk) { + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + } else { + if (ops->ndo_fdb_del_bulk) + err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, + extack); + } if (err) goto out; @@ -4198,15 +4256,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if (ndm->ndm_flags & NTF_SELF) { - if (dev->netdev_ops->ndo_fdb_del) - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr, - vid); - else - err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); + ops = dev->netdev_ops; + if (!del_bulk) { + if (ops->ndo_fdb_del) + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); + } else { + /* in case err was cleared by NTF_MASTER call */ + err = -EOPNOTSUPP; + if (ops->ndo_fdb_del_bulk) + err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, + extack); + } if (!err) { - rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, - ndm->ndm_state); + if (!del_bulk) + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, + ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } @@ -5896,11 +5963,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct rtnl_link *link; + enum rtnl_kinds kind; struct module *owner; int err = -EOPNOTSUPP; rtnl_doit_func doit; unsigned int flags; - int kind; int family; int type; @@ -5915,13 +5982,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; - kind = type&3; + kind = rtnl_msgtype_kind(type); - if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) + if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; rcu_read_lock(); - if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) { struct sock *rtnl; rtnl_dumpit_func dumpit; u32 min_dump_alloc = 0; @@ -5977,6 +6044,12 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } flags = link->flags; + if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) && + !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) { + NL_SET_ERR_MSG(extack, "Bulk delete is not supported"); + goto err_unlock; + } + if (flags & RTNL_FLAG_DOIT_UNLOCKED) { doit = link->doit; rcu_read_unlock(); @@ -6105,7 +6178,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0); rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, + RTNL_FLAG_BULK_DEL_SUPPORTED); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); diff --git a/net/core/sock.c b/net/core/sock.c index 1180a0cb0110..29abec3eabd8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -141,6 +141,8 @@ #include <linux/ethtool.h> +#include "dev.h" + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -503,17 +505,35 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sock_queue_rcv_skb); -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason *reason) { + enum skb_drop_reason drop_reason; int err; err = sk_filter(sk, skb); - if (err) - return err; - - return __sock_queue_rcv_skb(sk, skb); + if (err) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + goto out; + } + err = __sock_queue_rcv_skb(sk, skb); + switch (err) { + case -ENOMEM: + drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; + break; + case -ENOBUFS: + drop_reason = SKB_DROP_REASON_PROTO_MEM; + break; + default: + drop_reason = SKB_NOT_DROPPED_YET; + break; + } +out: + if (reason) + *reason = drop_reason; + return err; } -EXPORT_SYMBOL(sock_queue_rcv_skb); +EXPORT_SYMBOL(sock_queue_rcv_skb_reason); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted) @@ -3486,8 +3506,7 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, &addr_len); + err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7123fe7feeac..8295e5877eb3 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,8 @@ #include <net/busy_poll.h> #include <net/pkt_sched.h> +#include "dev.h" + static int two = 2; static int three = 3; static int int_3600 = 3600; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 671c377f0889..7dfc00c9fb32 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -293,8 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len); +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); __poll_t dccp_poll(struct file *file, struct socket *sock, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a976b4d29892..58421f94427e 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -791,8 +791,8 @@ out_discard: EXPORT_SYMBOL_GPL(dccp_sendmsg); -int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len) +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { const struct dccp_hdr *dh; long timeo; @@ -804,7 +804,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, goto out; } - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index ebcc812735a4..62b89d6f54fd 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -391,7 +391,7 @@ EXPORT_SYMBOL(ether_setup); struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs) { - return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN, + return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_ENUM, ether_setup, txqs, rxqs); } EXPORT_SYMBOL(alloc_etherdev_mqs); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 3b2366a88c3c..f24852814fa3 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -308,13 +308,13 @@ out: } static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -695,7 +695,7 @@ out: } static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -703,7 +703,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct dgram_sock *ro = dgram_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f..e983bb0c5012 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -321,7 +321,6 @@ config NET_UDP_TUNNEL config NET_FOU tristate "IP: Foo (IP protocols) over UDP" - select XFRM select NET_UDP_TUNNEL help Foo over UDP allows any IP protocol to be directly encapsulated diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 72fde2888ad2..195ecfa2f000 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -836,7 +836,7 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, EXPORT_SYMBOL(inet_sendpage); INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, - size_t, int, int, int *)); + size_t, int, int *)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -848,8 +848,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, sock_rps_record_flow(sk); err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, - sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, &addr_len); + sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ccb62038f6a4..a57ba23571c9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -524,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, fri.tb_id = tb_id; fri.dst = key; fri.dst_len = dst_len; - fri.tos = inet_dscp_to_dsfield(fa->fa_dscp); + fri.dscp = fa->fa_dscp; fri.type = fa->fa_type; fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); @@ -1781,7 +1781,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, rtm->rtm_family = AF_INET; rtm->rtm_dst_len = fri->dst_len; rtm->rtm_src_len = 0; - rtm->rtm_tos = fri->tos; + rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp); if (tb_id < 256) rtm->rtm_table = tb_id; else diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fb0e49c36c2e..1f7f25532fa1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -82,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, - .tos = inet_dscp_to_dsfield(fa->fa_dscp), + .dscp = fa->fa_dscp, .type = fa->fa_type, .tb_id = fa->tb_id, }; @@ -99,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, - .tos = inet_dscp_to_dsfield(fa->fa_dscp), + .dscp = fa->fa_dscp, .type = fa->fa_type, .tb_id = fa->tb_id, }; @@ -1032,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { if (fa->fa_slen == slen && fa->tb_id == fri->tb_id && - fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) && - fa->fa_info == fri->fi && fa->fa_type == fri->type) + fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi && + fa->fa_type == fri->type) return fa; } @@ -2305,7 +2305,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, fri.tb_id = tb->tb_id; fri.dst = xkey; fri.dst_len = KEYLENGTH - fa->fa_slen; - fri.tos = inet_dscp_to_dsfield(fa->fa_dscp); + fri.dscp = fa->fa_dscp; fri.type = fa->fa_type; fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 0d085cc8d96c..025a33c1b04d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -16,7 +16,6 @@ #include <net/protocol.h> #include <net/udp.h> #include <net/udp_tunnel.h> -#include <net/xfrm.h> #include <uapi/linux/fou.h> #include <uapi/linux/genetlink.h> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72a375c7f417..236debd9fded 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert); */ struct icmp_control { - bool (*handler)(struct sk_buff *skb); + enum skb_drop_reason (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; @@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto) * ICMP_PARAMETERPROB. */ -static bool icmp_unreach(struct sk_buff *skb) +static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; const struct iphdr *iph; struct icmphdr *icmph; struct net *net; @@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb) icmph = icmp_hdr(skb); iph = (const struct iphdr *)skb->data; - if (iph->ihl < 5) /* Mangled header, drop. */ + if (iph->ihl < 5) { /* Mangled header, drop. */ + reason = SKB_DROP_REASON_IP_INHDR; goto out_err; + } switch (icmph->type) { case ICMP_DEST_UNREACH: @@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb) icmp_socket_deliver(skb, info); out: - return true; + return reason; out_err: __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return false; + return reason ?: SKB_DROP_REASON_NOT_SPECIFIED; } @@ -952,20 +955,20 @@ out_err: * Handle ICMP_REDIRECT. */ -static bool icmp_redirect(struct sk_buff *skb) +static enum skb_drop_reason icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); - return false; + return SKB_DROP_REASON_PKT_TOO_SMALL; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) { /* there aught to be a stat */ - return false; + return SKB_DROP_REASON_NOMEM; } icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway)); - return true; + return SKB_NOT_DROPPED_YET; } /* @@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb) * See also WRT handling of options once they are done and working. */ -static bool icmp_echo(struct sk_buff *skb) +static enum skb_drop_reason icmp_echo(struct sk_buff *skb) { struct icmp_bxm icmp_param; struct net *net; @@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ if (net->ipv4.sysctl_icmp_echo_ignore_all) - return true; + return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); icmp_param.skb = skb; @@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb) if (icmp_param.data.icmph.type == ICMP_ECHO) icmp_param.data.icmph.type = ICMP_ECHOREPLY; else if (!icmp_build_probe(skb, &icmp_param.data.icmph)) - return true; + return SKB_NOT_DROPPED_YET; icmp_reply(&icmp_param, skb); - return true; + return SKB_NOT_DROPPED_YET; } /* Helper for icmp_echo and icmpv6_echo_reply. @@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe); * MUST be accurate to a few minutes. * MUST be updated at least at 15Hz. */ -static bool icmp_timestamp(struct sk_buff *skb) +static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) { struct icmp_bxm icmp_param; /* @@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb) icmp_param.data_len = 0; icmp_param.head_len = sizeof(struct icmphdr) + 12; icmp_reply(&icmp_param, skb); - return true; + return SKB_NOT_DROPPED_YET; out_err: __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); - return false; + return SKB_DROP_REASON_PKT_TOO_SMALL; } -static bool icmp_discard(struct sk_buff *skb) +static enum skb_drop_reason icmp_discard(struct sk_buff *skb) { /* pretend it was a success */ - return true; + return SKB_NOT_DROPPED_YET; } /* @@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb) */ int icmp_rcv(struct sk_buff *skb) { - struct icmphdr *icmph; + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); - bool success; + struct icmphdr *icmph; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & - XFRM_STATE_ICMP)) + XFRM_STATE_ICMP)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr))) goto drop; @@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb) nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*icmph)); - if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, + skb)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } skb_set_network_header(skb, nh); } @@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb) /* We can't use icmp_pointers[].handler() because it is an array of * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42. */ - success = icmp_echo(skb); - goto success_check; + reason = icmp_echo(skb); + goto reason_check; } if (icmph->type == ICMP_EXT_ECHOREPLY) { - success = ping_rcv(skb); - goto success_check; + reason = ping_rcv(skb); + goto reason_check; } /* @@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb) * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */ - if (icmph->type > NR_ICMP_TYPES) + if (icmph->type > NR_ICMP_TYPES) { + reason = SKB_DROP_REASON_UNHANDLED_PROTO; goto error; + } /* * Parse the ICMP message @@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb) if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } if (icmph->type != ICMP_ECHO && icmph->type != ICMP_TIMESTAMP && icmph->type != ICMP_ADDRESS && icmph->type != ICMP_ADDRESSREPLY) { + reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } } - success = icmp_pointers[icmph->type].handler(skb); -success_check: - if (success) { + reason = icmp_pointers[icmph->type].handler(skb); +reason_check: + if (!reason) { consume_skb(skb); return NET_RX_SUCCESS; } drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; csum_error: + reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS); error: __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 92ba3350274b..e3aa436a1bdf 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); struct net *net; + SKB_DR(reason); /* that should never happen */ if (skb->pkt_type != PACKET_HOST) @@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb) if (skb_warn_if_lro(skb)) goto drop; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) + if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto drop; + } if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb)) return NET_RX_SUCCESS; @@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb) if (ip_hdr(skb)->ttl <= 1) goto too_many_hops; - if (!xfrm4_route_forward(skb)) + if (!xfrm4_route_forward(skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto drop; + } rt = skb_rtable(skb); @@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb) IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + SKB_DR_SET(reason, PKT_TOO_BIG); goto drop; } @@ -169,7 +175,8 @@ too_many_hops: /* Tell the sender its packet died... */ __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); + SKB_DR_SET(reason, IP_INHDR); drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NET_RX_DROP; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 95f7bb052784..b1165f717cd1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) { + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; } diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 4151eb1262dd..b75cac69bd7e 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, fl4.daddr = iph->daddr; fl4.saddr = get_saddr(iph->saddr); } else { + if (nft_hook(pkt) == NF_INET_FORWARD && + priv->flags & NFTA_FIB_F_IIF) + fl4.flowi4_iif = nft_out(pkt)->ifindex; + fl4.daddr = iph->saddr; fl4.saddr = get_saddr(iph->daddr); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3ee947557b88..319c181bfbb6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -844,8 +844,8 @@ do_confirm: goto out; } -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; @@ -861,7 +861,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, if (flags & MSG_ERRQUEUE) return inet_recv_error(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -934,16 +934,24 @@ out: } EXPORT_SYMBOL_GPL(ping_recvmsg); -int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, + struct sk_buff *skb) { + enum skb_drop_reason reason; + pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); - if (sock_queue_rcv_skb(sk, skb) < 0) { - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + kfree_skb_reason(skb, reason); pr_debug("ping_queue_rcv_skb -> failed\n"); - return -1; + return reason; } - return 0; + return SKB_NOT_DROPPED_YET; +} + +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; } EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); @@ -952,12 +960,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); * All we need to do is get the socket. */ -bool ping_rcv(struct sk_buff *skb) +enum skb_drop_reason ping_rcv(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET; struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); - bool rc = false; /* We assume the packet has already been checked by icmp_rcv */ @@ -972,15 +980,17 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); - if (skb2 && !ping_queue_rcv_skb(sk, skb2)) - rc = true; + if (skb2) + reason = __ping_queue_rcv_skb(sk, skb2); + else + reason = SKB_DROP_REASON_NOMEM; sock_put(sk); } - if (!rc) + if (reason) pr_debug("no socket, dropping\n"); - return rc; + return reason; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 9f97b9cbf7b3..4056b0da85ea 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -753,7 +753,7 @@ out: */ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -769,7 +769,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98c6f3429593..e839d424b861 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -945,6 +945,7 @@ static int ip_error(struct sk_buff *skb) struct inet_peer *peer; unsigned long now; struct net *net; + SKB_DR(reason); bool send; int code; @@ -964,10 +965,12 @@ static int ip_error(struct sk_buff *skb) if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { case EHOSTUNREACH: + SKB_DR_SET(reason, IP_INADDRERRORS); __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); break; case ENETUNREACH: + SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; } @@ -983,6 +986,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; + SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; case EACCES: @@ -1009,7 +1013,7 @@ static int ip_error(struct sk_buff *skb) if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); -out: kfree_skb(skb); +out: kfree_skb_reason(skb, reason); return 0; } @@ -3394,7 +3398,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fri.tb_id = table_id; fri.dst = res.prefix; fri.dst_len = res.prefixlen; - fri.tos = fl4.flowi4_tos; + fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos); fri.type = rt->rt_type; fri.offload = 0; fri.trap = 0; @@ -3407,7 +3411,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (fa->fa_slen == slen && fa->tb_id == fri.tb_id && - fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) && + fa->fa_dscp == fri.dscp && fa->fa_info == res.fi && fa->fa_type == fri.type) { fri.offload = READ_ONCE(fa->offload); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf18fbcbf123..e20b87b3bf90 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -429,7 +429,7 @@ void tcp_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - tp->snd_cwnd = TCP_INIT_CWND; + tcp_snd_cwnd_set(tp, TCP_INIT_CWND); /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; @@ -1877,8 +1877,7 @@ static void tcp_zerocopy_set_hint_for_skb(struct sock *sk, } static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, - struct scm_timestamping_internal *tss, + int flags, struct scm_timestamping_internal *tss, int *cmsg_flags); static int receive_fallback_to_copy(struct sock *sk, struct tcp_zerocopy_receive *zc, int inq, @@ -1900,7 +1899,7 @@ static int receive_fallback_to_copy(struct sock *sk, if (err) return err; - err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0, + err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT, tss, &zc->msg_flags); if (err < 0) return err; @@ -2316,8 +2315,7 @@ static int tcp_inq_hint(struct sock *sk) */ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, - struct scm_timestamping_internal *tss, + int flags, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct tcp_sock *tp = tcp_sk(sk); @@ -2337,7 +2335,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (tp->recvmsg_inq) *cmsg_flags = TCP_CMSG_INQ; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); /* Urgent data needs to be handled specially. */ if (flags & MSG_OOB) @@ -2556,8 +2554,8 @@ recv_sndq: goto out; } -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { int cmsg_flags = 0, ret, inq; struct scm_timestamping_internal tss; @@ -2568,11 +2566,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && sk->sk_state == TCP_ESTABLISHED) - sk_busy_loop(sk, nonblock); + sk_busy_loop(sk, flags & MSG_DONTWAIT); lock_sock(sk); - ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss, - &cmsg_flags); + ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags); release_sock(sk); sk_defer_free_flush(sk); @@ -3033,7 +3030,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - tp->snd_cwnd = TCP_INIT_CWND; + tcp_snd_cwnd_set(tp, TCP_INIT_CWND); tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; tp->delivered = 0; @@ -3744,7 +3741,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_max_pacing_rate = rate64; info->tcpi_reordering = tp->reordering; - info->tcpi_snd_cwnd = tp->snd_cwnd; + info->tcpi_snd_cwnd = tcp_snd_cwnd(tp); if (info->tcpi_state == TCP_LISTEN) { /* listeners aliased fields : @@ -3915,7 +3912,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, rate64 = tcp_compute_delivery_rate(tp); nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); - nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd); + nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp)); nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 02e8626ccb27..c7d30a3bbd81 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) } else { /* no RTT sample yet */ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ } - bw = (u64)tp->snd_cwnd * BW_UNIT; + bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); } @@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) - bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ - bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); + bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); } static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) @@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore( struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; - u32 cwnd = tp->snd_cwnd; + u32 cwnd = tcp_snd_cwnd(tp); /* An ACK for P pkts should release at most 2*P packets. We do this * in two steps. First, here we deduct the number of lost packets. @@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = tp->snd_cwnd, target_cwnd = 0; + u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; if (!acked) goto done; /* no packet fully ACKed; just apply caps */ @@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, cwnd = max(cwnd, bbr_cwnd_min_target); done: - tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ - tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); } /* End cycle phase if it's time and/or we hit the phase's in-flight target. */ @@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk, bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, bbr->ack_epoch_acked + rs->acked_sacked); extra_acked = bbr->ack_epoch_acked - expected_acked; - extra_acked = min(extra_acked, tp->snd_cwnd); + extra_acked = min(extra_acked, tcp_snd_cwnd(tp)); if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; } @@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk) return; bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ - tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); bbr_reset_mode(sk); } @@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk) bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ bbr->full_bw_cnt = 0; bbr_reset_lt_bw_sampling(sk); - return tcp_sk(sk)->snd_cwnd; + return tcp_snd_cwnd(tcp_sk(sk)); } /* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index f5f588b1f6e9..58358bf92e1b 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!acked) return; } - bictcp_update(ca, tp->snd_cwnd); + bictcp_update(ca, tcp_snd_cwnd(tp)); tcp_cong_avoid_ai(tp, ca->cnt, acked); } @@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ - if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) - ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) + if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) + ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else - ca->last_max_cwnd = tp->snd_cwnd; + ca->last_max_cwnd = tcp_snd_cwnd(tp); - if (tp->snd_cwnd <= low_window) - return max(tp->snd_cwnd >> 1U, 2U); + if (tcp_snd_cwnd(tp) <= low_window) + return max(tcp_snd_cwnd(tp) >> 1U, 2U); else - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); + return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } static void bictcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 1cdcb4df0eb7..be3947e70fec 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -174,7 +174,6 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, static int tcp_bpf_recvmsg_parser(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) { @@ -186,7 +185,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); lock_sock(sk); msg_bytes_ready: @@ -211,7 +210,7 @@ msg_bytes_ready: goto out; } - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); if (!timeo) { copied = -EAGAIN; goto out; @@ -234,7 +233,7 @@ out: } static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_psock *psock; int copied, ret; @@ -244,11 +243,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); if (!skb_queue_empty(&sk->sk_receive_queue) && sk_psock_queue_empty(psock)) { sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); } lock_sock(sk); msg_bytes_ready: @@ -257,14 +256,14 @@ msg_bytes_ready: long timeo; int data; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = tcp_msg_wait_data(sk, psock, timeo); if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; release_sock(sk); sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags, addr_len); } copied = -EAGAIN; } diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 709d23801823..ddc7ba0554bd 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk) LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); return; } } @@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk) LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } @@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad) return false; } - ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd); + ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp)); ca->state = CDG_BACKOFF; tcp_enter_cwr(sk); return true; @@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) } if (!tcp_is_cwnd_limited(sk)) { - ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd); + ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp)); return; } - prior_snd_cwnd = tp->snd_cwnd; + prior_snd_cwnd = tcp_snd_cwnd(tp); tcp_reno_cong_avoid(sk, ack, acked); - incr = tp->snd_cwnd - prior_snd_cwnd; + incr = tcp_snd_cwnd(tp) - prior_snd_cwnd; ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); } @@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (ca->state == CDG_BACKOFF) - return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10); + return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10); if (ca->state == CDG_NONFULL && use_tolerance) - return tp->snd_cwnd; + return tcp_snd_cwnd(tp); - ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd); + ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp)); if (use_shadow) - return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1); - return max(2U, tp->snd_cwnd >> 1); + return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1); + return max(2U, tcp_snd_cwnd(tp) >> 1); } static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) @@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) ca->gradients = gradients; ca->rtt_seq = tp->snd_nxt; - ca->shadow_wnd = tp->snd_cwnd; + ca->shadow_wnd = tcp_snd_cwnd(tp); break; case CA_EVENT_COMPLETE_CWR: ca->state = CDG_UNKNOWN; @@ -380,7 +380,7 @@ static void tcp_cdg_init(struct sock *sk) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), GFP_NOWAIT | __GFP_NOWARN); ca->rtt_seq = tp->snd_nxt; - ca->shadow_wnd = tp->snd_cwnd; + ca->shadow_wnd = tcp_snd_cwnd(tp); } static void tcp_cdg_release(struct sock *sk) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index dc95572163df..d3cae40749e8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/jhash.h> #include <net/tcp.h> +#include <trace/events/tcp.h> static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); @@ -33,6 +34,17 @@ struct tcp_congestion_ops *tcp_ca_find(const char *name) return NULL; } +void tcp_set_ca_state(struct sock *sk, const u8 ca_state) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + trace_tcp_cong_state_set(sk, ca_state); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; +} + /* Must be called with rcu lock held */ static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, const char *name) @@ -393,10 +405,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, */ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { - u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); + u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh); - acked -= cwnd - tp->snd_cwnd; - tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + acked -= cwnd - tcp_snd_cwnd(tp); + tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); return acked; } @@ -410,7 +422,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) /* If credits accumulated at a higher w, apply them gently now. */ if (tp->snd_cwnd_cnt >= w) { tp->snd_cwnd_cnt = 0; - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } tp->snd_cwnd_cnt += acked; @@ -418,9 +430,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) u32 delta = tp->snd_cwnd_cnt / w; tp->snd_cwnd_cnt -= delta * w; - tp->snd_cwnd += delta; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta); } - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); @@ -445,7 +457,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; } /* In dangerous area, increase slowly. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); @@ -454,7 +466,7 @@ u32 tcp_reno_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd >> 1U, 2U); + return max(tcp_snd_cwnd(tp) >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); @@ -462,7 +474,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd, tp->prior_cwnd); + return max(tcp_snd_cwnd(tp), tp->prior_cwnd); } EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 24d562dd6225..b0918839bee7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -334,7 +334,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!acked) return; } - bictcp_update(ca, tp->snd_cwnd, acked); + bictcp_update(ca, tcp_snd_cwnd(tp), acked); tcp_cong_avoid_ai(tp, ca->cnt, acked); } @@ -346,13 +346,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk) ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ - if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) - ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) + if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) + ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else - ca->last_max_cwnd = tp->snd_cwnd; + ca->last_max_cwnd = tcp_snd_cwnd(tp); - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); + return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } static void cubictcp_state(struct sock *sk, u8 new_state) @@ -413,13 +413,13 @@ static void hystart_update(struct sock *sk, u32 delay) ca->found = 1; pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n", now - ca->round_start, threshold, - ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd); + ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp)); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } @@ -438,8 +438,8 @@ static void hystart_update(struct sock *sk, u32 delay) LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, - tp->snd_cwnd); - tp->snd_ssthresh = tp->snd_cwnd; + tcp_snd_cwnd(tp)); + tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } @@ -469,7 +469,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) /* hystart triggers when cwnd is larger than some threshold */ if (!ca->found && tcp_in_slow_start(tp) && hystart && - tp->snd_cwnd >= hystart_low_window) + tcp_snd_cwnd(tp) >= hystart_low_window) hystart_update(sk, delay); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 1943a6630341..ab034a4e9324 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -106,8 +106,8 @@ static u32 dctcp_ssthresh(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - ca->loss_cwnd = tp->snd_cwnd; - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); + ca->loss_cwnd = tcp_snd_cwnd(tp); + return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U); } static void dctcp_update_alpha(struct sock *sk, u32 flags) @@ -148,8 +148,8 @@ static void dctcp_react_to_loss(struct sock *sk) struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - ca->loss_cwnd = tp->snd_cwnd; - tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); + ca->loss_cwnd = tcp_snd_cwnd(tp); + tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U); } static void dctcp_state(struct sock *sk, u8 new_state) @@ -211,8 +211,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, static u32 dctcp_cwnd_undo(struct sock *sk) { const struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); + return max(tcp_snd_cwnd(tp), ca->loss_cwnd); } static struct tcp_congestion_ops dctcp __read_mostly = { diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 349069d6cd0a..c6de5ce79ad3 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) * snd_cwnd <= * hstcp_aimd_vals[ca->ai].cwnd */ - if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { - while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && + if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) { + while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd && ca->ai < HSTCP_AIMD_MAX - 1) ca->ai++; - } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) { - while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) + } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) { + while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) ca->ai--; } /* Do additive increase */ - if (tp->snd_cwnd < tp->snd_cwnd_clamp) { + if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) { /* cwnd = cwnd + a(w) / cwnd */ tp->snd_cwnd_cnt += ca->ai + 1; - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd_cnt -= tp->snd_cwnd; - tp->snd_cwnd++; + if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { + tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp); + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } } } @@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk) struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ - return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); + return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } static struct tcp_congestion_ops tcp_highspeed __read_mostly = { diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 55adcfcf96fe..52b1f2665dfa 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk, ca->packetcount += sample->pkts_acked; - if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && + if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) && now - ca->lasttime >= ca->minRTT && ca->minRTT > 0) { __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime); @@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) const struct htcp *ca = inet_csk_ca(sk); htcp_param_update(sk); - return max((tp->snd_cwnd * ca->beta) >> 7, 2U); + return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U); } static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd */ - if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; + if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) { + if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); tp->snd_cwnd_cnt = 0; htcp_alpha_update(ca); } else diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index be39327e04e6..abd7d91807e5 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk) ca->rho2_7ls = 0; ca->snd_cwnd_cents = 0; ca->hybla_en = true; - tp->snd_cwnd = 2; + tcp_snd_cwnd_set(tp, 2); tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ @@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk) /* set minimum rtt as this is the 1st ever seen */ ca->minrtt_us = tp->srtt_us; - tp->snd_cwnd = ca->rho; + tcp_snd_cwnd_set(tp, ca->rho); } static void hybla_state(struct sock *sk, u8 ca_state) @@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) * as long as increment is estimated as (rho<<7)/window * it already is <<7 and we can easily count its fractions. */ - increment = ca->rho2_7ls / tp->snd_cwnd; + increment = ca->rho2_7ls / tcp_snd_cwnd(tp); if (increment < 128) tp->snd_cwnd_cnt++; } odd = increment % 128; - tp->snd_cwnd += increment >> 7; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7)); ca->snd_cwnd_cents += odd; /* check when fractions goes >=128 and increase cwnd by 1. */ while (ca->snd_cwnd_cents >= 128) { - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } /* check when cwnd has not been incremented for a while */ - if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd++; + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); tp->snd_cwnd_cnt = 0; } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh)); - tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); } static struct tcp_congestion_ops tcp_hybla __read_mostly = { diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00e54873213e..c0c81a2c77fa 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -224,7 +224,7 @@ static void update_params(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); - if (tp->snd_cwnd < win_thresh) { + if (tcp_snd_cwnd(tp) < win_thresh) { ca->alpha = ALPHA_BASE; ca->beta = BETA_BASE; } else if (ca->cnt_rtt > 0) { @@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) * tp->snd_cwnd += alpha/tp->snd_cwnd */ delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; - if (delta >= tp->snd_cwnd) { - tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, - (u32)tp->snd_cwnd_clamp); + if (delta >= tcp_snd_cwnd(tp)) { + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp), + (u32)tp->snd_cwnd_clamp)); tp->snd_cwnd_cnt = 0; } } @@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); + u32 decr; /* Multiplicative decrease */ - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); + decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT; + return max(tcp_snd_cwnd(tp) - decr, 2U); } /* Extract info for Tcp socket info provided via netlink. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2088f93fa37b..1595b76ea2be 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk) per_mss = roundup_pow_of_two(per_mss) + SKB_DATA_ALIGN(sizeof(struct sk_buff)); - nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); + nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp)); nr_segs = max_t(u32, nr_segs, tp->reordering + 1); /* Fast Recovery (RFC 5681 3.2) : @@ -909,12 +909,12 @@ static void tcp_update_pacing_rate(struct sock *sk) * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching * end of slow start and should slow down. */ - if (tp->snd_cwnd < tp->snd_ssthresh / 2) + if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; else rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; - rate *= max(tp->snd_cwnd, tp->packets_out); + rate *= max(tcp_snd_cwnd(tp), tp->packets_out); if (likely(tp->srtt_us)) do_div(rate, tp->srtt_us); @@ -2147,12 +2147,12 @@ void tcp_enter_loss(struct sock *sk) !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->prior_cwnd = tp->snd_cwnd; + tp->prior_cwnd = tcp_snd_cwnd(tp); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); } - tp->snd_cwnd = tcp_packets_in_flight(tp) + 1; + tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1); tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; @@ -2458,7 +2458,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, &inet->inet_daddr, ntohs(inet->inet_dport), - tp->snd_cwnd, tcp_left_out(tp), + tcp_snd_cwnd(tp), tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -2467,7 +2467,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, &sk->sk_v6_daddr, ntohs(inet->inet_dport), - tp->snd_cwnd, tcp_left_out(tp), + tcp_snd_cwnd(tp), tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -2492,7 +2492,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); - tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); + tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk)); if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; @@ -2599,7 +2599,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tp->high_seq = tp->snd_nxt; tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; - tp->prior_cwnd = tp->snd_cwnd; + tp->prior_cwnd = tcp_snd_cwnd(tp); tp->prr_delivered = 0; tp->prr_out = 0; tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); @@ -2629,7 +2629,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, } /* Force a fast retransmit upon entering fast recovery */ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); - tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; + tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt); } static inline void tcp_end_cwnd_reduction(struct sock *sk) @@ -2642,7 +2642,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { - tp->snd_cwnd = tp->snd_ssthresh; + tcp_snd_cwnd_set(tp, tp->snd_ssthresh); tp->snd_cwnd_stamp = tcp_jiffies32; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2709,9 +2709,9 @@ static void tcp_mtup_probe_success(struct sock *sk) /* FIXME: breaks with very large cwnd */ tp->prior_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = tp->snd_cwnd * - tcp_mss_to_mtu(sk, tp->mss_cache) / - icsk->icsk_mtup.probe_size; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) * + tcp_mss_to_mtu(sk, tp->mss_cache) / + icsk->icsk_mtup.probe_size); tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); @@ -3034,7 +3034,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tp->snd_una == tp->mtu_probe.probe_seq_start) { tcp_mtup_probe_failed(sk); /* Restores the reduction we did in tcp_mtup_probe() */ - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); tcp_simple_retransmit(sk); return; } @@ -5436,7 +5436,7 @@ static bool tcp_should_expand_sndbuf(struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp)) return false; return true; @@ -5998,9 +5998,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) * retransmission has occurred. */ if (tp->total_retrans > 1 && tp->undo_marker) - tp->snd_cwnd = 1; + tcp_snd_cwnd_set(tp, 1); else - tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); + tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk))); tp->snd_cwnd_stamp = tcp_jiffies32; bpf_skops_established(sk, bpf_op, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f9cec624068d..157265aecbed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2621,7 +2621,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk), - tp->snd_cwnd, + tcp_snd_cwnd(tp), state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 82b36ec3f2f8..ae36780977d2 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) lp->flag &= ~LP_WITHIN_THR; pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, - tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, + tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max, lp->sowd >> 3); if (lp->flag & LP_WITHIN_THR) @@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) /* happened within inference * drop snd_cwnd into 1 */ if (lp->flag & LP_WITHIN_INF) - tp->snd_cwnd = 1U; + tcp_snd_cwnd_set(tp, 1U); /* happened after inference * cut snd_cwnd into half */ else - tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U)); /* record this drop time */ lp->last_drop = now; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0588b004ddac..7029b0e98edb 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -388,15 +388,15 @@ void tcp_update_metrics(struct sock *sk) if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); - if (val && (tp->snd_cwnd >> 1) > val) + if (val && (tcp_snd_cwnd(tp) >> 1) > val) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, - tp->snd_cwnd >> 1); + tcp_snd_cwnd(tp) >> 1); } if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { val = tcp_metric_get(tm, TCP_METRIC_CWND); - if (tp->snd_cwnd > val) + if (tcp_snd_cwnd(tp) > val) tcp_metric_set(tm, TCP_METRIC_CWND, - tp->snd_cwnd); + tcp_snd_cwnd(tp)); } } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { @@ -404,10 +404,10 @@ void tcp_update_metrics(struct sock *sk) if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) tcp_metric_set(tm, TCP_METRIC_SSTHRESH, - max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); + max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { val = tcp_metric_get(tm, TCP_METRIC_CWND); - tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1); + tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1); } } else { /* Else slow start did not finish, cwnd is non-sense, diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index ab552356bdba..a60662f4bdf9 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -197,10 +197,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) } if (ca->cwnd_growth_factor < 0) { - cnt = tp->snd_cwnd << -ca->cwnd_growth_factor; + cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor; tcp_cong_avoid_ai(tp, cnt, acked); } else { - cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor); + cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor); tcp_cong_avoid_ai(tp, cnt, acked); } } @@ -209,7 +209,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U); + return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U); } static void tcpnv_state(struct sock *sk, u8 new_state) @@ -257,7 +257,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) return; /* Stop cwnd growth if we were in catch up mode */ - if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) { + if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) { ca->nv_catchup = 0; ca->nv_allow_cwnd_growth = 0; } @@ -371,7 +371,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) * if cwnd < max_win, grow cwnd * else leave the same */ - if (tp->snd_cwnd > max_win) { + if (tcp_snd_cwnd(tp) > max_win) { /* there is congestion, check that it is ok * to make a CA decision * 1. We should have at least nv_dec_eval_min_calls @@ -398,20 +398,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) ca->nv_allow_cwnd_growth = 0; tp->snd_ssthresh = (nv_ssthresh_factor * max_win) >> 3; - if (tp->snd_cwnd - max_win > 2) { + if (tcp_snd_cwnd(tp) - max_win > 2) { /* gap > 2, we do exponential cwnd decrease */ int dec; - dec = max(2U, ((tp->snd_cwnd - max_win) * + dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) * nv_cong_dec_mult) >> 7); - tp->snd_cwnd -= dec; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec); } else if (nv_cong_dec_mult > 0) { - tp->snd_cwnd = max_win; + tcp_snd_cwnd_set(tp, max_win); } if (ca->cwnd_growth_factor > 0) ca->cwnd_growth_factor = 0; ca->nv_no_cong_cnt = 0; - } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) { + } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) { /* There is no congestion, grow cwnd if allowed*/ if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls) return; @@ -444,8 +444,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) * (it wasn't before, if it is now is because nv * decreased it). */ - if (tp->snd_cwnd < nv_min_cwnd) - tp->snd_cwnd = nv_min_cwnd; + if (tcp_snd_cwnd(tp) < nv_min_cwnd) + tcp_snd_cwnd_set(tp, nv_min_cwnd); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9ede847f4199..c221f3bce975 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -142,7 +142,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) { struct tcp_sock *tp = tcp_sk(sk); u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); - u32 cwnd = tp->snd_cwnd; + u32 cwnd = tcp_snd_cwnd(tp); tcp_ca_event(sk, CA_EVENT_CWND_RESTART); @@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta) while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; - tp->snd_cwnd = max(cwnd, restart_cwnd); + tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd)); tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_cwnd_used = 0; } @@ -1013,7 +1013,7 @@ static void tcp_tsq_write(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && - tp->snd_cwnd > tcp_packets_in_flight(tp)) { + tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) { tcp_mstamp_refresh(tp); tcp_xmit_retransmit_queue(sk); } @@ -1860,9 +1860,9 @@ static void tcp_cwnd_application_limited(struct sock *sk) /* Limited by application or receiver window. */ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 win_used = max(tp->snd_cwnd_used, init_win); - if (win_used < tp->snd_cwnd) { + if (win_used < tcp_snd_cwnd(tp)) { tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; + tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1); } tp->snd_cwnd_used = 0; } @@ -2043,7 +2043,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, return 1; in_flight = tcp_packets_in_flight(tp); - cwnd = tp->snd_cwnd; + cwnd = tcp_snd_cwnd(tp); if (in_flight >= cwnd) return 0; @@ -2196,12 +2196,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, in_flight = tcp_packets_in_flight(tp); BUG_ON(tcp_skb_pcount(skb) <= 1); - BUG_ON(tp->snd_cwnd <= in_flight); + BUG_ON(tcp_snd_cwnd(tp) <= in_flight); send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; /* From in_flight test above, we know that cwnd > in_flight. */ - cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; + cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache; limit = min(send_win, cong_win); @@ -2215,7 +2215,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor); if (win_divisor) { - u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); + u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache); /* If at least some fraction of a window is available, * just use it. @@ -2345,7 +2345,7 @@ static int tcp_mtu_probe(struct sock *sk) if (likely(!icsk->icsk_mtup.enabled || icsk->icsk_mtup.probe_size || inet_csk(sk)->icsk_ca_state != TCP_CA_Open || - tp->snd_cwnd < 11 || + tcp_snd_cwnd(tp) < 11 || tp->rx_opt.num_sacks || tp->rx_opt.dsack)) return -1; @@ -2381,7 +2381,7 @@ static int tcp_mtu_probe(struct sock *sk) return 0; /* Do we need to wait to drain cwnd? With none in flight, don't stall */ - if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) { + if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) { if (!tcp_packets_in_flight(tp)) return -1; else @@ -2450,7 +2450,7 @@ static int tcp_mtu_probe(struct sock *sk) if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending * effectively two packets. */ - tp->snd_cwnd--; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); tcp_event_new_data_sent(sk, nskb); icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); @@ -2708,7 +2708,7 @@ repair: else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); - is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp)); if (likely(sent_pkts || is_cwnd_limited)) tcp_cwnd_validate(sk, is_cwnd_limited); @@ -2818,7 +2818,7 @@ void tcp_send_loss_probe(struct sock *sk) if (unlikely(!skb)) { WARN_ONCE(tp->packets_out, "invalid inflight: %u state %u cwnd %u mss %d\n", - tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss); inet_csk(sk)->icsk_pending = 0; return; } @@ -3302,7 +3302,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!hole) tp->retransmit_skb_hint = skb; - segs = tp->snd_cwnd - tcp_packets_in_flight(tp); + segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp); if (segs <= 0) break; sacked = TCP_SKB_CB(skb)->sacked; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index fbab921670cc..617b8187c03d 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -195,7 +195,7 @@ void tcp_rate_check_app_limited(struct sock *sk) /* Nothing in sending host's qdisc queues or NIC tx queue. */ sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && /* We are not limited by CWND. */ - tcp_packets_in_flight(tp) < tp->snd_cwnd && + tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) && /* All lost packets have been retransmitted. */ tp->lost_out <= tp->retrans_out) tp->app_limited = diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 5842081bc8a2..862b96248a92 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!acked) return; } - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT), acked); } @@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); + return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U); } static struct tcp_congestion_ops tcp_scalable __read_mostly = { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c8003c8aad2c..786848ad37ea 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd); + return min(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * This is: * (actual rate in segments) * baseRTT */ - target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT; + target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT; do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. */ - diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; + diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tcp_in_slow_start(tp)) { /* Going too fast. Time to slow down @@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) * truncation robs us of full link * utilization. */ - tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), + (u32)target_cwnd + 1)); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tcp_in_slow_start(tp)) { @@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* The old window was too fast, so * we slow down. */ - tp->snd_cwnd--; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ - tp->snd_cwnd++; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } else { /* Sending just as fast as we * should be. @@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) } } - if (tp->snd_cwnd < 2) - tp->snd_cwnd = 2; - else if (tp->snd_cwnd > tp->snd_cwnd_clamp) - tp->snd_cwnd = tp->snd_cwnd_clamp; + if (tcp_snd_cwnd(tp) < 2) + tcp_snd_cwnd_set(tp, 2); + else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) + tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); tp->snd_ssthresh = tcp_current_ssthresh(sk); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index cd50a61c9976..366ff6f214b2 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) rtt = veno->minrtt; - target_cwnd = (u64)tp->snd_cwnd * veno->basertt; + target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt; target_cwnd <<= V_PARAM_SHIFT; do_div(target_cwnd, rtt); - veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; + veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd; if (tcp_in_slow_start(tp)) { /* Slow start. */ @@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* In the "non-congestive state", increase cwnd * every rtt. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } else { /* In the "congestive state", increase cwnd * every other rtt. */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) { if (veno->inc && - tp->snd_cwnd < tp->snd_cwnd_clamp) { - tp->snd_cwnd++; + tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) { + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); veno->inc = 0; } else veno->inc = 1; @@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) tp->snd_cwnd_cnt += acked; } done: - if (tp->snd_cwnd < 2) - tp->snd_cwnd = 2; - else if (tp->snd_cwnd > tp->snd_cwnd_clamp) - tp->snd_cwnd = tp->snd_cwnd_clamp; + if (tcp_snd_cwnd(tp) < 2) + tcp_snd_cwnd_set(tp, 2); + else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) + tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); } /* Wipe the slate clean for the next rtt. */ /* veno->cntrtt = 0; */ @@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk) if (veno->diff < beta) /* in "non-congestive state", cut cwnd by 1/5 */ - return max(tp->snd_cwnd * 4 / 5, 2U); + return max(tcp_snd_cwnd(tp) * 4 / 5, 2U); else /* in "congestive state", cut cwnd by 1/2 */ - return max(tp->snd_cwnd >> 1U, 2U); + return max(tcp_snd_cwnd(tp) >> 1U, 2U); } static struct tcp_congestion_ops tcp_veno __read_mostly = { diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index b2e05c4cea00..c6e97141eef2 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) switch (event) { case CA_EVENT_COMPLETE_CWR: - tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); + tcp_snd_cwnd_set(tp, tp->snd_ssthresh); break; case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 07c4c93b9fdb..18b07ff5d20e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!yeah->doing_reno_now) { /* Scalable */ - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT), acked); } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. @@ -130,7 +130,7 @@ do_vegas: /* Compute excess number of packets above bandwidth * Avoid doing full 64 bit divide. */ - bw = tp->snd_cwnd; + bw = tcp_snd_cwnd(tp); bw *= rtt - yeah->vegas.baseRTT; do_div(bw, rtt); queue = bw; @@ -138,20 +138,20 @@ do_vegas: if (queue > TCP_YEAH_ALPHA || rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { if (queue > TCP_YEAH_ALPHA && - tp->snd_cwnd > yeah->reno_count) { + tcp_snd_cwnd(tp) > yeah->reno_count) { u32 reduction = min(queue / TCP_YEAH_GAMMA , - tp->snd_cwnd >> TCP_YEAH_EPSILON); + tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON); - tp->snd_cwnd -= reduction; + tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction); - tp->snd_cwnd = max(tp->snd_cwnd, - yeah->reno_count); + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), + yeah->reno_count)); - tp->snd_ssthresh = tp->snd_cwnd; + tp->snd_ssthresh = tcp_snd_cwnd(tp); } if (yeah->reno_count <= 2) - yeah->reno_count = max(tp->snd_cwnd>>1, 2U); + yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U); else yeah->reno_count++; @@ -176,7 +176,7 @@ do_vegas: */ yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt; yeah->vegas.beg_snd_nxt = tp->snd_nxt; - yeah->vegas.beg_snd_cwnd = tp->snd_cwnd; + yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp); /* Wipe the slate clean for the next RTT. */ yeah->vegas.cntRTT = 0; @@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) if (yeah->doing_reno_now < TCP_YEAH_RHO) { reduction = yeah->lastQ; - reduction = min(reduction, max(tp->snd_cwnd>>1, 2U)); + reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U)); - reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA); + reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA); } else - reduction = max(tp->snd_cwnd>>1, 2U); + reduction = max(tcp_snd_cwnd(tp)>>1, 2U); yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - return max_t(int, tp->snd_cwnd - reduction, 2); + return max_t(int, tcp_snd_cwnd(tp) - reduction, 2); } static struct tcp_congestion_ops tcp_yeah __read_mostly = { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6b4d8361560f..aa8545ca6964 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1726,7 +1726,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) EXPORT_SYMBOL(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, - int noblock, int *off, int *err) + int *off, int *err) { struct sk_buff_head *sk_queue = &sk->sk_receive_queue; struct sk_buff_head *queue; @@ -1735,7 +1735,6 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int error; queue = &udp_sk(sk)->reader_queue; - flags |= noblock ? MSG_DONTWAIT : 0; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb; @@ -1805,7 +1804,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc, struct sk_buff *skb; int err, used; - skb = skb_recv_udp(sk, 0, 1, &err); + skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); if (!skb) return err; @@ -1843,8 +1842,8 @@ EXPORT_SYMBOL(udp_read_sock); * return it, otherwise we block. */ -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); @@ -1859,7 +1858,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, try_again: off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &off, &err); + skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index bbe6569c9ad3..ff15918b7bdc 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -11,14 +11,13 @@ static struct proto *udpv6_prot_saved __read_mostly; static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags, - addr_len); + return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len); #endif - return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len); + return udp_prot.recvmsg(sk, msg, len, flags, addr_len); } static bool udp_sk_has_data(struct sock *sk) @@ -61,7 +60,7 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, } static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_psock *psock; int copied, ret; @@ -71,10 +70,10 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, psock = sk_psock_get(sk); if (unlikely(!psock)) - return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return sk_udp_recvmsg(sk, msg, len, flags, addr_len); if (!psock_has_data(psock)) { - ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); goto out; } @@ -84,12 +83,12 @@ msg_bytes_ready: long timeo; int data; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = udp_msg_wait_data(sk, psock, timeo); if (data) { if (psock_has_data(psock)) goto msg_bytes_ready; - ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); goto out; } copied = -EAGAIN; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 2878d8285caf..4ba7a88a1b1d 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -17,8 +17,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void udp_destroy_sock(struct sock *sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b22504176588..1afc4c024981 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -797,6 +797,7 @@ static void dev_forward_change(struct inet6_dev *idev) { struct net_device *dev; struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); if (!idev) return; @@ -815,14 +816,24 @@ static void dev_forward_change(struct inet6_dev *idev) } } + read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa->flags&IFA_F_TENTATIVE) continue; + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + } + read_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); if (idev->cnf.forwarding) addrconf_join_anycast(ifa); else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); @@ -3728,7 +3739,8 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; - struct inet6_ifaddr *ifa, *tmp; + struct inet6_ifaddr *ifa; + LIST_HEAD(tmp_addr_list); bool keep_addr = false; bool was_ready; int state, i; @@ -3820,16 +3832,23 @@ restart: write_lock_bh(&idev->lock); } - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + list_for_each_entry(ifa, &idev->addr_list, if_list) + list_add_tail(&ifa->if_list_aux, &tmp_addr_list); + write_unlock_bh(&idev->lock); + + while (!list_empty(&tmp_addr_list)) { struct fib6_info *rt = NULL; bool keep; + ifa = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifa->if_list_aux); + addrconf_del_dad_work(ifa); keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); if (keep) { @@ -3860,15 +3879,14 @@ restart: addrconf_leave_solict(ifa->idev, &ifa->addr); } - write_lock_bh(&idev->lock); if (!keep) { + write_lock_bh(&idev->lock); list_del_rcu(&ifa->if_list); + write_unlock_bh(&idev->lock); in6_ifa_put(ifa); } } - write_unlock_bh(&idev->lock); - /* Step 5: Discard anycast and multicast list */ if (unregister) { ipv6_ac_destroy_dev(idev); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7d7b7523d126..6595a78672c8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -654,7 +654,7 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, - size_t, int, int, int *)); + size_t, int, int *)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -669,8 +669,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, - sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, &addr_len); + sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 658d5eabaf7e..a8d961d3a477 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -90,12 +90,13 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, break; fallthrough; case 2: /* send ICMP PARM PROB regardless and drop packet */ - icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); + icmpv6_param_prob_reason(skb, ICMPV6_UNK_OPTION, optoff, + SKB_DROP_REASON_UNHANDLED_PROTO); return false; } drop: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); return false; } @@ -218,7 +219,7 @@ static bool ip6_parse_tlv(bool hopbyhop, if (len == 0) return true; bad: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } @@ -232,6 +233,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); + SKB_DR(reason); int ret; if (opt->dsthao) { @@ -246,19 +248,23 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) if (hao->length != 16) { net_dbg_ratelimited("hao invalid option length = %d\n", hao->length); + SKB_DR_SET(reason, IP_INHDR); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", &hao->addr); + SKB_DR_SET(reason, INVALID_PROTO); goto discard; } ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + SKB_DR_SET(reason, XFRM_POLICY); goto discard; + } if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -281,7 +287,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) return true; discard: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return false; } #endif @@ -487,7 +493,6 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; - struct in6_addr addr; unsigned char *buf; int accept_rpl_seg; int i, err; @@ -616,9 +621,7 @@ looped_back: return -1; } - addr = ipv6_hdr(skb)->daddr; - ipv6_hdr(skb)->daddr = ohdr->rpl_segaddr[i]; - ohdr->rpl_segaddr[i] = addr; + swap(ipv6_hdr(skb)->daddr, ohdr->rpl_segaddr[i]); ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n); @@ -934,7 +937,7 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) } net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } @@ -988,7 +991,7 @@ ignore: return true; drop: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } @@ -997,31 +1000,30 @@ drop: static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); - struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); - struct net *net = ipv6_skb_net(skb); + SKB_DR(reason); u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + SKB_DR_SET(reason, IP_INHDR); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); + icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff + 2, + SKB_DROP_REASON_IP_INHDR); return false; } if (ipv6_hdr(skb)->payload_len) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); + icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff, + SKB_DROP_REASON_IP_INHDR); return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); + SKB_DR_SET(reason, PKT_TOO_SMALL); goto drop; } @@ -1032,7 +1034,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) return true; drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return false; } @@ -1054,7 +1056,7 @@ static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff) return true; drop: - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e6b978ea0e87..61770220774e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -629,12 +629,13 @@ out_bh_enable: } EXPORT_SYMBOL(icmp6_send); -/* Slightly more convenient version of icmp6_send. +/* Slightly more convenient version of icmp6_send with drop reasons. */ -void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, + enum skb_drop_reason reason) { icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); - kfree_skb(skb); + kfree_skb_reason(skb, reason); } /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH @@ -864,21 +865,23 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net *net = dev_net(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; - bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & - XFRM_STATE_ICMP)) + XFRM_STATE_ICMP)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; + } if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; @@ -886,8 +889,11 @@ static int icmpv6_rcv(struct sk_buff *skb) nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*hdr)); - if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, + skb)) { + reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; + } skb_set_network_header(skb, nh); } @@ -924,11 +930,11 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - success = ping_rcv(skb); + reason = ping_rcv(skb); break; case ICMPV6_EXT_ECHO_REPLY: - success = ping_rcv(skb); + reason = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -994,19 +1000,20 @@ static int icmpv6_rcv(struct sk_buff *skb) /* until the v6 path can be better sorted assume failure and * preserve the status quo behaviour for the rest of the paths to here */ - if (success) - consume_skb(skb); + if (reason) + kfree_skb_reason(skb, reason); else - kfree_skb(skb); + consume_skb(skb); return 0; csum_error: + reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5b5ea35635f9..126ae3aa67e1 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -145,12 +145,14 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk, static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, struct net *net) { + enum skb_drop_reason reason; const struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; if (skb->pkt_type == PACKET_OTHERHOST) { - kfree_skb(skb); + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); + kfree_skb_reason(skb, SKB_DROP_REASON_OTHERHOST); return NULL; } @@ -160,9 +162,12 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len); + SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); + if (unlikely(idev->cnf.disable_ipv6)) + SKB_DR_SET(reason, IPV6DISABLED); goto drop; } @@ -186,8 +191,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, hdr = ipv6_hdr(skb); - if (hdr->version != 6) + if (hdr->version != 6) { + SKB_DR_SET(reason, UNHANDLED_PROTO); goto err; + } __IP6_ADD_STATS(net, idev, IPSTATS_MIB_NOECTPKTS + @@ -225,8 +232,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) + idev->cnf.drop_unicast_in_l2_multicast) { + SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; + } /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope @@ -255,12 +264,11 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); + SKB_DR_SET(reason, PKT_TOO_SMALL); goto drop; } - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - goto drop; - } + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto err; hdr = ipv6_hdr(skb); } @@ -281,9 +289,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, return skb; err: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + SKB_DR_OR(reason, IP_INHDR); drop: rcu_read_unlock(); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return NULL; } @@ -353,6 +362,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; + SKB_DR(reason); bool raw; /* @@ -412,12 +422,16 @@ resubmit_final: if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(dev, &hdr->daddr, &hdr->saddr) && - !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) + !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) { + SKB_DR_SET(reason, IP_INADDRERRORS); goto discard; + } } if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && - !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); goto discard; + } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, skb); @@ -443,8 +457,11 @@ resubmit_final: IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); + SKB_DR_SET(reason, IP_NOPROTO); + } else { + SKB_DR_SET(reason, XFRM_POLICY); } - kfree_skb(skb); + kfree_skb_reason(skb, reason); } else { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); consume_skb(skb); @@ -454,7 +471,7 @@ resubmit_final: discard: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - kfree_skb(skb); + kfree_skb_reason(skb, reason); } static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fa63ef2bd99c..1f3d777e7694 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -469,6 +469,7 @@ int ip6_forward(struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; + SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); @@ -518,7 +519,7 @@ int ip6_forward(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return -ETIMEDOUT; } @@ -537,6 +538,7 @@ int ip6_forward(struct sk_buff *skb) if (!xfrm6_route_forward(skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); + SKB_DR_SET(reason, XFRM_POLICY); goto drop; } dst = skb_dst(skb); @@ -596,7 +598,7 @@ int ip6_forward(struct sk_buff *skb) __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); return -EMSGSIZE; } @@ -618,8 +620,9 @@ int ip6_forward(struct sk_buff *skb) error: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); + SKB_DR_SET(reason, IP_INADDRERRORS); drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return -EINVAL; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 53f632a560ec..19325b7600bb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -257,8 +257,6 @@ static int ip6_tnl_create2(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; - t = netdev_priv(dev); - dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index b3f163b40c2b..8970d0b4faeb 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, fl6->daddr = iph->daddr; fl6->saddr = iph->saddr; } else { + if (nft_hook(pkt) == NF_INET_FORWARD && + priv->flags & NFTA_FIB_F_IIF) + fl6->flowi6_iif = nft_out(pkt)->ifindex; + fl6->daddr = iph->saddr; fl6->saddr = iph->daddr; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c51d5ce3711c..0d7c13d33d1a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -460,7 +460,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) */ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -477,7 +477,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (np->rxpmtu && np->rxopt.bits.rxpmtu) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 169e9df6d172..9471ab4421c8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4482,6 +4482,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; + SKB_DR(reason); int type; if (netif_is_l3_master(skb->dev) || @@ -4494,11 +4495,14 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { + SKB_DR_SET(reason, IP_INADDRERRORS); IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } + SKB_DR_SET(reason, IP_INNOROUTES); fallthrough; case IPSTATS_MIB_OUTNOROUTES: + SKB_DR_OR(reason, IP_OUTNOROUTES); IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; } @@ -4508,7 +4512,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) skb_dst_drop(skb); icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 13678d3908fa..782df529ff69 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2044,7 +2044,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), - tp->snd_cwnd, + tcp_snd_cwnd(tp), state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7f0fa9bd9ffe..db9449b52dbe 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -322,7 +322,7 @@ static int udp6_skb_len(struct sk_buff *skb) */ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -342,7 +342,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &off, &err); + skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index b2fcc46c1630..4251e49d32a0 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -20,8 +20,8 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); -int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, + int *addr_len); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a1760add5bf1..a0385ddbffcf 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1223,7 +1223,6 @@ static void iucv_process_message_q(struct sock *sk) static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int copied, rlen; @@ -1242,7 +1241,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, /* receive/dequeue next skb: * the function understands MSG_PEEK and, thus, does not dequeue skb */ - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; diff --git a/net/key/af_key.c b/net/key/af_key.c index fd51db3be91c..d09ec26b1081 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3696,7 +3696,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b3edafa5fba4..6af09e188e52 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -515,7 +515,7 @@ no_route: } static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) + size_t len, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -526,7 +526,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 96f975777438..217c7192691e 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -657,7 +657,7 @@ do_confirm: } static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -671,7 +671,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index bf35710127dd..8be1fdc68a0b 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -191,8 +191,7 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, goto end; err = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto end; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index e22b0cbb2f35..221863afc4b1 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -216,7 +216,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) return rc; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 61205cf40074..24df29e135ed 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -352,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test) if (params->deliver) { KUNIT_EXPECT_EQ(test, rc, 0); - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); KUNIT_EXPECT_EQ(test, skb->len, 1); @@ -360,7 +360,7 @@ static void mctp_test_route_input_sk(struct kunit *test) } else { KUNIT_EXPECT_NE(test, rc, 0); - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_PTR_EQ(test, skb2, NULL); } @@ -423,7 +423,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) rc = mctp_route_input(&rt->rt, skb); } - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); if (params->rx_len) { KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); @@ -582,7 +582,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) rc = mctp_route_input(&rt->rt, skb); /* (potentially) receive message */ - skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc); + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); if (params->deliver) KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d6fdc5782d33..35b5f806fdda 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1527,10 +1527,9 @@ static int mpls_ifdown(struct net_device *dev, int event) rt->rt_nh_size; struct mpls_route *orig = rt; - rt = kmalloc(size, GFP_KERNEL); + rt = kmemdup(orig, size, GFP_KERNEL); if (!rt) return -ENOMEM; - memcpy(rt, orig, size); } } diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index f44125dd6697..dbb6d876a203 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -66,20 +66,103 @@ out_nosk: return err; } +struct mptcp_diag_ctx { + long s_slot; + long s_num; + unsigned int l_slot; + unsigned int l_num; +}; + +static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + bool net_admin) +{ + struct inet_diag_dump_data *cb_data = cb->data; + struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; + struct nlattr *bc = cb_data->inet_diag_nla_bc; + struct net *net = sock_net(skb->sk); + int i; + + for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) { + struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; + struct sock *sk; + int num = 0; + + ilb = &tcp_hashinfo.listening_hash[i]; + + rcu_read_lock(); + spin_lock(&ilb->lock); + sk_nulls_for_each(sk, node, &ilb->nulls_head) { + const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk); + struct inet_sock *inet = inet_sk(sk); + int ret; + + if (num < diag_ctx->l_num) + goto next_listen; + + if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp")) + goto next_listen; + + sk = ctx->conn; + if (!sk || !net_eq(sock_net(sk), net)) + goto next_listen; + + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next_listen; + + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto next_listen; + + ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); + + sock_put(sk); + + if (ret < 0) { + spin_unlock(&ilb->lock); + rcu_read_unlock(); + diag_ctx->l_slot = i; + diag_ctx->l_num = num; + return; + } + diag_ctx->l_num = num + 1; + num = 0; +next_listen: + ++num; + } + spin_unlock(&ilb->lock); + rcu_read_unlock(); + + cond_resched(); + diag_ctx->l_num = 0; + } + + diag_ctx->l_num = 0; + diag_ctx->l_slot = i; +} + static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; struct mptcp_sock *msk; struct nlattr *bc; + BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx)); + cb_data = cb->data; bc = cb_data->inet_diag_nla_bc; - while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) != - NULL) { + while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot, + &diag_ctx->s_num)) != NULL) { struct inet_sock *inet = (struct inet_sock *)msk; struct sock *sk = (struct sock *)msk; int ret = 0; @@ -101,11 +184,14 @@ next: sock_put(sk); if (ret < 0) { /* will retry on the same position */ - cb->args[1]--; + diag_ctx->s_num--; break; } cond_resched(); } + + if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0) + mptcp_diag_dump_listeners(skb, cb, r, net_admin); } static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -116,6 +202,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = sk_rmem_alloc_get(sk); r->idiag_wqueue = sk_wmem_alloc_get(sk); + + if (inet_sk_state_load(sk) == TCP_LISTEN) { + struct sock *lsk = READ_ONCE(msk->first); + + if (lsk) { + /* override with settings from tcp listener, + * so Send-Q will show accept queue. + */ + r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog); + r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog); + } + } + if (!info) return; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 01809eef29b4..8aa0cdb7ad46 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -262,14 +262,25 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, spin_unlock_bh(&pm->lock); } -void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) +void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + struct mptcp_sock *msk; pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); - subflow->backup = bkup; + msk = mptcp_sk(sk); + if (subflow->backup != bkup) { + subflow->backup = bkup; + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + msk->last_snd = NULL; + else + __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags); + mptcp_data_unlock(sk); + } - mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); + mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); } void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b5e8de6f7507..c20261b612e9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -55,6 +55,17 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 +static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) +{ + return net_generic(net, pm_nl_pernet_id); +} + +static struct pm_nl_pernet * +pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) +{ + return pm_nl_get_pernet(sock_net((struct sock *)msk)); +} + static bool addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port) { @@ -206,43 +217,39 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) { - const struct pm_nl_pernet *pernet; + const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->add_addr_signal_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet; + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->add_addr_accept_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet; + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->subflows_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet; + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); return READ_ONCE(pernet->local_addr_max); } EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) { - struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, @@ -508,7 +515,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) struct pm_nl_pernet *pernet; unsigned int subflows_max; - pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + pernet = pm_nl_get_pernet(sock_net(sk)); add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); local_addr_max = mptcp_pm_get_local_addr_max(msk); @@ -604,7 +611,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, unsigned int subflows_max; int i = 0; - pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); rcu_read_lock(); @@ -727,6 +734,8 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, if (!addresses_equal(&local, addr, addr->port)) continue; + if (subflow->backup != bkup) + msk->last_snd = NULL; subflow->backup = bkup; subflow->send_mp_prio = 1; subflow->request_bkup = bkup; @@ -1021,7 +1030,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) if (addresses_equal(&msk_local, &skc_local, false)) return 0; - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { @@ -1212,7 +1221,7 @@ skip_family: static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) { - return net_generic(genl_info_net(info), pm_nl_pernet_id); + return pm_nl_get_pernet(genl_info_net(info)); } static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) @@ -1306,7 +1315,7 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, if (id) { rcu_read_lock(); - entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id); + entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { *flags = entry->flags; *ifindex = entry->ifindex; @@ -1653,7 +1662,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, void *hdr; int i; - pernet = net_generic(net, pm_nl_pernet_id); + pernet = pm_nl_get_pernet(net); spin_lock_bh(&pernet->lock); for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { @@ -2165,7 +2174,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = { static int __net_init pm_nl_init_net(struct net *net) { - struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); INIT_LIST_HEAD_RCU(&pernet->local_addr_list); @@ -2187,7 +2196,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list) struct net *net; list_for_each_entry(net, net_list, exit_list) { - struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); /* net is removed from namespace list, can't race with * other modifiers, also netns core already waited for a diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0cbea3b6d0a4..0492aa9308c7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2012,7 +2012,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) } static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); struct scm_timestamping_internal tss; @@ -2030,7 +2030,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out_err; } - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); len = min_t(size_t, len, INT_MAX); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -3092,15 +3092,19 @@ static void mptcp_release_cb(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); } - /* be sure to set the current sk state before tacking actions - * depending on sk_state - */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) - __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); - if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) - __mptcp_error_report(sk); + if (unlikely(&msk->cb_flags)) { + /* be sure to set the current sk state before tacking actions + * depending on sk_state, that is processing MPTCP_ERROR_REPORT + */ + if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) + __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) + __mptcp_error_report(sk); + if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags)) + msk->last_snd = NULL; + } __mptcp_update_rmem(sk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3c1a3036550f..aca1fb56523f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,6 +124,7 @@ #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_CONNECTED 6 +#define MPTCP_RESET_SCHEDULER 7 static inline bool before64(__u64 seq1, __u64 seq2) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index f949d22f52bd..826b0c1dae98 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -853,15 +853,11 @@ out: void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) { - struct sock *sk = &msk->sk.icsk_inet.sk; u32 flags = 0; - bool slow; u8 val; memset(info, 0, sizeof(*info)); - slow = lock_sock_fast(sk); - info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); @@ -882,8 +878,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); - - unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index fe98673dd5ac..bc4d5cd63a94 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -38,6 +38,7 @@ * @l4proto - Layer 4 protocol * Values: * IPPROTO_TCP, IPPROTO_UDP + * @dir: - connection tracking tuple direction. * @reserved - Reserved member, will be reused for more options in future * Values: * 0 @@ -46,7 +47,8 @@ struct bpf_ct_opts { s32 netns_id; s32 error; u8 l4proto; - u8 reserved[3]; + u8 dir; + u8 reserved[2]; }; enum { @@ -56,10 +58,11 @@ enum { static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, struct bpf_sock_tuple *bpf_tuple, u32 tuple_len, u8 protonum, - s32 netns_id) + s32 netns_id, u8 *dir) { struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple tuple; + struct nf_conn *ct; if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) return ERR_PTR(-EPROTO); @@ -99,7 +102,12 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, put_net(net); if (!hash) return ERR_PTR(-ENOENT); - return nf_ct_tuplehash_to_ctrack(hash); + + ct = nf_ct_tuplehash_to_ctrack(hash); + if (dir) + *dir = NF_CT_DIRECTION(hash); + + return ct; } __diag_push(); @@ -135,13 +143,13 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, if (!opts) return NULL; if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts__sz != NF_BPF_CT_OPTS_SZ) { opts->error = -EINVAL; return NULL; } caller_net = dev_net(ctx->rxq->dev); nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, - opts->netns_id); + opts->netns_id, &opts->dir); if (IS_ERR(nfct)) { opts->error = PTR_ERR(nfct); return NULL; @@ -178,13 +186,13 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, if (!opts) return NULL; if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { + opts__sz != NF_BPF_CT_OPTS_SZ) { opts->error = -EINVAL; return NULL; } caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, - opts->netns_id); + opts->netns_id, &opts->dir); if (IS_ERR(nfct)) { opts->error = PTR_ERR(nfct); return NULL; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 07e65b4e92f8..0cb2da0a759a 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) static void ecache_work(struct work_struct *work) { - struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work); - struct netns_ct *ctnet = cnet->ct_net; + struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work); + struct netns_ct *ctnet = cnet->ecache.ct_net; int cpu, delay = -1; struct ct_pcpu *pcpu; @@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work) ctnet->ecache_dwork_pending = delay > 0; if (delay >= 0) - schedule_delayed_work(&cnet->ecache_dwork, delay); + schedule_delayed_work(&cnet->ecache.dwork, delay); } static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, @@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) struct nf_conntrack_net *cnet = nf_ct_pernet(net); if (state == NFCT_ECACHE_DESTROY_FAIL && - !delayed_work_pending(&cnet->ecache_dwork)) { - schedule_delayed_work(&cnet->ecache_dwork, HZ); + !delayed_work_pending(&cnet->ecache.dwork)) { + schedule_delayed_work(&cnet->ecache.dwork, HZ); net->ct.ecache_dwork_pending = true; } else if (state == NFCT_ECACHE_DESTROY_SENT) { net->ct.ecache_dwork_pending = false; - mod_delayed_work(system_wq, &cnet->ecache_dwork, 0); + mod_delayed_work(system_wq, &cnet->ecache.dwork, 0); } } @@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net) struct nf_conntrack_net *cnet = nf_ct_pernet(net); net->ct.sysctl_events = nf_ct_events; - cnet->ct_net = &net->ct; - INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work); + + cnet->ecache.ct_net = &net->ct; + INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work); BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */ } @@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); - cancel_delayed_work_sync(&cnet->ecache_dwork); + cancel_delayed_work_sync(&cnet->ecache.dwork); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1ea2ad732d57..924d766e6c53 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb) return 0; } +static int ctnetlink_dump_one_entry(struct sk_buff *skb, + struct netlink_callback *cb, + struct nf_conn *ct, + bool dying) +{ + struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u8 l3proto = nfmsg->nfgen_family; + int res; + + if (l3proto && nf_ct_l3num(ct) != l3proto) + return 0; + + if (ctx->last) { + if (ct != ctx->last) + return 0; + + ctx->last = NULL; + } + + /* We can't dump extension info for the unconfirmed + * list because unconfirmed conntracks can have + * ct->ext reallocated (and thus freed). + * + * In the dying list case ct->ext can't be free'd + * until after we drop pcpu->lock. + */ + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct, dying, 0); + if (res < 0) { + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return 0; + + ctx->last = ct; + } + + return res; +} + static int ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { @@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - u_int8_t l3proto = nfmsg->nfgen_family; - int res; - int cpu; struct hlist_nulls_head *list; struct net *net = sock_net(skb->sk); + int res, cpu; if (ctx->done) return 0; @@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying restart: hlist_nulls_for_each_entry(h, n, list, hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (l3proto && nf_ct_l3num(ct) != l3proto) - continue; - if (ctx->last) { - if (ct != last) - continue; - ctx->last = NULL; - } - /* We can't dump extension info for the unconfirmed - * list because unconfirmed conntracks can have - * ct->ext reallocated (and thus freed). - * - * In the dying list case ct->ext can't be free'd - * until after we drop pcpu->lock. - */ - res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NFNL_MSG_TYPE(cb->nlh->nlmsg_type), - ct, dying, 0); + res = ctnetlink_dump_one_entry(skb, cb, ct, dying); if (res < 0) { - if (!refcount_inc_not_zero(&ct->ct_general.use)) - continue; ctx->cpu = cpu; - ctx->last = ct; spin_unlock_bh(&pcpu->lock); goto out; } diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 13234641cdb3..77bcb10fc586 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -40,6 +40,12 @@ struct arppayload { unsigned char ip_dst[4]; }; +/* Guard against containers flooding syslog. */ +static bool nf_log_allowed(const struct net *net) +{ + return net_eq(net, &init_net) || sysctl_nf_log_all_netns; +} + static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) { u16 vid; @@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, { struct nf_log_buf *m; - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); @@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m, nf_log_buf_add(m, "MARK=0x%x ", skb->mark); } -static void dump_ipv4_mac_header(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) +static void dump_mac_header(struct nf_log_buf *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) { struct net_device *dev = skb->dev; unsigned int logflags = 0; @@ -798,9 +803,26 @@ fallback: const unsigned char *p = skb_mac_header(skb); unsigned int i; - nf_log_buf_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - nf_log_buf_add(m, ":%02x", *p); + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p) { + nf_log_buf_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++) + nf_log_buf_add(m, ":%02x", *p++); + } + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + + nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr, + &iph->daddr); + } } nf_log_buf_add(m, " "); } @@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, { struct nf_log_buf *m; - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); @@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, out, loginfo, prefix); if (in) - dump_ipv4_mac_header(m, loginfo, skb); + dump_mac_header(m, loginfo, skb); dump_ipv4_packet(net, m, loginfo, skb, 0); @@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = { .me = THIS_MODULE, }; -static void dump_ipv6_mac_header(struct nf_log_buf *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & NF_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); - nf_log_dump_vlan(m, skb); - nf_log_buf_add(m, "MACPROTO=%04x ", - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - nf_log_buf_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT) { - p -= ETH_HLEN; - - if (p < skb->head) - p = NULL; - } - - if (p) { - nf_log_buf_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - nf_log_buf_add(m, ":%02x", *p++); - } - nf_log_buf_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, - &iph->daddr); - } - } else { - nf_log_buf_add(m, " "); - } -} - static void nf_log_ip6_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, { struct nf_log_buf *m; - /* FIXME: Disabled from containers until syslog ns is supported */ - if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns) + if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); @@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf, loginfo, prefix); if (in) - dump_ipv6_mac_header(m, loginfo, skb); + dump_mac_header(m, loginfo, skb); dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); @@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = { .me = THIS_MODULE, }; +static void nf_log_unknown_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct nf_log_buf *m; + + if (!nf_log_allowed(net)) + return; + + m = nf_log_buf_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, + prefix); + + dump_mac_header(m, loginfo, skb); + + nf_log_buf_close(m); +} + static void nf_log_netdev_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, @@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf, case htons(ETH_P_RARP): nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; + default: + nf_log_unknown_packet(net, pf, hooknum, skb, + in, out, loginfo, prefix); + break; } } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 16c3a39689f4..f3ad02a399f8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8367,10 +8367,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; - rule = list_entry(&chain->rules, struct nft_rule, list); - data_size = 0; - list_for_each_entry_continue(rule, &chain->rules, list) { + list_for_each_entry(rule, &chain->rules, list) { if (nft_is_active_next(net, rule)) { data_size += sizeof(*prule) + rule->dlen; if (data_size > INT_MAX) @@ -8387,7 +8385,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha data_boundary = data + data_size; size = 0; - list_for_each_entry_continue(rule, &chain->rules, list) { + list_for_each_entry(rule, &chain->rules, list) { if (!nft_is_active_next(net, rule)) continue; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index b0d8888a539b..eea486f32971 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -158,6 +158,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb, timeout->timeout.l3num = l3num; timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); + __module_get(THIS_MODULE); list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list); return 0; @@ -506,13 +507,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - if (!try_module_get(THIS_MODULE)) + if (!refcount_inc_not_zero(&timeout->refcnt)) goto err; - - if (!refcount_inc_not_zero(&timeout->refcnt)) { - module_put(THIS_MODULE); - goto err; - } matching = timeout; break; } @@ -525,10 +521,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t) struct ctnl_timeout *timeout = container_of(t, struct ctnl_timeout, timeout); - if (refcount_dec_and_test(&timeout->refcnt)) + if (refcount_dec_and_test(&timeout->refcnt)) { kfree_rcu(timeout, rcu_head); - - module_put(THIS_MODULE); + module_put(THIS_MODULE); + } } static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index f590ee1c8a1b..83590afe3768 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src, { unsigned int i; - for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) + for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++) dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i]; } @@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, return err; if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) { err = -EINVAL; - goto err1; + goto err_mask_release; } err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor, tb[NFTA_BITWISE_XOR]); if (err < 0) - goto err1; + goto err_mask_release; if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) { err = -EINVAL; - goto err2; + goto err_xor_release; } return 0; -err2: + +err_xor_release: nft_data_release(&priv->xor, xor.type); -err1: +err_mask_release: nft_data_release(&priv->mask, mask.type); return err; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index f198f2d9ef90..1f12d7ade606 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: hooks = (1 << NF_INET_PRE_ROUTING); + if (priv->flags & NFTA_FIB_F_IIF) { + hooks |= (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + } break; case NFT_FIB_RESULT_ADDRTYPE: if (priv->flags & NFTA_FIB_F_IIF) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 47a876ccd288..9fa85bb36c0e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1931,7 +1931,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - int noblock = flags & MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; int err, ret; @@ -1941,7 +1940,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa9dc2ba3941..6f7f4392cffb 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1159,7 +1159,8 @@ static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } /* Now we can treat all alike */ - if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) { + skb = skb_recv_datagram(sk, flags, &er); + if (!skb) { release_sock(sk); return er; } diff --git a/net/nfc/core.c b/net/nfc/core.c index dc7a2404efdf..67524982b89b 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1165,6 +1165,7 @@ void nfc_unregister_device(struct nfc_dev *dev) if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); + dev->rfkill = NULL; } device_unlock(&dev->dev); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 4ca35791c93b..77642d18a3b4 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -821,7 +821,6 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; unsigned int copied, rlen; struct sk_buff *skb, *cskb; @@ -842,7 +841,7 @@ static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { pr_err("Recv datagram failed state %d %d %d", sk->sk_state, err, sock_error(sk)); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 0ca214ab5aef..8dd569765f96 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -238,7 +238,6 @@ static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; struct sk_buff *skb; int copied; @@ -246,7 +245,7 @@ static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags); - skb = skb_recv_datagram(sk, flags, noblock, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) return rc; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c39c09899fd0..d3caaf4d4b3e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3421,7 +3421,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, * but then it will block. */ - skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); + skb = skb_recv_datagram(sk, flags, &err); /* * An error occurred so return it. Because skb_recv_datagram() diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 393e6aa7a592..ff5f49ab236e 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -112,7 +112,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; @@ -123,7 +123,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, MSG_CMSG_COMPAT)) goto out_nofree; - skb = skb_recv_datagram(sk, flags, noblock, &rval); + skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 65d463ad8770..83ea13a50690 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -772,7 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, u8 pipe_handle, enabled, n_sb; u8 aligned = 0; - skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + errp); if (!skb) return NULL; @@ -1238,7 +1239,7 @@ struct sk_buff *pep_read(struct sock *sk) } static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sk_buff *skb; int err; @@ -1267,7 +1268,7 @@ static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EINVAL; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = skb_recv_datagram(sk, flags, &err); lock_sock(sk); if (skb == NULL) { if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index ec2322529727..5c2fb992803b 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -1035,8 +1035,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, return -EADDRNOTAVAIL; } - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); if (!skb) { release_sock(sk); return rc; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 30a1cf4c16c6..bf2d986a6bc3 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1230,7 +1230,8 @@ static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return -ENOTCONN; /* Now we can treat all alike */ - if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) + skb = skb_recv_datagram(sk, flags, &er); + if (!skb) return er; qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4f51094da9da..da9733da9868 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -195,7 +195,7 @@ static int offload_action_init(struct flow_offload_action *fl_action, if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); err = act->ops->offload_act_setup(act, fl_action, NULL, - false); + false, extack); spin_unlock_bh(&act->tcfa_lock); return err; } @@ -271,7 +271,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action, if (err) goto fl_err; - err = tc_setup_action(&fl_action->action, actions); + err = tc_setup_action(&fl_action->action, actions, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup tc actions for offload"); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e0f515b774ca..22847ee009ef 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -696,7 +696,8 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) } static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b1f502fce595..8af9d6e5ba61 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1584,7 +1584,8 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, } static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index bde6a6c01e64..ac29d1065232 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -253,7 +253,8 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) } static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -267,7 +268,17 @@ static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, } else if (is_tcf_gact_goto_chain(act)) { entry->id = FLOW_ACTION_GOTO; entry->chain_index = tcf_gact_goto_chain_index(act); + } else if (is_tcf_gact_continue(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported"); + return -EOPNOTSUPP; + } else if (is_tcf_gact_reclassify(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported"); + return -EOPNOTSUPP; + } else if (is_tcf_gact_pipe(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported"); + return -EOPNOTSUPP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index d56e73843a4b..fd5155274733 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -619,7 +619,8 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry, } static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { int err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 39acd1d18609..ebb92fb072ab 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -460,7 +460,8 @@ static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, } static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -478,6 +479,7 @@ static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, entry->id = FLOW_ACTION_MIRRED_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index b9ff3459fdab..adabeccb63e1 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -385,7 +385,8 @@ static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) } static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -410,7 +411,14 @@ static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, entry->mpls_mangle.bos = tcf_mpls_bos(act); entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; + case TCA_MPLS_ACT_DEC_TTL: + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"dec_ttl\" option is used"); + return -EOPNOTSUPP; + case TCA_MPLS_ACT_MAC_PUSH: + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"mac_push\" option is used"); + return -EOPNOTSUPP; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported MPLS mode offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 31fcd279c177..e01ef7f109f4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -488,7 +488,8 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) } static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -503,6 +504,7 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, entry->id = FLOW_ACTION_ADD; break; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload"); return -EOPNOTSUPP; } entry->mangle.htype = tcf_pedit_htype(act, k); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f4d917705263..79c8901f66ab 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -419,7 +419,8 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static int tcf_police_act_to_flow_act(int tc_act, u32 *extval) +static int tcf_police_act_to_flow_act(int tc_act, u32 *extval, + struct netlink_ext_ack *extack) { int act_id = -EOPNOTSUPP; @@ -430,19 +431,28 @@ static int tcf_police_act_to_flow_act(int tc_act, u32 *extval) act_id = FLOW_ACTION_DROP; else if (tc_act == TC_ACT_PIPE) act_id = FLOW_ACTION_PIPE; + else if (tc_act == TC_ACT_RECLASSIFY) + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\""); + else + NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload"); } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) { act_id = FLOW_ACTION_GOTO; *extval = tc_act & TC_ACT_EXT_VAL_MASK; } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) { act_id = FLOW_ACTION_JUMP; *extval = tc_act & TC_ACT_EXT_VAL_MASK; + } else if (tc_act == TC_ACT_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"continue\""); + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload"); } return act_id; } static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -466,14 +476,16 @@ static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, entry->police.mtu = tcf_police_tcfp_mtu(act); act_id = tcf_police_act_to_flow_act(police->tcf_action, - &entry->police.exceed.extval); + &entry->police.exceed.extval, + extack); if (act_id < 0) return act_id; entry->police.exceed.act_id = act_id; act_id = tcf_police_act_to_flow_act(p->tcfp_result, - &entry->police.notexceed.extval); + &entry->police.notexceed.extval, + extack); if (act_id < 0) return act_id; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 9a22cdda6bbd..2f7f5e44d28c 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -291,7 +291,8 @@ static void tcf_offload_sample_get_group(struct flow_action_entry *entry, } static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ceba11b198bb..92d0dc754207 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -328,7 +328,8 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) } static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -342,7 +343,14 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); + } else if (is_tcf_skbedit_queue_mapping(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used"); + return -EOPNOTSUPP; + } else if (is_tcf_skbedit_inheritdsfield(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used"); + return -EOPNOTSUPP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 23aba03d26a8..856dc23cef8c 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -808,7 +808,8 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, - bool bind) + bool bind, + struct netlink_ext_ack *extack) { int err; @@ -823,6 +824,7 @@ static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 883454c4f921..68b5e772386a 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -369,7 +369,8 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act) } static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -398,6 +399,7 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act); break; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload"); return -EOPNOTSUPP; } *index_inc = 1; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f0699f39afdb..9bb4d3dcc994 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3513,20 +3513,25 @@ EXPORT_SYMBOL(tc_cleanup_offload_action); static int tc_setup_offload_act(struct tc_action *act, struct flow_action_entry *entry, - u32 *index_inc) + u32 *index_inc, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT - if (act->ops->offload_act_setup) - return act->ops->offload_act_setup(act, entry, index_inc, true); - else + if (act->ops->offload_act_setup) { + return act->ops->offload_act_setup(act, entry, index_inc, true, + extack); + } else { + NL_SET_ERR_MSG(extack, "Action does not support offload"); return -EOPNOTSUPP; + } #else return 0; #endif } int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]) + struct tc_action *actions[], + struct netlink_ext_ack *extack) { int i, j, index, err = 0; struct tc_action *act; @@ -3551,7 +3556,7 @@ int tc_setup_action(struct flow_action *flow_action, entry->hw_stats = tc_act_hw_stats(act->hw_stats); entry->hw_index = act->tcfa_index; index = 0; - err = tc_setup_offload_act(act, entry, &index); + err = tc_setup_offload_act(act, entry, &index, extack); if (!err) j += index; else @@ -3570,13 +3575,14 @@ err_out_locked: } int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT if (!exts) return 0; - return tc_setup_action(flow_action, exts->actions); + return tc_setup_action(flow_action, exts->actions, extack); #else return 0; #endif diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index ed5e6f08e74a..222b0b8fac7a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -464,14 +464,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts, + cls_flower.common.extack); if (err) { kfree(cls_flower.rule); - if (skip_sw) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + + return skip_sw ? err : 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, @@ -2362,11 +2360,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts, + cls_flower.common.extack); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); __fl_put(f); return err; } diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index ca5670fd5228..06cf22adbab7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,16 +97,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts, + cls_mall.common.extack); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); - if (skip_sw) - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - return err; + return skip_sw ? err : 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, @@ -302,14 +299,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts, + cls_mall.common.extack); if (err) { kfree(cls_mall.rule); - if (add && tc_skip_sw(head->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + + return add && tc_skip_sw(head->flags) ? err : 0; } err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7b0427658056..3e3fe923bed5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2084,7 +2084,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * 5 for complete description of the flags. */ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -2093,9 +2093,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err = 0; int skb_len; - pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, " - "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags, - addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n", + __func__, sk, msg, len, flags, addr_len); lock_sock(sk); @@ -2105,7 +2104,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; } - skb = sctp_skb_recv_datagram(sk, flags, noblock, &err); + skb = sctp_skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -8978,14 +8977,13 @@ out: * Note: This is pretty much the same routine as in core/datagram.c * with a few changes to make lksctp work. */ -struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, - int noblock, int *err) +struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err) { int error; struct sk_buff *skb; long timeo; - timeo = sock_rcvtimeo(sk, noblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo, MAX_SCHEDULE_TIMEOUT); @@ -9018,7 +9016,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, break; if (sk_can_busy_loop(sk)) { - sk_busy_loop(sk, noblock); + sk_busy_loop(sk, flags & MSG_DONTWAIT); if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 0c3d2b4d7321..8920ca92a011 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -1063,7 +1063,7 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event, struct sk_buff *skb; int err; - skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err); + skb = sctp_skb_recv_datagram(sk, MSG_PEEK | MSG_DONTWAIT, &err); if (skb != NULL) { __sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb), msghdr, skb); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cc35ec433400..45336e68bf79 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -464,7 +464,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err < 0) goto out_recv_err; - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err); if (!skb) goto out_recv_err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ab64ea46870..5c91c5457197 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1355,7 +1355,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_udp(sk, 0, 1, &err); + skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); if (skb == NULL) break; xs_udp_data_read_skb(&transport->xprt, sk, skb); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 12f7b56771d9..5e4bd8ba48d3 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -962,11 +962,9 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, tls_ctx->rx.rec_seq, rxm->full_len, is_encrypted, is_decrypted); - ctx->sw.decrypted |= is_decrypted; - if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { if (likely(is_encrypted || is_decrypted)) - return 0; + return is_decrypted; /* After tls_device_down disables the offload, the next SKB will * likely have initial fragments decrypted, and final ones not @@ -981,7 +979,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, */ if (is_decrypted) { ctx->resync_nh_reset = 1; - return 0; + return is_decrypted; } if (is_encrypted) { tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a8976ef95528..ddbe05ec5489 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -44,6 +44,11 @@ #include <net/strparser.h> #include <net/tls.h> +struct tls_decrypt_arg { + bool zc; + bool async; +}; + noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); @@ -128,32 +133,31 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len) return __skb_nsg(skb, offset, len, 0); } -static int padding_length(struct tls_sw_context_rx *ctx, - struct tls_prot_info *prot, struct sk_buff *skb) +static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); int sub = 0; /* Determine zero-padding length */ if (prot->version == TLS_1_3_VERSION) { + int offset = rxm->full_len - TLS_TAG_SIZE - 1; char content_type = 0; int err; - int back = 17; while (content_type == 0) { - if (back > rxm->full_len - prot->prepend_size) + if (offset < prot->prepend_size) return -EBADMSG; - err = skb_copy_bits(skb, - rxm->offset + rxm->full_len - back, + err = skb_copy_bits(skb, rxm->offset + offset, &content_type, 1); if (err) return err; if (content_type) break; sub++; - back++; + offset--; } - ctx->control = content_type; + tlm->control = content_type; } return sub; } @@ -169,7 +173,6 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) struct scatterlist *sg; struct sk_buff *skb; unsigned int pages; - int pending; skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); @@ -185,17 +188,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) tls_err_abort(skb->sk, err); } else { struct strp_msg *rxm = strp_msg(skb); - int pad; - pad = padding_length(ctx, prot, skb); - if (pad < 0) { - ctx->async_wait.err = pad; - tls_err_abort(skb->sk, pad); - } else { - rxm->full_len -= pad; - rxm->offset += prot->prepend_size; - rxm->full_len -= prot->overhead_size; - } + /* No TLS 1.3 support with async crypto */ + WARN_ON(prot->tail_size); + + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; } /* After using skb->sk to propagate sk through crypto async callback @@ -217,9 +215,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); spin_lock_bh(&ctx->decrypt_compl_lock); - pending = atomic_dec_return(&ctx->decrypt_pending); - - if (!pending && ctx->async_notify) + if (!atomic_dec_return(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); spin_unlock_bh(&ctx->decrypt_compl_lock); } @@ -231,7 +227,7 @@ static int tls_do_decryption(struct sock *sk, char *iv_recv, size_t data_len, struct aead_request *aead_req, - bool async) + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -244,7 +240,7 @@ static int tls_do_decryption(struct sock *sk, data_len + prot->tag_size, (u8 *)iv_recv); - if (async) { + if (darg->async) { /* Using skb->sk to push sk through to crypto async callback * handler. This allows propagating errors up to the socket * if needed. It _must_ be cleared in the async handler @@ -264,14 +260,15 @@ static int tls_do_decryption(struct sock *sk, ret = crypto_aead_decrypt(aead_req); if (ret == -EINPROGRESS) { - if (async) - return ret; + if (darg->async) + return 0; ret = crypto_wait_req(ret, &ctx->async_wait); } + darg->async = false; - if (async) - atomic_dec(&ctx->decrypt_pending); + if (ret == -EBADMSG) + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR); return ret; } @@ -1346,15 +1343,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, return skb; } -static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, +static int tls_setup_from_iter(struct iov_iter *from, int length, int *pages_used, - unsigned int *size_used, struct scatterlist *to, int to_max_pages) { int rc = 0, i = 0, num_elem = *pages_used, maxpages; struct page *pages[MAX_SKB_FRAGS]; - unsigned int size = *size_used; + unsigned int size = 0; ssize_t copied, use; size_t offset; @@ -1397,8 +1393,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, sg_mark_end(&to[num_elem - 1]); out: if (rc) - iov_iter_revert(from, size - *size_used); - *size_used = size; + iov_iter_revert(from, size); *pages_used = num_elem; return rc; @@ -1415,12 +1410,13 @@ out: static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct iov_iter *out_iov, struct scatterlist *out_sg, - int *chunk, bool *zc, bool async) + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; struct aead_request *aead_req; struct sk_buff *unused; @@ -1431,7 +1427,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, prot->tail_size; int iv_offset = 0; - if (*zc && (out_iov || out_sg)) { + if (darg->zc && (out_iov || out_sg)) { if (out_iov) n_sgout = 1 + iov_iter_npages_cap(out_iov, INT_MAX, data_len); @@ -1441,7 +1437,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, rxm->full_len - prot->prepend_size); } else { n_sgout = 0; - *zc = false; + darg->zc = false; n_sgin = skb_cow_data(skb, 0, &unused); } @@ -1456,7 +1452,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); mem_size = aead_size + (nsg * sizeof(struct scatterlist)); mem_size = mem_size + prot->aad_size; - mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); + mem_size = mem_size + MAX_IV_SIZE; /* Allocate a single block of memory which contains * aead_req || sgin[] || sgout[] || aad || iv. @@ -1486,26 +1482,26 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } /* Prepare IV */ - err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, - iv + iv_offset + prot->salt_size, - prot->iv_size); - if (err < 0) { - kfree(mem); - return err; - } if (prot->version == TLS_1_3_VERSION || - prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) + prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) { memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->iv_size + prot->salt_size); - else + } else { + err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, + iv + iv_offset + prot->salt_size, + prot->iv_size); + if (err < 0) { + kfree(mem); + return err; + } memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - + } xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + prot->tail_size, - tls_ctx->rx.rec_seq, ctx->control, prot); + tls_ctx->rx.rec_seq, tlm->control, prot); /* Prepare sgin */ sg_init_table(sgin, n_sgin); @@ -1523,9 +1519,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], aad, prot->aad_size); - *chunk = 0; - err = tls_setup_from_iter(sk, out_iov, data_len, - &pages, chunk, &sgout[1], + err = tls_setup_from_iter(out_iov, data_len, + &pages, &sgout[1], (n_sgout - 1)); if (err < 0) goto fallback_to_reg_recv; @@ -1538,15 +1533,14 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, fallback_to_reg_recv: sgout = sgin; pages = 0; - *chunk = data_len; - *zc = false; + darg->zc = false; } /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, async); - if (err == -EINPROGRESS) - return err; + data_len, aead_req, darg); + if (darg->async) + return 0; /* Release the pages in case iov was mapped to pages */ for (; pages > 0; pages--) @@ -1557,87 +1551,81 @@ fallback_to_reg_recv: } static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct iov_iter *dest, int *chunk, bool *zc, - bool async) + struct iov_iter *dest, + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct strp_msg *rxm = strp_msg(skb); - int pad, err = 0; + struct tls_msg *tlm = tls_msg(skb); + int pad, err; - if (!ctx->decrypted) { - if (tls_ctx->rx_conf == TLS_HW) { - err = tls_device_decrypted(sk, tls_ctx, skb, rxm); - if (err < 0) - return err; - } + if (tlm->decrypted) { + darg->zc = false; + return 0; + } - /* Still not decrypted after tls_device */ - if (!ctx->decrypted) { - err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, - async); - if (err < 0) { - if (err == -EINPROGRESS) - tls_advance_record_sn(sk, prot, - &tls_ctx->rx); - else if (err == -EBADMSG) - TLS_INC_STATS(sock_net(sk), - LINUX_MIB_TLSDECRYPTERROR); - return err; - } - } else { - *zc = false; + if (tls_ctx->rx_conf == TLS_HW) { + err = tls_device_decrypted(sk, tls_ctx, skb, rxm); + if (err < 0) + return err; + if (err > 0) { + tlm->decrypted = 1; + darg->zc = false; + goto decrypt_done; } + } - pad = padding_length(ctx, prot, skb); - if (pad < 0) - return pad; + err = decrypt_internal(sk, skb, dest, NULL, darg); + if (err < 0) + return err; + if (darg->async) + goto decrypt_next; - rxm->full_len -= pad; - rxm->offset += prot->prepend_size; - rxm->full_len -= prot->overhead_size; - tls_advance_record_sn(sk, prot, &tls_ctx->rx); - ctx->decrypted = 1; - ctx->saved_data_ready(sk); - } else { - *zc = false; - } +decrypt_done: + pad = padding_length(prot, skb); + if (pad < 0) + return pad; - return err; + rxm->full_len -= pad; + rxm->offset += prot->prepend_size; + rxm->full_len -= prot->overhead_size; + tlm->decrypted = 1; +decrypt_next: + tls_advance_record_sn(sk, prot, &tls_ctx->rx); + + return 0; } int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout) { - bool zc = true; - int chunk; + struct tls_decrypt_arg darg = { .zc = true, }; - return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false); + return decrypt_internal(sk, skb, NULL, sgout, &darg); } -static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, - unsigned int len) +static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, + u8 *control) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - - if (skb) { - struct strp_msg *rxm = strp_msg(skb); - - if (len < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; - return false; + int err; + + if (!*control) { + *control = tlm->control; + if (!*control) + return -EBADMSG; + + err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(*control), control); + if (*control != TLS_RECORD_TYPE_DATA) { + if (err || msg->msg_flags & MSG_CTRUNC) + return -EIO; } - consume_skb(skb); + } else if (*control != tlm->control) { + return 0; } - /* Finished with message */ - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); - - return true; + return 1; } /* This function traverses the rx_list in tls receive context to copies the @@ -1648,31 +1636,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, static int process_rx_list(struct tls_sw_context_rx *ctx, struct msghdr *msg, u8 *control, - bool *cmsg, size_t skip, size_t len, bool zc, bool is_peek) { struct sk_buff *skb = skb_peek(&ctx->rx_list); - u8 ctrl = *control; - u8 msgc = *cmsg; struct tls_msg *tlm; ssize_t copied = 0; - - /* Set the record type in 'control' if caller didn't pass it */ - if (!ctrl && skb) { - tlm = tls_msg(skb); - ctrl = tlm->control; - } + int err; while (skip && skb) { struct strp_msg *rxm = strp_msg(skb); tlm = tls_msg(skb); - /* Cannot process a record of different type */ - if (ctrl != tlm->control) - return 0; + err = tls_record_content_type(msg, tlm, control); + if (err <= 0) + goto out; if (skip < rxm->full_len) break; @@ -1688,30 +1668,15 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, tlm = tls_msg(skb); - /* Cannot process a record of different type */ - if (ctrl != tlm->control) - return 0; - - /* Set record type if not already done. For a non-data record, - * do not proceed if record type could not be copied. - */ - if (!msgc) { - int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, - sizeof(ctrl), &ctrl); - msgc = true; - if (ctrl != TLS_RECORD_TYPE_DATA) { - if (cerr || msg->msg_flags & MSG_CTRUNC) - return -EIO; - - *cmsg = msgc; - } - } + err = tls_record_content_type(msg, tlm, control); + if (err <= 0) + goto out; if (!zc || (rxm->full_len - skip) > len) { - int err = skb_copy_datagram_msg(skb, rxm->offset + skip, + err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - return err; + goto out; } len = len - chunk; @@ -1738,21 +1703,21 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, next_skb = skb_peek_next(skb, &ctx->rx_list); if (!is_peek) { - skb_unlink(skb, &ctx->rx_list); + __skb_unlink(skb, &ctx->rx_list); consume_skb(skb); } skb = next_skb; } + err = 0; - *control = ctrl; - return copied; +out: + return copied ? : err; } int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) { @@ -1766,16 +1731,13 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; - bool cmsg = false; + bool async = false; int target, err = 0; long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; bool bpf_strp_enabled; - int num_async = 0; - int pending; - - flags |= nonblock; + bool zc_capable; if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); @@ -1784,81 +1746,64 @@ int tls_sw_recvmsg(struct sock *sk, lock_sock(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); + /* If crypto failed the connection is broken */ + err = ctx->async_wait.err; + if (err) + goto end; + /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false, - is_peek); - if (err < 0) { - tls_err_abort(sk, err); + err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek); + if (err < 0) goto end; - } else { - copied = err; - } + copied = err; if (len <= copied) - goto recv_end; + goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); len = len - copied; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && + prot->version != TLS_1_3_VERSION; + decrypted = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { - bool retain_skb = false; - bool zc = false; - int to_decrypt; - int chunk = 0; - bool async_capable; - bool async = false; + struct tls_decrypt_arg darg = {}; + int to_decrypt, chunk; skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { if (psock) { - int ret = sk_msg_recvmsg(sk, psock, msg, len, - flags); - - if (ret > 0) { - decrypted += ret; - len -= ret; - continue; - } + chunk = sk_msg_recvmsg(sk, psock, msg, len, + flags); + if (chunk > 0) + goto leave_on_list; } goto recv_end; - } else { - tlm = tls_msg(skb); - if (prot->version == TLS_1_3_VERSION) - tlm->control = 0; - else - tlm->control = ctx->control; } rxm = strp_msg(skb); + tlm = tls_msg(skb); to_decrypt = rxm->full_len - prot->overhead_size; - if (to_decrypt <= len && !is_kvec && !is_peek && - ctx->control == TLS_RECORD_TYPE_DATA && - prot->version != TLS_1_3_VERSION && - !bpf_strp_enabled) - zc = true; + if (zc_capable && to_decrypt <= len && + tlm->control == TLS_RECORD_TYPE_DATA) + darg.zc = true; /* Do not use async mode if record is non-data */ - if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) - async_capable = ctx->async_capable; + if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) + darg.async = ctx->async_capable; else - async_capable = false; + darg.async = false; - err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, async_capable); - if (err < 0 && err != -EINPROGRESS) { + err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg); + if (err < 0) { tls_err_abort(sk, -EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) { - async = true; - num_async++; - } else if (prot->version == TLS_1_3_VERSION) { - tlm->control = ctx->control; - } + async |= darg.async; /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, @@ -1867,124 +1812,99 @@ int tls_sw_recvmsg(struct sock *sk, * is known just after record is dequeued from stream parser. * For tls1.3, we disable async. */ - - if (!control) - control = tlm->control; - else if (control != tlm->control) + err = tls_record_content_type(msg, tlm, &control); + if (err <= 0) goto recv_end; - if (!cmsg) { - int cerr; - - cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, - sizeof(control), &control); - cmsg = true; - if (control != TLS_RECORD_TYPE_DATA) { - if (cerr || msg->msg_flags & MSG_CTRUNC) { - err = -EIO; - goto recv_end; - } - } + ctx->recv_pkt = NULL; + __strp_unpause(&ctx->strp); + __skb_queue_tail(&ctx->rx_list, skb); + + if (async) { + /* TLS 1.2-only, to_decrypt must be text length */ + chunk = min_t(int, to_decrypt, len); +leave_on_list: + decrypted += chunk; + len -= chunk; + continue; } + /* TLS 1.3 may have updated the length by more than overhead */ + chunk = rxm->full_len; - if (async) - goto pick_next_record; + if (!darg.zc) { + bool partially_consumed = chunk > len; - if (!zc) { if (bpf_strp_enabled) { err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; + __skb_unlink(skb, &ctx->rx_list); if (err == __SK_DROP) consume_skb(skb); - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); continue; } } - if (rxm->full_len > len) { - retain_skb = true; + if (partially_consumed) chunk = len; - } else { - chunk = rxm->full_len; - } err = skb_copy_datagram_msg(skb, rxm->offset, msg, chunk); if (err < 0) goto recv_end; - if (!is_peek) { - rxm->offset = rxm->offset + chunk; - rxm->full_len = rxm->full_len - chunk; + if (is_peek) + goto leave_on_list; + + if (partially_consumed) { + rxm->offset += chunk; + rxm->full_len -= chunk; + goto leave_on_list; } } -pick_next_record: - if (chunk > len) - chunk = len; - decrypted += chunk; len -= chunk; - /* For async or peek case, queue the current skb */ - if (async || is_peek || retain_skb) { - skb_queue_tail(&ctx->rx_list, skb); - skb = NULL; - } + __skb_unlink(skb, &ctx->rx_list); + consume_skb(skb); - if (tls_sw_advance_skb(sk, skb, chunk)) { - /* Return full control message to - * userspace before trying to parse - * another message type - */ - msg->msg_flags |= MSG_EOR; - if (control != TLS_RECORD_TYPE_DATA) - goto recv_end; - } else { + /* Return full control message to userspace before trying + * to parse another message type + */ + msg->msg_flags |= MSG_EOR; + if (control != TLS_RECORD_TYPE_DATA) break; - } } recv_end: - if (num_async) { + if (async) { + int ret, pending; + /* Wait for all previously submitted records to be decrypted */ spin_lock_bh(&ctx->decrypt_compl_lock); - ctx->async_notify = true; + reinit_completion(&ctx->async_wait.completion); pending = atomic_read(&ctx->decrypt_pending); spin_unlock_bh(&ctx->decrypt_compl_lock); if (pending) { - err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - if (err) { - /* one of async decrypt failed */ - tls_err_abort(sk, err); - copied = 0; + ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + if (ret) { + if (err >= 0 || err == -EINPROGRESS) + err = ret; decrypted = 0; goto end; } - } else { - reinit_completion(&ctx->async_wait.completion); } - /* There can be no concurrent accesses, since we have no - * pending decrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, &cmsg, copied, + err = process_rx_list(ctx, msg, &control, copied, decrypted, false, is_peek); else - err = process_rx_list(ctx, msg, &control, &cmsg, 0, + err = process_rx_list(ctx, msg, &control, 0, decrypted, true, is_peek); - if (err < 0) { - tls_err_abort(sk, err); - copied = 0; - goto end; - } + decrypted = max(err, 0); } copied += decrypted; @@ -2005,13 +1925,13 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct strp_msg *rxm = NULL; struct sock *sk = sock->sk; + struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; bool from_queue; int err = 0; long timeo; int chunk; - bool zc = false; lock_sock(sk); @@ -2021,26 +1941,29 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (from_queue) { skb = __skb_dequeue(&ctx->rx_list); } else { + struct tls_decrypt_arg darg = {}; + skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err); if (!skb) goto splice_read_end; - err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); + err = decrypt_skb_update(sk, skb, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto splice_read_end; } } + rxm = strp_msg(skb); + tlm = tls_msg(skb); + /* splice does not support reading control messages */ - if (ctx->control != TLS_RECORD_TYPE_DATA) { + if (tlm->control != TLS_RECORD_TYPE_DATA) { err = -EINVAL; goto splice_read_end; } - rxm = strp_msg(skb); - chunk = min_t(unsigned int, rxm->full_len, len); copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags); if (copied < 0) @@ -2084,10 +2007,10 @@ bool tls_sw_sock_is_readable(struct sock *sk) static int tls_read_size(struct strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; struct strp_msg *rxm = strp_msg(skb); + struct tls_msg *tlm = tls_msg(skb); size_t cipher_overhead; size_t data_len = 0; int ret; @@ -2104,11 +2027,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) /* Linearize header to local buffer */ ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size); - if (ret < 0) goto read_failure; - ctx->control = header[0]; + tlm->decrypted = 0; + tlm->control = header[0]; data_len = ((header[4] & 0xFF) | (header[3] << 8)); @@ -2149,8 +2072,6 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb) struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - ctx->decrypted = 0; - ctx->recv_pkt = skb; strp_pause(strp); @@ -2241,7 +2162,7 @@ void tls_sw_release_resources_rx(struct sock *sk) if (ctx->aead_recv) { kfree_skb(ctx->recv_pkt); ctx->recv_pkt = NULL; - skb_queue_purge(&ctx->rx_list); + __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); strp_stop(&ctx->strp); /* If tls_sw_strparser_arm() was not called (cleanup paths) @@ -2501,7 +2422,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) /* Sanity-check the sizes for stack allocations. */ if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE || - rec_seq_size > TLS_MAX_REC_SEQ_SIZE) { + rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE) { rc = -EINVAL; goto free_priv; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e71a312faa1e..e1dd9e9c8452 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1643,7 +1643,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); + skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &err); if (!skb) { /* This means receive shutdown. */ if (err == 0) @@ -2483,8 +2484,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_dgram_proto) - return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, NULL); + return prot->recvmsg(sk, msg, size, flags, NULL); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); } @@ -2500,7 +2500,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, int used, err; mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); mutex_unlock(&u->iolock); if (!skb) return err; @@ -2916,8 +2916,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_stream_proto) - return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, - flags & ~MSG_DONTWAIT, NULL); + return prot->recvmsg(sk, msg, size, flags, NULL); #endif return unix_stream_read_generic(&state, true); } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 452376c6f419..7cf14c6b1725 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -48,8 +48,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg, } static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, - int *addr_len) + size_t len, int flags, int *addr_len) { struct unix_sock *u = unix_sk(sk); struct sk_psock *psock; @@ -73,7 +72,7 @@ msg_bytes_ready: long timeo; int data; - timeo = sock_rcvtimeo(sk, nonblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = unix_msg_wait_data(sk, psock, timeo); if (data) { if (!sk_psock_queue_empty(psock)) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b17dc9745188..b14f0ed7427b 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1732,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, int flags) { int err; - int noblock; struct vmci_datagram *dg; size_t payload_len; struct sk_buff *skb; - noblock = flags & MSG_DONTWAIT; - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); + skb = skb_recv_datagram(&vsk->sk, flags, &err); if (!skb) return err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 3a171828638b..6bc2ac8d8146 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1315,8 +1315,7 @@ static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, } else { /* Now we can treat all alike */ release_sock(sk); - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &rc); + skb = skb_recv_datagram(sk, flags, &rc); lock_sock(sk); if (!skb) goto out; diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 1f08ebf7d80c..82d14eea1b5a 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -131,7 +131,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) } static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) + int flags, int *addr_len) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct sk_buff *skb; @@ -139,8 +139,6 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied; int off = 0; - flags |= nonblock ? MSG_DONTWAIT : 0; - skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err); if (!skb) { if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 38638845db9d..8fff5ad3444b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -96,7 +96,6 @@ test_cgrp2_sock2-objs := test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o -xdp_router_ipv4-objs := xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := trace_event_user.o $(TRACE_HELPERS) @@ -124,6 +123,7 @@ xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE) xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) +xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) # Tell kbuild to always build the programs always-y := $(tprogs-y) @@ -153,7 +153,6 @@ always-y += parse_varlen.o parse_simple.o parse_ldabs.o always-y += test_cgrp2_tc_kern.o always-y += xdp1_kern.o always-y += xdp2_kern.o -always-y += xdp_router_ipv4_kern.o always-y += test_current_task_under_cgroup_kern.o always-y += trace_event_kern.o always-y += sampleip_kern.o @@ -220,6 +219,7 @@ TPROGLDLIBS_xdp_redirect += -lm TPROGLDLIBS_xdp_redirect_cpu += -lm TPROGLDLIBS_xdp_redirect_map += -lm TPROGLDLIBS_xdp_redirect_map_multi += -lm +TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_map_perf_test += -lrt @@ -342,6 +342,7 @@ $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h +$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h @@ -399,6 +400,7 @@ $(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o +$(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h @echo " CLANG-BPF " $@ @@ -409,7 +411,8 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x -c $(filter %.bpf.c,$^) -o $@ LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ - xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h + xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \ + xdp_router_ipv4.skel.h clean-files += $(LINKED_SKELS) xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o @@ -417,6 +420,7 @@ xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bp xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o +xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index a0ebf1833ed3..c55383068384 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -36,6 +36,9 @@ static void verify_map(int map_id) fprintf(stderr, "failed: map #%d returns value 0\n", map_id); return; } + + printf("verify map:%d val: %d\n", map_id, val); + val = 0; if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) { fprintf(stderr, "map_update failed: %s\n", strerror(errno)); diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c new file mode 100644 index 000000000000..248119ca7938 --- /dev/null +++ b/samples/bpf/xdp_router_ipv4.bpf.c @@ -0,0 +1,180 @@ +/* Copyright (C) 2017 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include "vmlinux.h" +#include "xdp_sample.bpf.h" +#include "xdp_sample_shared.h" + +#define ETH_ALEN 6 +#define ETH_P_8021Q 0x8100 +#define ETH_P_8021AD 0x88A8 + +struct trie_value { + __u8 prefix[4]; + __be64 value; + int ifindex; + int metric; + __be32 gw; +}; + +/* Key for lpm_trie */ +union key_4 { + u32 b32[2]; + u8 b8[8]; +}; + +struct arp_entry { + __be64 mac; + __be32 dst; +}; + +struct direct_map { + struct arp_entry arp; + int ifindex; + __be64 mac; +}; + +/* Map for trie implementation */ +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(key_size, 8); + __uint(value_size, sizeof(struct trie_value)); + __uint(max_entries, 50); + __uint(map_flags, BPF_F_NO_PREALLOC); +} lpm_map SEC(".maps"); + +/* Map for ARP table */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __be32); + __type(value, __be64); + __uint(max_entries, 50); +} arp_table SEC(".maps"); + +/* Map to keep the exact match entries in the route table */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __be32); + __type(value, struct direct_map); + __uint(max_entries, 50); +} exact_match SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 100); +} tx_port SEC(".maps"); + +SEC("xdp") +int xdp_router_ipv4_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u64 nh_off = sizeof(*eth); + struct datarec *rec; + __be16 h_proto; + u32 key = 0; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (rec) + NO_TEAR_INC(rec->processed); + + if (data + nh_off > data_end) + goto drop; + + h_proto = eth->h_proto; + if (h_proto == bpf_htons(ETH_P_8021Q) || + h_proto == bpf_htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + goto drop; + + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + switch (bpf_ntohs(h_proto)) { + case ETH_P_ARP: + if (rec) + NO_TEAR_INC(rec->xdp_pass); + return XDP_PASS; + case ETH_P_IP: { + struct iphdr *iph = data + nh_off; + struct direct_map *direct_entry; + __be64 *dest_mac, *src_mac; + int forward_to; + + if (iph + 1 > data_end) + goto drop; + + direct_entry = bpf_map_lookup_elem(&exact_match, &iph->daddr); + + /* Check for exact match, this would give a faster lookup */ + if (direct_entry && direct_entry->mac && + direct_entry->arp.mac) { + src_mac = &direct_entry->mac; + dest_mac = &direct_entry->arp.mac; + forward_to = direct_entry->ifindex; + } else { + struct trie_value *prefix_value; + union key_4 key4; + + /* Look up in the trie for lpm */ + key4.b32[0] = 32; + key4.b8[4] = iph->daddr & 0xff; + key4.b8[5] = (iph->daddr >> 8) & 0xff; + key4.b8[6] = (iph->daddr >> 16) & 0xff; + key4.b8[7] = (iph->daddr >> 24) & 0xff; + + prefix_value = bpf_map_lookup_elem(&lpm_map, &key4); + if (!prefix_value) + goto drop; + + forward_to = prefix_value->ifindex; + src_mac = &prefix_value->value; + if (!src_mac) + goto drop; + + dest_mac = bpf_map_lookup_elem(&arp_table, &iph->daddr); + if (!dest_mac) { + if (!prefix_value->gw) + goto drop; + + dest_mac = bpf_map_lookup_elem(&arp_table, + &prefix_value->gw); + } + } + + if (src_mac && dest_mac) { + int ret; + + __builtin_memcpy(eth->h_dest, dest_mac, ETH_ALEN); + __builtin_memcpy(eth->h_source, src_mac, ETH_ALEN); + + ret = bpf_redirect_map(&tx_port, forward_to, 0); + if (ret == XDP_REDIRECT) { + if (rec) + NO_TEAR_INC(rec->xdp_redirect); + return ret; + } + } + } + default: + break; + } +drop: + if (rec) + NO_TEAR_INC(rec->xdp_drop); + + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c deleted file mode 100644 index b37ca2b13063..000000000000 --- a/samples/bpf/xdp_router_ipv4_kern.c +++ /dev/null @@ -1,186 +0,0 @@ -/* Copyright (C) 2017 Cavium, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - */ -#define KBUILD_MODNAME "foo" -#include <uapi/linux/bpf.h> -#include <linux/in.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <bpf/bpf_helpers.h> -#include <linux/slab.h> -#include <net/ip_fib.h> - -struct trie_value { - __u8 prefix[4]; - __be64 value; - int ifindex; - int metric; - __be32 gw; -}; - -/* Key for lpm_trie*/ -union key_4 { - u32 b32[2]; - u8 b8[8]; -}; - -struct arp_entry { - __be64 mac; - __be32 dst; -}; - -struct direct_map { - struct arp_entry arp; - int ifindex; - __be64 mac; -}; - -/* Map for trie implementation*/ -struct { - __uint(type, BPF_MAP_TYPE_LPM_TRIE); - __uint(key_size, 8); - __uint(value_size, sizeof(struct trie_value)); - __uint(max_entries, 50); - __uint(map_flags, BPF_F_NO_PREALLOC); -} lpm_map SEC(".maps"); - -/* Map for counter*/ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, u64); - __uint(max_entries, 256); -} rxcnt SEC(".maps"); - -/* Map for ARP table*/ -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, __be32); - __type(value, __be64); - __uint(max_entries, 50); -} arp_table SEC(".maps"); - -/* Map to keep the exact match entries in the route table*/ -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, __be32); - __type(value, struct direct_map); - __uint(max_entries, 50); -} exact_match SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); - __uint(max_entries, 100); -} tx_port SEC(".maps"); - -/* Function to set source and destination mac of the packet */ -static inline void set_src_dst_mac(void *data, void *src, void *dst) -{ - unsigned short *source = src; - unsigned short *dest = dst; - unsigned short *p = data; - - __builtin_memcpy(p, dest, 6); - __builtin_memcpy(p + 3, source, 6); -} - -/* Parse IPV4 packet to get SRC, DST IP and protocol */ -static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, - __be32 *src, __be32 *dest) -{ - struct iphdr *iph = data + nh_off; - - if (iph + 1 > data_end) - return 0; - *src = iph->saddr; - *dest = iph->daddr; - return iph->protocol; -} - -SEC("xdp_router_ipv4") -int xdp_router_ipv4_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - __be64 *dest_mac = NULL, *src_mac = NULL; - void *data = (void *)(long)ctx->data; - struct trie_value *prefix_value; - int rc = XDP_DROP, forward_to; - struct ethhdr *eth = data; - union key_4 key4; - long *value; - u16 h_proto; - u32 ipproto; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return rc; - - h_proto = eth->h_proto; - - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { - struct vlan_hdr *vhdr; - - vhdr = data + nh_off; - nh_off += sizeof(struct vlan_hdr); - if (data + nh_off > data_end) - return rc; - h_proto = vhdr->h_vlan_encapsulated_proto; - } - if (h_proto == htons(ETH_P_ARP)) { - return XDP_PASS; - } else if (h_proto == htons(ETH_P_IP)) { - struct direct_map *direct_entry; - __be32 src_ip = 0, dest_ip = 0; - - ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip); - direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip); - /* Check for exact match, this would give a faster lookup*/ - if (direct_entry && direct_entry->mac && direct_entry->arp.mac) { - src_mac = &direct_entry->mac; - dest_mac = &direct_entry->arp.mac; - forward_to = direct_entry->ifindex; - } else { - /* Look up in the trie for lpm*/ - key4.b32[0] = 32; - key4.b8[4] = dest_ip & 0xff; - key4.b8[5] = (dest_ip >> 8) & 0xff; - key4.b8[6] = (dest_ip >> 16) & 0xff; - key4.b8[7] = (dest_ip >> 24) & 0xff; - prefix_value = bpf_map_lookup_elem(&lpm_map, &key4); - if (!prefix_value) - return XDP_DROP; - src_mac = &prefix_value->value; - if (!src_mac) - return XDP_DROP; - dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); - if (!dest_mac) { - if (!prefix_value->gw) - return XDP_DROP; - dest_ip = prefix_value->gw; - dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip); - } - forward_to = prefix_value->ifindex; - } - } else { - ipproto = 0; - } - if (src_mac && dest_mac) { - set_src_dst_mac(data, src_mac, dest_mac); - value = bpf_map_lookup_elem(&rxcnt, &ipproto); - if (value) - *value += 1; - return bpf_redirect_map(&tx_port, forward_to, 0); - } - return rc; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 6dae87d83e1c..f32bbd5c32bf 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -24,70 +24,40 @@ #include <bpf/libbpf.h> #include <sys/resource.h> #include <libgen.h> +#include <getopt.h> +#include <pthread.h> +#include "xdp_sample_user.h" +#include "xdp_router_ipv4.skel.h" -int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int total_ifindex; -static int *ifindex_list; -static __u32 *prog_id_list; -char buf[8192]; +static const char *__doc__ = +"XDP IPv4 router implementation\n" +"Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n"; + +static char buf[8192]; static int lpm_map_fd; -static int rxcnt_map_fd; static int arp_table_map_fd; static int exact_match_map_fd; static int tx_port_map_fd; -static int get_route_table(int rtm_family); -static void int_exit(int sig) -{ - __u32 prog_id = 0; - int i = 0; +static bool routes_thread_exit; +static int interval = 5; - for (i = 0; i < total_ifindex; i++) { - if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) { - printf("bpf_xdp_query_id on iface %d failed\n", - ifindex_list[i]); - exit(1); - } - if (prog_id_list[i] == prog_id) - bpf_xdp_detach(ifindex_list[i], flags, NULL); - else if (!prog_id) - printf("couldn't find a prog id on iface %d\n", - ifindex_list[i]); - else - printf("program on iface %d changed, not removing\n", - ifindex_list[i]); - prog_id = 0; - } - exit(0); -} +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_EXCEPTION_CNT; -static void close_and_exit(int sig) -{ - close(sock); - close(sock_arp); +DEFINE_SAMPLE_INIT(xdp_router_ipv4); - int_exit(0); -} +static const struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "skb-mode", no_argument, NULL, 'S' }, + { "force", no_argument, NULL, 'F' }, + { "interval", required_argument, NULL, 'i' }, + { "verbose", no_argument, NULL, 'v' }, + { "stats", no_argument, NULL, 's' }, + {} +}; -/* Get the mac address of the interface given interface name */ -static __be64 getmac(char *iface) -{ - struct ifreq ifr; - __be64 mac = 0; - int fd, i; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - ifr.ifr_addr.sa_family = AF_INET; - strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1); - if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) { - printf("ioctl failed leaving....\n"); - return -1; - } - for (i = 0; i < 6 ; i++) - *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i]; - close(fd); - return mac; -} +static int get_route_table(int rtm_family); static int recv_msg(struct sockaddr_nl sock_addr, int sock) { @@ -130,7 +100,6 @@ static void read_route(struct nlmsghdr *nh, int nll) int i; struct route_table { int dst_len, iface, metric; - char *iface_name; __be32 dst, gw; __be64 mac; } route; @@ -145,17 +114,7 @@ static void read_route(struct nlmsghdr *nh, int nll) __be64 mac; } direct_entry; - if (nh->nlmsg_type == RTM_DELROUTE) - printf("DELETING Route entry\n"); - else if (nh->nlmsg_type == RTM_GETROUTE) - printf("READING Route entry\n"); - else if (nh->nlmsg_type == RTM_NEWROUTE) - printf("NEW Route entry\n"); - else - printf("%d\n", nh->nlmsg_type); - memset(&route, 0, sizeof(route)); - printf("Destination Gateway Genmask Metric Iface\n"); for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { rt_msg = (struct rtmsg *)NLMSG_DATA(nh); rtm_family = rt_msg->rtm_family; @@ -192,11 +151,7 @@ static void read_route(struct nlmsghdr *nh, int nll) route.gw = atoi(gws); route.iface = atoi(ifs); route.metric = atoi(metrics); - route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); - route.iface_name = if_indextoname(route.iface, route.iface_name); - route.mac = getmac(route.iface_name); - if (route.mac == -1) - int_exit(0); + assert(get_mac_addr(route.iface, &route.mac) == 0); assert(bpf_map_update_elem(tx_port_map_fd, &route.iface, &route.iface, 0) == 0); if (rtm_family == AF_INET) { @@ -207,7 +162,6 @@ static void read_route(struct nlmsghdr *nh, int nll) int metric; __be32 gw; } *prefix_value; - struct in_addr dst_addr, gw_addr, mask_addr; prefix_key = alloca(sizeof(*prefix_key) + 3); prefix_value = alloca(sizeof(*prefix_value)); @@ -235,17 +189,6 @@ static void read_route(struct nlmsghdr *nh, int nll) for (i = 0; i < 4; i++) prefix_key->data[i] = (route.dst >> i * 8) & 0xff; - dst_addr.s_addr = route.dst; - printf("%-16s", inet_ntoa(dst_addr)); - - gw_addr.s_addr = route.gw; - printf("%-16s", inet_ntoa(gw_addr)); - - mask_addr.s_addr = htonl(~(0xffffffffU >> route.dst_len)); - printf("%-16s%-7d%s\n", inet_ntoa(mask_addr), - route.metric, - route.iface_name); - if (bpf_map_lookup_elem(lpm_map_fd, prefix_key, prefix_value) < 0) { for (i = 0; i < 4; i++) @@ -261,13 +204,6 @@ static void read_route(struct nlmsghdr *nh, int nll) ) == 0); } else { if (nh->nlmsg_type == RTM_DELROUTE) { - printf("deleting entry\n"); - printf("prefix key=%d.%d.%d.%d/%d", - prefix_key->data[0], - prefix_key->data[1], - prefix_key->data[2], - prefix_key->data[3], - prefix_key->prefixlen); assert(bpf_map_delete_elem(lpm_map_fd, prefix_key ) == 0); @@ -331,14 +267,14 @@ static int get_route_table(int rtm_family) sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock < 0) { - printf("open netlink socket: %s\n", strerror(errno)); - return -1; + fprintf(stderr, "open netlink socket: %s\n", strerror(errno)); + return -errno; } memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - printf("bind to netlink: %s\n", strerror(errno)); - ret = -1; + fprintf(stderr, "bind netlink socket: %s\n", strerror(errno)); + ret = -errno; goto cleanup; } memset(&req, 0, sizeof(req)); @@ -357,15 +293,15 @@ static int get_route_table(int rtm_family) msg.msg_iovlen = 1; ret = sendmsg(sock, &msg, 0); if (ret < 0) { - printf("send to netlink: %s\n", strerror(errno)); - ret = -1; + fprintf(stderr, "send to netlink: %s\n", strerror(errno)); + ret = -errno; goto cleanup; } memset(buf, 0, sizeof(buf)); nll = recv_msg(sa, sock); if (nll < 0) { - printf("recv from netlink: %s\n", strerror(nll)); - ret = -1; + fprintf(stderr, "recv from netlink: %s\n", strerror(nll)); + ret = nll; goto cleanup; } nh = (struct nlmsghdr *)buf; @@ -395,14 +331,7 @@ static void read_arp(struct nlmsghdr *nh, int nll) __be64 mac; } direct_entry; - if (nh->nlmsg_type == RTM_GETNEIGH) - printf("READING arp entry\n"); - printf("Address HwAddress\n"); for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) { - struct in_addr dst_addr; - char mac_str[18]; - int len = 0, i; - rt_msg = (struct ndmsg *)NLMSG_DATA(nh); rt_attr = (struct rtattr *)RTM_RTA(rt_msg); ndm_family = rt_msg->ndm_family; @@ -424,13 +353,6 @@ static void read_arp(struct nlmsghdr *nh, int nll) arp_entry.dst = atoi(dsts); arp_entry.mac = atol(mac); - dst_addr.s_addr = arp_entry.dst; - for (i = 0; i < 6; i++) - len += snprintf(mac_str + len, 18 - len, "%02llx%s", - ((arp_entry.mac >> i * 8) & 0xff), - i < 5 ? ":" : ""); - printf("%-16s%s\n", inet_ntoa(dst_addr), mac_str); - if (ndm_family == AF_INET) { if (bpf_map_lookup_elem(exact_match_map_fd, &arp_entry.dst, @@ -481,14 +403,14 @@ static int get_arp_table(int rtm_family) sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock < 0) { - printf("open netlink socket: %s\n", strerror(errno)); - return -1; + fprintf(stderr, "open netlink socket: %s\n", strerror(errno)); + return -errno; } memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - printf("bind to netlink: %s\n", strerror(errno)); - ret = -1; + fprintf(stderr, "bind netlink socket: %s\n", strerror(errno)); + ret = -errno; goto cleanup; } memset(&req, 0, sizeof(req)); @@ -506,15 +428,15 @@ static int get_arp_table(int rtm_family) msg.msg_iovlen = 1; ret = sendmsg(sock, &msg, 0); if (ret < 0) { - printf("send to netlink: %s\n", strerror(errno)); - ret = -1; + fprintf(stderr, "send to netlink: %s\n", strerror(errno)); + ret = -errno; goto cleanup; } memset(buf, 0, sizeof(buf)); nll = recv_msg(sa, sock); if (nll < 0) { - printf("recv from netlink: %s\n", strerror(nll)); - ret = -1; + fprintf(stderr, "recv from netlink: %s\n", strerror(nll)); + ret = nll; goto cleanup; } nh = (struct nlmsghdr *)buf; @@ -527,24 +449,17 @@ cleanup: /* Function to keep track and update changes in route and arp table * Give regular statistics of packets forwarded */ -static int monitor_route(void) +static void *monitor_routes_thread(void *arg) { - unsigned int nr_cpus = bpf_num_possible_cpus(); - const unsigned int nr_keys = 256; struct pollfd fds_route, fds_arp; - __u64 prev[nr_keys][nr_cpus]; struct sockaddr_nl la, lr; - __u64 values[nr_cpus]; + int sock, sock_arp, nll; struct nlmsghdr *nh; - int nll, ret = 0; - int interval = 5; - __u32 key; - int i; sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock < 0) { - printf("open netlink socket: %s\n", strerror(errno)); - return -1; + fprintf(stderr, "open netlink socket: %s\n", strerror(errno)); + return NULL; } fcntl(sock, F_SETFL, O_NONBLOCK); @@ -552,17 +467,19 @@ static int monitor_route(void) lr.nl_family = AF_NETLINK; lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY; if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) { - printf("bind to netlink: %s\n", strerror(errno)); - ret = -1; - goto cleanup; + fprintf(stderr, "bind netlink socket: %s\n", strerror(errno)); + close(sock); + return NULL; } + fds_route.fd = sock; fds_route.events = POLL_IN; sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); if (sock_arp < 0) { - printf("open netlink socket: %s\n", strerror(errno)); - return -1; + fprintf(stderr, "open netlink socket: %s\n", strerror(errno)); + close(sock); + return NULL; } fcntl(sock_arp, F_SETFL, O_NONBLOCK); @@ -570,51 +487,44 @@ static int monitor_route(void) la.nl_family = AF_NETLINK; la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY; if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) { - printf("bind to netlink: %s\n", strerror(errno)); - ret = -1; + fprintf(stderr, "bind netlink socket: %s\n", strerror(errno)); goto cleanup; } + fds_arp.fd = sock_arp; fds_arp.events = POLL_IN; - memset(prev, 0, sizeof(prev)); - do { - signal(SIGINT, close_and_exit); - signal(SIGTERM, close_and_exit); + /* dump route and arp tables */ + if (get_arp_table(AF_INET) < 0) { + fprintf(stderr, "Failed reading arp table\n"); + goto cleanup; + } - sleep(interval); - for (key = 0; key < nr_keys; key++) { - __u64 sum = 0; - - assert(bpf_map_lookup_elem(rxcnt_map_fd, - &key, values) == 0); - for (i = 0; i < nr_cpus; i++) - sum += (values[i] - prev[key][i]); - if (sum) - printf("proto %u: %10llu pkt/s\n", - key, sum / interval); - memcpy(prev[key], values, sizeof(values)); - } + if (get_route_table(AF_INET) < 0) { + fprintf(stderr, "Failed reading route table\n"); + goto cleanup; + } + while (!routes_thread_exit) { memset(buf, 0, sizeof(buf)); if (poll(&fds_route, 1, 3) == POLL_IN) { nll = recv_msg(lr, sock); if (nll < 0) { - printf("recv from netlink: %s\n", strerror(nll)); - ret = -1; + fprintf(stderr, "recv from netlink: %s\n", + strerror(nll)); goto cleanup; } nh = (struct nlmsghdr *)buf; - printf("Routing table updated.\n"); read_route(nh, nll); } + memset(buf, 0, sizeof(buf)); if (poll(&fds_arp, 1, 3) == POLL_IN) { nll = recv_msg(la, sock_arp); if (nll < 0) { - printf("recv from netlink: %s\n", strerror(nll)); - ret = -1; + fprintf(stderr, "recv from netlink: %s\n", + strerror(nll)); goto cleanup; } @@ -622,132 +532,169 @@ static int monitor_route(void) read_arp(nh, nll); } - } while (1); + sleep(interval); + } + cleanup: + close(sock_arp); close(sock); - return ret; + return NULL; } -static void usage(const char *prog) +static void usage(char *argv[], const struct option *long_options, + const char *doc, int mask, bool error, + struct bpf_object *obj) { - fprintf(stderr, - "%s: %s [OPTS] interface name list\n\n" - "OPTS:\n" - " -S use skb-mode\n" - " -F force loading prog\n", - __func__, prog); + sample_usage(argv, long_options, doc, mask, error); } -int main(int ac, char **argv) +int main(int argc, char **argv) { - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - const char *optstr = "SF"; - struct bpf_program *prog; - struct bpf_object *obj; - char filename[256]; - char **ifname_list; - int prog_fd, opt; - int err, i = 1; - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - total_ifindex = ac - 1; - ifname_list = (argv + 1); - - while ((opt = getopt(ac, argv, optstr)) != -1) { + bool error = true, generic = false, force = false; + int opt, ret = EXIT_FAIL_BPF; + struct xdp_router_ipv4 *skel; + int i, total_ifindex = argc - 1; + char **ifname_list = argv + 1; + pthread_t routes_thread; + int longindex = 0; + + if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) { + fprintf(stderr, "Failed to set libbpf strict mode: %s\n", + strerror(errno)); + goto end; + } + + skel = xdp_router_ipv4__open(); + if (!skel) { + fprintf(stderr, "Failed to xdp_router_ipv4__open: %s\n", + strerror(errno)); + goto end; + } + + ret = sample_init_pre_load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to sample_init_pre_load: %s\n", + strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = xdp_router_ipv4__load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to xdp_router_ipv4__load: %s\n", + strerror(errno)); + goto end_destroy; + } + + ret = sample_init(skel, mask); + if (ret < 0) { + fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; + } + + while ((opt = getopt_long(argc, argv, "si:SFvh", + long_options, &longindex)) != -1) { switch (opt) { + case 's': + mask |= SAMPLE_REDIRECT_MAP_CNT; + total_ifindex--; + ifname_list++; + break; + case 'i': + interval = strtoul(optarg, NULL, 0); + total_ifindex -= 2; + ifname_list += 2; + break; case 'S': - flags |= XDP_FLAGS_SKB_MODE; + generic = true; total_ifindex--; ifname_list++; break; case 'F': - flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + force = true; total_ifindex--; ifname_list++; break; + case 'v': + sample_switch_mode(); + total_ifindex--; + ifname_list++; + break; + case 'h': + error = false; default: - usage(basename(argv[0])); - return 1; + usage(argv, long_options, __doc__, mask, error, skel->obj); + goto end_destroy; } } - if (!(flags & XDP_FLAGS_SKB_MODE)) - flags |= XDP_FLAGS_DRV_MODE; - - if (optind == ac) { - usage(basename(argv[0])); - return 1; + ret = EXIT_FAIL_OPTION; + if (optind == argc) { + usage(argv, long_options, __doc__, mask, true, skel->obj); + goto end_destroy; } - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) - return 1; - - prog = bpf_object__next_program(obj, NULL); - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); - - printf("\n******************loading bpf file*********************\n"); - err = bpf_object__load(obj); - if (err) { - printf("bpf_object__load(): %s\n", strerror(errno)); - return 1; + lpm_map_fd = bpf_map__fd(skel->maps.lpm_map); + if (lpm_map_fd < 0) { + fprintf(stderr, "Failed loading lpm_map %s\n", + strerror(-lpm_map_fd)); + goto end_destroy; } - prog_fd = bpf_program__fd(prog); - - lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map"); - rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); - arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table"); - exact_match_map_fd = bpf_object__find_map_fd_by_name(obj, - "exact_match"); - tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); - if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 || - exact_match_map_fd < 0 || tx_port_map_fd < 0) { - printf("bpf_object__find_map_fd_by_name failed\n"); - return 1; + arp_table_map_fd = bpf_map__fd(skel->maps.arp_table); + if (arp_table_map_fd < 0) { + fprintf(stderr, "Failed loading arp_table_map_fd %s\n", + strerror(-arp_table_map_fd)); + goto end_destroy; } - - ifindex_list = (int *)calloc(total_ifindex, sizeof(int *)); - for (i = 0; i < total_ifindex; i++) { - ifindex_list[i] = if_nametoindex(ifname_list[i]); - if (!ifindex_list[i]) { - printf("Couldn't translate interface name: %s", - strerror(errno)); - return 1; - } + exact_match_map_fd = bpf_map__fd(skel->maps.exact_match); + if (exact_match_map_fd < 0) { + fprintf(stderr, "Failed loading exact_match_map_fd %s\n", + strerror(-exact_match_map_fd)); + goto end_destroy; + } + tx_port_map_fd = bpf_map__fd(skel->maps.tx_port); + if (tx_port_map_fd < 0) { + fprintf(stderr, "Failed loading tx_port_map_fd %s\n", + strerror(-tx_port_map_fd)); + goto end_destroy; } - prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); - for (i = 0; i < total_ifindex; i++) { - if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) { - printf("link set xdp fd failed\n"); - int recovery_index = i; - for (i = 0; i < recovery_index; i++) - bpf_xdp_detach(ifindex_list[i], flags, NULL); + ret = EXIT_FAIL_XDP; + for (i = 0; i < total_ifindex; i++) { + int index = if_nametoindex(ifname_list[i]); - return 1; + if (!index) { + fprintf(stderr, "Interface %s not found %s\n", + ifname_list[i], strerror(-tx_port_map_fd)); + goto end_destroy; } - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (err) { - printf("can't get prog info - %s\n", strerror(errno)); - return err; - } - prog_id_list[i] = info.id; - memset(&info, 0, sizeof(info)); - printf("Attached to %d\n", ifindex_list[i]); + if (sample_install_xdp(skel->progs.xdp_router_ipv4_prog, + index, generic, force) < 0) + goto end_destroy; } - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - printf("\n*******************ROUTE TABLE*************************\n"); - get_route_table(AF_INET); - printf("\n*******************ARP TABLE***************************\n"); - get_arp_table(AF_INET); - if (monitor_route() < 0) { - printf("Error in receiving route update"); - return 1; + ret = pthread_create(&routes_thread, NULL, monitor_routes_thread, NULL); + if (ret) { + fprintf(stderr, "Failed creating routes_thread: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; } - return 0; + ret = sample_run(interval, NULL, NULL); + routes_thread_exit = true; + + if (ret < 0) { + fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_thread_wait; + } + ret = EXIT_OK; + +end_thread_wait: + pthread_join(routes_thread, NULL); +end_destroy: + xdp_router_ipv4__destroy(skel); +end: + sample_exit(ret); } diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 290998c82de1..f041c4a6a1f2 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -567,7 +567,7 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, res = probe_prog_type_ifindex(prog_type, ifindex); } else { - res = libbpf_probe_bpf_prog_type(prog_type, NULL); + res = libbpf_probe_bpf_prog_type(prog_type, NULL) > 0; } #ifdef USE_LIBCAP diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 97dec81950e5..8fb0116f9136 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -20,6 +20,9 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_CGROUP] = "cgroup", [BPF_LINK_TYPE_ITER] = "iter", [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", }; static struct hashmap *link_table; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index bc4e05542c2b..8643b37d4e43 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -68,6 +68,7 @@ const char * const prog_type_name[] = { [BPF_PROG_TYPE_EXT] = "ext", [BPF_PROG_TYPE_LSM] = "lsm", [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", }; const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index b0d8fea1951d..a9162a6c0284 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -33,8 +33,8 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 94f0a146bb7b..31a1a9015902 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ + usdt.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 064c89e31560..64741c55b8e3 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -239,7 +239,7 @@ install_lib: all_cmd SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ - skel_internal.h libbpf_version.h + skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1383e26c5d1f..d124e9e533f0 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2826,10 +2826,8 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) if (err) goto done; - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) { - err = -EINVAL; - goto done; - } + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + goto done; /* skip core relos parsing */ err = btf_ext_setup_core_relos(btf_ext); if (err) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 809fe209cdcc..465b7c0996f1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -483,6 +483,8 @@ struct elf_state { int st_ops_shndx; }; +struct usdt_manager; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -545,6 +547,8 @@ struct bpf_object { size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + char path[]; }; @@ -1397,8 +1401,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); @@ -4678,6 +4685,18 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4740,6 +4759,9 @@ static struct kern_feature_desc { [FEAT_MEMCG_ACCOUNT] = { "memcg-based memory accounting", probe_memcg_account, }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -5665,10 +5687,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded @@ -8200,6 +8229,9 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); @@ -8630,6 +8662,8 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log } static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8642,11 +8676,12 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -9692,14 +9727,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, return bpf_prog_load_xattr2(&attr, pobj, prog_fd); } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { @@ -10517,6 +10544,273 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* uprobes deal in relative offsets; subtract the base address associated with + * the mapped binary. See Documentation/trace/uprobetracer.rst for more + * details. + */ +static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) +{ + size_t n; + int i; + + if (elf_getphdrnum(elf, &n)) { + pr_warn("elf: failed to find program headers for '%s': %s\n", filename, + elf_errmsg(-1)); + return -ENOENT; + } + + for (i = 0; i < n; i++) { + int seg_start, seg_end, seg_offset; + GElf_Phdr phdr; + + if (!gelf_getphdr(elf, i, &phdr)) { + pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, + elf_errmsg(-1)); + return -ENOENT; + } + if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) + continue; + + seg_start = phdr.p_vaddr; + seg_end = seg_start + phdr.p_memsz; + seg_offset = phdr.p_offset; + if (addr >= seg_start && addr < seg_end) + return addr - seg_start + seg_offset; + } + pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); + return -ENOENT; +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +/* Find offset of function name in object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset(const char *binary_path, const char *name) +{ + int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + Elf *elf; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + size_t nr_syms, strtabidx, idx; + Elf_Data *symbols = NULL; + Elf_Scn *scn = NULL; + int last_bind = -1; + const char *sname; + GElf_Shdr sh; + + scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + continue; + } + if (!gelf_getshdr(scn, &sh)) + continue; + strtabidx = sh.sh_link; + symbols = elf_getdata(scn, 0); + if (!symbols) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + nr_syms = symbols->d_size / sh.sh_entsize; + + for (idx = 0; idx < nr_syms; idx++) { + int curr_bind; + GElf_Sym sym; + + if (!gelf_getsym(symbols, idx, &sym)) + continue; + + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + sname = elf_strptr(elf, strtabidx, sym.st_name); + if (!sname) + continue; + + curr_bind = GELF_ST_BIND(sym.st_info); + + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sname, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') + continue; + + if (ret >= 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sname, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (curr_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + ret = sym.st_value; + last_bind = curr_bind; + } + /* For binaries that are not shared libraries, we need relative offset */ + if (ret > 0 && !is_shared_lib) + ret = elf_find_relative_offset(binary_path, elf, ret); + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + elf_end(elf); + close(fd); + return ret; +} + +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__s390__) + return "/lib/s390-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[3] = {}; + int i; + + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it is an executable file/link */ + if (access(result, R_OK | X_OK) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -10524,10 +10818,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char full_binary_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10536,12 +10832,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (binary_path && !strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_binary_path, + sizeof(full_binary_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = full_binary_path; + } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (!binary_path) { + pr_warn("prog '%s': name-based attach requires binary_path\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + sym_off = elf_find_func_offset(binary_path, func_name); + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } + legacy = determine_uprobe_perf_type() < 0; if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[PATH_MAX + 64]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); @@ -10589,6 +10910,60 @@ err_out: } +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + int n, ret = -EINVAL; + long offset = 0; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", + &probe_type, &binary_path, &func_name, &offset); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + case 4: + opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + + return ret; +} + struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -10599,6 +10974,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + long usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 05dde85e19a6..63d66f1adf1a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -459,9 +459,17 @@ struct bpf_uprobe_opts { __u64 bpf_cookie; /* uprobe is return probe, invoked at function return time */ bool retprobe; + /* Function name to attach to. Could be an unqualified ("abc") or library-qualified + * "abc@LIBXYZ" name. To specify function entry, func_name should be set while + * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an + * offset within a function, specify func_name and use func_offset argument to specify + * offset within the function. Shared library functions must specify the shared library + * binary_path. + */ + const char *func_name; size_t :0; }; -#define bpf_uprobe_opts__last_field retprobe +#define bpf_uprobe_opts__last_field func_name /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program @@ -503,6 +511,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); +struct bpf_usdt_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value accessible through usdt_cookie() */ + __u64 usdt_cookie; + size_t :0; +}; +#define bpf_usdt_opts__last_field usdt_cookie + +/** + * @brief **bpf_program__attach_usdt()** is just like + * bpf_program__attach_uprobe_opts() except it covers USDT (User-space + * Statically Defined Tracepoint) attachment, instead of attaching to + * user-space function entry or exit. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains provided USDT probe + * @param usdt_provider USDT provider name + * @param usdt_name USDT probe name + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts); + struct bpf_tracepoint_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index dd35ee58bfaa..82f6d62176dd 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -444,6 +444,7 @@ LIBBPF_0.8.0 { global: bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; + bpf_program__attach_usdt; libbpf_register_prog_handler; libbpf_unregister_prog_handler; bpf_program__attach_kprobe_multi_opts; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b6247dc7f8eb..080272421f6c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -103,6 +103,17 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) +/* suffix check */ +static inline bool str_has_sfx(const char *str, const char *sfx) +{ + size_t str_len = strlen(str); + size_t sfx_len = strlen(sfx); + + if (sfx_len <= str_len) + return strcmp(str + str_len - sfx_len, sfx); + return false; +} + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. @@ -148,6 +159,15 @@ do { \ #ifndef __has_builtin #define __has_builtin(x) 0 #endif + +struct bpf_link { + int (*detach)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ + bool disconnected; +}; + /* * Re-implement glibc's reallocarray() for libbpf internal-only use. * reallocarray(), unfortunately, is not available in all versions of glibc, @@ -329,6 +349,8 @@ enum kern_feature_id { FEAT_BTF_TYPE_TAG, /* memcg-based accounting for BPF maps and progs */ FEAT_MEMCG_ACCOUNT, + /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ + FEAT_BPF_COOKIE, __FEAT_CNT, }; @@ -543,4 +565,12 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand, struct bpf_core_cand_list *cands); void bpf_core_free_cands(struct bpf_core_cand_list *cands); +struct usdt_manager *usdt_manager_new(struct bpf_object *obj); +void usdt_manager_free(struct usdt_manager *man); +struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, + const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + long usdt_cookie); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h new file mode 100644 index 000000000000..4181fddb3687 --- /dev/null +++ b/tools/lib/bpf/usdt.bpf.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#ifndef __USDT_BPF_H__ +#define __USDT_BPF_H__ + +#include <linux/errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* Below types and maps are internal implementation details of libbpf's USDT + * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should + * be considered an unstable API as well and might be adjusted based on user + * feedback from using libbpf's USDT support in production. + */ + +/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal + * map that keeps track of USDT argument specifications. This might be + * necessary if there are a lot of USDT attachments. + */ +#ifndef BPF_USDT_MAX_SPEC_CNT +#define BPF_USDT_MAX_SPEC_CNT 256 +#endif +/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal + * map that keeps track of IP (memory address) mapping to USDT argument + * specification. + * Note, if kernel supports BPF cookies, this map is not used and could be + * resized all the way to 1 to save a bit of memory. + */ +#ifndef BPF_USDT_MAX_IP_CNT +#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) +#endif +/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is + * the only dependency on CO-RE, so if it's undesirable, user can override + * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. + */ +#ifndef BPF_USDT_HAS_BPF_COOKIE +#define BPF_USDT_HAS_BPF_COOKIE \ + bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) +#endif + +enum __bpf_usdt_arg_type { + BPF_USDT_ARG_CONST, + BPF_USDT_ARG_REG, + BPF_USDT_ARG_REG_DEREF, +}; + +struct __bpf_usdt_arg_spec { + /* u64 scalar interpreted depending on arg_type, see below */ + __u64 val_off; + /* arg location case, see bpf_udst_arg() for details */ + enum __bpf_usdt_arg_type arg_type; + /* offset of referenced register within struct pt_regs */ + short reg_off; + /* whether arg should be interpreted as signed value */ + bool arg_signed; + /* number of bits that need to be cleared and, optionally, + * sign-extended to cast arguments that are 1, 2, or 4 bytes + * long into final 8-byte u64/s64 value returned to user + */ + char arg_bitshift; +}; + +/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ +#define BPF_USDT_MAX_ARG_CNT 12 +struct __bpf_usdt_spec { + struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); + __type(key, int); + __type(value, struct __bpf_usdt_spec); +} __bpf_usdt_specs SEC(".maps") __weak; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, BPF_USDT_MAX_IP_CNT); + __type(key, long); + __type(value, __u32); +} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; + +/* don't rely on user's BPF code to have latest definition of bpf_func_id */ +enum bpf_func_id___usdt { + BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ +}; + +static __always_inline +int __bpf_usdt_spec_id(struct pt_regs *ctx) +{ + if (!BPF_USDT_HAS_BPF_COOKIE) { + long ip = PT_REGS_IP(ctx); + int *spec_id_ptr; + + spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); + return spec_id_ptr ? *spec_id_ptr : -ESRCH; + } + + return bpf_get_attach_cookie(ctx); +} + +/* Return number of USDT arguments defined for currently traced USDT. */ +__weak __hidden +int bpf_usdt_arg_cnt(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + return spec->arg_cnt; +} + +/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. + * Returns 0 on success; negative error, otherwise. + * On error *res is guaranteed to be set to zero. + */ +__weak __hidden +int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) +{ + struct __bpf_usdt_spec *spec; + struct __bpf_usdt_arg_spec *arg_spec; + unsigned long val; + int err, spec_id; + + *res = 0; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + switch (arg_spec->arg_type) { + case BPF_USDT_ARG_CONST: + /* Arg is just a constant ("-4@$-9" in USDT arg spec). + * value is recorded in arg_spec->val_off directly. + */ + val = arg_spec->val_off; + break; + case BPF_USDT_ARG_REG: + /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), + * so we read the contents of that register directly from + * struct pt_regs. To keep things simple user-space parts + * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + break; + case BPF_USDT_ARG_REG_DEREF: + /* Arg is in memory addressed by register, plus some offset + * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is + * identified like with BPF_USDT_ARG_REG case, and the offset + * is in arg_spec->val_off. We first fetch register contents + * from pt_regs, then do another user-space probe read to + * fetch argument value itself. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif + break; + default: + return -EINVAL; + } + + /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing + * necessary upper arg_bitshift bits, with sign extension if argument + * is signed + */ + val <<= arg_spec->arg_bitshift; + if (arg_spec->arg_signed) + val = ((long)val) >> arg_spec->arg_bitshift; + else + val = val >> arg_spec->arg_bitshift; + *res = val; + return 0; +} + +/* Retrieve user-specified cookie value provided during attach as + * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie + * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself + * utilizing BPF cookies internally, so user can't use BPF cookie directly + * for USDT programs and has to use bpf_usdt_cookie() API instead. + */ +__weak __hidden +long bpf_usdt_cookie(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return 0; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return 0; + + return spec->usdt_cookie; +} + +/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ +#define ___bpf_usdt_args0() ctx +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) +#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) + +/* + * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for + * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. + * Original struct pt_regs * context is preserved as 'ctx' argument. + */ +#define BPF_USDT(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_usdt_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#endif /* __USDT_BPF_H__ */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c new file mode 100644 index 000000000000..acf2d99a9e77 --- /dev/null +++ b/tools/lib/bpf/usdt.c @@ -0,0 +1,1335 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libelf.h> +#include <gelf.h> +#include <unistd.h> +#include <linux/ptrace.h> +#include <linux/kernel.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +/* libbpf's USDT support consists of BPF-side state/code and user-space + * state/code working together in concert. BPF-side parts are defined in + * usdt.bpf.h header library. User-space state is encapsulated by struct + * usdt_manager and all the supporting code centered around usdt_manager. + * + * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map + * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that + * don't support BPF cookie (see below). These two maps are implicitly + * embedded into user's end BPF object file when user's code included + * usdt.bpf.h. This means that libbpf doesn't do anything special to create + * these USDT support maps. They are created by normal libbpf logic of + * instantiating BPF maps when opening and loading BPF object. + * + * As such, libbpf is basically unaware of the need to do anything + * USDT-related until the very first call to bpf_program__attach_usdt(), which + * can be called by user explicitly or happen automatically during skeleton + * attach (or, equivalently, through generic bpf_program__attach() call). At + * this point, libbpf will instantiate and initialize struct usdt_manager and + * store it in bpf_object. USDT manager is per-BPF object construct, as each + * independent BPF object might or might not have USDT programs, and thus all + * the expected USDT-related state. There is no coordination between two + * bpf_object in parts of USDT attachment, they are oblivious of each other's + * existence and libbpf is just oblivious, dealing with bpf_object-specific + * USDT state. + * + * Quick crash course on USDTs. + * + * From user-space application's point of view, USDT is essentially just + * a slightly special function call that normally has zero overhead, unless it + * is being traced by some external entity (e.g, BPF-based tool). Here's how + * a typical application can trigger USDT probe: + * + * #include <sys/sdt.h> // provided by systemtap-sdt-devel package + * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h + * + * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); + * + * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * individual USDT has a fixed number of arguments (3 in the above example) + * and specifies values of each argument as if it was a function call. + * + * USDT call is actually not a function call, but is instead replaced by + * a single NOP instruction (thus zero overhead, effectively). But in addition + * to that, those USDT macros generate special SHT_NOTE ELF records in + * .note.stapsdt ELF section. Here's an example USDT definition as emitted by + * `readelf -n <binary>`: + * + * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors) + * Provider: test + * Name: usdt12 + * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e + * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil + * + * In this case we have USDT test:usdt12 with 12 arguments. + * + * Location and base are offsets used to calculate absolute IP address of that + * NOP instruction that kernel can replace with an interrupt instruction to + * trigger instrumentation code (BPF program for all that we care about). + * + * Semaphore above is and optional feature. It records an address of a 2-byte + * refcount variable (normally in '.probes' ELF section) used for signaling if + * there is anything that is attached to USDT. This is useful for user + * applications if, for example, they need to prepare some arguments that are + * passed only to USDTs and preparation is expensive. By checking if USDT is + * "activated", an application can avoid paying those costs unnecessarily. + * Recent enough kernel has built-in support for automatically managing this + * refcount, which libbpf expects and relies on. If USDT is defined without + * associated semaphore, this value will be zero. See selftests for semaphore + * examples. + * + * Arguments is the most interesting part. This USDT specification string is + * providing information about all the USDT arguments and their locations. The + * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and + * whether the argument is signed or unsigned (negative size means signed). + * The part after @ sign is assembly-like definition of argument location + * (see [0] for more details). Technically, assembler can provide some pretty + * advanced definitions, but libbpf is currently supporting three most common + * cases: + * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9); + * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer + * whose value is in register %rdx"; + * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which + * specifies signed 32-bit integer stored at offset -1204 bytes from + * memory address stored in %rbp. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * + * During attachment, libbpf parses all the relevant USDT specifications and + * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side + * code through spec map. This allows BPF applications to quickly fetch the + * actual value at runtime using a simple BPF-side code. + * + * With basics out of the way, let's go over less immediately obvious aspects + * of supporting USDTs. + * + * First, there is no special USDT BPF program type. It is actually just + * a uprobe BPF program (which for kernel, at least currently, is just a kprobe + * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference + * that uprobe is usually attached at the function entry, while USDT will + * normally will be somewhere inside the function. But it should always be + * pointing to NOP instruction, which makes such uprobes the fastest uprobe + * kind. + * + * Second, it's important to realize that such STAP_PROBEn(provider, name, ...) + * macro invocations can end up being inlined many-many times, depending on + * specifics of each individual user application. So single conceptual USDT + * (identified by provider:name pair of identifiers) is, generally speaking, + * multiple uprobe locations (USDT call sites) in different places in user + * application. Further, again due to inlining, each USDT call site might end + * up having the same argument #N be located in a different place. In one call + * site it could be a constant, in another will end up in a register, and in + * yet another could be some other register or even somewhere on the stack. + * + * As such, "attaching to USDT" means (in general case) attaching the same + * uprobe BPF program to multiple target locations in user application, each + * potentially having a completely different USDT spec associated with it. + * To wire all this up together libbpf allocates a unique integer spec ID for + * each unique USDT spec. Spec IDs are allocated as sequential small integers + * so that they can be used as keys in array BPF map (for performance reasons). + * Spec ID allocation and accounting is big part of what usdt_manager is + * about. This state has to be maintained per-BPF object and coordinate + * between different USDT attachments within the same BPF object. + * + * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out + * as struct usdt_spec. Each invocation of BPF program at runtime needs to + * know its associated spec ID. It gets it either through BPF cookie, which + * libbpf sets to spec ID during attach time, or, if kernel is too old to + * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such + * case. The latter means that some modes of operation can't be supported + * without BPF cookie. Such mode is attaching to shared library "generically", + * without specifying target process. In such case, it's impossible to + * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode + * is not supported without BPF cookie support. + * + * Note that libbpf is using BPF cookie functionality for its own internal + * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf + * provides conceptually equivalent USDT cookie support. It's still u64 + * user-provided value that can be associated with USDT attachment. Note that + * this will be the same value for all USDT call sites within the same single + * *logical* USDT attachment. This makes sense because to user attaching to + * USDT is a single BPF program triggered for singular USDT probe. The fact + * that this is done at multiple actual locations is a mostly hidden + * implementation details. This USDT cookie value can be fetched with + * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h + * + * Lastly, while single USDT can have tons of USDT call sites, it doesn't + * necessarily have that many different USDT specs. It very well might be + * that 1000 USDT call sites only need 5 different USDT specs, because all the + * arguments are typically contained in a small set of registers or stack + * locations. As such, it's wasteful to allocate as many USDT spec IDs as + * there are USDT call sites. So libbpf tries to be frugal and performs + * on-the-fly deduplication during a single USDT attachment to only allocate + * the minimal required amount of unique USDT specs (and thus spec IDs). This + * is trivially achieved by using USDT spec string (Arguments string from USDT + * note) as a lookup key in a hashmap. USDT spec string uniquely defines + * everything about how to fetch USDT arguments, so two USDT call sites + * sharing USDT spec string can safely share the same USDT spec and spec ID. + * Note, this spec string deduplication is happening only during the same USDT + * attachment, so each USDT spec shares the same USDT cookie value. This is + * not generally true for other USDT attachments within the same BPF object, + * as even if USDT spec string is the same, USDT cookie value can be + * different. It was deemed excessive to try to deduplicate across independent + * USDT attachments by taking into account USDT spec string *and* USDT cookie + * value, which would complicated spec ID accounting significantly for little + * gain. + */ + +#define USDT_BASE_SEC ".stapsdt.base" +#define USDT_SEMA_SEC ".probes" +#define USDT_NOTE_SEC ".note.stapsdt" +#define USDT_NOTE_TYPE 3 +#define USDT_NOTE_NAME "stapsdt" + +/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */ +enum usdt_arg_type { + USDT_ARG_CONST, + USDT_ARG_REG, + USDT_ARG_REG_DEREF, +}; + +/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ +struct usdt_arg_spec { + __u64 val_off; + enum usdt_arg_type arg_type; + short reg_off; + bool arg_signed; + char arg_bitshift; +}; + +/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ +#define USDT_MAX_ARG_CNT 12 + +/* should match struct __bpf_usdt_spec from usdt.bpf.h */ +struct usdt_spec { + struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct usdt_note { + const char *provider; + const char *name; + /* USDT args specification string, e.g.: + * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx" + */ + const char *args; + long loc_addr; + long base_addr; + long sema_addr; +}; + +struct usdt_target { + long abs_ip; + long rel_ip; + long sema_off; + struct usdt_spec spec; + const char *spec_str; +}; + +struct usdt_manager { + struct bpf_map *specs_map; + struct bpf_map *ip_to_spec_id_map; + + int *free_spec_ids; + size_t free_spec_cnt; + size_t next_free_spec_id; + + bool has_bpf_cookie; + bool has_sema_refcnt; +}; + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj) +{ + static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"; + struct usdt_manager *man; + struct bpf_map *specs_map, *ip_to_spec_id_map; + + specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs"); + ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id"); + if (!specs_map || !ip_to_spec_id_map) { + pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n"); + return ERR_PTR(-ESRCH); + } + + man = calloc(1, sizeof(*man)); + if (!man) + return ERR_PTR(-ENOMEM); + + man->specs_map = specs_map; + man->ip_to_spec_id_map = ip_to_spec_id_map; + + /* Detect if BPF cookie is supported for kprobes. + * We don't need IP-to-ID mapping if we can use BPF cookies. + * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value") + */ + man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE); + + /* Detect kernel support for automatic refcounting of USDT semaphore. + * If this is not supported, USDTs with semaphores will not be supported. + * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") + */ + man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0; + + return man; +} + +void usdt_manager_free(struct usdt_manager *man) +{ + if (IS_ERR_OR_NULL(man)) + return; + + free(man->free_spec_ids); + free(man); +} + +static int sanity_check_usdt_elf(Elf *elf, const char *path) +{ + GElf_Ehdr ehdr; + int endianness; + + if (elf_kind(elf) != ELF_K_ELF) { + pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path); + return -EBADF; + } + + switch (gelf_getclass(elf)) { + case ELFCLASS64: + if (sizeof(void *) != 8) { + pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + case ELFCLASS32: + if (sizeof(void *) != 4) { + pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + default: + pr_warn("usdt: unsupported ELF class for '%s'\n", path); + return -EBADF; + } + + if (!gelf_getehdr(elf, &ehdr)) + return -EINVAL; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) { + pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n", + path, ehdr.e_type); + return -EBADF; + } + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + endianness = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + endianness = ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (endianness != ehdr.e_ident[EI_DATA]) { + pr_warn("usdt: ELF endianness mismatch for '%s'\n", path); + return -EBADF; + } + + return 0; +} + +static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn) +{ + Elf_Scn *sec = NULL; + size_t shstrndx; + + if (elf_getshdrstrndx(elf, &shstrndx)) + return -EINVAL; + + /* check if ELF is corrupted and avoid calling elf_strptr if yes */ + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) + return -EINVAL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *name; + + if (!gelf_getshdr(sec, shdr)) + return -EINVAL; + + name = elf_strptr(elf, shstrndx, shdr->sh_name); + if (name && strcmp(sec_name, name) == 0) { + *scn = sec; + return 0; + } + } + + return -ENOENT; +} + +struct elf_seg { + long start; + long end; + long offset; + bool is_exec; +}; + +static int cmp_elf_segs(const void *_a, const void *_b) +{ + const struct elf_seg *a = _a; + const struct elf_seg *b = _b; + + return a->start < b->start ? -1 : 1; +} + +static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt) +{ + GElf_Phdr phdr; + size_t n; + int i, err; + struct elf_seg *seg; + void *tmp; + + *seg_cnt = 0; + + if (elf_getphdrnum(elf, &n)) { + err = -errno; + return err; + } + + for (i = 0; i < n; i++) { + if (!gelf_getphdr(elf, i, &phdr)) { + err = -errno; + return err; + } + + pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n", + i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset, + (long)phdr.p_type, (long)phdr.p_flags); + if (phdr.p_type != PT_LOAD) + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) + return -ENOMEM; + + *segs = tmp; + seg = *segs + *seg_cnt; + (*seg_cnt)++; + + seg->start = phdr.p_vaddr; + seg->end = phdr.p_vaddr + phdr.p_memsz; + seg->offset = phdr.p_offset; + seg->is_exec = phdr.p_flags & PF_X; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path); + return -ESRCH; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + return 0; +} + +static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +{ + char path[PATH_MAX], line[PATH_MAX], mode[16]; + size_t seg_start, seg_end, seg_off; + struct elf_seg *seg; + int tmp_pid, i, err; + FILE *f; + + *seg_cnt = 0; + + /* Handle containerized binaries only accessible from + * /proc/<pid>/root/<path>. They will be reported as just /<path> in + * /proc/<pid>/maps. + */ + if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) + goto proceed; + + if (!realpath(lib_path, path)) { + pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", + lib_path, -errno); + libbpf_strlcpy(path, lib_path, sizeof(path)); + } + +proceed: + sprintf(line, "/proc/%d/maps", pid); + f = fopen(line, "r"); + if (!f) { + err = -errno; + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", + line, lib_path, err); + return err; + } + + /* We need to handle lines with no path at the end: + * + * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so + * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 + * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so + */ + while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", + &seg_start, &seg_end, mode, &seg_off, line) == 5) { + void *tmp; + + /* to handle no path case (see above) we need to capture line + * without skipping any whitespaces. So we need to strip + * leading whitespaces manually here + */ + i = 0; + while (isblank(line[i])) + i++; + if (strcmp(line + i, path) != 0) + continue; + + pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n", + path, seg_start, seg_end, mode, seg_off); + + /* ignore non-executable sections for shared libs */ + if (mode[2] != 'x') + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + + *segs = tmp; + seg = *segs + *seg_cnt; + *seg_cnt += 1; + + seg->start = seg_start; + seg->end = seg_end; + seg->offset = seg_off; + seg->is_exec = true; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n", + lib_path, path, pid); + err = -ESRCH; + goto err_out; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + err = 0; +err_out: + fclose(f); + return err; +} + +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +{ + struct elf_seg *seg; + int i; + + if (relative) { + /* for shared libraries, address is relative offset and thus + * should be fall within logical offset-based range of + * [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) + return seg; + } + } else { + /* for binaries, address is absolute and thus should be within + * absolute address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= addr && addr < seg->end) + return seg; + } + } + + return NULL; +} + +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *usdt_note); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie); + +static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, + const char *usdt_provider, const char *usdt_name, long usdt_cookie, + struct usdt_target **out_targets, size_t *out_target_cnt) +{ + size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *lib_segs = NULL; + struct usdt_target *targets = NULL, *target; + long base_addr = 0; + Elf_Scn *notes_scn, *base_scn; + GElf_Shdr base_shdr, notes_shdr; + GElf_Ehdr ehdr; + GElf_Nhdr nhdr; + Elf_Data *data; + int err; + + *out_targets = NULL; + *out_target_cnt = 0; + + err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, ¬es_shdr, ¬es_scn); + if (err) { + pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path); + return err; + } + + if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) { + pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path); + return -EINVAL; + } + + err = parse_elf_segs(elf, path, &segs, &seg_cnt); + if (err) { + pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + goto err_out; + } + + /* .stapsdt.base ELF section is optional, but is used for prelink + * offset compensation (see a big comment further below) + */ + if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0) + base_addr = base_shdr.sh_addr; + + data = elf_getdata(notes_scn, 0); + off = 0; + while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) { + long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0; + struct usdt_note note; + struct elf_seg *seg = NULL; + void *tmp; + + err = parse_usdt_note(elf, path, base_addr, &nhdr, + data->d_buf, name_off, desc_off, ¬e); + if (err) + goto err_out; + + if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0) + continue; + + /* We need to compensate "prelink effect". See [0] for details, + * relevant parts quoted here: + * + * Each SDT probe also expands into a non-allocated ELF note. You can + * find this by looking at SHT_NOTE sections and decoding the format; + * see below for details. Because the note is non-allocated, it means + * there is no runtime cost, and also preserved in both stripped files + * and .debug files. + * + * However, this means that prelink won't adjust the note's contents + * for address offsets. Instead, this is done via the .stapsdt.base + * section. This is a special section that is added to the text. We + * will only ever have one of these sections in a final link and it + * will only ever be one byte long. Nothing about this section itself + * matters, we just use it as a marker to detect prelink address + * adjustments. + * + * Each probe note records the link-time address of the .stapsdt.base + * section alongside the probe PC address. The decoder compares the + * base address stored in the note with the .stapsdt.base section's + * sh_addr. Initially these are the same, but the section header will + * be adjusted by prelink. So the decoder applies the difference to + * the probe PC address to get the correct prelinked PC address; the + * same adjustment is applied to the semaphore address, if any. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + */ + usdt_rel_ip = usdt_abs_ip = note.loc_addr; + if (base_addr) { + usdt_abs_ip += base_addr - note.base_addr; + usdt_rel_ip += base_addr - note.base_addr; + } + + if (ehdr.e_type == ET_EXEC) { + /* When attaching uprobes (which what USDTs basically + * are) kernel expects a relative IP to be specified, + * so if we are attaching to an executable ELF binary + * (i.e., not a shared library), we need to calculate + * proper relative IP based on ELF's load address + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + + usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); + } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + /* If we don't have BPF cookie support but need to + * attach to a shared library, we'll need to know and + * record absolute addresses of attach points due to + * the need to lookup USDT spec by absolute IP of + * triggered uprobe. Doing this resolution is only + * possible when we have a specific PID of the process + * that's using specified shared library. BPF cookie + * removes the absolute address limitation as we don't + * need to do this lookup (we just use BPF cookie as + * an index of USDT spec), so for newer kernels with + * BPF cookie support libbpf supports USDT attachment + * to shared libraries with no PID filter. + */ + if (pid < 0) { + pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n"); + err = -ENOTSUP; + goto err_out; + } + + /* lib_segs are lazily initialized only if necessary */ + if (lib_seg_cnt == 0) { + err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + if (err) { + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", + pid, path, err); + goto err_out; + } + } + + seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_rel_ip); + goto err_out; + } + + usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + } + + pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path, + note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, + seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); + + /* Adjust semaphore address to be a relative offset */ + if (note.sema_addr) { + if (!man->has_sema_refcnt) { + pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", + usdt_provider, usdt_name, path); + err = -ENOTSUP; + goto err_out; + } + + seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", + usdt_provider, usdt_name, path, note.sema_addr); + goto err_out; + } + if (seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + note.sema_addr); + goto err_out; + } + + usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + + pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", + path, note.sema_addr, note.base_addr, usdt_sema_off, + seg->start, seg->end, seg->offset); + } + + /* Record adjusted addresses and offsets and parse USDT spec */ + tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + targets = tmp; + + target = &targets[target_cnt]; + memset(target, 0, sizeof(*target)); + + target->abs_ip = usdt_abs_ip; + target->rel_ip = usdt_rel_ip; + target->sema_off = usdt_sema_off; + + /* notes->args references strings from Elf itself, so they can + * be referenced safely until elf_end() call + */ + target->spec_str = note.args; + + err = parse_usdt_spec(&target->spec, ¬e, usdt_cookie); + if (err) + goto err_out; + + target_cnt++; + } + + *out_targets = targets; + *out_target_cnt = target_cnt; + err = target_cnt; + +err_out: + free(segs); + free(lib_segs); + if (err < 0) + free(targets); + return err; +} + +struct bpf_link_usdt { + struct bpf_link link; + + struct usdt_manager *usdt_man; + + size_t spec_cnt; + int *spec_ids; + + size_t uprobe_cnt; + struct { + long abs_ip; + struct bpf_link *link; + } *uprobes; +}; + +static int bpf_link_usdt_detach(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + struct usdt_manager *man = usdt_link->usdt_man; + int i; + + for (i = 0; i < usdt_link->uprobe_cnt; i++) { + /* detach underlying uprobe link */ + bpf_link__destroy(usdt_link->uprobes[i].link); + /* there is no need to update specs map because it will be + * unconditionally overwritten on subsequent USDT attaches, + * but if BPF cookies are not used we need to remove entry + * from ip_to_spec_id map, otherwise we'll run into false + * conflicting IP errors + */ + if (!man->has_bpf_cookie) { + /* not much we can do about errors here */ + (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map), + &usdt_link->uprobes[i].abs_ip); + } + } + + /* try to return the list of previously used spec IDs to usdt_manager + * for future reuse for subsequent USDT attaches + */ + if (!man->free_spec_ids) { + /* if there were no free spec IDs yet, just transfer our IDs */ + man->free_spec_ids = usdt_link->spec_ids; + man->free_spec_cnt = usdt_link->spec_cnt; + usdt_link->spec_ids = NULL; + } else { + /* otherwise concat IDs */ + size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt; + int *new_free_ids; + + new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt, + sizeof(*new_free_ids)); + /* If we couldn't resize free_spec_ids, we'll just leak + * a bunch of free IDs; this is very unlikely to happen and if + * system is so exhausted on memory, it's the least of user's + * concerns, probably. + * So just do our best here to return those IDs to usdt_manager. + */ + if (new_free_ids) { + memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids, + usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids)); + man->free_spec_ids = new_free_ids; + man->free_spec_cnt = new_cnt; + } + } + + return 0; +} + +static void bpf_link_usdt_dealloc(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + + free(usdt_link->spec_ids); + free(usdt_link->uprobes); + free(usdt_link); +} + +static size_t specs_hash_fn(const void *key, void *ctx) +{ + const char *s = key; + + return str_hash(s); +} + +static bool specs_equal_fn(const void *key1, const void *key2, void *ctx) +{ + const char *s1 = key1; + const char *s2 = key2; + + return strcmp(s1, s2) == 0; +} + +static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, + struct bpf_link_usdt *link, struct usdt_target *target, + int *spec_id, bool *is_new) +{ + void *tmp; + int err; + + /* check if we already allocated spec ID for this spec string */ + if (hashmap__find(specs_hash, target->spec_str, &tmp)) { + *spec_id = (long)tmp; + *is_new = false; + return 0; + } + + /* otherwise it's a new ID that needs to be set up in specs map and + * returned back to usdt_manager when USDT link is detached + */ + tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); + if (!tmp) + return -ENOMEM; + link->spec_ids = tmp; + + /* get next free spec ID, giving preference to free list, if not empty */ + if (man->free_spec_cnt) { + *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->free_spec_cnt--; + } else { + /* don't allocate spec ID bigger than what fits in specs map */ + if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map)) + return -E2BIG; + + *spec_id = man->next_free_spec_id; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->next_free_spec_id++; + } + + /* remember new spec ID in the link for later return back to free list on detach */ + link->spec_ids[link->spec_cnt] = *spec_id; + link->spec_cnt++; + *is_new = true; + return 0; +} + +struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + long usdt_cookie) +{ + int i, fd, err, spec_map_fd, ip_map_fd; + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct hashmap *specs_hash = NULL; + struct bpf_link_usdt *link = NULL; + struct usdt_target *targets = NULL; + size_t target_cnt; + Elf *elf; + + spec_map_fd = bpf_map__fd(man->specs_map); + ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); + + /* TODO: perform path resolution similar to uprobe's */ + fd = open(path, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); + return libbpf_err_ptr(err); + } + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + err = -EBADF; + pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); + goto err_out; + } + + err = sanity_check_usdt_elf(elf, path); + if (err) + goto err_out; + + /* normalize PID filter */ + if (pid < 0) + pid = -1; + else if (pid == 0) + pid = getpid(); + + /* discover USDT in given binary, optionally limiting + * activations to a given PID, if pid > 0 + */ + err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, + usdt_cookie, &targets, &target_cnt); + if (err <= 0) { + err = (err == 0) ? -ENOENT : err; + goto err_out; + } + + specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL); + if (IS_ERR(specs_hash)) { + err = PTR_ERR(specs_hash); + goto err_out; + } + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto err_out; + } + + link->usdt_man = man; + link->link.detach = &bpf_link_usdt_detach; + link->link.dealloc = &bpf_link_usdt_dealloc; + + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); + if (!link->uprobes) { + err = -ENOMEM; + goto err_out; + } + + for (i = 0; i < target_cnt; i++) { + struct usdt_target *target = &targets[i]; + struct bpf_link *uprobe_link; + bool is_new; + int spec_id; + + /* Spec ID can be either reused or newly allocated. If it is + * newly allocated, we'll need to fill out spec map, otherwise + * entire spec should be valid and can be just used by a new + * uprobe. We reuse spec when USDT arg spec is identical. We + * also never share specs between two different USDT + * attachments ("links"), so all the reused specs already + * share USDT cookie value implicitly. + */ + err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new); + if (err) + goto err_out; + + if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { + err = -errno; + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", + spec_id, usdt_provider, usdt_name, path, err); + goto err_out; + } + if (!man->has_bpf_cookie && + bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) { + err = -errno; + if (err == -EEXIST) { + pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", + spec_id, usdt_provider, usdt_name, path); + } else { + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + target->abs_ip, spec_id, usdt_provider, usdt_name, + path, err); + } + goto err_out; + } + + opts.ref_ctr_offset = target->sema_off; + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, + target->rel_ip, &opts); + err = libbpf_get_error(uprobe_link); + if (err) { + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", + i, usdt_provider, usdt_name, path, err); + goto err_out; + } + + link->uprobes[i].link = uprobe_link; + link->uprobes[i].abs_ip = target->abs_ip; + link->uprobe_cnt++; + } + + free(targets); + hashmap__free(specs_hash); + elf_end(elf); + close(fd); + + return &link->link; + +err_out: + if (link) + bpf_link__destroy(&link->link); + free(targets); + hashmap__free(specs_hash); + if (elf) + elf_end(elf); + close(fd); + return libbpf_err_ptr(err); +} + +/* Parse out USDT ELF note from '.note.stapsdt' section. + * Logic inspired by perf's code. + */ +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *note) +{ + const char *provider, *name, *args; + long addrs[3]; + size_t len; + + /* sanity check USDT note name and type first */ + if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0) + return -EINVAL; + if (nhdr->n_type != USDT_NOTE_TYPE) + return -EINVAL; + + /* sanity check USDT note contents ("description" in ELF terminology) */ + len = nhdr->n_descsz; + data = data + desc_off; + + /* +3 is the very minimum required to store three empty strings */ + if (len < sizeof(addrs) + 3) + return -EINVAL; + + /* get location, base, and semaphore addrs */ + memcpy(&addrs, data, sizeof(addrs)); + + /* parse string fields: provider, name, args */ + provider = data + sizeof(addrs); + + name = (const char *)memchr(provider, '\0', data + len - provider); + if (!name) /* non-zero-terminated provider */ + return -EINVAL; + name++; + if (name >= data + len || *name == '\0') /* missing or empty name */ + return -EINVAL; + + args = memchr(name, '\0', data + len - name); + if (!args) /* non-zero-terminated name */ + return -EINVAL; + ++args; + if (args >= data + len) /* missing arguments spec */ + return -EINVAL; + + note->provider = provider; + note->name = name; + if (*args == '\0' || *args == ':') + note->args = ""; + else + note->args = args; + note->loc_addr = addrs[0]; + note->base_addr = addrs[1]; + note->sema_addr = addrs[2]; + + return 0; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie) +{ + const char *s; + int len; + + spec->usdt_cookie = usdt_cookie; + spec->arg_cnt = 0; + + s = note->args; + while (s[0]) { + if (spec->arg_cnt >= USDT_MAX_ARG_CNT) { + pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n", + USDT_MAX_ARG_CNT, note->provider, note->name, note->args); + return -E2BIG; + } + + len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]); + if (len < 0) + return len; + + s += len; + spec->arg_cnt++; + } + + return 0; +} + +/* Architecture-specific logic for parsing USDT argument location specs */ + +#if defined(__x86_64__) || defined(__i386__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *names[4]; + size_t pt_regs_off; + } reg_map[] = { +#ifdef __x86_64__ +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) +#else +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) +#endif + { {"rip", "eip", "", ""}, reg_off(rip, eip) }, + { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) }, + { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) }, + { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) }, + { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) }, + { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) }, + { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) }, + { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, + { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, +#undef reg_off +#ifdef __x86_64__ + { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, + { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, + { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, + { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) }, + { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) }, + { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) }, + { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) }, + { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) }, +#endif + }; + int i, j; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) { + if (strcmp(reg_name, reg_map[i].names[j]) == 0) + return reg_map[i].pt_regs_off; + } + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -4@-20(%rbp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -4@%eax */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@$71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__s390x__) + +/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + unsigned int reg; + int arg_sz, len; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, ®, &len) == 3) { + /* Memory dereference case, e.g., -2@-28(%r15) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, ®, &len) == 2) { + /* Register read case, e.g., -8@%r0 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#else + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); + return -ENOTSUP; +} + +#endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3820608faf57..bafdc5373a13 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -168,9 +168,15 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ -$(OUTPUT)/urandom_read: urandom_read.c +$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c + $(call msg,LIB,,$@) + $(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@ + +$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -328,12 +334,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h \ - test_subskeleton.skel.h test_subskeleton_lib.skel.h - -# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file -# but that's created as a side-effect of the skel.h generation. -test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o -test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o + test_subskeleton.skel.h test_subskeleton_lib.skel.h \ + test_usdt.skel.h LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ @@ -346,6 +348,11 @@ test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o +# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file +# but that's created as a side-effect of the skel.h generation. +test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o +test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o +test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) @@ -400,6 +407,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ $(wildcard $(BPFDIR)/bpf_*.h) \ + $(wildcard $(BPFDIR)/*.bpf.h) \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS)) @@ -491,6 +499,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ btf_helpers.c flow_dissector_load.h \ cap_helpers.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ + $(OUTPUT)/liburandom_read.so \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index d48f6e533e1e..c0c6d410751d 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -11,15 +11,22 @@ static void trigger_func(void) asm volatile (""); } +/* attach point for byname uprobe */ +static void trigger_func2(void) +{ + asm volatile (""); +} + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); - int duration = 0; struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; struct test_attach_probe* skel; ssize_t uprobe_offset, ref_ctr_offset; + struct bpf_link *uprobe_err_link; bool legacy; + char *mem; /* Check if new-style kprobe/uprobe API is supported. * Kernels that support new FD-based kprobe and uprobe BPF attachment @@ -43,9 +50,9 @@ void test_attach_probe(void) return; skel = test_attach_probe__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) return; - if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n")) + if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, @@ -90,25 +97,73 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; - /* trigger & validate kprobe && kretprobe */ - usleep(1); + /* verify auto-attach fails for old-style uprobe definition */ + uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); + if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, + "auto-attach should fail for old-style name")) + goto cleanup; + + uprobe_opts.func_name = "trigger_func2"; + uprobe_opts.retprobe = false; + uprobe_opts.ref_ctr_offset = 0; + skel->links.handle_uprobe_byname = + bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname, + 0 /* this pid */, + "/proc/self/exe", + 0, &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname")) + goto cleanup; + + /* verify auto-attach works */ + skel->links.handle_uretprobe_byname = + bpf_program__attach(skel->progs.handle_uretprobe_byname); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname")) + goto cleanup; - if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res", - "wrong kprobe res: %d\n", skel->bss->kprobe_res)) + /* test attach by name for a library function, using the library + * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo(). + */ + uprobe_opts.func_name = "malloc"; + uprobe_opts.retprobe = false; + skel->links.handle_uprobe_byname2 = + bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2, + 0 /* this pid */, + "libc.so.6", + 0, &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2")) goto cleanup; - if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res", - "wrong kretprobe res: %d\n", skel->bss->kretprobe_res)) + + uprobe_opts.func_name = "free"; + uprobe_opts.retprobe = true; + skel->links.handle_uretprobe_byname2 = + bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2, + -1 /* any pid */, + "libc.so.6", + 0, &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) goto cleanup; + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate shared library u[ret]probes attached by name */ + mem = malloc(1); + free(mem); + /* trigger & validate uprobe & uretprobe */ trigger_func(); - if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", - "wrong uprobe res: %d\n", skel->bss->uprobe_res)) - goto cleanup; - if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res", - "wrong uretprobe res: %d\n", skel->bss->uretprobe_res)) - goto cleanup; + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); + ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); + ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); + ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); + ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 044df13ee069..754e80937e5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -4,6 +4,7 @@ #include <network_helpers.h> #include "for_each_hash_map_elem.skel.h" #include "for_each_array_map_elem.skel.h" +#include "for_each_map_elem_write_key.skel.h" static unsigned int duration; @@ -129,10 +130,21 @@ out: for_each_array_map_elem__destroy(skel); } +static void test_write_map_key(void) +{ + struct for_each_map_elem_write_key *skel; + + skel = for_each_map_elem_write_key__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load")) + for_each_map_elem_write_key__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) test_hash_map(); if (test__start_subtest("array_map")) test_array_map(); + if (test__start_subtest("write_map_key")) + test_write_map_key(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index f6933b06daf8..1d7a2f1e0731 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -138,12 +138,16 @@ cleanup: test_ksyms_weak_lskel__destroy(skel); } -static void test_write_check(void) +static void test_write_check(bool test_handler1) { struct test_ksyms_btf_write_check *skel; - skel = test_ksyms_btf_write_check__open_and_load(); - ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n"); + skel = test_ksyms_btf_write_check__open(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open")) + return; + bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false); + ASSERT_ERR(test_ksyms_btf_write_check__load(skel), + "unexpected load of a prog writing to ksym memory\n"); test_ksyms_btf_write_check__destroy(skel); } @@ -179,6 +183,9 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms_lskel")) test_weak_syms_lskel(); - if (test__start_subtest("write_check")) - test_write_check(); + if (test__start_subtest("write_check1")) + test_write_check(true); + + if (test__start_subtest("write_check2")) + test_write_check(false); } diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index 954964f0ac3d..d3915c58d0e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -25,7 +25,7 @@ void serial_test_netcnt(void) if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load")) return; - nproc = get_nprocs_conf(); + nproc = bpf_num_possible_cpus(); percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)")) goto err; diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 509e21d5cb9d..b90ee47d3111 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -81,6 +81,7 @@ void test_test_global_funcs(void) { "test_global_func14.o", "reference type('FWD S') size cannot be determined" }, { "test_global_func15.o", "At program exit the register R0 has value" }, { "test_global_func16.o", "invalid indirect read from stack" }, + { "test_global_func17.o", "Caller passes invalid args into func#1" }, }; libbpf_print_fn_t old_print_fn = NULL; int err, i, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c new file mode 100644 index 000000000000..d6003dc8cc99 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include "test_uprobe_autoattach.skel.h" + +/* uprobe attach point */ +static noinline int autoattach_trigger_func(int arg) +{ + asm volatile (""); + return arg + 1; +} + +void test_uprobe_autoattach(void) +{ + struct test_uprobe_autoattach *skel; + int trigger_val = 100, trigger_ret; + size_t malloc_sz = 1; + char *mem; + + skel = test_uprobe_autoattach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach")) + goto cleanup; + + skel->bss->test_pid = getpid(); + + /* trigger & validate uprobe & uretprobe */ + trigger_ret = autoattach_trigger_func(trigger_val); + + skel->bss->test_pid = getpid(); + + /* trigger & validate shared library u[ret]probes attached by name */ + mem = malloc(malloc_sz); + free(mem); + + ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1"); + ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran"); + ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc"); + ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran"); + ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1"); + ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran"); + ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc"); + ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran"); +cleanup: + test_uprobe_autoattach__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c new file mode 100644 index 000000000000..a71f51bdc08d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> + +#define _SDT_HAS_SEMAPHORES 1 +#include "../sdt.h" + +#include "test_usdt.skel.h" +#include "test_urandom_usdt.skel.h" + +int lets_test_this(int); + +static volatile int idx = 2; +static volatile __u64 bla = 0xFEDCBA9876543210ULL; +static volatile short nums[] = {-1, -2, -3, }; + +static volatile struct { + int x; + signed char y; +} t1 = { 1, -127 }; + +#define SEC(name) __attribute__((section(name), used)) + +unsigned short test_usdt0_semaphore SEC(".probes"); +unsigned short test_usdt3_semaphore SEC(".probes"); +unsigned short test_usdt12_semaphore SEC(".probes"); + +static void __always_inline trigger_func(int x) { + long y = 42; + + if (test_usdt0_semaphore) + STAP_PROBE(test, usdt0); + if (test_usdt3_semaphore) + STAP_PROBE3(test, usdt3, x, y, &bla); + if (test_usdt12_semaphore) { + STAP_PROBE12(test, usdt12, + x, x + 1, y, x + y, 5, + y / 7, bla, &bla, -9, nums[x], + nums[idx], t1.y); + } +} + +static void subtest_basic_usdt(void) +{ + LIBBPF_OPTS(bpf_usdt_opts, opts); + struct test_usdt *skel; + struct test_usdt__bss *bss; + int err; + + skel = test_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = test_usdt__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* usdt0 won't be auto-attached */ + opts.usdt_cookie = 0xcafedeadbeeffeed; + skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, + 0 /*self*/, "/proc/self/exe", + "test", "usdt0", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link")) + goto cleanup; + + trigger_func(1); + + ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called"); + + ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); + ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); + ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); + + /* auto-attached usdt3 gets default zero cookie value */ + ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie"); + ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); + + ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret"); + ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1"); + ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); + ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); + + /* auto-attached usdt12 gets default zero cookie value */ + ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie"); + ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt"); + + ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1"); + ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2"); + ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3"); + ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4"); + ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5"); + ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6"); + ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7"); + ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8"); + ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9"); + ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10"); + ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11"); + ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12"); + + /* trigger_func() is marked __always_inline, so USDT invocations will be + * inlined in two different places, meaning that each USDT will have + * at least 2 different places to be attached to. This verifies that + * bpf_program__attach_usdt() handles this properly and attaches to + * all possible places of USDT invocation. + */ + trigger_func(2); + + ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called"); + + /* only check values that depend on trigger_func()'s input value */ + ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1"); + + ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1"); + ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2"); + ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4"); + ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10"); + + /* detach and re-attach usdt3 */ + bpf_link__destroy(skel->links.usdt3); + + opts.usdt_cookie = 0xBADC00C51E; + skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */, + "/proc/self/exe", "test", "usdt3", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach")) + goto cleanup; + + trigger_func(3); + + ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called"); + /* this time usdt3 has custom cookie */ + ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie"); + ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); + + ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret"); + ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret"); + ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1"); + ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); + ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); + +cleanup: + test_usdt__destroy(skel); +} + +unsigned short test_usdt_100_semaphore SEC(".probes"); +unsigned short test_usdt_300_semaphore SEC(".probes"); +unsigned short test_usdt_400_semaphore SEC(".probes"); + +#define R10(F, X) F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \ + F(X+5); F(X+6); F(X+7); F(X+8); F(X+9); +#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \ + R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90); + +/* carefully control that we get exactly 100 inlines by preventing inlining */ +static void __always_inline f100(int x) +{ + STAP_PROBE1(test, usdt_100, x); +} + +__weak void trigger_100_usdts(void) +{ + R100(f100, 0); +} + +/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as + * many slots for specs. It's important that each STAP_PROBE2() invocation + * (after untolling) gets different arg spec due to compiler inlining i as + * a constant + */ +static void __always_inline f300(int x) +{ + STAP_PROBE1(test, usdt_300, x); +} + +__weak void trigger_300_usdts(void) +{ + R100(f300, 0); + R100(f300, 100); + R100(f300, 200); +} + +static void __always_inline f400(int x __attribute__((unused))) +{ + static int y; + + STAP_PROBE1(test, usdt_400, y++); +} + +/* this time we have 400 different USDT call sites, but they have uniform + * argument location, so libbpf's spec string deduplication logic should keep + * spec count use very small and so we should be able to attach to all 400 + * call sites + */ +__weak void trigger_400_usdts(void) +{ + R100(f400, 0); + R100(f400, 100); + R100(f400, 200); + R100(f400, 300); +} + +static void subtest_multispec_usdt(void) +{ + LIBBPF_OPTS(bpf_usdt_opts, opts); + struct test_usdt *skel; + struct test_usdt__bss *bss; + int err, i; + + skel = test_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = test_usdt__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* usdt_100 is auto-attached and there are 100 inlined call sites, + * let's validate that all of them are properly attached to and + * handled from BPF side + */ + trigger_100_usdts(); + + ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called"); + ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum"); + + /* Stress test free spec ID tracking. By default libbpf allows up to + * 256 specs to be used, so if we don't return free spec IDs back + * after few detachments and re-attachments we should run out of + * available spec IDs. + */ + for (i = 0; i < 2; i++) { + bpf_link__destroy(skel->links.usdt_100); + + skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, + "/proc/self/exe", + "test", "usdt_100", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach")) + goto cleanup; + + bss->usdt_100_sum = 0; + trigger_100_usdts(); + + ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called"); + ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum"); + } + + /* Now let's step it up and try to attach USDT that requires more than + * 256 attach points with different specs for each. + * Note that we need trigger_300_usdts() only to actually have 300 + * USDT call sites, we are not going to actually trace them. + */ + trigger_300_usdts(); + + /* we'll reuse usdt_100 BPF program for usdt_300 test */ + bpf_link__destroy(skel->links.usdt_100); + skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", + "test", "usdt_300", NULL); + err = -errno; + if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach")) + goto cleanup; + ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err"); + + /* let's check that there are no "dangling" BPF programs attached due + * to partial success of the above test:usdt_300 attachment + */ + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + f300(777); /* this is 301st instance of usdt_300 */ + + ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); + ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); + + /* This time we have USDT with 400 inlined invocations, but arg specs + * should be the same across all sites, so libbpf will only need to + * use one spec and thus we'll be able to attach 400 uprobes + * successfully. + * + * Again, we are reusing usdt_100 BPF program. + */ + skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, + "/proc/self/exe", + "test", "usdt_400", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach")) + goto cleanup; + + trigger_400_usdts(); + + ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called"); + ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum"); + +cleanup: + test_usdt__destroy(skel); +} + +static FILE *urand_spawn(int *pid) +{ + FILE *f; + + /* urandom_read's stdout is wired into f */ + f = popen("./urandom_read 1 report-pid", "r"); + if (!f) + return NULL; + + if (fscanf(f, "%d", pid) != 1) { + pclose(f); + return NULL; + } + + return f; +} + +static int urand_trigger(FILE **urand_pipe) +{ + int exit_code; + + /* pclose() waits for child process to exit and returns their exit code */ + exit_code = pclose(*urand_pipe); + *urand_pipe = NULL; + + return exit_code; +} + +static void subtest_urandom_usdt(bool auto_attach) +{ + struct test_urandom_usdt *skel; + struct test_urandom_usdt__bss *bss; + struct bpf_link *l; + FILE *urand_pipe = NULL; + int err, urand_pid = 0; + + skel = test_urandom_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + urand_pipe = urand_spawn(&urand_pid); + if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn")) + goto cleanup; + + bss = skel->bss; + bss->urand_pid = urand_pid; + + if (auto_attach) { + err = test_urandom_usdt__attach(skel); + if (!ASSERT_OK(err, "skel_auto_attach")) + goto cleanup; + } else { + l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema, + urand_pid, "./urandom_read", + "urand", "read_without_sema", NULL); + if (!ASSERT_OK_PTR(l, "urand_without_sema_attach")) + goto cleanup; + skel->links.urand_read_without_sema = l; + + l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema, + urand_pid, "./urandom_read", + "urand", "read_with_sema", NULL); + if (!ASSERT_OK_PTR(l, "urand_with_sema_attach")) + goto cleanup; + skel->links.urand_read_with_sema = l; + + l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema, + urand_pid, "./liburandom_read.so", + "urandlib", "read_without_sema", NULL); + if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach")) + goto cleanup; + skel->links.urandlib_read_without_sema = l; + + l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema, + urand_pid, "./liburandom_read.so", + "urandlib", "read_with_sema", NULL); + if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach")) + goto cleanup; + skel->links.urandlib_read_with_sema = l; + + } + + /* trigger urandom_read USDTs */ + ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code"); + + ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt"); + ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum"); + + ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt"); + ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum"); + + ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt"); + ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum"); + + ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt"); + ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum"); + +cleanup: + if (urand_pipe) + pclose(urand_pipe); + test_urandom_usdt__destroy(skel); +} + +void test_usdt(void) +{ + if (test__start_subtest("basic")) + subtest_basic_usdt(); + if (test__start_subtest("multispec")) + subtest_multispec_usdt(); + if (test__start_subtest("urand_auto_attach")) + subtest_urandom_usdt(true /* auto_attach */); + if (test__start_subtest("urand_pid_attach")) + subtest_urandom_usdt(false /* auto_attach */); +} diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c new file mode 100644 index 000000000000..8e545865ea33 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array_map SEC(".maps"); + +static __u64 +check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val, + void *data) +{ + bpf_get_current_comm(key, sizeof(*key)); + return 0; +} + +SEC("raw_tp/sys_enter") +int test_map_key_write(const void *ctx) +{ + bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index b964ec1390c2..963b393c37e8 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> /* weak and shared between two files */ const volatile int my_tid __weak; @@ -44,6 +45,13 @@ void set_output_ctx1(__u64 *ctx) /* this weak instance should win because it's the first one */ __weak int set_output_weak(int x) { + static volatile int whatever; + + /* make sure we use CO-RE relocations in a weak function, this used to + * cause problems for BPF static linker + */ + whatever = bpf_core_type_size(struct task_struct); + output_weak1 = x; return x; } diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 575e958e60b7..db195872f4eb 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> /* weak and shared between both files */ const volatile int my_tid __weak; @@ -44,6 +45,13 @@ void set_output_ctx2(__u64 *ctx) /* this weak instance should lose, because it will be processed second */ __weak int set_output_weak(int x) { + static volatile int whatever; + + /* make sure we use CO-RE relocations in a weak function, this used to + * cause problems for BPF static linker + */ + whatever = 2 * bpf_core_type_size(struct task_struct); + output_weak2 = x; return 2 * x; } diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c index b3fcb5274ee0..f793280a3238 100644 --- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c @@ -35,10 +35,10 @@ int oncpu(void *ctx) long val; val = bpf_get_stackid(ctx, &stackmap, 0); - if (val > 0) + if (val >= 0) stackid_kernel = 2; val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); - if (val > 0) + if (val >= 0) stackid_user = 2; trace = bpf_map_lookup_elem(&stackdata_map, &key); diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 4896fdf816f7..92331053dba3 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -826,8 +826,9 @@ out: SEC("kprobe/vfs_link") int BPF_KPROBE(kprobe__vfs_link, - struct dentry* old_dentry, struct inode* dir, - struct dentry* new_dentry, struct inode** delegated_inode) + struct dentry* old_dentry, struct user_namespace *mnt_userns, + struct inode* dir, struct dentry* new_dentry, + struct inode** delegated_inode) { struct bpf_func_stats_ctx stats_ctx; bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 8056a4c6d918..af994d16bb10 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -10,6 +10,10 @@ int kprobe_res = 0; int kretprobe_res = 0; int uprobe_res = 0; int uretprobe_res = 0; +int uprobe_byname_res = 0; +int uretprobe_byname_res = 0; +int uprobe_byname2_res = 0; +int uretprobe_byname2_res = 0; SEC("kprobe/sys_nanosleep") int handle_kprobe(struct pt_regs *ctx) @@ -25,18 +29,51 @@ int BPF_KRETPROBE(handle_kretprobe) return 0; } -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { uprobe_res = 3; return 0; } -SEC("uretprobe/trigger_func") +SEC("uretprobe") int handle_uretprobe(struct pt_regs *ctx) { uretprobe_res = 4; return 0; } +SEC("uprobe") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_res = 5; + return 0; +} + +/* use auto-attach format for section definition. */ +SEC("uretprobe//proc/self/exe:trigger_func2") +int handle_uretprobe_byname(struct pt_regs *ctx) +{ + uretprobe_byname_res = 6; + return 0; +} + +SEC("uprobe") +int handle_uprobe_byname2(struct pt_regs *ctx) +{ + unsigned int size = PT_REGS_PARM1(ctx); + + /* verify malloc size */ + if (size == 1) + uprobe_byname2_res = 7; + return 0; +} + +SEC("uretprobe") +int handle_uretprobe_byname2(struct pt_regs *ctx) +{ + uretprobe_byname2_res = 8; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 2d3a7710e2ce..0e2222968918 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { update(ctx, &uprobe_res); return 0; } -SEC("uretprobe/trigger_func") +SEC("uretprobe") int handle_uretprobe(struct pt_regs *ctx) { update(ctx, &uretprobe_res); diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c new file mode 100644 index 000000000000..2b8b9b8ba018 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func17.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +__noinline int foo(int *p) +{ + return p ? (*p = 42) : 0; +} + +const volatile int i; + +SEC("tc") +int test_cls(struct __sk_buff *skb) +{ + return foo((int *)&i); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c index 2180c41cd890..a72a5bf3812a 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c @@ -8,7 +8,7 @@ extern const int bpf_prog_active __ksym; /* int type global var. */ SEC("raw_tp/sys_enter") -int handler(const void *ctx) +int handler1(const void *ctx) { int *active; __u32 cpu; @@ -26,4 +26,20 @@ int handler(const void *ctx) return 0; } +__noinline int write_active(int *p) +{ + return p ? (*p = 42) : 0; +} + +SEC("raw_tp/sys_enter") +int handler2(const void *ctx) +{ + int *active; + __u32 cpu; + + active = bpf_this_cpu_ptr(&bpf_prog_active); + write_active(active); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 19e4d2071c60..c8bc0c6947aa 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real, if (hash != 0x358459b7 /* jhash of ipv4 packet */ && hash != 0x2f4bc6bb /* jhash of ipv6 packet */) - return 0; + return false; real_pos = bpf_map_lookup_elem(&ch_rings, &key); if (!real_pos) diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c index 02f79356d5eb..98c6493d9b91 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_assign.c +++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c @@ -89,7 +89,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp) static inline int handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) { - struct bpf_sock_tuple ln = {0}; struct bpf_sock *sk; const int zero = 0; size_t tuple_len; @@ -121,7 +120,6 @@ assign: static inline int handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4) { - struct bpf_sock_tuple ln = {0}; struct bpf_sock *sk; const int zero = 0; size_t tuple_len; @@ -161,7 +159,7 @@ assign: SEC("tc") int bpf_sk_assign_test(struct __sk_buff *skb) { - struct bpf_sock_tuple *tuple, ln = {0}; + struct bpf_sock_tuple *tuple; bool ipv4 = false; bool tcp = false; int tuple_len; diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index e6cb09259408..1926facba122 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {}; char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { struct task_struct *current; diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c new file mode 100644 index 000000000000..ab75522e2eeb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include "vmlinux.h" + +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int uprobe_byname_parm1 = 0; +int uprobe_byname_ran = 0; +int uretprobe_byname_rc = 0; +int uretprobe_byname_ran = 0; +size_t uprobe_byname2_parm1 = 0; +int uprobe_byname2_ran = 0; +char *uretprobe_byname2_rc = NULL; +int uretprobe_byname2_ran = 0; + +int test_pid; + +/* This program cannot auto-attach, but that should not stop other + * programs from attaching. + */ +SEC("uprobe") +int handle_uprobe_noautoattach(struct pt_regs *ctx) +{ + return 0; +} + +SEC("uprobe//proc/self/exe:autoattach_trigger_func") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx); + uprobe_byname_ran = 1; + return 0; +} + +SEC("uretprobe//proc/self/exe:autoattach_trigger_func") +int handle_uretprobe_byname(struct pt_regs *ctx) +{ + uretprobe_byname_rc = PT_REGS_RC_CORE(ctx); + uretprobe_byname_ran = 2; + return 0; +} + + +SEC("uprobe/libc.so.6:malloc") +int handle_uprobe_byname2(struct pt_regs *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + /* ignore irrelevant invocations */ + if (test_pid != pid) + return 0; + uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx); + uprobe_byname2_ran = 3; + return 0; +} + +SEC("uretprobe/libc.so.6:malloc") +int handle_uretprobe_byname2(struct pt_regs *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + /* ignore irrelevant invocations */ + if (test_pid != pid) + return 0; + uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx); + uretprobe_byname2_ran = 4; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c new file mode 100644 index 000000000000..3539b02bd5f7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +int urand_pid; + +int urand_read_without_sema_call_cnt; +int urand_read_without_sema_buf_sz_sum; + +SEC("usdt/./urandom_read:urand:read_without_sema") +int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1); + __sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz); + + return 0; +} + +int urand_read_with_sema_call_cnt; +int urand_read_with_sema_buf_sz_sum; + +SEC("usdt/./urandom_read:urand:read_with_sema") +int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1); + __sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz); + + return 0; +} + +int urandlib_read_without_sema_call_cnt; +int urandlib_read_without_sema_buf_sz_sum; + +SEC("usdt/./liburandom_read.so:urandlib:read_without_sema") +int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1); + __sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz); + + return 0; +} + +int urandlib_read_with_sema_call_cnt; +int urandlib_read_with_sema_buf_sz_sum; + +SEC("usdt/./liburandom_read.so:urandlib:read_with_sema") +int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz) +{ + if (urand_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1); + __sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c new file mode 100644 index 000000000000..505aab9a5234 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +int my_pid; + +int usdt0_called; +u64 usdt0_cookie; +int usdt0_arg_cnt; +int usdt0_arg_ret; + +SEC("usdt") +int usdt0(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt0_called, 1); + + usdt0_cookie = bpf_usdt_cookie(ctx); + usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx); + /* should return -ENOENT for any arg_num */ + usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp); + return 0; +} + +int usdt3_called; +u64 usdt3_cookie; +int usdt3_arg_cnt; +int usdt3_arg_rets[3]; +u64 usdt3_args[3]; + +SEC("usdt//proc/self/exe:test:usdt3") +int usdt3(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt3_called, 1); + + usdt3_cookie = bpf_usdt_cookie(ctx); + usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp); + usdt3_args[0] = (int)tmp; + + usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp); + usdt3_args[1] = (long)tmp; + + usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp); + usdt3_args[2] = (uintptr_t)tmp; + + return 0; +} + +int usdt12_called; +u64 usdt12_cookie; +int usdt12_arg_cnt; +u64 usdt12_args[12]; + +SEC("usdt//proc/self/exe:test:usdt12") +int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, + long a6, __u64 a7, uintptr_t a8, int a9, short a10, + short a11, signed char a12) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt12_called, 1); + + usdt12_cookie = bpf_usdt_cookie(ctx); + usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt12_args[0] = a1; + usdt12_args[1] = a2; + usdt12_args[2] = a3; + usdt12_args[3] = a4; + usdt12_args[4] = a5; + usdt12_args[5] = a6; + usdt12_args[6] = a7; + usdt12_args[7] = a8; + usdt12_args[8] = a9; + usdt12_args[9] = a10; + usdt12_args[10] = a11; + usdt12_args[11] = a12; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c new file mode 100644 index 000000000000..aa6de32b50d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +/* this file is linked together with test_usdt.c to validate that usdt.bpf.h + * can be included in multiple .bpf.c files forming single final BPF object + * file + */ + +extern int my_pid; + +int usdt_100_called; +int usdt_100_sum; + +SEC("usdt//proc/self/exe:test:usdt_100") +int BPF_USDT(usdt_100, int x) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt_100_called, 1); + __sync_fetch_and_add(&usdt_100_sum, x); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 596c4e71bf3a..125d872d7981 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -564,22 +564,22 @@ static bool get_packet_dst(struct real_definition **real, hash = get_packet_hash(pckt, hash_16bytes); if (hash != 0x358459b7 /* jhash of ipv4 packet */ && hash != 0x2f4bc6bb /* jhash of ipv6 packet */) - return 0; + return false; key = 2 * vip_info->vip_num + hash % 2; real_pos = bpf_map_lookup_elem(&ch_rings, &key); if (!real_pos) - return 0; + return false; key = *real_pos; *real = bpf_map_lookup_elem(&reals, &key); if (!(*real)) - return 0; + return false; if (!(vip_info->flags & (1 << 1))) { __u32 conn_rate_key = 512 + 2; struct lb_stats *conn_rate_stats = bpf_map_lookup_elem(&stats, &conn_rate_key); if (!conn_rate_stats) - return 1; + return true; cur_time = bpf_ktime_get_ns(); if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { conn_rate_stats->v1 = 1; @@ -587,14 +587,14 @@ static bool get_packet_dst(struct real_definition **real, } else { conn_rate_stats->v1 += 1; if (conn_rate_stats->v1 >= 1) - return 1; + return true; } if (pckt->flow.proto == IPPROTO_UDP) new_dst_lru.atime = cur_time; new_dst_lru.pos = key; bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); } - return 1; + return true; } __attribute__ ((noinline)) diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2ab049b54d6c..694e7cec1823 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx) return -22; } -SEC("uprobe/self/uprobe_target") +SEC("uprobe") int bench_trigger_uprobe(void *ctx) { __sync_add_and_fetch(&hits, 1); diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h new file mode 100644 index 000000000000..733045a52771 --- /dev/null +++ b/tools/testing/selftests/bpf/sdt-config.h @@ -0,0 +1,6 @@ +/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure. + + This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to + indicate whether the assembler supports "?" in .pushsection directives. */ + +#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1 diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h new file mode 100644 index 000000000000..ca0162b4dc57 --- /dev/null +++ b/tools/testing/selftests/bpf/sdt.h @@ -0,0 +1,513 @@ +/* <sys/sdt.h> - Systemtap static probe definition macros. + + This file is dedicated to the public domain, pursuant to CC0 + (https://creativecommons.org/publicdomain/zero/1.0/) +*/ + +#ifndef _SYS_SDT_H +#define _SYS_SDT_H 1 + +/* + This file defines a family of macros + + STAP_PROBEn(op1, ..., opn) + + that emit a nop into the instruction stream, and some data into an auxiliary + note section. The data in the note section describes the operands, in terms + of size and location. Each location is encoded as assembler operand string. + Consumer tools such as gdb or systemtap insert breakpoints on top of + the nop, and decode the location operand-strings, like an assembler, + to find the values being passed. + + The operand strings are selected by the compiler for each operand. + They are constrained by gcc inline-assembler codes. The default is: + + #define STAP_SDT_ARG_CONSTRAINT nor + + This is a good default if the operands tend to be integral and + moderate in number (smaller than number of registers). In other + cases, the compiler may report "'asm' requires impossible reload" or + similar. In this case, consider simplifying the macro call (fewer + and simpler operands), reduce optimization, or override the default + constraints string via: + + #define STAP_SDT_ARG_CONSTRAINT g + #include <sys/sdt.h> + + See also: + https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + https://gcc.gnu.org/onlinedocs/gcc/Constraints.html + */ + + + +#ifdef __ASSEMBLER__ +# define _SDT_PROBE(provider, name, n, arglist) \ + _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \ + _SDT_ASM_BASE +# define _SDT_ASM_1(x) x; +# define _SDT_ASM_2(a, b) a,b; +# define _SDT_ASM_3(a, b, c) a,b,c; +# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e; +# define _SDT_ASM_STRING_1(x) .asciz #x; +# define _SDT_ASM_SUBSTR_1(x) .ascii #x; +# define _SDT_DEPAREN_0() /* empty */ +# define _SDT_DEPAREN_1(a) a +# define _SDT_DEPAREN_2(a,b) a b +# define _SDT_DEPAREN_3(a,b,c) a b c +# define _SDT_DEPAREN_4(a,b,c,d) a b c d +# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e +# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f +# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g +# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h +# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i +# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j +# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k +# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l +#else +#if defined _SDT_HAS_SEMAPHORES +#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \ + __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore)); +#else +#define _SDT_NOTE_SEMAPHORE_USE(provider, name) +#endif + +# define _SDT_PROBE(provider, name, n, arglist) \ + do { \ + _SDT_NOTE_SEMAPHORE_USE(provider, name); \ + __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \ + :: _SDT_ASM_OPERANDS_##n arglist); \ + __asm__ __volatile__ (_SDT_ASM_BASE); \ + } while (0) +# define _SDT_S(x) #x +# define _SDT_ASM_1(x) _SDT_S(x) "\n" +# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n" +# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \ + _SDT_S(c) "\n" +# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \ + _SDT_S(c) "," _SDT_S(d) "," \ + _SDT_S(e) "\n" +# define _SDT_ASM_ARGS(n) _SDT_ASM_TEMPLATE_##n +# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x) +# define _SDT_ASM_SUBSTR_1(x) _SDT_ASM_1(.ascii #x) + +# define _SDT_ARGFMT(no) _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \ + _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \ + _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \ + _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no)) + + +# ifndef STAP_SDT_ARG_CONSTRAINT +# if defined __powerpc__ +# define STAP_SDT_ARG_CONSTRAINT nZr +# elif defined __arm__ +# define STAP_SDT_ARG_CONSTRAINT g +# else +# define STAP_SDT_ARG_CONSTRAINT nor +# endif +# endif + +# define _SDT_STRINGIFY(x) #x +# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x) +/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler + macros _SDT_SIZE and _SDT_TYPE */ +# define _SDT_ARG(n, x) \ + [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \ + [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x)) +#endif +#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x) +#define _SDT_ASM_SUBSTR(x) _SDT_ASM_SUBSTR_1(x) + +#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \ + || __builtin_classify_type (x) == 5) + +#ifdef __cplusplus +# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \ + && __sdt_type<__typeof (x)>::__sdt_signed) +# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \ + ? sizeof (void *) : sizeof (x)) +# define _SDT_ARGVAL(x) (x) + +# include <cstddef> + +template<typename __sdt_T> +struct __sdt_type +{ + static const bool __sdt_signed = false; +}; + +#define __SDT_ALWAYS_SIGNED(T) \ +template<> struct __sdt_type<T> { static const bool __sdt_signed = true; }; +#define __SDT_COND_SIGNED(T,CT) \ +template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); }; +__SDT_ALWAYS_SIGNED(signed char) +__SDT_ALWAYS_SIGNED(short) +__SDT_ALWAYS_SIGNED(int) +__SDT_ALWAYS_SIGNED(long) +__SDT_ALWAYS_SIGNED(long long) +__SDT_ALWAYS_SIGNED(volatile signed char) +__SDT_ALWAYS_SIGNED(volatile short) +__SDT_ALWAYS_SIGNED(volatile int) +__SDT_ALWAYS_SIGNED(volatile long) +__SDT_ALWAYS_SIGNED(volatile long long) +__SDT_ALWAYS_SIGNED(const signed char) +__SDT_ALWAYS_SIGNED(const short) +__SDT_ALWAYS_SIGNED(const int) +__SDT_ALWAYS_SIGNED(const long) +__SDT_ALWAYS_SIGNED(const long long) +__SDT_ALWAYS_SIGNED(const volatile signed char) +__SDT_ALWAYS_SIGNED(const volatile short) +__SDT_ALWAYS_SIGNED(const volatile int) +__SDT_ALWAYS_SIGNED(const volatile long) +__SDT_ALWAYS_SIGNED(const volatile long long) +__SDT_COND_SIGNED(char, char) +__SDT_COND_SIGNED(wchar_t, wchar_t) +__SDT_COND_SIGNED(volatile char, char) +__SDT_COND_SIGNED(volatile wchar_t, wchar_t) +__SDT_COND_SIGNED(const char, char) +__SDT_COND_SIGNED(const wchar_t, wchar_t) +__SDT_COND_SIGNED(const volatile char, char) +__SDT_COND_SIGNED(const volatile wchar_t, wchar_t) +#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) +/* __SDT_COND_SIGNED(char16_t) */ +/* __SDT_COND_SIGNED(char32_t) */ +#endif + +template<typename __sdt_E> +struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {}; + +template<typename __sdt_E, size_t __sdt_N> +struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {}; + +#elif !defined(__ASSEMBLER__) +__extension__ extern unsigned long long __sdt_unsp; +# define _SDT_ARGINTTYPE(x) \ + __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \ + + 3) & -4) == 4, (x), 0U)) +# define _SDT_ARGSIGNED(x) \ + (!__extension__ \ + (__builtin_constant_p ((((unsigned long long) \ + (_SDT_ARGINTTYPE (x)) __sdt_unsp) \ + & ((unsigned long long)1 << (sizeof (unsigned long long) \ + * __CHAR_BIT__ - 1))) == 0) \ + || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0)) +# define _SDT_ARGSIZE(x) \ + (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x)) +# define _SDT_ARGVAL(x) (x) +#endif + +#if defined __powerpc__ || defined __powerpc64__ +# define _SDT_ARGTMPL(id) %I[id]%[id] +#elif defined __i386__ +# define _SDT_ARGTMPL(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */ +#else +# define _SDT_ARGTMPL(id) %[id] +#endif + +/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h + operand note format. + + The named register may be a longer or shorter (!) alias for the + storage where the value in question is found. For example, on + i386, 64-bit value may be put in register pairs, and the register + name stored would identify just one of them. Previously, gcc was + asked to emit the %w[id] (16-bit alias of some registers holding + operands), even when a wider 32-bit value was used. + + Bottom line: the byte-width given before the @ sign governs. If + there is a mismatch between that width and that of the named + register, then a sys/sdt.h note consumer may need to employ + architecture-specific heuristics to figure out where the compiler + has actually put the complete value. +*/ + +#ifdef __LP64__ +# define _SDT_ASM_ADDR .8byte +#else +# define _SDT_ASM_ADDR .4byte +#endif + +/* The ia64 and s390 nop instructions take an argument. */ +#if defined(__ia64__) || defined(__s390__) || defined(__s390x__) +#define _SDT_NOP nop 0 +#else +#define _SDT_NOP nop +#endif + +#define _SDT_NOTE_NAME "stapsdt" +#define _SDT_NOTE_TYPE 3 + +/* If the assembler supports the necessary feature, then we can play + nice with code in COMDAT sections, which comes up in C++ code. + Without that assembler support, some combinations of probe placements + in certain kinds of C++ code may produce link-time errors. */ +#include "sdt-config.h" +#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT +# define _SDT_ASM_AUTOGROUP "?" +#else +# define _SDT_ASM_AUTOGROUP "" +#endif + +#define _SDT_DEF_MACROS \ + _SDT_ASM_1(.altmacro) \ + _SDT_ASM_1(.macro _SDT_SIGN x) \ + _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \ + _SDT_ASM_1(.iflt \\x) \ + _SDT_ASM_1(.ascii "-") \ + _SDT_ASM_1(.endif) \ + _SDT_ASM_1(.popsection) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_SIZE_ x) \ + _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \ + _SDT_ASM_1(.ascii "\x") \ + _SDT_ASM_1(.popsection) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_SIZE x) \ + _SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8)) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_TYPE_ x) \ + _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \ + _SDT_ASM_2(.ifc 8,\\x) \ + _SDT_ASM_1(.ascii "f") \ + _SDT_ASM_1(.endif) \ + _SDT_ASM_1(.ascii "@") \ + _SDT_ASM_1(.popsection) \ + _SDT_ASM_1(.endm) \ + _SDT_ASM_1(.macro _SDT_TYPE x) \ + _SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff))) \ + _SDT_ASM_1(.endm) + +#define _SDT_UNDEF_MACROS \ + _SDT_ASM_1(.purgem _SDT_SIGN) \ + _SDT_ASM_1(.purgem _SDT_SIZE_) \ + _SDT_ASM_1(.purgem _SDT_SIZE) \ + _SDT_ASM_1(.purgem _SDT_TYPE_) \ + _SDT_ASM_1(.purgem _SDT_TYPE) + +#define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \ + _SDT_DEF_MACROS \ + _SDT_ASM_1(990: _SDT_NOP) \ + _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \ + _SDT_ASM_1( .balign 4) \ + _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \ + _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \ + _SDT_ASM_1(992: .balign 4) \ + _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \ + _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \ + _SDT_SEMAPHORE(provider,name) \ + _SDT_ASM_STRING(provider) \ + _SDT_ASM_STRING(name) \ + pack_args args \ + _SDT_ASM_SUBSTR(\x00) \ + _SDT_UNDEF_MACROS \ + _SDT_ASM_1(994: .balign 4) \ + _SDT_ASM_1( .popsection) + +#define _SDT_ASM_BASE \ + _SDT_ASM_1(.ifndef _.stapsdt.base) \ + _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \ + .stapsdt.base,comdat) \ + _SDT_ASM_1( .weak _.stapsdt.base) \ + _SDT_ASM_1( .hidden _.stapsdt.base) \ + _SDT_ASM_1( _.stapsdt.base: .space 1) \ + _SDT_ASM_2( .size _.stapsdt.base, 1) \ + _SDT_ASM_1( .popsection) \ + _SDT_ASM_1(.endif) + +#if defined _SDT_HAS_SEMAPHORES +#define _SDT_SEMAPHORE(p,n) \ + _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore) +#else +#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0) +#endif + +#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20) +#define _SDT_ASM_TEMPLATE_0 /* no arguments */ +#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1) +#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2) +#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3) +#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4) +#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5) +#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6) +#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7) +#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8) +#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9) +#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10) +#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11) +#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12) +#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0) +#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1) +#define _SDT_ASM_OPERANDS_2(arg1, arg2) \ + _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2) +#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \ + _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3) +#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \ + _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4) +#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \ + _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5) +#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \ + _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6) +#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7) +#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ + _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \ + _SDT_ARG(8, arg8) +#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \ + _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \ + _SDT_ARG(9, arg9) +#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \ + _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \ + _SDT_ARG(10, arg10) +#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \ + _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \ + _SDT_ARG(11, arg11) +#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \ + _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \ + _SDT_ARG(12, arg12) + +/* These macros can be used in C, C++, or assembly code. + In assembly code the arguments should use normal assembly operand syntax. */ + +#define STAP_PROBE(provider, name) \ + _SDT_PROBE(provider, name, 0, ()) +#define STAP_PROBE1(provider, name, arg1) \ + _SDT_PROBE(provider, name, 1, (arg1)) +#define STAP_PROBE2(provider, name, arg1, arg2) \ + _SDT_PROBE(provider, name, 2, (arg1, arg2)) +#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \ + _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3)) +#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \ + _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4)) +#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \ + _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5)) +#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \ + _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6)) +#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7)) +#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \ + _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8)) +#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\ + _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)) +#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \ + _SDT_PROBE(provider, name, 10, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10)) +#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \ + _SDT_PROBE(provider, name, 11, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11)) +#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \ + _SDT_PROBE(provider, name, 12, \ + (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12)) + +/* This STAP_PROBEV macro can be used in variadic scenarios, where the + number of probe arguments is not known until compile time. Since + variadic macro support may vary with compiler options, you must + pre-#define SDT_USE_VARIADIC to enable this type of probe. + + The trick to count __VA_ARGS__ was inspired by this post by + Laurent Deniau <laurent.deniau@cern.ch>: + http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5 + + Note that our _SDT_NARG is called with an extra 0 arg that's not + counted, so we don't have to worry about the behavior of macros + called without any arguments. */ + +#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0) +#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N +#ifdef SDT_USE_VARIADIC +#define _SDT_PROBE_N(provider, name, N, ...) \ + _SDT_PROBE(provider, name, N, (__VA_ARGS__)) +#define STAP_PROBEV(provider, name, ...) \ + _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__) +#endif + +/* These macros are for use in asm statements. You must compile + with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro. + + The STAP_PROBE_ASM macro generates a quoted string to be used in the + template portion of the asm statement, concatenated with strings that + contain the actual assembly code around the probe site. + + For example: + + asm ("before\n" + STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi)) + "after"); + + emits the assembly code for "before\nafter", with a probe in between. + The probe arguments are the %eax register, and the value of the memory + word located 4 bytes past the address in the %esi register. Note that + because this is a simple asm, not a GNU C extended asm statement, these + % characters do not need to be doubled to generate literal %reg names. + + In a GNU C extended asm statement, the probe arguments can be specified + using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired + macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments, + and appears in the input operand list of the asm statement. For example: + + asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand + STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3)) + "otherinsn %[namedarg]" + : "r" (outvar) + : "g" (some_value), [namedarg] "i" (1234), + STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234)); + + This is just like writing: + + STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234)); + + but the probe site is right between "someinsn" and "otherinsn". + + The probe arguments in STAP_PROBE_ASM can be given as assembly + operands instead, even inside a GNU C extended asm statement. + Note that these can use operand templates like %0 or %[name], + and likewise they must write %%reg for a literal operand of %reg. */ + +#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__)) +#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__) +#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__) +#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__) +#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__) + +#if __STDC_VERSION__ >= 199901L +# define STAP_PROBE_ASM(provider, name, ...) \ + _SDT_ASM_BODY_N(provider, name, __VA_ARGS__) \ + _SDT_ASM_BASE +# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__) +#else +# define STAP_PROBE_ASM(provider, name, args) \ + _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args)) \ + _SDT_ASM_BASE +#endif +#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_" + + +/* DTrace compatible macro names. */ +#define DTRACE_PROBE(provider,probe) \ + STAP_PROBE(provider,probe) +#define DTRACE_PROBE1(provider,probe,parm1) \ + STAP_PROBE1(provider,probe,parm1) +#define DTRACE_PROBE2(provider,probe,parm1,parm2) \ + STAP_PROBE2(provider,probe,parm1,parm2) +#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \ + STAP_PROBE3(provider,probe,parm1,parm2,parm3) +#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \ + STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4) +#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \ + STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) +#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \ + STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) +#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \ + STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) +#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \ + STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) +#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \ + STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) +#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \ + STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) +#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \ + STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) +#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \ + STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) + + +#endif /* sys/sdt.h */ diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 6bf21e47882a..c0e7acd698ed 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -180,7 +180,7 @@ class FileExtractor(object): @enum_name: name of the enum to parse """ start_marker = re.compile(f'enum {enum_name} {{\n') - pattern = re.compile('^\s*(BPF_\w+),?$') + pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$') end_marker = re.compile('^};') parser = BlockParser(self.reader) parser.search_block(start_marker) diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index d6a1be4d8020..2ffa08198d1c 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -7,6 +7,7 @@ #include <sys/sysinfo.h> #include "bpf_rlimit.h" +#include "bpf_util.h" #include "cgroup_helpers.h" #include "testing_helpers.h" @@ -44,7 +45,7 @@ int main(int argc, char **argv) unsigned long long *percpu_value; int cpu, nproc; - nproc = get_nprocs_conf(); + nproc = bpf_num_possible_cpus(); percpu_value = malloc(sizeof(*percpu_value) * nproc); if (!percpu_value) { printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index edaffd43da83..6cd6ef9fc20b 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -184,7 +184,7 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) # Remove the base maps - maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names] + maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names] if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2ecb73a65206..0a4b45d7b515 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -761,8 +761,10 @@ int cd_flavor_subdir(const char *exec_name) const char *flavor = strrchr(exec_name, '/'); if (!flavor) - return 0; - flavor++; + flavor = exec_name; + else + flavor++; + flavor = strrchr(flavor, '-'); if (!flavor) return 0; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 93c1ff705533..eec4c7385b14 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -332,6 +332,8 @@ int trigger_module_test_write(int write_sz); #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" #elif defined(__s390x__) #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" +#elif defined(__aarch64__) +#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep" #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 795b6798ccee..87867f7a78c3 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -60,7 +60,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) set[i] = true; } - if (!set) + if (!set || parsing_end) return -EINVAL; *num_set = set; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 3d6217e3aff7..9c4be2cdb21a 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -25,15 +25,12 @@ static int ksym_cmp(const void *p1, const void *p2) int load_kallsyms(void) { - FILE *f = fopen("/proc/kallsyms", "r"); + FILE *f; char func[256], buf[256]; char symbol; void *addr; int i = 0; - if (!f) - return -ENOENT; - /* * This is called/used from multiplace places, * load symbols just once. @@ -41,6 +38,10 @@ int load_kallsyms(void) if (sym_cnt) return 0; + f = fopen("/proc/kallsyms", "r"); + if (!f) + return -ENOENT; + while (fgets(buf, sizeof(buf), f)) { if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) break; diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index db781052758d..e92644d0fa75 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -1,32 +1,85 @@ +#include <stdbool.h> #include <stdio.h> #include <unistd.h> +#include <errno.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdlib.h> +#include <signal.h> + +#define _SDT_HAS_SEMAPHORES 1 +#include "sdt.h" + +#define SEC(name) __attribute__((section(name), used)) #define BUF_SIZE 256 +/* defined in urandom_read_aux.c */ +void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz); +/* these are coming from urandom_read_lib{1,2}.c */ +void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz); +void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz); + +unsigned short urand_read_with_sema_semaphore SEC(".probes"); + static __attribute__((noinline)) void urandom_read(int fd, int count) { - char buf[BUF_SIZE]; - int i; + char buf[BUF_SIZE]; + int i; + + for (i = 0; i < count; ++i) { + read(fd, buf, BUF_SIZE); + + /* trigger USDTs defined in executable itself */ + urand_read_without_sema(i, count, BUF_SIZE); + STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE); - for (i = 0; i < count; ++i) - read(fd, buf, BUF_SIZE); + /* trigger USDTs defined in shared lib */ + urandlib_read_without_sema(i, count, BUF_SIZE); + urandlib_read_with_sema(i, count, BUF_SIZE); + } +} + +static volatile bool parent_ready; + +static void handle_sigpipe(int sig) +{ + parent_ready = true; } int main(int argc, char *argv[]) { int fd = open("/dev/urandom", O_RDONLY); int count = 4; + bool report_pid = false; if (fd < 0) return 1; - if (argc == 2) + if (argc >= 2) count = atoi(argv[1]); + if (argc >= 3) { + report_pid = true; + /* install SIGPIPE handler to catch when parent closes their + * end of the pipe (on the other side of our stdout) + */ + signal(SIGPIPE, handle_sigpipe); + } + + /* report PID and wait for parent process to send us "signal" by + * closing stdout + */ + if (report_pid) { + while (!parent_ready) { + fprintf(stdout, "%d\n", getpid()); + fflush(stdout); + } + /* at this point stdout is closed, parent process knows our + * PID and is ready to trace us + */ + } urandom_read(fd, count); diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c new file mode 100644 index 000000000000..6132edcfea74 --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read_aux.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "sdt.h" + +void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz) +{ + /* semaphore-less USDT */ + STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz); +} diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c new file mode 100644 index 000000000000..86186e24b740 --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read_lib1.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#define _SDT_HAS_SEMAPHORES 1 +#include "sdt.h" + +#define SEC(name) __attribute__((section(name), used)) + +unsigned short urandlib_read_with_sema_semaphore SEC(".probes"); + +void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz) +{ + STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz); +} diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c new file mode 100644 index 000000000000..9d401ad9838f --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read_lib2.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include "sdt.h" + +void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz) +{ + STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz); +} diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 4f70baad867d..bbe3b379927a 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -20,6 +20,7 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 +TESTS="fib_rule6 fib_rule4" log_test() { @@ -316,7 +317,16 @@ fi # start clean cleanup &> /dev/null setup -run_fibrule_tests +for t in $TESTS +do + case $t in + fib_rule6_test|fib_rule6) fib_rule6_test;; + fib_rule4_test|fib_rule4) fib_rule4_test;; + + help) echo "Test names: $TESTS"; exit 0;; + + esac +done cleanup if [ "$TESTS" != "none" ]; then diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8fa97ae9af9e..ae80c2aef577 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -2,6 +2,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ + bridge_mdb.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh new file mode 100755 index 000000000000..b1ba6876dd86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that adding host mdb entries work as intended for all types of +# multicast filters: ipv4, ipv6, and mac + +ALL_TESTS="mdb_add_del_test" +NUM_NETIFS=2 + +TEST_GROUP_IP4="225.1.2.3" +TEST_GROUP_IP6="ff02::42" +TEST_GROUP_MAC="01:00:01:c0:ff:ee" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 + + ip link set dev $swp1 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +do_mdb_add_del() +{ + local group=$1 + local flag=$2 + + RET=0 + bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null + check_err $? "Failed adding $group to br0, port br0" + + if [ -z "$flag" ]; then + flag="temp" + fi + + bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null + check_err $? "$group not added with $flag flag" + + bridge mdb del dev br0 port br0 grp $group 2>/dev/null + check_err $? "Failed deleting $group from br0, port br0" + + bridge mdb show dev br0 | grep -q $group >/dev/null + check_err_fail 1 $? "$group still in mdb after delete" + + log_test "MDB add/del group $group to bridge port br0" +} + +mdb_add_del_test() +{ + do_mdb_add_del $TEST_GROUP_MAC permanent + do_mdb_add_del $TEST_GROUP_IP4 + do_mdb_add_del $TEST_GROUP_IP6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index ff821025d309..9dd43d7d957b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -71,6 +71,43 @@ chk_msk_remote_key_nr() __chk_nr "grep -c remote_key" $* } +__chk_listen() +{ + local filter="$1" + local expected=$2 + + shift 2 + msg=$* + + nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN) + printf "%-50s" "$msg" + + if [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi +} + +chk_msk_listen() +{ + lport=$1 + local msg="check for listen socket" + + # destination port search should always return empty list + __chk_listen "dport $lport" 0 "listen match for dport $lport" + + # should return 'our' mptcp listen socket + __chk_listen "sport $lport" 1 "listen match for sport $lport" + + __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport" + + __chk_listen "" 1 "all listen sockets" + + nr=$(ss -Ml $filter | wc -l) +} + # $1: ns, $2: port wait_local_port_listen() { @@ -113,6 +150,7 @@ echo "a" | \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" +chk_msk_listen 10000 echo "b" | \ timeout ${timeout_test} \ diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh index 695a1958723f..fd76b69635a4 100755 --- a/tools/testing/selftests/netfilter/nft_fib.sh +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -66,6 +66,20 @@ table inet filter { EOF } +load_pbr_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain forward { + type filter hook forward priority raw; + fib saddr . iif oif gt 0 accept + log drop + } +} +EOF +} + load_ruleset_count() { local netns=$1 @@ -219,4 +233,40 @@ sleep 2 ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 +# delete all rules +ip netns exec ${ns1} nft flush ruleset +ip netns exec ${ns2} nft flush ruleset +ip netns exec ${nsrouter} nft flush ruleset + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 + +ip -net ${ns1} addr del 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr del dead:2::99/64 dev eth0 + +ip -net ${nsrouter} addr del dead:2::1/64 dev veth0 + +# ... pbr ruleset for the router, check iif+oif. +load_pbr_ruleset ${nsrouter} +if [ $? -ne 0 ] ; then + echo "SKIP: Could not load fib forward ruleset" + exit $ksft_skip +fi + +ip -net ${nsrouter} rule add from all table 128 +ip -net ${nsrouter} rule add from all iif veth0 table 129 +ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0 +ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1 + +# drop main ipv4 table +ip -net ${nsrouter} -4 rule delete table main + +test_ping 10.0.2.99 dead:2::99 +if [ $? -ne 0 ] ; then + ip -net ${nsrouter} nft list ruleset + echo "FAIL: fib mismatch in pbr setup" + exit 1 +fi + +echo "PASS: fib expression forward check with policy based routing" exit 0 |
