diff options
290 files changed, 11772 insertions, 5417 deletions
diff --git a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml new file mode 100644 index 000000000000..65af8183cb9c --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml @@ -0,0 +1,161 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/xilinx,can.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: + Xilinx Axi CAN/Zynq CANPS controller + +maintainers: + - Appana Durga Kedareswara rao <appana.durga.rao@xilinx.com> + +properties: + compatible: + enum: + - xlnx,zynq-can-1.0 + - xlnx,axi-can-1.00.a + - xlnx,canfd-1.0 + - xlnx,canfd-2.0 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + minItems: 1 + maxItems: 2 + + clock-names: + maxItems: 2 + + power-domains: + maxItems: 1 + + tx-fifo-depth: + $ref: "/schemas/types.yaml#/definitions/uint32" + description: CAN Tx fifo depth (Zynq, Axi CAN). + + rx-fifo-depth: + $ref: "/schemas/types.yaml#/definitions/uint32" + description: CAN Rx fifo depth (Zynq, Axi CAN, CAN FD in sequential Rx mode) + + tx-mailbox-count: + $ref: "/schemas/types.yaml#/definitions/uint32" + description: CAN Tx mailbox buffer count (CAN FD) + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +unevaluatedProperties: false + +allOf: + - $ref: can-controller.yaml# + - if: + properties: + compatible: + contains: + enum: + - xlnx,zynq-can-1.0 + + then: + properties: + clock-names: + items: + - const: can_clk + - const: pclk + required: + - tx-fifo-depth + - rx-fifo-depth + + - if: + properties: + compatible: + contains: + enum: + - xlnx,axi-can-1.00.a + + then: + properties: + clock-names: + items: + - const: can_clk + - const: s_axi_aclk + required: + - tx-fifo-depth + - rx-fifo-depth + + - if: + properties: + compatible: + contains: + enum: + - xlnx,canfd-1.0 + - xlnx,canfd-2.0 + + then: + properties: + clock-names: + items: + - const: can_clk + - const: s_axi_aclk + required: + - tx-mailbox-count + - rx-fifo-depth + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + + can@e0008000 { + compatible = "xlnx,zynq-can-1.0"; + reg = <0xe0008000 0x1000>; + clocks = <&clkc 19>, <&clkc 36>; + clock-names = "can_clk", "pclk"; + interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&intc>; + tx-fifo-depth = <0x40>; + rx-fifo-depth = <0x40>; + }; + + - | + can@40000000 { + compatible = "xlnx,axi-can-1.00.a"; + reg = <0x40000000 0x10000>; + clocks = <&clkc 0>, <&clkc 1>; + clock-names = "can_clk", "s_axi_aclk"; + interrupt-parent = <&intc>; + interrupts = <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>; + tx-fifo-depth = <0x40>; + rx-fifo-depth = <0x40>; + }; + + - | + can@40000000 { + compatible = "xlnx,canfd-1.0"; + reg = <0x40000000 0x2000>; + clocks = <&clkc 0>, <&clkc 1>; + clock-names = "can_clk", "s_axi_aclk"; + interrupt-parent = <&intc>; + interrupts = <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>; + tx-mailbox-count = <0x20>; + rx-fifo-depth = <0x20>; + }; + + - | + can@ff060000 { + compatible = "xlnx,canfd-2.0"; + reg = <0xff060000 0x6000>; + clocks = <&clkc 0>, <&clkc 1>; + clock-names = "can_clk", "s_axi_aclk"; + interrupt-parent = <&intc>; + interrupts = <GIC_SPI 59 IRQ_TYPE_EDGE_RISING>; + tx-mailbox-count = <0x20>; + rx-fifo-depth = <0x40>; + }; diff --git a/Documentation/devicetree/bindings/net/can/xilinx_can.txt b/Documentation/devicetree/bindings/net/can/xilinx_can.txt deleted file mode 100644 index 100cc40b8510..000000000000 --- a/Documentation/devicetree/bindings/net/can/xilinx_can.txt +++ /dev/null @@ -1,61 +0,0 @@ -Xilinx Axi CAN/Zynq CANPS controller Device Tree Bindings ---------------------------------------------------------- - -Required properties: -- compatible : Should be: - - "xlnx,zynq-can-1.0" for Zynq CAN controllers - - "xlnx,axi-can-1.00.a" for Axi CAN controllers - - "xlnx,canfd-1.0" for CAN FD controllers - - "xlnx,canfd-2.0" for CAN FD 2.0 controllers -- reg : Physical base address and size of the controller - registers map. -- interrupts : Property with a value describing the interrupt - number. -- clock-names : List of input clock names - - "can_clk", "pclk" (For CANPS), - - "can_clk", "s_axi_aclk" (For AXI CAN and CAN FD). - (See clock bindings for details). -- clocks : Clock phandles (see clock bindings for details). -- tx-fifo-depth : Can Tx fifo depth (Zynq, Axi CAN). -- rx-fifo-depth : Can Rx fifo depth (Zynq, Axi CAN, CAN FD in - sequential Rx mode). -- tx-mailbox-count : Can Tx mailbox buffer count (CAN FD). -- rx-mailbox-count : Can Rx mailbox buffer count (CAN FD in mailbox Rx - mode). - - -Example: - -For Zynq CANPS Dts file: - zynq_can_0: can@e0008000 { - compatible = "xlnx,zynq-can-1.0"; - clocks = <&clkc 19>, <&clkc 36>; - clock-names = "can_clk", "pclk"; - reg = <0xe0008000 0x1000>; - interrupts = <0 28 4>; - interrupt-parent = <&intc>; - tx-fifo-depth = <0x40>; - rx-fifo-depth = <0x40>; - }; -For Axi CAN Dts file: - axi_can_0: axi-can@40000000 { - compatible = "xlnx,axi-can-1.00.a"; - clocks = <&clkc 0>, <&clkc 1>; - clock-names = "can_clk","s_axi_aclk" ; - reg = <0x40000000 0x10000>; - interrupt-parent = <&intc>; - interrupts = <0 59 1>; - tx-fifo-depth = <0x40>; - rx-fifo-depth = <0x40>; - }; -For CAN FD Dts file: - canfd_0: canfd@40000000 { - compatible = "xlnx,canfd-1.0"; - clocks = <&clkc 0>, <&clkc 1>; - clock-names = "can_clk", "s_axi_aclk"; - reg = <0x40000000 0x2000>; - interrupt-parent = <&intc>; - interrupts = <0 59 1>; - tx-mailbox-count = <0x20>; - rx-fifo-depth = <0x20>; - }; diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt index 691f886cfc4a..2bf31572b08d 100644 --- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt +++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt @@ -5,6 +5,7 @@ Required properties: "marvell,armada-370-neta" "marvell,armada-xp-neta" "marvell,armada-3700-neta" + "marvell,armada-ac5-neta" - reg: address and length of the register set for the device. - interrupts: interrupt for the device - phy: See ethernet.txt file in the same directory. diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt deleted file mode 100644 index afbcaebf062e..000000000000 --- a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt +++ /dev/null @@ -1,91 +0,0 @@ -MediaTek DWMAC glue layer controller - -This file documents platform glue layer for stmmac. -Please see stmmac.txt for the other unchanged properties. - -The device node has following properties. - -Required properties: -- compatible: Should be "mediatek,mt2712-gmac" for MT2712 SoC -- reg: Address and length of the register set for the device -- interrupts: Should contain the MAC interrupts -- interrupt-names: Should contain a list of interrupt names corresponding to - the interrupts in the interrupts property, if available. - Should be "macirq" for the main MAC IRQ -- clocks: Must contain a phandle for each entry in clock-names. -- clock-names: The name of the clock listed in the clocks property. These are - "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" for MT2712 SoC. -- mac-address: See ethernet.txt in the same directory -- phy-mode: See ethernet.txt in the same directory -- mediatek,pericfg: A phandle to the syscon node that control ethernet - interface and timing delay. - -Optional properties: -- mediatek,tx-delay-ps: TX clock delay macro value. Default is 0. - It should be defined for RGMII/MII interface. - It should be defined for RMII interface when the reference clock is from MT2712 SoC. -- mediatek,rx-delay-ps: RX clock delay macro value. Default is 0. - It should be defined for RGMII/MII interface. - It should be defined for RMII interface. -Both delay properties need to be a multiple of 170 for RGMII interface, -or will round down. Range 0~31*170. -Both delay properties need to be a multiple of 550 for MII/RMII interface, -or will round down. Range 0~31*550. - -- mediatek,rmii-rxc: boolean property, if present indicates that the RMII - reference clock, which is from external PHYs, is connected to RXC pin - on MT2712 SoC. - Otherwise, is connected to TXC pin. -- mediatek,rmii-clk-from-mac: boolean property, if present indicates that - MT2712 SoC provides the RMII reference clock, which outputs to TXC pin only. -- mediatek,txc-inverse: boolean property, if present indicates that - 1. tx clock will be inversed in MII/RGMII case, - 2. tx clock inside MAC will be inversed relative to reference clock - which is from external PHYs in RMII case, and it rarely happen. - 3. the reference clock, which outputs to TXC pin will be inversed in RMII case - when the reference clock is from MT2712 SoC. -- mediatek,rxc-inverse: boolean property, if present indicates that - 1. rx clock will be inversed in MII/RGMII case. - 2. reference clock will be inversed when arrived at MAC in RMII case, when - the reference clock is from external PHYs. - 3. the inside clock, which be sent to MAC, will be inversed in RMII case when - the reference clock is from MT2712 SoC. -- assigned-clocks: mac_main and ptp_ref clocks -- assigned-clock-parents: parent clocks of the assigned clocks - -Example: - eth: ethernet@1101c000 { - compatible = "mediatek,mt2712-gmac"; - reg = <0 0x1101c000 0 0x1300>; - interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_LOW>; - interrupt-names = "macirq"; - phy-mode ="rgmii-rxid"; - mac-address = [00 55 7b b5 7d f7]; - clock-names = "axi", - "apb", - "mac_main", - "ptp_ref", - "rmii_internal"; - clocks = <&pericfg CLK_PERI_GMAC>, - <&pericfg CLK_PERI_GMAC_PCLK>, - <&topckgen CLK_TOP_ETHER_125M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; - assigned-clocks = <&topckgen CLK_TOP_ETHER_125M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; - assigned-clock-parents = <&topckgen CLK_TOP_ETHERPLL_125M>, - <&topckgen CLK_TOP_APLL1_D3>, - <&topckgen CLK_TOP_ETHERPLL_50M>; - power-domains = <&scpsys MT2712_POWER_DOMAIN_AUDIO>; - mediatek,pericfg = <&pericfg>; - mediatek,tx-delay-ps = <1530>; - mediatek,rx-delay-ps = <1530>; - mediatek,rmii-rxc; - mediatek,txc-inverse; - mediatek,rxc-inverse; - snps,txpbl = <1>; - snps,rxpbl = <1>; - snps,reset-gpio = <&pio 87 GPIO_ACTIVE_LOW>; - snps,reset-active-low; - }; diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml b/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml new file mode 100644 index 000000000000..901944683322 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml @@ -0,0 +1,175 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/mediatek-dwmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek DWMAC glue layer controller + +maintainers: + - Biao Huang <biao.huang@mediatek.com> + +description: + This file documents platform glue layer for stmmac. + +# We need a select here so we don't match all nodes with 'snps,dwmac' +select: + properties: + compatible: + contains: + enum: + - mediatek,mt2712-gmac + - mediatek,mt8195-gmac + required: + - compatible + +allOf: + - $ref: "snps,dwmac.yaml#" + +properties: + compatible: + oneOf: + - items: + - enum: + - mediatek,mt2712-gmac + - const: snps,dwmac-4.20a + - items: + - enum: + - mediatek,mt8195-gmac + - const: snps,dwmac-5.10a + + clocks: + minItems: 5 + items: + - description: AXI clock + - description: APB clock + - description: MAC Main clock + - description: PTP clock + - description: RMII reference clock provided by MAC + - description: MAC clock gate + + clock-names: + minItems: 5 + items: + - const: axi + - const: apb + - const: mac_main + - const: ptp_ref + - const: rmii_internal + - const: mac_cg + + mediatek,pericfg: + $ref: /schemas/types.yaml#/definitions/phandle + description: + The phandle to the syscon node that control ethernet + interface and timing delay. + + mediatek,tx-delay-ps: + description: + The internal TX clock delay (provided by this driver) in nanoseconds. + For MT2712 RGMII interface, Allowed value need to be a multiple of 170, + or will round down. Range 0~31*170. + For MT2712 RMII/MII interface, Allowed value need to be a multiple of 550, + or will round down. Range 0~31*550. + For MT8195 RGMII/RMII/MII interface, Allowed value need to be a multiple of 290, + or will round down. Range 0~31*290. + + mediatek,rx-delay-ps: + description: + The internal RX clock delay (provided by this driver) in nanoseconds. + For MT2712 RGMII interface, Allowed value need to be a multiple of 170, + or will round down. Range 0~31*170. + For MT2712 RMII/MII interface, Allowed value need to be a multiple of 550, + or will round down. Range 0~31*550. + For MT8195 RGMII/RMII/MII interface, Allowed value need to be a multiple + of 290, or will round down. Range 0~31*290. + + mediatek,rmii-rxc: + type: boolean + description: + If present, indicates that the RMII reference clock, which is from external + PHYs, is connected to RXC pin. Otherwise, is connected to TXC pin. + + mediatek,rmii-clk-from-mac: + type: boolean + description: + If present, indicates that MAC provides the RMII reference clock, which + outputs to TXC pin only. + + mediatek,txc-inverse: + type: boolean + description: + If present, indicates that + 1. tx clock will be inversed in MII/RGMII case, + 2. tx clock inside MAC will be inversed relative to reference clock + which is from external PHYs in RMII case, and it rarely happen. + 3. the reference clock, which outputs to TXC pin will be inversed in RMII case + when the reference clock is from MAC. + + mediatek,rxc-inverse: + type: boolean + description: + If present, indicates that + 1. rx clock will be inversed in MII/RGMII case. + 2. reference clock will be inversed when arrived at MAC in RMII case, when + the reference clock is from external PHYs. + 3. the inside clock, which be sent to MAC, will be inversed in RMII case when + the reference clock is from MAC. + + mediatek,mac-wol: + type: boolean + description: + If present, indicates that MAC supports WOL(Wake-On-LAN), and MAC WOL will be enabled. + Otherwise, PHY WOL is perferred. + +required: + - compatible + - reg + - interrupts + - interrupt-names + - clocks + - clock-names + - phy-mode + - mediatek,pericfg + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/mt2712-clk.h> + #include <dt-bindings/gpio/gpio.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/power/mt2712-power.h> + + eth: ethernet@1101c000 { + compatible = "mediatek,mt2712-gmac", "snps,dwmac-4.20a"; + reg = <0x1101c000 0x1300>; + interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "macirq"; + phy-mode ="rgmii-rxid"; + mac-address = [00 55 7b b5 7d f7]; + clock-names = "axi", + "apb", + "mac_main", + "ptp_ref", + "rmii_internal"; + clocks = <&pericfg CLK_PERI_GMAC>, + <&pericfg CLK_PERI_GMAC_PCLK>, + <&topckgen CLK_TOP_ETHER_125M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; + assigned-clocks = <&topckgen CLK_TOP_ETHER_125M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; + assigned-clock-parents = <&topckgen CLK_TOP_ETHERPLL_125M>, + <&topckgen CLK_TOP_APLL1_D3>, + <&topckgen CLK_TOP_ETHERPLL_50M>; + power-domains = <&scpsys MT2712_POWER_DOMAIN_AUDIO>; + mediatek,pericfg = <&pericfg>; + mediatek,tx-delay-ps = <1530>; + snps,txpbl = <1>; + snps,rxpbl = <1>; + snps,reset-gpio = <&pio 87 GPIO_ACTIVE_LOW>; + snps,reset-delays-us = <0 10000 10000>; + }; diff --git a/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml b/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml new file mode 100644 index 000000000000..4d91e2f4f247 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/phy/fsl,lynx-28g.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale Lynx 28G SerDes PHY binding + +maintainers: + - Ioana Ciornei <ioana.ciornei@nxp.com> + +properties: + compatible: + enum: + - fsl,lynx-28g + + reg: + maxItems: 1 + + "#phy-cells": + const: 1 + +required: + - compatible + - reg + - "#phy-cells" + +additionalProperties: false + +examples: + - | + soc { + #address-cells = <2>; + #size-cells = <2>; + serdes_1: phy@1ea0000 { + compatible = "fsl,lynx-28g"; + reg = <0x0 0x1ea0000 0x0 0x1e30>; + #phy-cells = <1>; + }; + }; diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 443123772f44..c17cdb079611 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -4,6 +4,22 @@ Linux Devlink Documentation devlink is an API to expose device information and resources not directly related to any device class, such as chip-wide/switch-ASIC-wide configuration. +Locking +------- + +Driver facing APIs are currently transitioning to allow more explicit +locking. Drivers can use the existing ``devlink_*`` set of APIs, or +new APIs prefixed by ``devl_*``. The older APIs handle all the locking +in devlink core, but don't allow registration of most sub-objects once +the main devlink object is itself registered. The newer ``devl_*`` APIs assume +the devlink instance lock is already held. Drivers can take the instance +lock by calling ``devl_lock()``. It is also held in most of the callbacks. +Eventually all callbacks will be invoked under the devlink instance lock, +refer to the use of the ``DEVLINK_NL_FLAG_NO_LOCK`` flag in devlink core +to find out which callbacks are not converted, yet. + +Drivers are encouraged to use the devlink instance lock for their own needs. + Interface documentation ----------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 033af08c8239..e5ce4f60786e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11333,6 +11333,13 @@ S: Maintained W: http://linux-test-project.github.io/ T: git git://github.com/linux-test-project/ltp.git +LYNX 28G SERDES PHY DRIVER +M: Ioana Ciornei <ioana.ciornei@nxp.com> +L: netdev@vger.kernel.org +S: Supported +F: Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml +F: drivers/phy/freescale/phy-fsl-lynx-28g.c + LYNX PCS MODULE M: Ioana Ciornei <ioana.ciornei@nxp.com> L: netdev@vger.kernel.org @@ -13382,6 +13389,7 @@ F: net/core/drop_monitor.c NETWORKING DRIVERS M: "David S. Miller" <davem@davemloft.net> M: Jakub Kicinski <kuba@kernel.org> +M: Paolo Abeni <pabeni@redhat.com> L: netdev@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/netdevbpf/list/ @@ -13428,6 +13436,7 @@ F: tools/testing/selftests/drivers/net/dsa/ NETWORKING [GENERAL] M: "David S. Miller" <davem@davemloft.net> M: Jakub Kicinski <kuba@kernel.org> +M: Paolo Abeni <pabeni@redhat.com> L: netdev@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/netdevbpf/list/ @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h index d1fa5607d3aa..85f9e538fb32 100644 --- a/arch/arm/include/asm/spectre.h +++ b/arch/arm/include/asm/spectre.h @@ -25,7 +25,13 @@ enum { SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8), }; +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES void spectre_v2_update_state(unsigned int state, unsigned int methods); +#else +static inline void spectre_v2_update_state(unsigned int state, + unsigned int methods) +{} +#endif int spectre_bhb_update_vectors(unsigned int method); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 676703cbfe4b..ee3f7a599181 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1040,9 +1040,9 @@ vector_bhb_loop8_\name: @ bhb workaround mov r0, #8 -1: b . + 4 +3: b . + 4 subs r0, r0, #1 - bne 1b + bne 3b dsb isb b 2b diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi index 17f8e733972a..41702e7386e3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi @@ -63,21 +63,25 @@ &dpmac7 { sfp = <&sfp0>; managed = "in-band-status"; + phys = <&serdes_1 3>; }; &dpmac8 { sfp = <&sfp1>; managed = "in-band-status"; + phys = <&serdes_1 2>; }; &dpmac9 { sfp = <&sfp2>; managed = "in-band-status"; + phys = <&serdes_1 1>; }; &dpmac10 { sfp = <&sfp3>; managed = "in-band-status"; + phys = <&serdes_1 0>; }; &emdio2 { diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 7032505f5ef3..92a881302708 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -612,6 +612,12 @@ ranges; dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>; + serdes_1: phy@1ea0000 { + compatible = "fsl,lynx-28g"; + reg = <0x0 0x1ea0000 0x0 0x1e30>; + #phy-cells = <1>; + }; + crypto: crypto@8000000 { compatible = "fsl,sec-v5.0", "fsl,sec-v4.0"; fsl,sec-era = <10>; diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts index 7d369fdd3117..11aa135aa0f3 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts @@ -110,6 +110,7 @@ phy-handle = <ðernet_phy0>; mediatek,tx-delay-ps = <1530>; snps,reset-gpio = <&pio 87 GPIO_ACTIVE_LOW>; + snps,reset-delays-us = <0 10000 10000>; pinctrl-names = "default", "sleep"; pinctrl-0 = <ð_default>; pinctrl-1 = <ð_sleep>; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index de16c0d80c30..a27b7628c5f7 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -726,7 +726,7 @@ }; eth: ethernet@1101c000 { - compatible = "mediatek,mt2712-gmac"; + compatible = "mediatek,mt2712-gmac", "snps,dwmac-4.20a"; reg = <0 0x1101c000 0 0x1300>; interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_LOW>; interrupt-names = "macirq"; @@ -734,15 +734,19 @@ clock-names = "axi", "apb", "mac_main", - "ptp_ref"; + "ptp_ref", + "rmii_internal"; clocks = <&pericfg CLK_PERI_GMAC>, <&pericfg CLK_PERI_GMAC_PCLK>, <&topckgen CLK_TOP_ETHER_125M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_SEL>; + <&topckgen CLK_TOP_ETHER_50M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; assigned-clocks = <&topckgen CLK_TOP_ETHER_125M_SEL>, - <&topckgen CLK_TOP_ETHER_50M_SEL>; + <&topckgen CLK_TOP_ETHER_50M_SEL>, + <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>; assigned-clock-parents = <&topckgen CLK_TOP_ETHERPLL_125M>, - <&topckgen CLK_TOP_APLL1_D3>; + <&topckgen CLK_TOP_APLL1_D3>, + <&topckgen CLK_TOP_ETHERPLL_50M>; power-domains = <&scpsys MT2712_POWER_DOMAIN_AUDIO>; mediatek,pericfg = <&pericfg>; snps,axi-config = <&stmmac_axi_setup>; diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 160abcb8e9fa..ea0e487f87b1 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs); static inline void arch_touch_nmi_watchdog(void) {} #endif -#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE) +#ifdef CONFIG_NMI_IPI extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index b44d6ecdb46e..0aacd7052585 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -2,6 +2,7 @@ menu "CPU errata selection" config RISCV_ERRATA_ALTERNATIVE bool "RISC-V alternative scheme" + depends on !XIP_KERNEL default y help This Kconfig allows the kernel to automatically patch the diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 6ec44a22278a..c112ab2a9052 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -14,8 +14,8 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC - select RISCV_ERRATA_ALTERNATIVE - select ERRATA_SIFIVE + select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL + select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 68a9e3d1fe16..4a48287513c3 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -13,6 +13,19 @@ #include <linux/pgtable.h> #include <asm/sections.h> +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 48afe96ae0f0..7c63a1911fae 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -13,6 +13,30 @@ #include "sgx.h" /* + * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's + * follow right after the EPC data in the backing storage. In addition to the + * visible enclave pages, there's one extra page slot for SECS, before PCMD + * structs. + */ +static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl, + unsigned long page_index) +{ + pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs); + + return epc_end_off + page_index * sizeof(struct sgx_pcmd); +} + +/* + * Free a page from the backing storage in the given page index. + */ +static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index) +{ + struct inode *inode = file_inode(encl->backing); + + shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1); +} + +/* * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC * Pages" in the SDM. */ @@ -22,9 +46,11 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, { unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; struct sgx_encl *encl = encl_page->encl; + pgoff_t page_index, page_pcmd_off; struct sgx_pageinfo pginfo; struct sgx_backing b; - pgoff_t page_index; + bool pcmd_page_empty; + u8 *pcmd_page; int ret; if (secs_page) @@ -32,14 +58,16 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, else page_index = PFN_DOWN(encl->size); + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); + ret = sgx_encl_get_backing(encl, page_index, &b); if (ret) return ret; pginfo.addr = encl_page->desc & PAGE_MASK; pginfo.contents = (unsigned long)kmap_atomic(b.contents); - pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + - b.pcmd_offset; + pcmd_page = kmap_atomic(b.pcmd); + pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset; if (secs_page) pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page); @@ -55,11 +83,24 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, ret = -EFAULT; } - kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset)); + memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); + + /* + * The area for the PCMD in the page was zeroed above. Check if the + * whole page is now empty meaning that all PCMD's have been zeroed: + */ + pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE); + + kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); sgx_encl_put_backing(&b, false); + sgx_encl_truncate_backing_page(encl, page_index); + + if (pcmd_page_empty) + sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); + return ret; } @@ -579,7 +620,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { - pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5); + pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); struct page *contents; struct page *pcmd; @@ -587,7 +628,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, if (IS_ERR(contents)) return PTR_ERR(contents); - pcmd = sgx_encl_get_backing_page(encl, pcmd_index); + pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off)); if (IS_ERR(pcmd)) { put_page(contents); return PTR_ERR(pcmd); @@ -596,9 +637,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, backing->page_index = page_index; backing->contents = contents; backing->pcmd = pcmd; - backing->pcmd_offset = - (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) * - sizeof(struct sgx_pcmd); + backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1); return 0; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index bc0657f0deed..f267205f2d5a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt); */ void __init e820__reserve_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; if (!pa_data) @@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("e820: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* @@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void) sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - e820__range_update(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("e820: failed to memremap indirect setup_data\n"); + return; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + e820__range_update(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + } } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + pa_data = pa_next; + early_memunmap(data, len); } e820__update_table(e820_table); diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 64b6da95af98..e2e89bebcbc3 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no, static int __init create_setup_data_nodes(struct dentry *parent) { + struct setup_indirect *indirect; struct setup_data_node *node; struct setup_data *data; - int error; + u64 pa_data, pa_next; struct dentry *d; - u64 pa_data; + int error; + u32 len; int no = 0; d = debugfs_create_dir("setup_data", parent); @@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent) error = -ENOMEM; goto err_dir; } - - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - node->paddr = ((struct setup_indirect *)data->data)->addr; - node->type = ((struct setup_indirect *)data->data)->type; - node->len = ((struct setup_indirect *)data->data)->len; + pa_next = data->next; + + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) { + kfree(node); + error = -ENOMEM; + goto err_dir; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + node->paddr = indirect->addr; + node->type = indirect->type; + node->len = indirect->len; + } else { + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + } } else { node->paddr = pa_data; node->type = data->type; @@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) } create_setup_data_node(d, no, node); - pa_data = data->next; + pa_data = pa_next; memunmap(data); no++; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index d0a19121c6a4..257892fcefa7 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr) static int __init get_setup_data_size(int nr, size_t *size) { - int i = 0; + u64 pa_data = boot_params.hdr.setup_data, pa_next; + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data = boot_params.hdr.setup_data; + int i = 0; + u32 len; while (pa_data) { data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); if (!data) return -ENOMEM; + pa_next = data->next; + if (nr == i) { - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - *size = ((struct setup_indirect *)data->data)->len; - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + *size = indirect->len; + else + *size = data->len; + } else { *size = data->len; + } memunmap(data); return 0; } - pa_data = data->next; + pa_data = pa_next; memunmap(data); i++; } @@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size) static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret; u64 paddr; - struct setup_data *data; + u32 len; ret = kobj_to_setup_data_nr(kobj, &nr); if (ret) @@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT) - ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type); - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + ret = sprintf(buf, "0x%x\n", indirect->type); + } else { ret = sprintf(buf, "0x%x\n", data->type); + } + memunmap(data); return ret; } @@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp, char *buf, loff_t off, size_t count) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret = 0; u64 paddr, len; - struct setup_data *data; void *p; ret = kobj_to_setup_data_nr(kobj, &nr); @@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } else { + /* + * Even though this is technically undefined, return + * the data as though it is a normal setup_data struct. + * This will at least allow it to be inspected. + */ + paddr += sizeof(*data); + len = data->len; + } } else { paddr += sizeof(*data); len = data->len; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 95fa745e310a..96d7c27b7093 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -273,6 +273,14 @@ int module_finalize(const Elf_Ehdr *hdr, retpolines = s; } + /* + * See alternative_instructions() for the ordering rules between the + * various patching types. + */ + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); @@ -290,11 +298,6 @@ int module_finalize(const Elf_Ehdr *hdr, tseg, tseg + text->sh_size); } - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - /* make jump label nops */ jump_label_apply_nops(me); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7a132eb794d..90d7e1788c91 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -369,21 +369,41 @@ static void __init parse_setup_data(void) static void __init memblock_x86_reserve_range_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("setup: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + memblock_reserve(pa_data, sizeof(*data) + data->len); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - memblock_reserve(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("setup: failed to memremap indirect setup_data\n"); + return; + } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + memblock_reserve(indirect->addr, indirect->len); + } + + pa_data = pa_next; + early_memunmap(data, len); } } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c9d566dcf89a..8143693a7ea6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -659,6 +659,7 @@ static bool do_int3(struct pt_regs *regs) return res == NOTIFY_STOP; } +NOKPROBE_SYMBOL(do_int3); static void do_int3_user(struct pt_regs *regs) { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 026031b3b782..17a492c27306 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -615,6 +615,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, static bool memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; @@ -627,6 +628,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap setup_data entry\n"); + return false; + } paddr_next = data->next; len = data->len; @@ -636,10 +641,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return true; } - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + memunmap(data); + data = memremap(paddr, sizeof(*data) + len, + MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } } memunmap(data); @@ -660,22 +676,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; paddr = boot_params.hdr.setup_data; while (paddr) { - unsigned int len; + unsigned int len, size; if (phys_addr == paddr) return true; data = early_memremap_decrypted(paddr, sizeof(*data)); + if (!data) { + pr_warn("failed to early memremap setup_data entry\n"); + return false; + } + + size = sizeof(*data); paddr_next = data->next; len = data->len; - early_memunmap(data, sizeof(*data)); + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + early_memunmap(data, sizeof(*data)); + return true; + } + + if (data->type == SETUP_INDIRECT) { + size += len; + early_memunmap(data, sizeof(*data)); + data = early_memremap_decrypted(paddr, size); + if (!data) { + pr_warn("failed to early memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } + } + + early_memunmap(data, size); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) return true; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 1331756d4cfc..8b2e5ef15559 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1377,11 +1377,11 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, if (info->valid & ACPI_VALID_HID) { acpi_add_id(pnp, info->hardware_id.string); pnp->type.platform_id = 1; - if (info->valid & ACPI_VALID_CID) { - cid_list = &info->compatible_id_list; - for (i = 0; i < cid_list->count; i++) - acpi_add_id(pnp, cid_list->ids[i].string); - } + } + if (info->valid & ACPI_VALID_CID) { + cid_list = &info->compatible_id_list; + for (i = 0; i < cid_list->count; i++) + acpi_add_id(pnp, cid_list->ids[i].string); } if (info->valid & ACPI_VALID_ADR) { pnp->bus_address = info->address; diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 422753d52244..a31ffe16e626 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags); skb_data3 = skb->data[3]; paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr)) + return enq_next; ENI_PRV_PADDR(skb) = paddr; /* prepare DMA queue entries */ j = 0; diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 99ba8d51d102..11f30fd48c14 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -8,6 +8,7 @@ #include <linux/clk.h> #include <linux/crypto.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> @@ -43,16 +44,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) { unsigned int currsize = 0; u32 val; + int ret; /* read random data from hardware */ do { - val = readl_relaxed(rng->base + PRNG_STATUS); - if (!(val & PRNG_STATUS_DATA_AVAIL)) - break; + ret = readl_poll_timeout(rng->base + PRNG_STATUS, val, + val & PRNG_STATUS_DATA_AVAIL, + 200, 10000); + if (ret) + return ret; val = readl_relaxed(rng->base + PRNG_DATA_OUT); if (!val) - break; + return -EINVAL; if ((max - currsize) >= WORD_SZ) { memcpy(data, &val, WORD_SZ); @@ -61,11 +65,10 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } else { /* copy only remaining bytes */ memcpy(data, &val, max - currsize); - break; } } while (currsize < max); - return currsize; + return 0; } static int qcom_rng_generate(struct crypto_rng *tfm, @@ -87,7 +90,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm, mutex_unlock(&rng->lock); clk_disable_unprepare(rng->clk); - return 0; + return ret; } static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed, diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 4c3201e290e2..ea84108035eb 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -24,7 +24,7 @@ static bool dump_properties __initdata; static int __init dump_properties_enable(char *arg) { dump_properties = true; - return 0; + return 1; } __setup("dump_apple_properties", dump_properties_enable); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 7de3f5b6e8d0..5502e176d51b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -212,7 +212,7 @@ static int __init efivar_ssdt_setup(char *str) memcpy(efivar_ssdt, str, strlen(str)); else pr_warn("efivar_ssdt: name too long: %s\n", str); - return 0; + return 1; } __setup("efivar_ssdt=", efivar_ssdt_setup); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index defb7c464b87..6630d92e30ad 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1701,6 +1701,11 @@ static inline void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc) */ int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return 0; +#endif + return pinctrl_gpio_request(gc->gpiodev->base + offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_request); @@ -1712,6 +1717,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_request); */ void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return; +#endif + pinctrl_gpio_free(gc->gpiodev->base + offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_free); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index a1a663f362e7..00279e8c2775 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1406,6 +1406,13 @@ static inline u32 man_trk_ctl_single_full_frame_bit_get(struct drm_i915_private PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; } +static inline u32 man_trk_ctl_partial_frame_bit_get(struct drm_i915_private *dev_priv) +{ + return IS_ALDERLAKE_P(dev_priv) ? + ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE : + PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; +} + static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -1510,7 +1517,13 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val = PSR2_MAN_TRK_CTL_ENABLE; + u32 val = 0; + + if (!IS_ALDERLAKE_P(dev_priv)) + val = PSR2_MAN_TRK_CTL_ENABLE; + + /* SF partial frame enable has to be set even on full update */ + val |= man_trk_ctl_partial_frame_bit_get(dev_priv); if (full_update) { /* @@ -1530,7 +1543,6 @@ static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, } else { drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4); - val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1); val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c2bb33febb68..902e4c802a12 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4829,6 +4829,7 @@ enum { #define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) #define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(12, 0) #define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) +#define ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(31) #define ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(14) #define ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(13) diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 434c2861bb40..0aec5a10b064 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -106,6 +106,7 @@ config DRM_PANEL_EDP depends on PM select VIDEOMODE_HELPERS select DRM_DP_AUX_BUS + select DRM_DP_HELPER help DRM panel driver for dumb eDP panels that need at most a regulator and a GPIO to be powered up. Optionally a backlight can be attached so diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h index 145833a9d82d..5b3fbee18671 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.h +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h @@ -111,10 +111,10 @@ /* format 13 is semi-planar YUV411 VUVU */ #define SUN8I_MIXER_FBFMT_YUV411 14 /* format 15 doesn't exist */ -/* format 16 is P010 YVU */ -#define SUN8I_MIXER_FBFMT_P010_YUV 17 -/* format 18 is P210 YVU */ -#define SUN8I_MIXER_FBFMT_P210_YUV 19 +#define SUN8I_MIXER_FBFMT_P010_YUV 16 +/* format 17 is P010 YVU */ +#define SUN8I_MIXER_FBFMT_P210_YUV 18 +/* format 19 is P210 YVU */ /* format 20 is packed YVU444 10-bit */ /* format 21 is packed YUV444 10-bit */ diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 8d718aa56d33..4e67c1403cc9 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1908,7 +1908,7 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) cb_data.card = card; cb_data.status = 0; - err = __mmc_poll_for_busy(card->host, MMC_BLK_TIMEOUT_MS, + err = __mmc_poll_for_busy(card->host, 0, MMC_BLK_TIMEOUT_MS, &mmc_blk_busy_cb, &cb_data); /* diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index bbbbcaf70a59..8421519c2a98 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1962,7 +1962,7 @@ static int mmc_sleep(struct mmc_host *host) goto out_release; } - err = __mmc_poll_for_busy(host, timeout_ms, &mmc_sleep_busy_cb, host); + err = __mmc_poll_for_busy(host, 0, timeout_ms, &mmc_sleep_busy_cb, host); out_release: mmc_retune_release(host); diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index d63d1c735335..180d7e9d3400 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -21,6 +21,8 @@ #define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */ #define MMC_SANITIZE_TIMEOUT_MS (240 * 1000) /* 240s */ +#define MMC_OP_COND_PERIOD_US (1 * 1000) /* 1ms */ +#define MMC_OP_COND_TIMEOUT_MS 1000 /* 1s */ static const u8 tuning_blk_pattern_4bit[] = { 0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc, @@ -232,7 +234,9 @@ int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr) cmd.arg = mmc_host_is_spi(host) ? 0 : ocr; cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R3 | MMC_CMD_BCR; - err = __mmc_poll_for_busy(host, 1000, &__mmc_send_op_cond_cb, &cb_data); + err = __mmc_poll_for_busy(host, MMC_OP_COND_PERIOD_US, + MMC_OP_COND_TIMEOUT_MS, + &__mmc_send_op_cond_cb, &cb_data); if (err) return err; @@ -495,13 +499,14 @@ static int mmc_busy_cb(void *cb_data, bool *busy) return 0; } -int __mmc_poll_for_busy(struct mmc_host *host, unsigned int timeout_ms, +int __mmc_poll_for_busy(struct mmc_host *host, unsigned int period_us, + unsigned int timeout_ms, int (*busy_cb)(void *cb_data, bool *busy), void *cb_data) { int err; unsigned long timeout; - unsigned int udelay = 32, udelay_max = 32768; + unsigned int udelay = period_us ? period_us : 32, udelay_max = 32768; bool expired = false; bool busy = false; @@ -546,7 +551,7 @@ int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms, cb_data.retry_crc_err = retry_crc_err; cb_data.busy_cmd = busy_cmd; - return __mmc_poll_for_busy(host, timeout_ms, &mmc_busy_cb, &cb_data); + return __mmc_poll_for_busy(host, 0, timeout_ms, &mmc_busy_cb, &cb_data); } EXPORT_SYMBOL_GPL(mmc_poll_for_busy); diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h index 9c813b851d0b..09ffbc00908b 100644 --- a/drivers/mmc/core/mmc_ops.h +++ b/drivers/mmc/core/mmc_ops.h @@ -41,7 +41,8 @@ int mmc_can_ext_csd(struct mmc_card *card); int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal); bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd, unsigned int timeout_ms); -int __mmc_poll_for_busy(struct mmc_host *host, unsigned int timeout_ms, +int __mmc_poll_for_busy(struct mmc_host *host, unsigned int period_us, + unsigned int timeout_ms, int (*busy_cb)(void *cb_data, bool *busy), void *cb_data); int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms, diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index bd87012c220c..bfbfed30dc4d 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1672,7 +1672,7 @@ static int sd_poweroff_notify(struct mmc_card *card) cb_data.card = card; cb_data.reg_buf = reg_buf; - err = __mmc_poll_for_busy(card->host, SD_POWEROFF_NOTIFY_TIMEOUT_MS, + err = __mmc_poll_for_busy(card->host, 0, SD_POWEROFF_NOTIFY_TIMEOUT_MS, &sd_busy_poweroff_notify_cb, &cb_data); out: diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 8f36536cb1b6..58ab9d90bc8b 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -173,6 +173,8 @@ struct meson_host { int irq; bool vqmmc_enabled; + bool needs_pre_post_req; + }; #define CMD_CFG_LENGTH_MASK GENMASK(8, 0) @@ -663,6 +665,8 @@ static void meson_mmc_request_done(struct mmc_host *mmc, struct meson_host *host = mmc_priv(mmc); host->cmd = NULL; + if (host->needs_pre_post_req) + meson_mmc_post_req(mmc, mrq, 0); mmc_request_done(host->mmc, mrq); } @@ -880,7 +884,7 @@ static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct meson_host *host = mmc_priv(mmc); - bool needs_pre_post_req = mrq->data && + host->needs_pre_post_req = mrq->data && !(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE); /* @@ -896,22 +900,19 @@ static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) } } - if (needs_pre_post_req) { + if (host->needs_pre_post_req) { meson_mmc_get_transfer_mode(mmc, mrq); if (!meson_mmc_desc_chain_mode(mrq->data)) - needs_pre_post_req = false; + host->needs_pre_post_req = false; } - if (needs_pre_post_req) + if (host->needs_pre_post_req) meson_mmc_pre_req(mmc, mrq); /* Stop execution */ writel(0, host->regs + SD_EMMC_START); meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd); - - if (needs_pre_post_req) - meson_mmc_post_req(mmc, mrq, 0); } static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index ba587e5fc24f..683203f87ae2 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -148,14 +148,14 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_mac_header(skb); - if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) + if (!ipv6_mod_enabled() || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); else err = IP6_ECN_decapsulate(oiph, skb); if (unlikely(err)) { if (log_ecn_error) { - if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) + if (!ipv6_mod_enabled() || family == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, @@ -221,11 +221,12 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port) int err; memset(&udp_conf, 0, sizeof(udp_conf)); -#if IS_ENABLED(CONFIG_IPV6) - udp_conf.family = AF_INET6; -#else - udp_conf.family = AF_INET; -#endif + + if (ipv6_mod_enabled()) + udp_conf.family = AF_INET6; + else + udp_conf.family = AF_INET; + udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); @@ -448,7 +449,7 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) } rcu_read_lock(); - if (IS_ENABLED(CONFIG_IPV6) && info->mode & IP_TUNNEL_INFO_IPV6) + if (ipv6_mod_enabled() && info->mode & IP_TUNNEL_INFO_IPV6) err = bareudp6_xmit_skb(skb, dev, bareudp, info); else err = bareudp_xmit_skb(skb, dev, bareudp, info); @@ -478,7 +479,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, use_cache = ip_tunnel_dst_cache_usable(skb, info); - if (!IS_ENABLED(CONFIG_IPV6) || ip_tunnel_info_af(info) == AF_INET) { + if (!ipv6_mod_enabled() || ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; __be32 saddr; diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index c7c41d1fd038..5ae0d7c017cc 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1392,7 +1392,7 @@ static int ucan_probe(struct usb_interface *intf, * Stage 3 for the final driver initialisation. */ - /* Prepare Memory for control transferes */ + /* Prepare Memory for control transfers */ ctl_msg_buffer = devm_kzalloc(&udev->dev, sizeof(union ucan_ctl_payload), GFP_KERNEL); @@ -1526,7 +1526,7 @@ static int ucan_probe(struct usb_interface *intf, ret = ucan_device_request_in(up, UCAN_DEVICE_GET_FW_STRING, 0, sizeof(union ucan_ctl_payload)); if (ret > 0) { - /* copy string while ensuring zero terminiation */ + /* copy string while ensuring zero termination */ strncpy(firmware_str, up->ctl_msg_buffer->raw, sizeof(union ucan_ctl_payload)); firmware_str[sizeof(union ucan_ctl_payload)] = '\0'; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 122e63762979..77501f9c5915 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2110,7 +2110,8 @@ out: EXPORT_SYMBOL(b53_get_tag_protocol); int b53_mirror_add(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror, bool ingress) + struct dsa_mall_mirror_tc_entry *mirror, bool ingress, + struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; u16 reg, loc; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 86e7eb7924e7..3085b6cc7d40 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -373,7 +373,8 @@ int b53_mdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); int b53_mirror_add(struct dsa_switch *ds, int port, - struct dsa_mall_mirror_tc_entry *mirror, bool ingress); + struct dsa_mall_mirror_tc_entry *mirror, bool ingress, + struct netlink_ext_ack *extack); enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); void b53_mirror_del(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h index 9d611895d3cf..03da369675c6 100644 --- a/drivers/net/dsa/microchip/ksz8.h +++ b/drivers/net/dsa/microchip/ksz8.h @@ -16,6 +16,7 @@ enum ksz_regs { REG_IND_DATA_HI, REG_IND_DATA_LO, REG_IND_MIB_CHECK, + REG_IND_BYTE, P_FORCE_CTRL, P_LINK_STATUS, P_LOCAL_CTRL, diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 5dc9899bc0a6..b2752978cb09 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -33,6 +33,7 @@ static const u8 ksz8795_regs[] = { [REG_IND_DATA_HI] = 0x71, [REG_IND_DATA_LO] = 0x75, [REG_IND_MIB_CHECK] = 0x74, + [REG_IND_BYTE] = 0xA0, [P_FORCE_CTRL] = 0x0C, [P_LINK_STATUS] = 0x0E, [P_LOCAL_CTRL] = 0x07, @@ -222,6 +223,25 @@ static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, bits, set ? bits : 0); } +static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) +{ + struct ksz8 *ksz8 = dev->priv; + const u8 *regs = ksz8->regs; + u16 ctrl_addr; + int ret = 0; + + mutex_lock(&dev->alu_mutex); + + ctrl_addr = IND_ACC_TABLE(table) | addr; + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + if (!ret) + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + + mutex_unlock(&dev->alu_mutex); + + return ret; +} + static int ksz8_reset_switch(struct ksz_device *dev) { if (ksz_is_ksz88x3(dev)) { @@ -1213,7 +1233,7 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, static int ksz8_port_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress) + bool ingress, struct netlink_ext_ack *extack) { struct ksz_device *dev = ds->priv; @@ -1391,6 +1411,23 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds) } } +static int ksz8_handle_global_errata(struct dsa_switch *ds) +{ + struct ksz_device *dev = ds->priv; + int ret = 0; + + /* KSZ87xx Errata DS80000687C. + * Module 2: Link drops with some EEE link partners. + * An issue with the EEE next page exchange between the + * KSZ879x/KSZ877x/KSZ876x and some EEE link partners may result in + * the link dropping. + */ + if (dev->ksz87xx_eee_link_erratum) + ret = ksz8_ind_write8(dev, TABLE_EEE, REG_IND_EEE_GLOB2_HI, 0); + + return ret; +} + static int ksz8_setup(struct dsa_switch *ds) { struct ksz_device *dev = ds->priv; @@ -1458,7 +1495,7 @@ static int ksz8_setup(struct dsa_switch *ds) ds->configure_vlan_while_not_filtering = false; - return 0; + return ksz8_handle_global_errata(ds); } static void ksz8_get_caps(struct dsa_switch *ds, int port, @@ -1575,6 +1612,7 @@ struct ksz_chip_data { int num_statics; int cpu_ports; int port_cnt; + bool ksz87xx_eee_link_erratum; }; static const struct ksz_chip_data ksz8_switch_chips[] = { @@ -1586,6 +1624,7 @@ static const struct ksz_chip_data ksz8_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x10, /* can be configured as cpu port */ .port_cnt = 5, /* total cpu and user ports */ + .ksz87xx_eee_link_erratum = true, }, { /* @@ -1609,6 +1648,7 @@ static const struct ksz_chip_data ksz8_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x10, /* can be configured as cpu port */ .port_cnt = 4, /* total cpu and user ports */ + .ksz87xx_eee_link_erratum = true, }, { .chip_id = 0x8765, @@ -1618,6 +1658,7 @@ static const struct ksz_chip_data ksz8_switch_chips[] = { .num_statics = 8, .cpu_ports = 0x10, /* can be configured as cpu port */ .port_cnt = 5, /* total cpu and user ports */ + .ksz87xx_eee_link_erratum = true, }, { .chip_id = 0x8830, @@ -1652,6 +1693,8 @@ static int ksz8_switch_init(struct ksz_device *dev) dev->host_mask = chip->cpu_ports; dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | chip->cpu_ports; + dev->ksz87xx_eee_link_erratum = + chip->ksz87xx_eee_link_erratum; break; } } diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index 6b40bc25f7ff..d74defcd86b4 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -812,6 +812,10 @@ #define IND_ACC_TABLE(table) ((table) << 8) +/* */ +#define REG_IND_EEE_GLOB2_LO 0x34 +#define REG_IND_EEE_GLOB2_HI 0x35 + /* Driver set switch broadcast storm protection at 10% rate. */ #define BROADCAST_STORM_PROT_RATE 10 diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index 673589dc88ab..5f8d94aee774 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -122,12 +122,23 @@ static const struct of_device_id ksz8795_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz8795_dt_ids); +static const struct spi_device_id ksz8795_spi_ids[] = { + { "ksz8765" }, + { "ksz8794" }, + { "ksz8795" }, + { "ksz8863" }, + { "ksz8873" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids); + static struct spi_driver ksz8795_spi_driver = { .driver = { .name = "ksz8795-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz8795_dt_ids), }, + .id_table = ksz8795_spi_ids, .probe = ksz8795_spi_probe, .remove = ksz8795_spi_remove, .shutdown = ksz8795_spi_shutdown, diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index a4699481c746..8222c8a6c5ec 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1018,7 +1018,7 @@ exit: static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress) + bool ingress, struct netlink_ext_ack *extack) { struct ksz_device *dev = ds->priv; diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 940bb9665f15..87ca464dad32 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -96,12 +96,24 @@ static const struct of_device_id ksz9477_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); +static const struct spi_device_id ksz9477_spi_ids[] = { + { "ksz9477" }, + { "ksz9897" }, + { "ksz9893" }, + { "ksz9563" }, + { "ksz8563" }, + { "ksz9567" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids); + static struct spi_driver ksz9477_spi_driver = { .driver = { .name = "ksz9477-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz9477_dt_ids), }, + .id_table = ksz9477_spi_ids, .probe = ksz9477_spi_probe, .remove = ksz9477_spi_remove, .shutdown = ksz9477_spi_shutdown, diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index fa39ee73cbd2..485d4a948c38 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -77,6 +77,7 @@ struct ksz_device { phy_interface_t compat_interface; u32 regs_size; bool phy_errata_9477; + bool ksz87xx_eee_link_erratum; bool synclko_125; bool synclko_disable; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 669f008528ec..19f0035d4410 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1714,7 +1714,7 @@ static int mt753x_mirror_port_set(unsigned int id, u32 val) static int mt753x_port_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress) + bool ingress, struct netlink_ext_ack *extack) { struct mt7530_priv *priv = ds->priv; int monitor_port; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 84b90fc36c58..b36393ba6d49 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1667,24 +1667,31 @@ static int mv88e6xxx_pvt_setup(struct mv88e6xxx_chip *chip) return 0; } -static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) +static int mv88e6xxx_port_fast_age_fid(struct mv88e6xxx_chip *chip, int port, + u16 fid) { - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - if (dsa_to_port(ds, port)->lag) + if (dsa_to_port(chip->ds, port)->lag) /* Hardware is incapable of fast-aging a LAG through a * regular ATU move operation. Until we have something * more fancy in place this is a no-op. */ - return; + return -EOPNOTSUPP; + + return mv88e6xxx_g1_atu_remove(chip, fid, port, false); +} + +static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; mv88e6xxx_reg_lock(chip); - err = mv88e6xxx_g1_atu_remove(chip, 0, port, false); + err = mv88e6xxx_port_fast_age_fid(chip, port, 0); mv88e6xxx_reg_unlock(chip); if (err) - dev_err(ds->dev, "p%d: failed to flush ATU\n", port); + dev_err(chip->ds->dev, "p%d: failed to flush ATU: %d\n", + port, err); } static int mv88e6xxx_vtu_setup(struct mv88e6xxx_chip *chip) @@ -1791,6 +1798,187 @@ static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid) return mv88e6xxx_g1_atu_flush(chip, *fid, true); } +static int mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry) +{ + if (!chip->info->ops->stu_loadpurge) + return -EOPNOTSUPP; + + return chip->info->ops->stu_loadpurge(chip, entry); +} + +static int mv88e6xxx_stu_setup(struct mv88e6xxx_chip *chip) +{ + struct mv88e6xxx_stu_entry stu = { + .valid = true, + .sid = 0 + }; + + if (!mv88e6xxx_has_stu(chip)) + return 0; + + /* Make sure that SID 0 is always valid. This is used by VTU + * entries that do not make use of the STU, e.g. when creating + * a VLAN upper on a port that is also part of a VLAN + * filtering bridge. + */ + return mv88e6xxx_stu_loadpurge(chip, &stu); +} + +static int mv88e6xxx_sid_get(struct mv88e6xxx_chip *chip, u8 *sid) +{ + DECLARE_BITMAP(busy, MV88E6XXX_N_SID) = { 0 }; + struct mv88e6xxx_mst *mst; + + __set_bit(0, busy); + + list_for_each_entry(mst, &chip->msts, node) + __set_bit(mst->stu.sid, busy); + + *sid = find_first_zero_bit(busy, MV88E6XXX_N_SID); + + return (*sid >= mv88e6xxx_max_sid(chip)) ? -ENOSPC : 0; +} + +static int mv88e6xxx_mst_put(struct mv88e6xxx_chip *chip, u8 sid) +{ + struct mv88e6xxx_mst *mst, *tmp; + int err; + + if (!sid) + return 0; + + list_for_each_entry_safe(mst, tmp, &chip->msts, node) { + if (mst->stu.sid != sid) + continue; + + if (!refcount_dec_and_test(&mst->refcnt)) + return 0; + + mst->stu.valid = false; + err = mv88e6xxx_stu_loadpurge(chip, &mst->stu); + if (err) { + refcount_set(&mst->refcnt, 1); + return err; + } + + list_del(&mst->node); + kfree(mst); + return 0; + } + + return -ENOENT; +} + +static int mv88e6xxx_mst_get(struct mv88e6xxx_chip *chip, struct net_device *br, + u16 msti, u8 *sid) +{ + struct mv88e6xxx_mst *mst; + int err, i; + + if (!mv88e6xxx_has_stu(chip)) { + err = -EOPNOTSUPP; + goto err; + } + + if (!msti) { + *sid = 0; + return 0; + } + + list_for_each_entry(mst, &chip->msts, node) { + if (mst->br == br && mst->msti == msti) { + refcount_inc(&mst->refcnt); + *sid = mst->stu.sid; + return 0; + } + } + + err = mv88e6xxx_sid_get(chip, sid); + if (err) + goto err; + + mst = kzalloc(sizeof(*mst), GFP_KERNEL); + if (!mst) { + err = -ENOMEM; + goto err; + } + + INIT_LIST_HEAD(&mst->node); + refcount_set(&mst->refcnt, 1); + mst->br = br; + mst->msti = msti; + mst->stu.valid = true; + mst->stu.sid = *sid; + + /* The bridge starts out all ports in the disabled state. But + * a STU state of disabled means to go by the port-global + * state. So we set all user port's initial state to blocking, + * to match the bridge's behavior. + */ + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) + mst->stu.state[i] = dsa_is_user_port(chip->ds, i) ? + MV88E6XXX_PORT_CTL0_STATE_BLOCKING : + MV88E6XXX_PORT_CTL0_STATE_DISABLED; + + err = mv88e6xxx_stu_loadpurge(chip, &mst->stu); + if (err) + goto err_free; + + list_add_tail(&mst->node, &chip->msts); + return 0; + +err_free: + kfree(mst); +err: + return err; +} + +static int mv88e6xxx_port_mst_state_set(struct dsa_switch *ds, int port, + const struct switchdev_mst_state *st) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_mst *mst; + u8 state; + int err; + + if (!mv88e6xxx_has_stu(chip)) + return -EOPNOTSUPP; + + switch (st->state) { + case BR_STATE_DISABLED: + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + state = MV88E6XXX_PORT_CTL0_STATE_BLOCKING; + break; + case BR_STATE_LEARNING: + state = MV88E6XXX_PORT_CTL0_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + state = MV88E6XXX_PORT_CTL0_STATE_FORWARDING; + break; + default: + return -EINVAL; + } + + list_for_each_entry(mst, &chip->msts, node) { + if (mst->br == dsa_port_bridge_dev_get(dp) && + mst->msti == st->msti) { + if (mst->stu.state[port] == state) + return 0; + + mst->stu.state[port] = state; + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_stu_loadpurge(chip, &mst->stu); + mv88e6xxx_reg_unlock(chip); + return err; + } + } + + return -ENOENT; +} + static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, u16 vid) { @@ -2410,6 +2598,12 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip, if (err) return err; + if (!vlan.valid) { + err = mv88e6xxx_mst_put(chip, vlan.sid); + if (err) + return err; + } + return mv88e6xxx_g1_atu_remove(chip, vlan.fid, port, false); } @@ -2455,6 +2649,69 @@ unlock: return err; } +static int mv88e6xxx_port_vlan_fast_age(struct dsa_switch *ds, int port, u16 vid) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_vtu_entry vlan; + int err; + + mv88e6xxx_reg_lock(chip); + + err = mv88e6xxx_vtu_get(chip, vid, &vlan); + if (err) + goto unlock; + + err = mv88e6xxx_port_fast_age_fid(chip, port, vlan.fid); + +unlock: + mv88e6xxx_reg_unlock(chip); + + return err; +} + +static int mv88e6xxx_vlan_msti_set(struct dsa_switch *ds, + struct dsa_bridge bridge, + const struct switchdev_vlan_msti *msti) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_vtu_entry vlan; + u8 old_sid, new_sid; + int err; + + mv88e6xxx_reg_lock(chip); + + err = mv88e6xxx_vtu_get(chip, msti->vid, &vlan); + if (err) + goto unlock; + + if (!vlan.valid) { + err = -EINVAL; + goto unlock; + } + + old_sid = vlan.sid; + + err = mv88e6xxx_mst_get(chip, bridge.dev, msti->msti, &new_sid); + if (err) + goto unlock; + + if (new_sid != old_sid) { + vlan.sid = new_sid; + + err = mv88e6xxx_vtu_loadpurge(chip, &vlan); + if (err) { + mv88e6xxx_mst_put(chip, new_sid); + goto unlock; + } + } + + err = mv88e6xxx_mst_put(chip, old_sid); + +unlock: + mv88e6xxx_reg_unlock(chip); + return err; +} + static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db) @@ -3427,6 +3684,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + /* Must be called after mv88e6xxx_vtu_setup (which flushes the + * VTU, thereby also flushing the STU). + */ + err = mv88e6xxx_stu_setup(chip); + if (err) + goto unlock; + /* Setup Switch Port Registers */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { if (dsa_is_unused_port(ds, i)) @@ -3882,6 +4146,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .phylink_get_caps = mv88e6095_phylink_get_caps, + .stu_getnext = mv88e6352_g1_stu_getnext, + .stu_loadpurge = mv88e6352_g1_stu_loadpurge, .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; @@ -4968,6 +5234,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .stu_getnext = mv88e6352_g1_stu_getnext, + .stu_loadpurge = mv88e6352_g1_stu_loadpurge, .serdes_get_lane = mv88e6352_serdes_get_lane, .serdes_pcs_get_state = mv88e6352_serdes_pcs_get_state, .serdes_pcs_config = mv88e6352_serdes_pcs_config, @@ -5033,6 +5301,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, + .stu_getnext = mv88e6390_g1_stu_getnext, + .stu_loadpurge = mv88e6390_g1_stu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390_serdes_get_lane, /* Check status register pause & lpa register */ @@ -5098,6 +5368,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, + .stu_getnext = mv88e6390_g1_stu_getnext, + .stu_loadpurge = mv88e6390_g1_stu_loadpurge, .serdes_power = mv88e6390_serdes_power, .serdes_get_lane = mv88e6390x_serdes_get_lane, .serdes_pcs_get_state = mv88e6390_serdes_pcs_get_state, @@ -5166,6 +5438,8 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { .atu_set_hash = mv88e6165_g1_atu_set_hash, .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, + .stu_getnext = mv88e6390_g1_stu_getnext, + .stu_loadpurge = mv88e6390_g1_stu_loadpurge, .serdes_power = mv88e6393x_serdes_power, .serdes_get_lane = mv88e6393x_serdes_get_lane, .serdes_pcs_get_state = mv88e6393x_serdes_pcs_get_state, @@ -5234,6 +5508,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, .num_internal_phys = 8, .max_vid = 4095, + .max_sid = 63, .port_base_addr = 0x10, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5487,6 +5762,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5510,6 +5786,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5532,6 +5809,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 9, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5554,6 +5832,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 9, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5576,6 +5855,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 9, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5815,6 +6095,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .num_gpio = 15, .max_vid = 4095, + .max_sid = 63, .port_base_addr = 0x10, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5839,6 +6120,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5863,6 +6145,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5886,6 +6169,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_ports = 11, /* 10 + Z80 */ .num_internal_phys = 9, .max_vid = 8191, + .max_sid = 63, .port_base_addr = 0x0, .phy_base_addr = 0x0, .global1_addr = 0x1b, @@ -5954,6 +6238,7 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) mutex_init(&chip->reg_lock); INIT_LIST_HEAD(&chip->mdios); idr_init(&chip->policies); + INIT_LIST_HEAD(&chip->msts); return chip; } @@ -6036,7 +6321,8 @@ static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, static int mv88e6xxx_port_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress) + bool ingress, + struct netlink_ext_ack *extack) { enum mv88e6xxx_egress_direction direction = ingress ? MV88E6XXX_EGRESS_DIR_INGRESS : @@ -6486,10 +6772,13 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags, .port_bridge_flags = mv88e6xxx_port_bridge_flags, .port_stp_state_set = mv88e6xxx_port_stp_state_set, + .port_mst_state_set = mv88e6xxx_port_mst_state_set, .port_fast_age = mv88e6xxx_port_fast_age, + .port_vlan_fast_age = mv88e6xxx_port_vlan_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, .port_vlan_add = mv88e6xxx_port_vlan_add, .port_vlan_del = mv88e6xxx_port_vlan_del, + .vlan_msti_set = mv88e6xxx_vlan_msti_set, .port_fdb_add = mv88e6xxx_port_fdb_add, .port_fdb_del = mv88e6xxx_port_fdb_del, .port_fdb_dump = mv88e6xxx_port_fdb_dump, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 30b92a265613..6a0b66354e1d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -20,6 +20,7 @@ #define EDSA_HLEN 8 #define MV88E6XXX_N_FID 4096 +#define MV88E6XXX_N_SID 64 #define MV88E6XXX_FID_STANDALONE 0 #define MV88E6XXX_FID_BRIDGED 1 @@ -130,6 +131,7 @@ struct mv88e6xxx_info { unsigned int num_internal_phys; unsigned int num_gpio; unsigned int max_vid; + unsigned int max_sid; unsigned int port_base_addr; unsigned int phy_base_addr; unsigned int global1_addr; @@ -181,6 +183,12 @@ struct mv88e6xxx_vtu_entry { bool valid; bool policy; u8 member[DSA_MAX_PORTS]; + u8 state[DSA_MAX_PORTS]; /* Older silicon has no STU */ +}; + +struct mv88e6xxx_stu_entry { + u8 sid; + bool valid; u8 state[DSA_MAX_PORTS]; }; @@ -279,6 +287,7 @@ enum mv88e6xxx_region_id { MV88E6XXX_REGION_GLOBAL2, MV88E6XXX_REGION_ATU, MV88E6XXX_REGION_VTU, + MV88E6XXX_REGION_STU, MV88E6XXX_REGION_PVT, _MV88E6XXX_REGION_MAX, @@ -288,6 +297,16 @@ struct mv88e6xxx_region_priv { enum mv88e6xxx_region_id id; }; +struct mv88e6xxx_mst { + struct list_head node; + + refcount_t refcnt; + struct net_device *br; + u16 msti; + + struct mv88e6xxx_stu_entry stu; +}; + struct mv88e6xxx_chip { const struct mv88e6xxx_info *info; @@ -388,6 +407,9 @@ struct mv88e6xxx_chip { /* devlink regions */ struct devlink_region *regions[_MV88E6XXX_REGION_MAX]; + + /* Bridge MST to SID mappings */ + struct list_head msts; }; struct mv88e6xxx_bus_ops { @@ -602,6 +624,12 @@ struct mv88e6xxx_ops { int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); + /* Spanning Tree Unit operations */ + int (*stu_getnext)(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); + int (*stu_loadpurge)(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); + /* GPIO operations */ const struct mv88e6xxx_gpio_ops *gpio_ops; @@ -700,6 +728,11 @@ struct mv88e6xxx_hw_stat { int type; }; +static inline bool mv88e6xxx_has_stu(struct mv88e6xxx_chip *chip) +{ + return chip->info->max_sid > 0; +} + static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip) { return chip->info->pvt; @@ -730,6 +763,11 @@ static inline unsigned int mv88e6xxx_max_vid(struct mv88e6xxx_chip *chip) return chip->info->max_vid; } +static inline unsigned int mv88e6xxx_max_sid(struct mv88e6xxx_chip *chip) +{ + return chip->info->max_sid; +} + static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip) { return GENMASK((s32)mv88e6xxx_num_ports(chip) - 1, 0); diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 381068395c63..1266eabee086 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -503,6 +503,85 @@ static int mv88e6xxx_region_vtu_snapshot(struct devlink *dl, return 0; } +/** + * struct mv88e6xxx_devlink_stu_entry - Devlink STU entry + * @sid: Global1/3: SID, unknown filters and learning. + * @vid: Global1/6: Valid bit. + * @data: Global1/7-9: Membership data and priority override. + * @resvd: Reserved. In case we forgot something. + * + * The STU entry format varies between chipset generations. Peridot + * and Amethyst packs the STU data into Global1/7-8. Older silicon + * spreads the information across all three VTU data registers - + * inheriting the layout of even older hardware that had no STU at + * all. Since this is a low-level debug interface, copy all data + * verbatim and defer parsing to the consumer. + */ +struct mv88e6xxx_devlink_stu_entry { + u16 sid; + u16 vid; + u16 data[3]; + u16 resvd; +}; + +static int mv88e6xxx_region_stu_snapshot(struct devlink *dl, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u8 **data) +{ + struct mv88e6xxx_devlink_stu_entry *table, *entry; + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_stu_entry stu; + int err; + + table = kcalloc(mv88e6xxx_max_sid(chip) + 1, + sizeof(struct mv88e6xxx_devlink_stu_entry), + GFP_KERNEL); + if (!table) + return -ENOMEM; + + entry = table; + stu.sid = mv88e6xxx_max_sid(chip); + stu.valid = false; + + mv88e6xxx_reg_lock(chip); + + do { + err = mv88e6xxx_g1_stu_getnext(chip, &stu); + if (err) + break; + + if (!stu.valid) + break; + + err = err ? : mv88e6xxx_g1_read(chip, MV88E6352_G1_VTU_SID, + &entry->sid); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_VID, + &entry->vid); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_DATA1, + &entry->data[0]); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_DATA2, + &entry->data[1]); + err = err ? : mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_DATA3, + &entry->data[2]); + if (err) + break; + + entry++; + } while (stu.sid < mv88e6xxx_max_sid(chip)); + + mv88e6xxx_reg_unlock(chip); + + if (err) { + kfree(table); + return err; + } + + *data = (u8 *)table; + return 0; +} + static int mv88e6xxx_region_pvt_snapshot(struct devlink *dl, const struct devlink_region_ops *ops, struct netlink_ext_ack *extack, @@ -605,6 +684,12 @@ static struct devlink_region_ops mv88e6xxx_region_vtu_ops = { .destructor = kfree, }; +static struct devlink_region_ops mv88e6xxx_region_stu_ops = { + .name = "stu", + .snapshot = mv88e6xxx_region_stu_snapshot, + .destructor = kfree, +}; + static struct devlink_region_ops mv88e6xxx_region_pvt_ops = { .name = "pvt", .snapshot = mv88e6xxx_region_pvt_snapshot, @@ -640,6 +725,11 @@ static struct mv88e6xxx_region mv88e6xxx_regions[] = { .ops = &mv88e6xxx_region_vtu_ops /* calculated at runtime */ }, + [MV88E6XXX_REGION_STU] = { + .ops = &mv88e6xxx_region_stu_ops, + .cond = mv88e6xxx_has_stu, + /* calculated at runtime */ + }, [MV88E6XXX_REGION_PVT] = { .ops = &mv88e6xxx_region_pvt_ops, .size = MV88E6XXX_MAX_PVT_ENTRIES * sizeof(u16), @@ -706,6 +796,10 @@ int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds) size = (mv88e6xxx_max_vid(chip) + 1) * sizeof(struct mv88e6xxx_devlink_vtu_entry); break; + case MV88E6XXX_REGION_STU: + size = (mv88e6xxx_max_sid(chip) + 1) * + sizeof(struct mv88e6xxx_devlink_stu_entry); + break; } region = dsa_devlink_region_create(ds, ops, 1, size); diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 2c1607c858a1..65958b2a0d3a 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -348,6 +348,16 @@ int mv88e6390_g1_vtu_getnext(struct mv88e6xxx_chip *chip, int mv88e6390_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6xxx_g1_vtu_flush(struct mv88e6xxx_chip *chip); +int mv88e6xxx_g1_stu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); +int mv88e6352_g1_stu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); +int mv88e6352_g1_stu_loadpurge(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); +int mv88e6390_g1_stu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); +int mv88e6390_g1_stu_loadpurge(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry); int mv88e6xxx_g1_vtu_prob_irq_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_g1_vtu_prob_irq_free(struct mv88e6xxx_chip *chip); int mv88e6xxx_g1_atu_get_next(struct mv88e6xxx_chip *chip, u16 fid); diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index b1bd9274a562..38e18f5811bf 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -44,8 +44,7 @@ static int mv88e6xxx_g1_vtu_fid_write(struct mv88e6xxx_chip *chip, /* Offset 0x03: VTU SID Register */ -static int mv88e6xxx_g1_vtu_sid_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) +static int mv88e6xxx_g1_vtu_sid_read(struct mv88e6xxx_chip *chip, u8 *sid) { u16 val; int err; @@ -54,15 +53,14 @@ static int mv88e6xxx_g1_vtu_sid_read(struct mv88e6xxx_chip *chip, if (err) return err; - entry->sid = val & MV88E6352_G1_VTU_SID_MASK; + *sid = val & MV88E6352_G1_VTU_SID_MASK; return 0; } -static int mv88e6xxx_g1_vtu_sid_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) +static int mv88e6xxx_g1_vtu_sid_write(struct mv88e6xxx_chip *chip, u8 sid) { - u16 val = entry->sid & MV88E6352_G1_VTU_SID_MASK; + u16 val = sid & MV88E6352_G1_VTU_SID_MASK; return mv88e6xxx_g1_write(chip, MV88E6352_G1_VTU_SID, val); } @@ -91,7 +89,7 @@ static int mv88e6xxx_g1_vtu_op(struct mv88e6xxx_chip *chip, u16 op) /* Offset 0x06: VTU VID Register */ static int mv88e6xxx_g1_vtu_vid_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) + bool *valid, u16 *vid) { u16 val; int err; @@ -100,25 +98,28 @@ static int mv88e6xxx_g1_vtu_vid_read(struct mv88e6xxx_chip *chip, if (err) return err; - entry->vid = val & 0xfff; + if (vid) { + *vid = val & 0xfff; - if (val & MV88E6390_G1_VTU_VID_PAGE) - entry->vid |= 0x1000; + if (val & MV88E6390_G1_VTU_VID_PAGE) + *vid |= 0x1000; + } - entry->valid = !!(val & MV88E6XXX_G1_VTU_VID_VALID); + if (valid) + *valid = !!(val & MV88E6XXX_G1_VTU_VID_VALID); return 0; } static int mv88e6xxx_g1_vtu_vid_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) + bool valid, u16 vid) { - u16 val = entry->vid & 0xfff; + u16 val = vid & 0xfff; - if (entry->vid & 0x1000) + if (vid & 0x1000) val |= MV88E6390_G1_VTU_VID_PAGE; - if (entry->valid) + if (valid) val |= MV88E6XXX_G1_VTU_VID_VALID; return mv88e6xxx_g1_write(chip, MV88E6XXX_G1_VTU_VID, val); @@ -147,7 +148,7 @@ static int mv88e6185_g1_vtu_stu_data_read(struct mv88e6xxx_chip *chip, } static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) + u8 *member, u8 *state) { u16 regs[3]; int err; @@ -160,36 +161,20 @@ static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, /* Extract MemberTag data */ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int member_offset = (i % 4) * 4; + unsigned int state_offset = member_offset + 2; - entry->member[i] = (regs[i / 4] >> member_offset) & 0x3; - } - - return 0; -} - -static int mv88e6185_g1_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) -{ - u16 regs[3]; - int err; - int i; - - err = mv88e6185_g1_vtu_stu_data_read(chip, regs); - if (err) - return err; + if (member) + member[i] = (regs[i / 4] >> member_offset) & 0x3; - /* Extract PortState data */ - for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { - unsigned int state_offset = (i % 4) * 4 + 2; - - entry->state[i] = (regs[i / 4] >> state_offset) & 0x3; + if (state) + state[i] = (regs[i / 4] >> state_offset) & 0x3; } return 0; } static int mv88e6185_g1_vtu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) + u8 *member, u8 *state) { u16 regs[3] = { 0 }; int i; @@ -199,8 +184,11 @@ static int mv88e6185_g1_vtu_data_write(struct mv88e6xxx_chip *chip, unsigned int member_offset = (i % 4) * 4; unsigned int state_offset = member_offset + 2; - regs[i / 4] |= (entry->member[i] & 0x3) << member_offset; - regs[i / 4] |= (entry->state[i] & 0x3) << state_offset; + if (member) + regs[i / 4] |= (member[i] & 0x3) << member_offset; + + if (state) + regs[i / 4] |= (state[i] & 0x3) << state_offset; } /* Write all 3 VTU/STU Data registers */ @@ -268,48 +256,6 @@ static int mv88e6390_g1_vtu_data_write(struct mv88e6xxx_chip *chip, u8 *data) /* VLAN Translation Unit Operations */ -static int mv88e6xxx_g1_vtu_stu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) -{ - int err; - - err = mv88e6xxx_g1_vtu_sid_write(chip, entry); - if (err) - return err; - - err = mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_STU_GET_NEXT); - if (err) - return err; - - err = mv88e6xxx_g1_vtu_sid_read(chip, entry); - if (err) - return err; - - return mv88e6xxx_g1_vtu_vid_read(chip, entry); -} - -static int mv88e6xxx_g1_vtu_stu_get(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *vtu) -{ - struct mv88e6xxx_vtu_entry stu; - int err; - - err = mv88e6xxx_g1_vtu_sid_read(chip, vtu); - if (err) - return err; - - stu.sid = vtu->sid - 1; - - err = mv88e6xxx_g1_vtu_stu_getnext(chip, &stu); - if (err) - return err; - - if (stu.sid != vtu->sid || !stu.valid) - return -EINVAL; - - return 0; -} - int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry) { @@ -327,7 +273,7 @@ int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, * write the VID only once, when the entry is given as invalid. */ if (!entry->valid) { - err = mv88e6xxx_g1_vtu_vid_write(chip, entry); + err = mv88e6xxx_g1_vtu_vid_write(chip, false, entry->vid); if (err) return err; } @@ -336,7 +282,7 @@ int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; - return mv88e6xxx_g1_vtu_vid_read(chip, entry); + return mv88e6xxx_g1_vtu_vid_read(chip, &entry->valid, &entry->vid); } int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, @@ -350,11 +296,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; if (entry->valid) { - err = mv88e6185_g1_vtu_data_read(chip, entry); - if (err) - return err; - - err = mv88e6185_g1_stu_data_read(chip, entry); + err = mv88e6185_g1_vtu_data_read(chip, entry->member, entry->state); if (err) return err; @@ -384,7 +326,7 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; if (entry->valid) { - err = mv88e6185_g1_vtu_data_read(chip, entry); + err = mv88e6185_g1_vtu_data_read(chip, entry->member, NULL); if (err) return err; @@ -392,12 +334,7 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; - /* Fetch VLAN PortState data from the STU */ - err = mv88e6xxx_g1_vtu_stu_get(chip, entry); - if (err) - return err; - - err = mv88e6185_g1_stu_data_read(chip, entry); + err = mv88e6xxx_g1_vtu_sid_read(chip, &entry->sid); if (err) return err; } @@ -420,16 +357,11 @@ int mv88e6390_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; - /* Fetch VLAN PortState data from the STU */ - err = mv88e6xxx_g1_vtu_stu_get(chip, entry); - if (err) - return err; - - err = mv88e6390_g1_vtu_data_read(chip, entry->state); + err = mv88e6xxx_g1_vtu_fid_read(chip, entry); if (err) return err; - err = mv88e6xxx_g1_vtu_fid_read(chip, entry); + err = mv88e6xxx_g1_vtu_sid_read(chip, &entry->sid); if (err) return err; } @@ -447,12 +379,12 @@ int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, if (err) return err; - err = mv88e6xxx_g1_vtu_vid_write(chip, entry); + err = mv88e6xxx_g1_vtu_vid_write(chip, entry->valid, entry->vid); if (err) return err; if (entry->valid) { - err = mv88e6185_g1_vtu_data_write(chip, entry); + err = mv88e6185_g1_vtu_data_write(chip, entry->member, entry->state); if (err) return err; @@ -479,27 +411,21 @@ int mv88e6352_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, if (err) return err; - err = mv88e6xxx_g1_vtu_vid_write(chip, entry); + err = mv88e6xxx_g1_vtu_vid_write(chip, entry->valid, entry->vid); if (err) return err; if (entry->valid) { - /* Write MemberTag and PortState data */ - err = mv88e6185_g1_vtu_data_write(chip, entry); - if (err) - return err; - - err = mv88e6xxx_g1_vtu_sid_write(chip, entry); + /* Write MemberTag data */ + err = mv88e6185_g1_vtu_data_write(chip, entry->member, NULL); if (err) return err; - /* Load STU entry */ - err = mv88e6xxx_g1_vtu_op(chip, - MV88E6XXX_G1_VTU_OP_STU_LOAD_PURGE); + err = mv88e6xxx_g1_vtu_fid_write(chip, entry); if (err) return err; - err = mv88e6xxx_g1_vtu_fid_write(chip, entry); + err = mv88e6xxx_g1_vtu_sid_write(chip, entry->sid); if (err) return err; } @@ -517,41 +443,113 @@ int mv88e6390_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, if (err) return err; - err = mv88e6xxx_g1_vtu_vid_write(chip, entry); + err = mv88e6xxx_g1_vtu_vid_write(chip, entry->valid, entry->vid); if (err) return err; if (entry->valid) { - /* Write PortState data */ - err = mv88e6390_g1_vtu_data_write(chip, entry->state); + /* Write MemberTag data */ + err = mv88e6390_g1_vtu_data_write(chip, entry->member); if (err) return err; - err = mv88e6xxx_g1_vtu_sid_write(chip, entry); + err = mv88e6xxx_g1_vtu_fid_write(chip, entry); if (err) return err; - /* Load STU entry */ - err = mv88e6xxx_g1_vtu_op(chip, - MV88E6XXX_G1_VTU_OP_STU_LOAD_PURGE); + err = mv88e6xxx_g1_vtu_sid_write(chip, entry->sid); if (err) return err; + } - /* Write MemberTag data */ - err = mv88e6390_g1_vtu_data_write(chip, entry->member); + /* Load/Purge VTU entry */ + return mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE); +} + +int mv88e6xxx_g1_vtu_flush(struct mv88e6xxx_chip *chip) +{ + int err; + + err = mv88e6xxx_g1_vtu_op_wait(chip); + if (err) + return err; + + return mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_FLUSH_ALL); +} + +/* Spanning Tree Unit Operations */ + +int mv88e6xxx_g1_stu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry) +{ + int err; + + err = mv88e6xxx_g1_vtu_op_wait(chip); + if (err) + return err; + + /* To get the next higher active SID, the STU GetNext operation can be + * started again without setting the SID registers since it already + * contains the last SID. + * + * To save a few hardware accesses and abstract this to the caller, + * write the SID only once, when the entry is given as invalid. + */ + if (!entry->valid) { + err = mv88e6xxx_g1_vtu_sid_write(chip, entry->sid); if (err) return err; + } - err = mv88e6xxx_g1_vtu_fid_write(chip, entry); + err = mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_STU_GET_NEXT); + if (err) + return err; + + err = mv88e6xxx_g1_vtu_vid_read(chip, &entry->valid, NULL); + if (err) + return err; + + if (entry->valid) { + err = mv88e6xxx_g1_vtu_sid_read(chip, &entry->sid); if (err) return err; } - /* Load/Purge VTU entry */ - return mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE); + return 0; } -int mv88e6xxx_g1_vtu_flush(struct mv88e6xxx_chip *chip) +int mv88e6352_g1_stu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry) +{ + int err; + + err = mv88e6xxx_g1_stu_getnext(chip, entry); + if (err) + return err; + + if (!entry->valid) + return 0; + + return mv88e6185_g1_vtu_data_read(chip, NULL, entry->state); +} + +int mv88e6390_g1_stu_getnext(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry) +{ + int err; + + err = mv88e6xxx_g1_stu_getnext(chip, entry); + if (err) + return err; + + if (!entry->valid) + return 0; + + return mv88e6390_g1_vtu_data_read(chip, entry->state); +} + +int mv88e6352_g1_stu_loadpurge(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry) { int err; @@ -559,16 +557,59 @@ int mv88e6xxx_g1_vtu_flush(struct mv88e6xxx_chip *chip) if (err) return err; - return mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_FLUSH_ALL); + err = mv88e6xxx_g1_vtu_vid_write(chip, entry->valid, 0); + if (err) + return err; + + err = mv88e6xxx_g1_vtu_sid_write(chip, entry->sid); + if (err) + return err; + + if (entry->valid) { + err = mv88e6185_g1_vtu_data_write(chip, NULL, entry->state); + if (err) + return err; + } + + /* Load/Purge STU entry */ + return mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_STU_LOAD_PURGE); +} + +int mv88e6390_g1_stu_loadpurge(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_stu_entry *entry) +{ + int err; + + err = mv88e6xxx_g1_vtu_op_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g1_vtu_vid_write(chip, entry->valid, 0); + if (err) + return err; + + err = mv88e6xxx_g1_vtu_sid_write(chip, entry->sid); + if (err) + return err; + + if (entry->valid) { + err = mv88e6390_g1_vtu_data_write(chip, entry->state); + if (err) + return err; + } + + /* Load/Purge STU entry */ + return mv88e6xxx_g1_vtu_op(chip, MV88E6XXX_G1_VTU_OP_STU_LOAD_PURGE); } +/* VTU Violation Management */ + static irqreturn_t mv88e6xxx_g1_vtu_prob_irq_thread_fn(int irq, void *dev_id) { struct mv88e6xxx_chip *chip = dev_id; - struct mv88e6xxx_vtu_entry entry; + u16 val, vid; int spid; int err; - u16 val; mv88e6xxx_reg_lock(chip); @@ -580,7 +621,7 @@ static irqreturn_t mv88e6xxx_g1_vtu_prob_irq_thread_fn(int irq, void *dev_id) if (err) goto out; - err = mv88e6xxx_g1_vtu_vid_read(chip, &entry); + err = mv88e6xxx_g1_vtu_vid_read(chip, NULL, &vid); if (err) goto out; @@ -588,13 +629,13 @@ static irqreturn_t mv88e6xxx_g1_vtu_prob_irq_thread_fn(int irq, void *dev_id) if (val & MV88E6XXX_G1_VTU_OP_MEMBER_VIOLATION) { dev_err_ratelimited(chip->dev, "VTU member violation for vid %d, source port %d\n", - entry.vid, spid); + vid, spid); chip->ports[spid].vtu_member_violation++; } if (val & MV88E6XXX_G1_VTU_OP_MISS_VIOLATION) { dev_dbg_ratelimited(chip->dev, "VTU miss violation for vid %d, source port %d\n", - entry.vid, spid); + vid, spid); chip->ports[spid].vtu_miss_violation++; } diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 35b436a491e1..413b0006e9a2 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1650,6 +1650,24 @@ static void felix_port_policer_del(struct dsa_switch *ds, int port) ocelot_port_policer_del(ocelot, port); } +static int felix_port_mirror_add(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror, + bool ingress, struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_mirror_add(ocelot, port, mirror->to_local_port, + ingress, extack); +} + +static void felix_port_mirror_del(struct dsa_switch *ds, int port, + struct dsa_mall_mirror_tc_entry *mirror) +{ + struct ocelot *ocelot = ds->priv; + + ocelot_port_mirror_del(ocelot, port, mirror->ingress); +} + static int felix_port_setup_tc(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data) @@ -1799,6 +1817,44 @@ felix_mrp_del_ring_role(struct dsa_switch *ds, int port, return ocelot_mrp_del_ring_role(ocelot, port, mrp); } +static int felix_port_get_default_prio(struct dsa_switch *ds, int port) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_get_default_prio(ocelot, port); +} + +static int felix_port_set_default_prio(struct dsa_switch *ds, int port, + u8 prio) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_set_default_prio(ocelot, port, prio); +} + +static int felix_port_get_dscp_prio(struct dsa_switch *ds, int port, u8 dscp) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_get_dscp_prio(ocelot, port, dscp); +} + +static int felix_port_add_dscp_prio(struct dsa_switch *ds, int port, u8 dscp, + u8 prio) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_add_dscp_prio(ocelot, port, dscp, prio); +} + +static int felix_port_del_dscp_prio(struct dsa_switch *ds, int port, u8 dscp, + u8 prio) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_del_dscp_prio(ocelot, port, dscp, prio); +} + const struct dsa_switch_ops felix_switch_ops = { .get_tag_protocol = felix_get_tag_protocol, .change_tag_protocol = felix_change_tag_protocol, @@ -1842,6 +1898,8 @@ const struct dsa_switch_ops felix_switch_ops = { .port_max_mtu = felix_get_max_mtu, .port_policer_add = felix_port_policer_add, .port_policer_del = felix_port_policer_del, + .port_mirror_add = felix_port_mirror_add, + .port_mirror_del = felix_port_mirror_del, .cls_flower_add = felix_cls_flower_add, .cls_flower_del = felix_cls_flower_del, .cls_flower_stats = felix_cls_flower_stats, @@ -1862,6 +1920,11 @@ const struct dsa_switch_ops felix_switch_ops = { .port_mrp_del_ring_role = felix_mrp_del_ring_role, .tag_8021q_vlan_add = felix_tag_8021q_vlan_add, .tag_8021q_vlan_del = felix_tag_8021q_vlan_del, + .port_get_default_prio = felix_port_get_default_prio, + .port_set_default_prio = felix_port_set_default_prio, + .port_get_dscp_prio = felix_port_get_dscp_prio, + .port_add_dscp_prio = felix_port_add_dscp_prio, + .port_del_dscp_prio = felix_port_del_dscp_prio, }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index ee0dbf324268..d3ed0a7f8077 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -2473,7 +2473,7 @@ qca8k_port_mdb_del(struct dsa_switch *ds, int port, static int qca8k_port_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress) + bool ingress, struct netlink_ext_ack *extack) { struct qca8k_priv *priv = ds->priv; int monitor_port, ret; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 3358e979342c..b33841c6507a 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2847,7 +2847,7 @@ static int sja1105_mirror_apply(struct sja1105_private *priv, int from, int to, static int sja1105_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress) + bool ingress, struct netlink_ext_ack *extack) { return sja1105_mirror_apply(ds->priv, port, mirror->to_local_port, ingress, true); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 4ad3fc72e74e..a89b93cb4e26 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); netdev_update_features(netdev); - if (netif_running(netdev)) + if (netif_running(netdev)) { + mutex_lock(&alx->mtx); alx_reinit(alx); + mutex_unlock(&alx->mtx); + } return 0; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 447a75ea0cc1..dd5945c4bfec 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2533,6 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp); * Meant for implicit re-load flows. */ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp); -int bnx2x_init_firmware(struct bnx2x *bp); -void bnx2x_release_firmware(struct bnx2x *bp); #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 8d36ebbf08e1..5729a5ab059d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2364,24 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err) /* is another pf loaded on this engine? */ if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && load_code != FW_MSG_CODE_DRV_LOAD_COMMON) { - /* build my FW version dword */ - u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) + - (bp->fw_rev << 16) + (bp->fw_eng << 24); + u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng; + u32 loaded_fw; /* read loaded FW from chip */ - u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); + loaded_fw = REG_RD(bp, XSEM_REG_PRAM); - DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n", - loaded_fw, my_fw); + loaded_fw_major = loaded_fw & 0xff; + loaded_fw_minor = (loaded_fw >> 8) & 0xff; + loaded_fw_rev = (loaded_fw >> 16) & 0xff; + loaded_fw_eng = (loaded_fw >> 24) & 0xff; + + DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n", + loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng); /* abort nic load if version mismatch */ - if (my_fw != loaded_fw) { + if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION || + loaded_fw_minor != BCM_5710_FW_MINOR_VERSION || + loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION || + loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) { if (print_err) - BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n", - loaded_fw, my_fw); + BNX2X_ERR("loaded FW incompatible. Aborting\n"); else - BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n", - loaded_fw, my_fw); + BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n"); + return -EBUSY; } } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 4e85e7dbc2be..bede16760388 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6178,7 +6178,7 @@ static int bnx2x_format_ver(u32 num, u8 *str, u16 *len) return -EINVAL; } - ret = scnprintf(str, *len, "%hx.%hx", num >> 16, num); + ret = scnprintf(str, *len, "%x.%x", num >> 16, num); *len -= ret; return 0; } @@ -6193,7 +6193,7 @@ static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len) return -EINVAL; } - ret = scnprintf(str, *len, "%hhx.%hhx.%hhx", num >> 16, num >> 8, num); + ret = scnprintf(str, *len, "%x.%x.%x", num >> 16, num >> 8, num); *len -= ret; return 0; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index eedb48d945ed..c19b072f3a23 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12319,15 +12319,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) bnx2x_read_fwinfo(bp); - if (IS_PF(bp)) { - rc = bnx2x_init_firmware(bp); - - if (rc) { - bnx2x_free_mem_bp(bp); - return rc; - } - } - func = BP_FUNC(bp); /* need to reset chip if undi was active */ @@ -12340,7 +12331,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) rc = bnx2x_prev_unload(bp); if (rc) { - bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); return rc; } @@ -13409,7 +13399,7 @@ do { \ (u8 *)bp->arr, len); \ } while (0) -int bnx2x_init_firmware(struct bnx2x *bp) +static int bnx2x_init_firmware(struct bnx2x *bp) { const char *fw_file_name, *fw_file_name_v15; struct bnx2x_fw_file_hdr *fw_hdr; @@ -13509,7 +13499,7 @@ request_firmware_exit: return rc; } -void bnx2x_release_firmware(struct bnx2x *bp) +static void bnx2x_release_firmware(struct bnx2x *bp) { kfree(bp->init_ops_offsets); kfree(bp->init_ops); @@ -14026,7 +14016,6 @@ static int bnx2x_init_one(struct pci_dev *pdev, return 0; init_one_freemem: - bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); init_one_exit: diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index cfe09117fe6c..9a41145dadfc 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2287,8 +2287,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dma_length_status = status->length_status; if (dev->features & NETIF_F_RXCSUM) { rx_csum = (__force __be16)(status->rx_csum & 0xffff); - skb->csum = (__force __wsum)ntohs(rx_csum); - skb->ip_summed = CHECKSUM_COMPLETE; + if (rx_csum) { + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } } /* DMA flags and length are still valid no matter how diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 939fa9db6a2e..4b047255d928 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -2077,8 +2077,10 @@ static int dpaa2_eth_open(struct net_device *net_dev) goto enable_err; } - if (dpaa2_eth_is_type_phy(priv)) + if (dpaa2_eth_is_type_phy(priv)) { + dpaa2_mac_start(priv->mac); phylink_start(priv->mac->phylink); + } return 0; @@ -2153,6 +2155,7 @@ static int dpaa2_eth_stop(struct net_device *net_dev) if (dpaa2_eth_is_type_phy(priv)) { phylink_stop(priv->mac->phylink); + dpaa2_mac_stop(priv->mac); } else { netif_tx_stop_all_queues(net_dev); netif_carrier_off(net_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 521f036d1c00..c48811d3bcd5 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -3,6 +3,7 @@ #include <linux/acpi.h> #include <linux/pcs-lynx.h> +#include <linux/phy/phy.h> #include <linux/property.h> #include "dpaa2-eth.h" @@ -11,6 +12,28 @@ #define phylink_to_dpaa2_mac(config) \ container_of((config), struct dpaa2_mac, phylink_config) +#define DPMAC_PROTOCOL_CHANGE_VER_MAJOR 4 +#define DPMAC_PROTOCOL_CHANGE_VER_MINOR 8 + +#define DPAA2_MAC_FEATURE_PROTOCOL_CHANGE BIT(0) + +static int dpaa2_mac_cmp_ver(struct dpaa2_mac *mac, + u16 ver_major, u16 ver_minor) +{ + if (mac->ver_major == ver_major) + return mac->ver_minor - ver_minor; + return mac->ver_major - ver_major; +} + +static void dpaa2_mac_detect_features(struct dpaa2_mac *mac) +{ + mac->features = 0; + + if (dpaa2_mac_cmp_ver(mac, DPMAC_PROTOCOL_CHANGE_VER_MAJOR, + DPMAC_PROTOCOL_CHANGE_VER_MINOR) >= 0) + mac->features |= DPAA2_MAC_FEATURE_PROTOCOL_CHANGE; +} + static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) { *if_mode = PHY_INTERFACE_MODE_NA; @@ -38,6 +61,29 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) return 0; } +static enum dpmac_eth_if dpmac_eth_if_mode(phy_interface_t if_mode) +{ + switch (if_mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + return DPMAC_ETH_IF_RGMII; + case PHY_INTERFACE_MODE_USXGMII: + return DPMAC_ETH_IF_USXGMII; + case PHY_INTERFACE_MODE_QSGMII: + return DPMAC_ETH_IF_QSGMII; + case PHY_INTERFACE_MODE_SGMII: + return DPMAC_ETH_IF_SGMII; + case PHY_INTERFACE_MODE_10GBASER: + return DPMAC_ETH_IF_XFI; + case PHY_INTERFACE_MODE_1000BASEX: + return DPMAC_ETH_IF_1000BASEX; + default: + return DPMAC_ETH_IF_MII; + } +} + static struct fwnode_handle *dpaa2_mac_get_node(struct device *dev, u16 dpmac_id) { @@ -125,6 +171,19 @@ static void dpaa2_mac_config(struct phylink_config *config, unsigned int mode, if (err) netdev_err(mac->net_dev, "%s: dpmac_set_link_state() = %d\n", __func__, err); + + if (!mac->serdes_phy) + return; + + /* This happens only if we support changing of protocol at runtime */ + err = dpmac_set_protocol(mac->mc_io, 0, mac->mc_dev->mc_handle, + dpmac_eth_if_mode(state->interface)); + if (err) + netdev_err(mac->net_dev, "dpmac_set_protocol() = %d\n", err); + + err = phy_set_mode_ext(mac->serdes_phy, PHY_MODE_ETHERNET, state->interface); + if (err) + netdev_err(mac->net_dev, "phy_set_mode_ext() = %d\n", err); } static void dpaa2_mac_link_up(struct phylink_config *config, @@ -235,10 +294,66 @@ static void dpaa2_pcs_destroy(struct dpaa2_mac *mac) } } +static void dpaa2_mac_set_supported_interfaces(struct dpaa2_mac *mac) +{ + int intf, err; + + /* We support the current interface mode, and if we have a PCS + * similar interface modes that do not require the SerDes lane to be + * reconfigured. + */ + __set_bit(mac->if_mode, mac->phylink_config.supported_interfaces); + if (mac->pcs) { + switch (mac->if_mode) { + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_SGMII: + __set_bit(PHY_INTERFACE_MODE_1000BASEX, + mac->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, + mac->phylink_config.supported_interfaces); + break; + + default: + break; + } + } + + if (!mac->serdes_phy) + return; + + /* In case we have access to the SerDes phy/lane, then ask the SerDes + * driver what interfaces are supported based on the current PLL + * configuration. + */ + for (intf = 0; intf < PHY_INTERFACE_MODE_MAX; intf++) { + if (intf == PHY_INTERFACE_MODE_NA) + continue; + + err = phy_validate(mac->serdes_phy, PHY_MODE_ETHERNET, intf, NULL); + if (err) + continue; + + __set_bit(intf, mac->phylink_config.supported_interfaces); + } +} + +void dpaa2_mac_start(struct dpaa2_mac *mac) +{ + if (mac->serdes_phy) + phy_power_on(mac->serdes_phy); +} + +void dpaa2_mac_stop(struct dpaa2_mac *mac) +{ + if (mac->serdes_phy) + phy_power_off(mac->serdes_phy); +} + int dpaa2_mac_connect(struct dpaa2_mac *mac) { struct net_device *net_dev = mac->net_dev; struct fwnode_handle *dpmac_node; + struct phy *serdes_phy = NULL; struct phylink *phylink; int err; @@ -255,6 +370,20 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) return -EINVAL; mac->if_mode = err; + if (mac->features & DPAA2_MAC_FEATURE_PROTOCOL_CHANGE && + !phy_interface_mode_is_rgmii(mac->if_mode) && + is_of_node(dpmac_node)) { + serdes_phy = of_phy_get(to_of_node(dpmac_node), NULL); + + if (serdes_phy == ERR_PTR(-ENODEV)) + serdes_phy = NULL; + else if (IS_ERR(serdes_phy)) + return PTR_ERR(serdes_phy); + else + phy_init(serdes_phy); + } + mac->serdes_phy = serdes_phy; + /* The MAC does not have the capability to add RGMII delays so * error out if the interface mode requests them and there is no PHY * to act upon them @@ -283,25 +412,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) MAC_10FD | MAC_100FD | MAC_1000FD | MAC_2500FD | MAC_5000FD | MAC_10000FD; - /* We support the current interface mode, and if we have a PCS - * similar interface modes that do not require the PLLs to be - * reconfigured. - */ - __set_bit(mac->if_mode, mac->phylink_config.supported_interfaces); - if (mac->pcs) { - switch (mac->if_mode) { - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_SGMII: - __set_bit(PHY_INTERFACE_MODE_1000BASEX, - mac->phylink_config.supported_interfaces); - __set_bit(PHY_INTERFACE_MODE_SGMII, - mac->phylink_config.supported_interfaces); - break; - - default: - break; - } - } + dpaa2_mac_set_supported_interfaces(mac); phylink = phylink_create(&mac->phylink_config, dpmac_node, mac->if_mode, @@ -336,6 +447,8 @@ void dpaa2_mac_disconnect(struct dpaa2_mac *mac) phylink_disconnect_phy(mac->phylink); phylink_destroy(mac->phylink); dpaa2_pcs_destroy(mac); + of_phy_put(mac->serdes_phy); + mac->serdes_phy = NULL; } int dpaa2_mac_open(struct dpaa2_mac *mac) @@ -359,6 +472,14 @@ int dpaa2_mac_open(struct dpaa2_mac *mac) goto err_close_dpmac; } + err = dpmac_get_api_version(mac->mc_io, 0, &mac->ver_major, &mac->ver_minor); + if (err) { + netdev_err(net_dev, "dpmac_get_api_version() = %d\n", err); + goto err_close_dpmac; + } + + dpaa2_mac_detect_features(mac); + /* Find the device node representing the MAC device and link the device * behind the associated netdev to it. */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h index 1331a8477fe4..a58cab188a99 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h @@ -17,6 +17,8 @@ struct dpaa2_mac { struct net_device *net_dev; struct fsl_mc_io *mc_io; struct dpmac_attr attr; + u16 ver_major, ver_minor; + unsigned long features; struct phylink_config phylink_config; struct phylink *phylink; @@ -24,6 +26,8 @@ struct dpaa2_mac { enum dpmac_link_type if_link_type; struct phylink_pcs *pcs; struct fwnode_handle *fw_node; + + struct phy *serdes_phy; }; bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev, @@ -43,4 +47,8 @@ void dpaa2_mac_get_strings(u8 *data); void dpaa2_mac_get_ethtool_stats(struct dpaa2_mac *mac, u64 *data); +void dpaa2_mac_start(struct dpaa2_mac *mac); + +void dpaa2_mac_stop(struct dpaa2_mac *mac); + #endif /* DPAA2_MAC_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 9a561072aa4a..e507e9065214 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -703,8 +703,10 @@ static int dpaa2_switch_port_open(struct net_device *netdev) dpaa2_switch_enable_ctrl_if_napi(ethsw); - if (dpaa2_switch_port_is_type_phy(port_priv)) + if (dpaa2_switch_port_is_type_phy(port_priv)) { + dpaa2_mac_start(port_priv->mac); phylink_start(port_priv->mac->phylink); + } return 0; } @@ -717,6 +719,7 @@ static int dpaa2_switch_port_stop(struct net_device *netdev) if (dpaa2_switch_port_is_type_phy(port_priv)) { phylink_stop(port_priv->mac->phylink); + dpaa2_mac_stop(port_priv->mac); } else { netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h index a24b20f76938..e9ac2ecef3be 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h @@ -19,11 +19,15 @@ #define DPMAC_CMDID_CLOSE DPMAC_CMD(0x800) #define DPMAC_CMDID_OPEN DPMAC_CMD(0x80c) +#define DPMAC_CMDID_GET_API_VERSION DPMAC_CMD(0xa0c) + #define DPMAC_CMDID_GET_ATTR DPMAC_CMD(0x004) #define DPMAC_CMDID_SET_LINK_STATE DPMAC_CMD_V2(0x0c3) #define DPMAC_CMDID_GET_COUNTER DPMAC_CMD(0x0c4) +#define DPMAC_CMDID_SET_PROTOCOL DPMAC_CMD(0x0c7) + /* Macros for accessing command fields smaller than 1byte */ #define DPMAC_MASK(field) \ GENMASK(DPMAC_##field##_SHIFT + DPMAC_##field##_SIZE - 1, \ @@ -70,4 +74,12 @@ struct dpmac_rsp_get_counter { __le64 counter; }; +struct dpmac_rsp_get_api_version { + __le16 major; + __le16 minor; +}; + +struct dpmac_cmd_set_protocol { + u8 eth_if; +}; #endif /* _FSL_DPMAC_CMD_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.c b/drivers/net/ethernet/freescale/dpaa2/dpmac.c index d5997b654562..f440a4c3b70c 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.c @@ -181,3 +181,57 @@ int dpmac_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, return 0; } + +/** + * dpmac_get_api_version() - Get Data Path MAC version + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @major_ver: Major version of data path mac API + * @minor_ver: Minor version of data path mac API + * + * Return: '0' on Success; Error code otherwise. + */ +int dpmac_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 *major_ver, u16 *minor_ver) +{ + struct dpmac_rsp_get_api_version *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + cmd.header = mc_encode_cmd_header(DPMAC_CMDID_GET_API_VERSION, + cmd_flags, + 0); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpmac_rsp_get_api_version *)cmd.params; + *major_ver = le16_to_cpu(rsp_params->major); + *minor_ver = le16_to_cpu(rsp_params->minor); + + return 0; +} + +/** + * dpmac_set_protocol() - Reconfigure the DPMAC protocol + * @mc_io: Pointer to opaque I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPMAC object + * @protocol: New protocol for the DPMAC to be reconfigured in. + * + * Return: '0' on Success; Error code otherwise. + */ +int dpmac_set_protocol(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + enum dpmac_eth_if protocol) +{ + struct dpmac_cmd_set_protocol *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPMAC_CMDID_SET_PROTOCOL, + cmd_flags, token); + cmd_params = (struct dpmac_cmd_set_protocol *)cmd.params; + cmd_params->eth_if = protocol; + + return mc_send_command(mc_io, &cmd); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h index 8f7ceb731282..17488819ef68 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.h @@ -205,4 +205,9 @@ enum dpmac_counter_id { int dpmac_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, enum dpmac_counter_id id, u64 *value); +int dpmac_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 *major_ver, u16 *minor_ver); + +int dpmac_set_protocol(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + enum dpmac_eth_if protocol); #endif /* __FSL_DPMAC_H */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c index 70e6d97b380f..1c8f5cc6dec4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c @@ -147,7 +147,7 @@ int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum) /* return all Fs if nothing was there */ if (enetc_mdio_rd(mdio_priv, ENETC_MDIO_CFG) & MDIO_CFG_RD_ER) { dev_dbg(&bus->dev, - "Error while reading PHY%d reg at %d.%hhu\n", + "Error while reading PHY%d reg at %d.%d\n", phy_id, dev_addr, regnum); return 0xffff; } diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index ef8058a17188..ec90da1de030 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -243,7 +243,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && !priv->has_a011043) { dev_dbg(&bus->dev, - "Error while reading PHY%d reg at %d.%hhu\n", + "Error while reading PHY%d reg at %d.%d\n", phy_id, dev_addr, regnum); ret = 0xffff; } else { diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index e4e98aa7745f..021bbf308d68 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -439,7 +439,7 @@ static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx) if (frag_size > rx->packet_buffer_size) { packet_size_error = true; netdev_warn(priv->dev, - "RX fragment error: packet_buffer_size=%d, frag_size=%d, droping packet.", + "RX fragment error: packet_buffer_size=%d, frag_size=%d, dropping packet.", rx->packet_buffer_size, be16_to_cpu(desc->len)); } page_info = &rx->data.page_info[idx]; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 089f4444b7e3..1f87a8a3fe32 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1225,7 +1225,7 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev) ret = hclge_tm_ets_tc_dwrr_cfg(hdev); if (ret == -EOPNOTSUPP) { dev_warn(&hdev->pdev->dev, - "fw %08x does't support ets tc weight cmd\n", + "fw %08x doesn't support ets tc weight cmd\n", hdev->fw_version); ret = 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7c4b75a5e1b5..190590d32faf 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2705,6 +2705,13 @@ restart_watchdog: queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); } +/** + * iavf_disable_vf - disable VF + * @adapter: board private structure + * + * Set communication failed flag and free all resources. + * NOTE: This function is expected to be called with crit_lock being held. + **/ static void iavf_disable_vf(struct iavf_adapter *adapter) { struct iavf_mac_filter *f, *ftmp; @@ -2759,7 +2766,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; - mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -4778,6 +4784,13 @@ static void iavf_remove(struct pci_dev *pdev) struct iavf_hw *hw = &adapter->hw; int err; + /* When reboot/shutdown is in progress no need to do anything + * as the adapter is already REMOVE state that was set during + * iavf_shutdown() callback. + */ + if (adapter->state == __IAVF_REMOVE) + return; + set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race. diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 44b8464b7663..9183d480b70b 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -34,11 +34,13 @@ ice-y := ice_main.o \ ice_repr.o \ ice_tc_lib.o ice-$(CONFIG_PCI_IOV) += \ + ice_sriov.o \ + ice_virtchnl.o \ ice_virtchnl_allowlist.o \ ice_virtchnl_fdir.o \ - ice_sriov.o \ + ice_vf_mbx.o \ ice_vf_vsi_vlan_ops.o \ - ice_virtchnl_pf.o + ice_vf_lib.o ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice-$(CONFIG_TTY) += ice_gnss.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 1a130ff562af..e9aa1fb43c3a 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -52,9 +52,6 @@ #include <net/udp_tunnel.h> #include <net/vxlan.h> #include <net/gtp.h> -#if IS_ENABLED(CONFIG_DCB) -#include <scsi/iscsi_proto.h> -#endif /* CONFIG_DCB */ #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -64,8 +61,8 @@ #include "ice_flow.h" #include "ice_sched.h" #include "ice_idc_int.h" -#include "ice_virtchnl_pf.h" #include "ice_sriov.h" +#include "ice_vf_mbx.h" #include "ice_ptp.h" #include "ice_fdir.h" #include "ice_xsk.h" diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h index 80ed76f0cace..9669ad9bf7b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.h +++ b/drivers/net/ethernet/intel/ice/ice_arfs.h @@ -3,6 +3,9 @@ #ifndef _ICE_ARFS_H_ #define _ICE_ARFS_H_ + +#include "ice_fdir.h" + enum ice_arfs_fltr_state { ICE_ARFS_INACTIVE, ICE_ARFS_ACTIVE, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index a3094470d31d..136d7911adb4 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -5,7 +5,7 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_dcb_lib.h" -#include "ice_virtchnl_pf.h" +#include "ice_sriov.h" static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring) { diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 1efe6b2c32f0..872ea7d2332d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -6,12 +6,12 @@ #include <linux/bitfield.h> -#include "ice.h" #include "ice_type.h" #include "ice_nvm.h" #include "ice_flex_pipe.h" -#include "ice_switch.h" #include <linux/avf/virtchnl.h> +#include "ice_switch.h" +#include "ice_fdir.h" #define ICE_SQ_SEND_DELAY_TIME_MS 10 #define ICE_SQ_SEND_MAX_EXECUTE 3 diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h index d73348f279f7..6abf28a14291 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb.h @@ -5,6 +5,7 @@ #define _ICE_DCB_H_ #include "ice_type.h" +#include <scsi/iscsi_proto.h> #define ICE_DCBX_STATUS_NOT_STARTED 0 #define ICE_DCBX_STATUS_IN_PROGRESS 1 diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 6a336e8d4e4d..c73cdab44f70 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -4,6 +4,7 @@ #include "ice_common.h" #include "ice_flex_pipe.h" #include "ice_flow.h" +#include "ice.h" /* For supporting double VLAN mode, it is necessary to enable or disable certain * boost tcam entries. The metadata labels names that match the following diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index beed4838dcbe..ef103e47a8dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -3,6 +3,7 @@ #include "ice_common.h" #include "ice_flow.h" +#include <net/gre.h> /* Describe properties of a protocol header field */ struct ice_flow_field_info { diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h index 84b6e4464a21..b465d27d9b80 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.h +++ b/drivers/net/ethernet/intel/ice/ice_flow.h @@ -4,6 +4,8 @@ #ifndef _ICE_FLOW_H_ #define _ICE_FLOW_H_ +#include "ice_flex_type.h" + #define ICE_FLOW_ENTRY_HANDLE_INVAL 0 #define ICE_FLOW_FLD_OFF_INVAL 0xffff diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index 755e1580f368..35579cf4283f 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -125,7 +125,7 @@ static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf) * writes. */ kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev)); - if (!kworker) { + if (IS_ERR(kworker)) { kfree(gnss); return NULL; } @@ -253,7 +253,7 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) int err; tty_driver = tty_alloc_driver(1, TTY_DRIVER_REAL_RAW); - if (!tty_driver) { + if (IS_ERR(tty_driver)) { dev_err(ice_pf_to_dev(pf), "Failed to allocate memory for GNSS TTY\n"); return NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_idc_int.h b/drivers/net/ethernet/intel/ice/ice_idc_int.h index b7796b8aecbd..4b0c86757df9 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc_int.h +++ b/drivers/net/ethernet/intel/ice/ice_idc_int.h @@ -5,7 +5,6 @@ #define _ICE_IDC_INT_H_ #include <linux/net/intel/iidc.h> -#include "ice.h" struct ice_pf; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2694acb1aa01..0a2e695f3bb9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -48,6 +48,21 @@ static DEFINE_IDA(ice_aux_ida); DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key); EXPORT_SYMBOL(ice_xdp_locking_key); +/** + * ice_hw_to_dev - Get device pointer from the hardware structure + * @hw: pointer to the device HW structure + * + * Used to access the device pointer from compilation units which can't easily + * include the definition of struct ice_pf without leading to circular header + * dependencies. + */ +struct device *ice_hw_to_dev(struct ice_hw *hw) +{ + struct ice_pf *pf = container_of(hw, struct ice_pf, hw); + + return &pf->pdev->dev; +} + static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; @@ -619,7 +634,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) clear_bit(ICE_PREPARED_FOR_RESET, pf->state); clear_bit(ICE_PFR_REQ, pf->state); wake_up(&pf->reset_wait_queue); - ice_reset_all_vfs(pf, true); + ice_reset_all_vfs(pf); } } @@ -670,7 +685,7 @@ static void ice_reset_subtask(struct ice_pf *pf) clear_bit(ICE_CORER_REQ, pf->state); clear_bit(ICE_GLOBR_REQ, pf->state); wake_up(&pf->reset_wait_queue); - ice_reset_all_vfs(pf, true); + ice_reset_all_vfs(pf); } return; @@ -1808,9 +1823,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) * reset, so print the event prior to reset. */ ice_print_vf_rx_mdd_event(vf); - mutex_lock(&vf->cfg_lock); - ice_reset_vf(vf, false); - mutex_unlock(&vf->cfg_lock); + ice_reset_vf(vf, ICE_VF_RESET_LOCK); } } } @@ -3739,7 +3752,7 @@ static void ice_set_pf_caps(struct ice_pf *pf) if (func_caps->common_cap.sr_iov_1_1) { set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs, - ICE_MAX_VF_COUNT); + ICE_MAX_SRIOV_VFS); } clear_bit(ICE_FLAG_RSS_ENA, pf->flags); if (func_caps->common_cap.rss_table_size) @@ -4908,7 +4921,6 @@ static void ice_remove(struct pci_dev *pdev) ice_devlink_unregister_params(pf); set_bit(ICE_DOWN, pf->state); - mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_deinit_lag(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); @@ -4918,6 +4930,7 @@ static void ice_remove(struct pci_dev *pdev) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); ice_vsi_release_all(pf); + mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_set_wake(pf); ice_free_irq_msix_misc(pf); ice_for_each_vsi(pf, i) { @@ -6166,8 +6179,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (ring) - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + if (!ring) + continue; + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; vsi->tx_restart += ring->tx_stats.restart_q; diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h index 380e8ae94fc9..82bc54fec7f3 100644 --- a/drivers/net/ethernet/intel/ice/ice_osdep.h +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h @@ -5,7 +5,14 @@ #define _ICE_OSDEP_H_ #include <linux/types.h> +#include <linux/ctype.h> +#include <linux/delay.h> #include <linux/io.h> +#include <linux/bitops.h> +#include <linux/ethtool.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/pci_ids.h> #ifndef CONFIG_64BIT #include <linux/io-64-nonatomic-lo-hi.h> #endif @@ -25,8 +32,8 @@ struct ice_dma_mem { size_t size; }; -#define ice_hw_to_dev(ptr) \ - (&(container_of((ptr), struct ice_pf, hw))->pdev->dev) +struct ice_hw; +struct device *ice_hw_to_dev(struct ice_hw *hw); #ifdef CONFIG_DYNAMIC_DEBUG #define ice_debug(hw, type, fmt, args...) \ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 000c39d163a2..a1cd33273ca4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -3,6 +3,7 @@ #include "ice.h" #include "ice_lib.h" +#include "ice_trace.h" #define E810_OUT_PROP_DELAY_NS 1 @@ -2063,11 +2064,15 @@ static void ice_ptp_tx_tstamp_work(struct kthread_work *work) struct sk_buff *skb; int err; + ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); + err = ice_read_phy_tstamp(hw, tx->quad, phy_idx, &raw_tstamp); if (err) continue; + ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); + /* Check if the timestamp is invalid or stale */ if (!(raw_tstamp & ICE_PTP_TS_VALID) || raw_tstamp == tx->tstamps[idx].cached_tstamp) @@ -2093,6 +2098,8 @@ static void ice_ptp_tx_tstamp_work(struct kthread_work *work) tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + ice_trace(tx_tstamp_complete, skb, idx); + skb_tstamp_tx(skb, &shhwtstamps); dev_kfree_skb_any(skb); } @@ -2131,6 +2138,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) tx->tstamps[idx].start = jiffies; tx->tstamps[idx].skb = skb_get(skb); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + ice_trace(tx_tstamp_request, skb, idx); } spin_unlock(&tx->lock); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index f8db3ca521da..848f2adea563 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -4,7 +4,7 @@ #include "ice.h" #include "ice_eswitch.h" #include "ice_devlink.h" -#include "ice_virtchnl_pf.h" +#include "ice_sriov.h" #include "ice_tc_lib.h" /** @@ -339,7 +339,7 @@ static int ice_repr_add(struct ice_vf *vf) devlink_port_type_eth_set(&vf->devlink_port, repr->netdev); - ice_vc_change_ops_to_repr(&vf->vc_ops); + ice_virtchnl_set_repr_ops(vf); return 0; @@ -384,7 +384,7 @@ static void ice_repr_rem(struct ice_vf *vf) kfree(vf->repr); vf->repr = NULL; - ice_vc_set_dflt_vf_ops(&vf->vc_ops); + ice_virtchnl_set_dflt_ops(vf); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index 0c77ff050d15..378a45bfa256 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -5,7 +5,6 @@ #define _ICE_REPR_H_ #include <net/dst_metadata.h> -#include "ice.h" struct ice_repr { struct ice_vsi *src_vsi; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 52c6bac41bf7..8915a9d39e36 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1,532 +1,1919 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ -#include "ice_common.h" -#include "ice_sriov.h" +#include "ice.h" +#include "ice_vf_lib_private.h" +#include "ice_base.h" +#include "ice_lib.h" +#include "ice_fltr.h" +#include "ice_dcb_lib.h" +#include "ice_flow.h" +#include "ice_eswitch.h" +#include "ice_virtchnl_allowlist.h" +#include "ice_flex_pipe.h" +#include "ice_vf_vsi_vlan_ops.h" +#include "ice_vlan.h" /** - * ice_aq_send_msg_to_vf - * @hw: pointer to the hardware structure - * @vfid: VF ID to send msg - * @v_opcode: opcodes for VF-PF communication - * @v_retval: return error code - * @msg: pointer to the msg buffer - * @msglen: msg length - * @cd: pointer to command details + * ice_free_vf_entries - Free all VF entries from the hash table + * @pf: pointer to the PF structure * - * Send message to VF driver (0x0802) using mailbox - * queue and asynchronously sending message via - * ice_sq_send_cmd() function + * Iterate over the VF hash table, removing and releasing all VF entries. + * Called during VF teardown or as cleanup during failed VF initialization. */ -int -ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, - u8 *msg, u16 msglen, struct ice_sq_cd *cd) +static void ice_free_vf_entries(struct ice_pf *pf) { - struct ice_aqc_pf_vf_msg *cmd; - struct ice_aq_desc desc; + struct ice_vfs *vfs = &pf->vfs; + struct hlist_node *tmp; + struct ice_vf *vf; + unsigned int bkt; - ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf); + /* Remove all VFs from the hash table and release their main + * reference. Once all references to the VF are dropped, ice_put_vf() + * will call ice_release_vf which will remove the VF memory. + */ + lockdep_assert_held(&vfs->table_lock); - cmd = &desc.params.virt; - cmd->id = cpu_to_le32(vfid); + hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) { + hash_del_rcu(&vf->entry); + ice_put_vf(vf); + } +} - desc.cookie_high = cpu_to_le32(v_opcode); - desc.cookie_low = cpu_to_le32(v_retval); +/** + * ice_vf_vsi_release - invalidate the VF's VSI after freeing it + * @vf: invalidate this VF's VSI after freeing it + */ +static void ice_vf_vsi_release(struct ice_vf *vf) +{ + ice_vsi_release(ice_get_vf_vsi(vf)); + ice_vf_invalidate_vsi(vf); +} - if (msglen) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); +/** + * ice_free_vf_res - Free a VF's resources + * @vf: pointer to the VF info + */ +static void ice_free_vf_res(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + int i, last_vector_idx; + + /* First, disable VF's configuration API to prevent OS from + * accessing the VF's VSI after it's freed or invalidated. + */ + clear_bit(ICE_VF_STATE_INIT, vf->vf_states); + ice_vf_fdir_exit(vf); + /* free VF control VSI */ + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + ice_vf_ctrl_vsi_release(vf); + + /* free VSI and disconnect it from the parent uplink */ + if (vf->lan_vsi_idx != ICE_NO_VSI) { + ice_vf_vsi_release(vf); + vf->num_mac = 0; + } - return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); + last_vector_idx = vf->first_vector_idx + pf->vfs.num_msix_per - 1; + + /* clear VF MDD event information */ + memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); + memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events)); + + /* Disable interrupts so that VF starts in a known state */ + for (i = vf->first_vector_idx; i <= last_vector_idx; i++) { + wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M); + ice_flush(&pf->hw); + } + /* reset some of the state variables keeping track of the resources */ + clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); + clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); } /** - * ice_conv_link_speed_to_virtchnl - * @adv_link_support: determines the format of the returned link speed - * @link_speed: variable containing the link_speed to be converted + * ice_dis_vf_mappings + * @vf: pointer to the VF structure + */ +static void ice_dis_vf_mappings(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + int first, last, v; + struct ice_hw *hw; + + hw = &pf->hw; + vsi = ice_get_vf_vsi(vf); + + dev = ice_pf_to_dev(pf); + wr32(hw, VPINT_ALLOC(vf->vf_id), 0); + wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); + + first = vf->first_vector_idx; + last = first + pf->vfs.num_msix_per - 1; + for (v = first; v <= last; v++) { + u32 reg; + + reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) & + GLINT_VECT2FUNC_IS_PF_M) | + ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & + GLINT_VECT2FUNC_PF_NUM_M)); + wr32(hw, GLINT_VECT2FUNC(v), reg); + } + + if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) + wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0); + else + dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); + + if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) + wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0); + else + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); +} + +/** + * ice_sriov_free_msix_res - Reset/free any used MSIX resources + * @pf: pointer to the PF structure + * + * Since no MSIX entries are taken from the pf->irq_tracker then just clear + * the pf->sriov_base_vector. * - * Convert link speed supported by HW to link speed supported by virtchnl. - * If adv_link_support is true, then return link speed in Mbps. Else return - * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller - * needs to cast back to an enum virtchnl_link_speed in the case where - * adv_link_support is false, but when adv_link_support is true the caller can - * expect the speed in Mbps. + * Returns 0 on success, and -EINVAL on error. */ -u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) +static int ice_sriov_free_msix_res(struct ice_pf *pf) { - u32 speed; + struct ice_res_tracker *res; - if (adv_link_support) - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - speed = ICE_LINK_SPEED_10MBPS; - break; - case ICE_AQ_LINK_SPEED_100MB: - speed = ICE_LINK_SPEED_100MBPS; - break; - case ICE_AQ_LINK_SPEED_1000MB: - speed = ICE_LINK_SPEED_1000MBPS; - break; - case ICE_AQ_LINK_SPEED_2500MB: - speed = ICE_LINK_SPEED_2500MBPS; - break; - case ICE_AQ_LINK_SPEED_5GB: - speed = ICE_LINK_SPEED_5000MBPS; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = ICE_LINK_SPEED_10000MBPS; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = ICE_LINK_SPEED_20000MBPS; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = ICE_LINK_SPEED_25000MBPS; - break; - case ICE_AQ_LINK_SPEED_40GB: - speed = ICE_LINK_SPEED_40000MBPS; - break; - case ICE_AQ_LINK_SPEED_50GB: - speed = ICE_LINK_SPEED_50000MBPS; - break; - case ICE_AQ_LINK_SPEED_100GB: - speed = ICE_LINK_SPEED_100000MBPS; - break; - default: - speed = ICE_LINK_SPEED_UNKNOWN; - break; - } + if (!pf) + return -EINVAL; + + res = pf->irq_tracker; + if (!res) + return -EINVAL; + + /* give back irq_tracker resources used */ + WARN_ON(pf->sriov_base_vector < res->num_entries); + + pf->sriov_base_vector = 0; + + return 0; +} + +/** + * ice_free_vfs - Free all VFs + * @pf: pointer to the PF structure + */ +void ice_free_vfs(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vfs *vfs = &pf->vfs; + struct ice_hw *hw = &pf->hw; + struct ice_vf *vf; + unsigned int bkt; + + if (!ice_has_vfs(pf)) + return; + + while (test_and_set_bit(ICE_VF_DIS, pf->state)) + usleep_range(1000, 2000); + + /* Disable IOV before freeing resources. This lets any VF drivers + * running in the host get themselves cleaned up before we yank + * the carpet out from underneath their feet. + */ + if (!pci_vfs_assigned(pf->pdev)) + pci_disable_sriov(pf->pdev); else - /* Virtchnl speeds are not defined for every speed supported in - * the hardware. To maintain compatibility with older AVF - * drivers, while reporting the speed the new speed values are - * resolved to the closest known virtchnl speeds - */ - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - case ICE_AQ_LINK_SPEED_100MB: - speed = (u32)VIRTCHNL_LINK_SPEED_100MB; - break; - case ICE_AQ_LINK_SPEED_1000MB: - case ICE_AQ_LINK_SPEED_2500MB: - case ICE_AQ_LINK_SPEED_5GB: - speed = (u32)VIRTCHNL_LINK_SPEED_1GB; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = (u32)VIRTCHNL_LINK_SPEED_10GB; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = (u32)VIRTCHNL_LINK_SPEED_20GB; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = (u32)VIRTCHNL_LINK_SPEED_25GB; - break; - case ICE_AQ_LINK_SPEED_40GB: - case ICE_AQ_LINK_SPEED_50GB: - case ICE_AQ_LINK_SPEED_100GB: - speed = (u32)VIRTCHNL_LINK_SPEED_40GB; - break; - default: - speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN; - break; + dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); + + mutex_lock(&vfs->table_lock); + + ice_eswitch_release(pf); + + ice_for_each_vf(pf, bkt, vf) { + mutex_lock(&vf->cfg_lock); + + ice_dis_vf_qs(vf); + + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + /* disable VF qp mappings and set VF disable state */ + ice_dis_vf_mappings(vf); + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_free_vf_res(vf); } - return speed; + if (!pci_vfs_assigned(pf->pdev)) { + u32 reg_idx, bit_idx; + + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; + wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); + } + + /* clear malicious info since the VF is getting released */ + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + vf->vf_id); + + mutex_unlock(&vf->cfg_lock); + } + + if (ice_sriov_free_msix_res(pf)) + dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); + + vfs->num_qps_per = 0; + ice_free_vf_entries(pf); + + mutex_unlock(&vfs->table_lock); + + clear_bit(ICE_VF_DIS, pf->state); + clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } -/* The mailbox overflow detection algorithm helps to check if there - * is a possibility of a malicious VF transmitting too many MBX messages to the - * PF. - * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during - * driver initialization in ice_init_hw() using ice_mbx_init_snapshot(). - * The struct ice_mbx_snapshot helps to track and traverse a static window of - * messages within the mailbox queue while looking for a malicious VF. +/** + * ice_vf_vsi_setup - Set up a VF VSI + * @vf: VF to setup VSI for * - * 2. When the caller starts processing its mailbox queue in response to an - * interrupt, the structure ice_mbx_snapshot is expected to be cleared before - * the algorithm can be run for the first time for that interrupt. This can be - * done via ice_mbx_reset_snapshot(). + * Returns pointer to the successfully allocated VSI struct on success, + * otherwise returns NULL on failure. + */ +static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) +{ + struct ice_port_info *pi = ice_vf_get_port_info(vf); + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + + vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf, NULL); + + if (!vsi) { + dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); + ice_vf_invalidate_vsi(vf); + return NULL; + } + + vf->lan_vsi_idx = vsi->idx; + vf->lan_vsi_num = vsi->vsi_num; + + return vsi; +} + +/** + * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space + * @pf: pointer to PF structure + * @vf: pointer to VF that the first MSIX vector index is being calculated for * - * 3. For every message read by the caller from the MBX Queue, the caller must - * call the detection algorithm's entry function ice_mbx_vf_state_handler(). - * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is - * filled as it is required to be passed to the algorithm. + * This returns the first MSIX vector index in PF space that is used by this VF. + * This index is used when accessing PF relative registers such as + * GLINT_VECT2FUNC and GLINT_DYN_CTL. + * This will always be the OICR index in the AVF driver so any functionality + * using vf->first_vector_idx for queue configuration will have to increment by + * 1 to avoid meddling with the OICR index. + */ +static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) +{ + return pf->sriov_base_vector + vf->vf_id * pf->vfs.num_msix_per; +} + +/** + * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware + * @vf: VF to enable MSIX mappings for * - * 4. Every time a message is read from the MBX queue, a VFId is received which - * is passed to the state handler. The boolean output is_malvf of the state - * handler ice_mbx_vf_state_handler() serves as an indicator to the caller - * whether this VF is malicious or not. + * Some of the registers need to be indexed/configured using hardware global + * device values and other registers need 0-based values, which represent PF + * based values. + */ +static void ice_ena_vf_msix_mappings(struct ice_vf *vf) +{ + int device_based_first_msix, device_based_last_msix; + int pf_based_first_msix, pf_based_last_msix, v; + struct ice_pf *pf = vf->pf; + int device_based_vf_id; + struct ice_hw *hw; + u32 reg; + + hw = &pf->hw; + pf_based_first_msix = vf->first_vector_idx; + pf_based_last_msix = (pf_based_first_msix + pf->vfs.num_msix_per) - 1; + + device_based_first_msix = pf_based_first_msix + + pf->hw.func_caps.common_cap.msix_vector_first_id; + device_based_last_msix = + (device_based_first_msix + pf->vfs.num_msix_per) - 1; + device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id; + + reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) & + VPINT_ALLOC_FIRST_M) | + ((device_based_last_msix << VPINT_ALLOC_LAST_S) & + VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M); + wr32(hw, VPINT_ALLOC(vf->vf_id), reg); + + reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S) + & VPINT_ALLOC_PCI_FIRST_M) | + ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) & + VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M); + wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg); + + /* map the interrupts to its functions */ + for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) { + reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) & + GLINT_VECT2FUNC_VF_NUM_M) | + ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & + GLINT_VECT2FUNC_PF_NUM_M)); + wr32(hw, GLINT_VECT2FUNC(v), reg); + } + + /* Map mailbox interrupt to VF MSI-X vector 0 */ + wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M); +} + +/** + * ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF + * @vf: VF to enable the mappings for + * @max_txq: max Tx queues allowed on the VF's VSI + * @max_rxq: max Rx queues allowed on the VF's VSI + */ +static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_hw *hw = &vf->pf->hw; + u32 reg; + + /* set regardless of mapping mode */ + wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M); + + /* VF Tx queues allocation */ + if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) { + /* set the VF PF Tx queue range + * VFNUMQ value should be set to (number of queues - 1). A value + * of 0 means 1 queue and a value of 255 means 256 queues + */ + reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) & + VPLAN_TX_QBASE_VFFIRSTQ_M) | + (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) & + VPLAN_TX_QBASE_VFNUMQ_M)); + wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg); + } else { + dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); + } + + /* set regardless of mapping mode */ + wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M); + + /* VF Rx queues allocation */ + if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) { + /* set the VF PF Rx queue range + * VFNUMQ value should be set to (number of queues - 1). A value + * of 0 means 1 queue and a value of 255 means 256 queues + */ + reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) & + VPLAN_RX_QBASE_VFFIRSTQ_M) | + (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) & + VPLAN_RX_QBASE_VFNUMQ_M)); + wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg); + } else { + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); + } +} + +/** + * ice_ena_vf_mappings - enable VF MSIX and queue mapping + * @vf: pointer to the VF structure + */ +static void ice_ena_vf_mappings(struct ice_vf *vf) +{ + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + + ice_ena_vf_msix_mappings(vf); + ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq); +} + +/** + * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space + * @vf: VF to calculate the register index for + * @q_vector: a q_vector associated to the VF + */ +int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) +{ + struct ice_pf *pf; + + if (!vf || !q_vector) + return -EINVAL; + + pf = vf->pf; + + /* always add one to account for the OICR being the first MSIX */ + return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id + + q_vector->v_idx + 1; +} + +/** + * ice_get_max_valid_res_idx - Get the max valid resource index + * @res: pointer to the resource to find the max valid index for * - * 5. When a VF is identified to be malicious, the caller can send a message - * to the system administrator. The caller can invoke ice_mbx_report_malvf() - * to help determine if a malicious VF is to be reported or not. This function - * requires the caller to maintain a global bitmap to track all malicious VFs - * and pass that to ice_mbx_report_malvf() along with the VFID which was identified - * to be malicious by ice_mbx_vf_state_handler(). + * Start from the end of the ice_res_tracker and return right when we find the + * first res->list entry with the ICE_RES_VALID_BIT set. This function is only + * valid for SR-IOV because it is the only consumer that manipulates the + * res->end and this is always called when res->end is set to res->num_entries. + */ +static int ice_get_max_valid_res_idx(struct ice_res_tracker *res) +{ + int i; + + if (!res) + return -EINVAL; + + for (i = res->num_entries - 1; i >= 0; i--) + if (res->list[i] & ICE_RES_VALID_BIT) + return i; + + return 0; +} + +/** + * ice_sriov_set_msix_res - Set any used MSIX resources + * @pf: pointer to PF structure + * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs * - * 6. The global bitmap maintained by PF can be cleared completely if PF is in - * reset or the bit corresponding to a VF can be cleared if that VF is in reset. - * When a VF is shut down and brought back up, we assume that the new VF - * brought up is not malicious and hence report it if found malicious. + * This function allows SR-IOV resources to be taken from the end of the PF's + * allowed HW MSIX vectors so that the irq_tracker will not be affected. We + * just set the pf->sriov_base_vector and return success. * - * 7. The function ice_mbx_reset_snapshot() is called to reset the information - * in ice_mbx_snapshot for every new mailbox interrupt handled. + * If there are not enough resources available, return an error. This should + * always be caught by ice_set_per_vf_res(). * - * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated - * when driver is unloaded. + * Return 0 on success, and -EINVAL when there are not enough MSIX vectors + * in the PF's space available for SR-IOV. */ -#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M) -/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that - * the max messages check must be ignored in the algorithm +static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) +{ + u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; + int vectors_used = pf->irq_tracker->num_entries; + int sriov_base_vector; + + sriov_base_vector = total_vectors - num_msix_needed; + + /* make sure we only grab irq_tracker entries from the list end and + * that we have enough available MSIX vectors + */ + if (sriov_base_vector < vectors_used) + return -EINVAL; + + pf->sriov_base_vector = sriov_base_vector; + + return 0; +} + +/** + * ice_set_per_vf_res - check if vectors and queues are available + * @pf: pointer to the PF structure + * @num_vfs: the number of SR-IOV VFs being configured + * + * First, determine HW interrupts from common pool. If we allocate fewer VFs, we + * get more vectors and can enable more queues per VF. Note that this does not + * grab any vectors from the SW pool already allocated. Also note, that all + * vector counts include one for each VF's miscellaneous interrupt vector + * (i.e. OICR). + * + * Minimum VFs - 2 vectors, 1 queue pair + * Small VFs - 5 vectors, 4 queue pairs + * Medium VFs - 17 vectors, 16 queue pairs + * + * Second, determine number of queue pairs per VF by starting with a pre-defined + * maximum each VF supports. If this is not possible, then we adjust based on + * queue pairs available on the device. + * + * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used + * by each VF during VF initialization and reset. */ -#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF +static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) +{ + int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); + u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; + int msix_avail_per_vf, msix_avail_for_sriov; + struct device *dev = ice_pf_to_dev(pf); + int err; + + lockdep_assert_held(&pf->vfs.table_lock); + + if (!num_vfs) + return -EINVAL; + + if (max_valid_res_idx < 0) + return -ENOSPC; + + /* determine MSI-X resources per VF */ + msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - + pf->irq_tracker->num_entries; + msix_avail_per_vf = msix_avail_for_sriov / num_vfs; + if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { + num_msix_per_vf = ICE_NUM_VF_MSIX_MED; + } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) { + num_msix_per_vf = ICE_NUM_VF_MSIX_SMALL; + } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MULTIQ_MIN) { + num_msix_per_vf = ICE_NUM_VF_MSIX_MULTIQ_MIN; + } else if (msix_avail_per_vf >= ICE_MIN_INTR_PER_VF) { + num_msix_per_vf = ICE_MIN_INTR_PER_VF; + } else { + dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n", + msix_avail_for_sriov, ICE_MIN_INTR_PER_VF, + num_vfs); + return -ENOSPC; + } + + num_txq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, + ICE_MAX_RSS_QS_PER_VF); + avail_qs = ice_get_avail_txq_count(pf) / num_vfs; + if (!avail_qs) + num_txq = 0; + else if (num_txq > avail_qs) + num_txq = rounddown_pow_of_two(avail_qs); + + num_rxq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, + ICE_MAX_RSS_QS_PER_VF); + avail_qs = ice_get_avail_rxq_count(pf) / num_vfs; + if (!avail_qs) + num_rxq = 0; + else if (num_rxq > avail_qs) + num_rxq = rounddown_pow_of_two(avail_qs); + + if (num_txq < ICE_MIN_QS_PER_VF || num_rxq < ICE_MIN_QS_PER_VF) { + dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n", + ICE_MIN_QS_PER_VF, num_vfs); + return -ENOSPC; + } + + err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs); + if (err) { + dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n", + num_vfs, err); + return err; + } + + /* only allow equal Tx/Rx queue count (i.e. queue pairs) */ + pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq); + pf->vfs.num_msix_per = num_msix_per_vf; + dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n", + num_vfs, pf->vfs.num_msix_per, pf->vfs.num_qps_per); + + return 0; +} /** - * ice_mbx_traverse - Pass through mailbox snapshot - * @hw: pointer to the HW struct - * @new_state: new algorithm state + * ice_init_vf_vsi_res - initialize/setup VF VSI resources + * @vf: VF to initialize/setup the VSI for * - * Traversing the mailbox static snapshot without checking - * for malicious VFs. + * This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the + * VF VSI's broadcast filter and is only used during initial VF creation. */ -static void -ice_mbx_traverse(struct ice_hw *hw, - enum ice_mbx_snapshot_state *new_state) +static int ice_init_vf_vsi_res(struct ice_vf *vf) { - struct ice_mbx_snap_buffer_data *snap_buf; - u32 num_iterations; + struct ice_vsi_vlan_ops *vlan_ops; + struct ice_pf *pf = vf->pf; + u8 broadcast[ETH_ALEN]; + struct ice_vsi *vsi; + struct device *dev; + int err; - snap_buf = &hw->mbx_snapshot.mbx_buf; + vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf); - /* As mailbox buffer is circular, applying a mask - * on the incremented iteration count. - */ - num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations); - - /* Checking either of the below conditions to exit snapshot traversal: - * Condition-1: If the number of iterations in the mailbox is equal to - * the mailbox head which would indicate that we have reached the end - * of the static snapshot. - * Condition-2: If the maximum messages serviced in the mailbox for a - * given interrupt is the highest possible value then there is no need - * to check if the number of messages processed is equal to it. If not - * check if the number of messages processed is greater than or equal - * to the maximum number of mailbox entries serviced in current work item. + dev = ice_pf_to_dev(pf); + vsi = ice_vf_vsi_setup(vf); + if (!vsi) + return -ENOMEM; + + err = ice_vsi_add_vlan_zero(vsi); + if (err) { + dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n", + vf->vf_id); + goto release_vsi; + } + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + err = vlan_ops->ena_rx_filtering(vsi); + if (err) { + dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n", + vf->vf_id); + goto release_vsi; + } + + eth_broadcast_addr(broadcast); + err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); + if (err) { + dev_err(dev, "Failed to add broadcast MAC filter for VF %d, error %d\n", + vf->vf_id, err); + goto release_vsi; + } + + err = ice_vsi_apply_spoofchk(vsi, vf->spoofchk); + if (err) { + dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n", + vf->vf_id); + goto release_vsi; + } + + vf->num_mac = 1; + + return 0; + +release_vsi: + ice_vf_vsi_release(vf); + return err; +} + +/** + * ice_start_vfs - start VFs so they are ready to be used by SR-IOV + * @pf: PF the VFs are associated with + */ +static int ice_start_vfs(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + unsigned int bkt, it_cnt; + struct ice_vf *vf; + int retval; + + lockdep_assert_held(&pf->vfs.table_lock); + + it_cnt = 0; + ice_for_each_vf(pf, bkt, vf) { + vf->vf_ops->clear_reset_trigger(vf); + + retval = ice_init_vf_vsi_res(vf); + if (retval) { + dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n", + vf->vf_id, retval); + goto teardown; + } + + set_bit(ICE_VF_STATE_INIT, vf->vf_states); + ice_ena_vf_mappings(vf); + wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); + it_cnt++; + } + + ice_flush(hw); + return 0; + +teardown: + ice_for_each_vf(pf, bkt, vf) { + if (it_cnt == 0) + break; + + ice_dis_vf_mappings(vf); + ice_vf_vsi_release(vf); + it_cnt--; + } + + return retval; +} + +/** + * ice_sriov_free_vf - Free VF memory after all references are dropped + * @vf: pointer to VF to free + * + * Called by ice_put_vf through ice_release_vf once the last reference to a VF + * structure has been dropped. + */ +static void ice_sriov_free_vf(struct ice_vf *vf) +{ + mutex_destroy(&vf->cfg_lock); + + kfree_rcu(vf, rcu); +} + +/** + * ice_sriov_clear_mbx_register - clears SRIOV VF's mailbox registers + * @vf: the vf to configure + */ +static void ice_sriov_clear_mbx_register(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + wr32(&pf->hw, VF_MBX_ARQLEN(vf->vf_id), 0); + wr32(&pf->hw, VF_MBX_ATQLEN(vf->vf_id), 0); +} + +/** + * ice_sriov_trigger_reset_register - trigger VF reset for SRIOV VF + * @vf: pointer to VF structure + * @is_vflr: true if reset occurred due to VFLR + * + * Trigger and cleanup after a VF reset for a SR-IOV VF. + */ +static void ice_sriov_trigger_reset_register(struct ice_vf *vf, bool is_vflr) +{ + struct ice_pf *pf = vf->pf; + u32 reg, reg_idx, bit_idx; + unsigned int vf_abs_id, i; + struct device *dev; + struct ice_hw *hw; + + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id; + + /* In the case of a VFLR, HW has already reset the VF and we just need + * to clean up. Otherwise we must first trigger the reset using the + * VFRTRIG register. */ - if (num_iterations == snap_buf->head || - (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT && - ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx)) - *new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; + if (!is_vflr) { + reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id)); + reg |= VPGEN_VFRTRIG_VFSWR_M; + wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg); + } + + /* clear the VFLR bit in GLGEN_VFLRSTAT */ + reg_idx = (vf_abs_id) / 32; + bit_idx = (vf_abs_id) % 32; + wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); + ice_flush(hw); + + wr32(hw, PF_PCI_CIAA, + VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S)); + for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) { + reg = rd32(hw, PF_PCI_CIAD); + /* no transactions pending so stop polling */ + if ((reg & VF_TRANS_PENDING_M) == 0) + break; + + dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id); + udelay(ICE_PCI_CIAD_WAIT_DELAY_US); + } +} + +/** + * ice_sriov_poll_reset_status - poll SRIOV VF reset status + * @vf: pointer to VF structure + * + * Returns true when reset is successful, else returns false + */ +static bool ice_sriov_poll_reset_status(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + unsigned int i; + u32 reg; + + for (i = 0; i < 10; i++) { + /* VF reset requires driver to first reset the VF and then + * poll the status register to make sure that the reset + * completed successfully. + */ + reg = rd32(&pf->hw, VPGEN_VFRSTAT(vf->vf_id)); + if (reg & VPGEN_VFRSTAT_VFRD_M) + return true; + + /* only sleep if the reset is not done */ + usleep_range(10, 20); + } + return false; +} + +/** + * ice_sriov_clear_reset_trigger - enable VF to access hardware + * @vf: VF to enabled hardware access for + */ +static void ice_sriov_clear_reset_trigger(struct ice_vf *vf) +{ + struct ice_hw *hw = &vf->pf->hw; + u32 reg; + + reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id)); + reg &= ~VPGEN_VFRTRIG_VFSWR_M; + wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg); + ice_flush(hw); +} + +/** + * ice_sriov_vsi_rebuild - release and rebuild VF's VSI + * @vf: VF to release and setup the VSI for + * + * This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF + * configuration change, etc.). + */ +static int ice_sriov_vsi_rebuild(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + ice_vf_vsi_release(vf); + if (!ice_vf_vsi_setup(vf)) { + dev_err(ice_pf_to_dev(pf), + "Failed to release and setup the VF%u's VSI\n", + vf->vf_id); + return -ENOMEM; + } + + return 0; +} + +/** + * ice_sriov_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt + * @vf: VF to perform tasks on + */ +static void ice_sriov_post_vsi_rebuild(struct ice_vf *vf) +{ + ice_vf_rebuild_host_cfg(vf); + ice_vf_set_initialized(vf); + ice_ena_vf_mappings(vf); + wr32(&vf->pf->hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); +} + +static const struct ice_vf_ops ice_sriov_vf_ops = { + .reset_type = ICE_VF_RESET, + .free = ice_sriov_free_vf, + .clear_mbx_register = ice_sriov_clear_mbx_register, + .trigger_reset_register = ice_sriov_trigger_reset_register, + .poll_reset_status = ice_sriov_poll_reset_status, + .clear_reset_trigger = ice_sriov_clear_reset_trigger, + .vsi_rebuild = ice_sriov_vsi_rebuild, + .post_vsi_rebuild = ice_sriov_post_vsi_rebuild, +}; + +/** + * ice_create_vf_entries - Allocate and insert VF entries + * @pf: pointer to the PF structure + * @num_vfs: the number of VFs to allocate + * + * Allocate new VF entries and insert them into the hash table. Set some + * basic default fields for initializing the new VFs. + * + * After this function exits, the hash table will have num_vfs entries + * inserted. + * + * Returns 0 on success or an integer error code on failure. + */ +static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs) +{ + struct ice_vfs *vfs = &pf->vfs; + struct ice_vf *vf; + u16 vf_id; + int err; + + lockdep_assert_held(&vfs->table_lock); + + for (vf_id = 0; vf_id < num_vfs; vf_id++) { + vf = kzalloc(sizeof(*vf), GFP_KERNEL); + if (!vf) { + err = -ENOMEM; + goto err_free_entries; + } + kref_init(&vf->refcnt); + + vf->pf = pf; + vf->vf_id = vf_id; + + /* set sriov vf ops for VFs created during SRIOV flow */ + vf->vf_ops = &ice_sriov_vf_ops; + + vf->vf_sw_id = pf->first_sw; + /* assign default capabilities */ + vf->spoofchk = true; + vf->num_vf_qs = pf->vfs.num_qps_per; + ice_vc_set_default_allowlist(vf); + + /* ctrl_vsi_idx will be set to a valid value only when VF + * creates its first fdir rule. + */ + ice_vf_ctrl_invalidate_vsi(vf); + ice_vf_fdir_init(vf); + + ice_virtchnl_set_dflt_ops(vf); + + mutex_init(&vf->cfg_lock); + + hash_add_rcu(vfs->table, &vf->entry, vf_id); + } + + return 0; + +err_free_entries: + ice_free_vf_entries(pf); + return err; } /** - * ice_mbx_detect_malvf - Detect malicious VF in snapshot - * @hw: pointer to the HW struct - * @vf_id: relative virtual function ID - * @new_state: new algorithm state - * @is_malvf: boolean output to indicate if VF is malicious + * ice_ena_vfs - enable VFs so they are ready to be used + * @pf: pointer to the PF structure + * @num_vfs: number of VFs to enable + */ +static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + int ret; + + /* Disable global interrupt 0 so we don't try to handle the VFLR. */ + wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); + set_bit(ICE_OICR_INTR_DIS, pf->state); + ice_flush(hw); + + ret = pci_enable_sriov(pf->pdev, num_vfs); + if (ret) + goto err_unroll_intr; + + mutex_lock(&pf->vfs.table_lock); + + ret = ice_set_per_vf_res(pf, num_vfs); + if (ret) { + dev_err(dev, "Not enough resources for %d VFs, err %d. Try with fewer number of VFs\n", + num_vfs, ret); + goto err_unroll_sriov; + } + + ret = ice_create_vf_entries(pf, num_vfs); + if (ret) { + dev_err(dev, "Failed to allocate VF entries for %d VFs\n", + num_vfs); + goto err_unroll_sriov; + } + + ret = ice_start_vfs(pf); + if (ret) { + dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret); + ret = -EAGAIN; + goto err_unroll_vf_entries; + } + + clear_bit(ICE_VF_DIS, pf->state); + + ret = ice_eswitch_configure(pf); + if (ret) { + dev_err(dev, "Failed to configure eswitch, err %d\n", ret); + goto err_unroll_sriov; + } + + /* rearm global interrupts */ + if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) + ice_irq_dynamic_ena(hw, NULL, NULL); + + mutex_unlock(&pf->vfs.table_lock); + + return 0; + +err_unroll_vf_entries: + ice_free_vf_entries(pf); +err_unroll_sriov: + mutex_unlock(&pf->vfs.table_lock); + pci_disable_sriov(pf->pdev); +err_unroll_intr: + /* rearm interrupts here */ + ice_irq_dynamic_ena(hw, NULL, NULL); + clear_bit(ICE_OICR_INTR_DIS, pf->state); + return ret; +} + +/** + * ice_pci_sriov_ena - Enable or change number of VFs + * @pf: pointer to the PF structure + * @num_vfs: number of VFs to allocate * - * This function tracks the number of asynchronous messages - * sent per VF and marks the VF as malicious if it exceeds - * the permissible number of messages to send. + * Returns 0 on success and negative on failure */ -static int -ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, - enum ice_mbx_snapshot_state *new_state, - bool *is_malvf) +static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { - struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + int pre_existing_vfs = pci_num_vf(pf->pdev); + struct device *dev = ice_pf_to_dev(pf); + int err; - if (vf_id >= snap->mbx_vf.vfcntr_len) - return -EIO; + if (pre_existing_vfs && pre_existing_vfs != num_vfs) + ice_free_vfs(pf); + else if (pre_existing_vfs && pre_existing_vfs == num_vfs) + return 0; - /* increment the message count in the VF array */ - snap->mbx_vf.vf_cntr[vf_id]++; + if (num_vfs > pf->vfs.num_supported) { + dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", + num_vfs, pf->vfs.num_supported); + return -EOPNOTSUPP; + } - if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD) - *is_malvf = true; + dev_info(dev, "Enabling %d VFs\n", num_vfs); + err = ice_ena_vfs(pf, num_vfs); + if (err) { + dev_err(dev, "Failed to enable SR-IOV: %d\n", err); + return err; + } - /* continue to iterate through the mailbox snapshot */ - ice_mbx_traverse(hw, new_state); + set_bit(ICE_FLAG_SRIOV_ENA, pf->flags); + return 0; +} + +/** + * ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks + * @pf: PF to enabled SR-IOV on + */ +static int ice_check_sriov_allowed(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) { + dev_err(dev, "This device is not capable of SR-IOV\n"); + return -EOPNOTSUPP; + } + + if (ice_is_safe_mode(pf)) { + dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n"); + return -EOPNOTSUPP; + } + + if (!ice_pf_state_is_nominal(pf)) { + dev_err(dev, "Cannot enable SR-IOV, device not ready\n"); + return -EBUSY; + } return 0; } /** - * ice_mbx_reset_snapshot - Reset mailbox snapshot structure - * @snap: pointer to mailbox snapshot structure in the ice_hw struct + * ice_sriov_configure - Enable or change number of VFs via sysfs + * @pdev: pointer to a pci_dev structure + * @num_vfs: number of VFs to allocate or 0 to free VFs + * + * This function is called when the user updates the number of VFs in sysfs. On + * success return whatever num_vfs was set to by the caller. Return negative on + * failure. + */ +int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + struct device *dev = ice_pf_to_dev(pf); + int err; + + err = ice_check_sriov_allowed(pf); + if (err) + return err; + + if (!num_vfs) { + if (!pci_vfs_assigned(pdev)) { + ice_mbx_deinit_snapshot(&pf->hw); + ice_free_vfs(pf); + if (pf->lag) + ice_enable_lag(pf->lag); + return 0; + } + + dev_err(dev, "can't free VFs because some are assigned to VMs.\n"); + return -EBUSY; + } + + err = ice_mbx_init_snapshot(&pf->hw, num_vfs); + if (err) + return err; + + err = ice_pci_sriov_ena(pf, num_vfs); + if (err) { + ice_mbx_deinit_snapshot(&pf->hw); + return err; + } + + if (pf->lag) + ice_disable_lag(pf->lag); + return num_vfs; +} + +/** + * ice_process_vflr_event - Free VF resources via IRQ calls + * @pf: pointer to the PF structure * - * Reset the mailbox snapshot structure and clear VF counter array. + * called from the VFLR IRQ handler to + * free up VF resources and state variables */ -static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) +void ice_process_vflr_event(struct ice_pf *pf) { - u32 vfcntr_len; + struct ice_hw *hw = &pf->hw; + struct ice_vf *vf; + unsigned int bkt; + u32 reg; - if (!snap || !snap->mbx_vf.vf_cntr) + if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || + !ice_has_vfs(pf)) return; - /* Clear VF counters. */ - vfcntr_len = snap->mbx_vf.vfcntr_len; - if (vfcntr_len) - memset(snap->mbx_vf.vf_cntr, 0, - (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr))); - - /* Reset mailbox snapshot for a new capture. */ - memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); - snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; -} - -/** - * ice_mbx_vf_state_handler - Handle states of the overflow algorithm - * @hw: pointer to the HW struct - * @mbx_data: pointer to structure containing mailbox data - * @vf_id: relative virtual function (VF) ID - * @is_malvf: boolean output to indicate if VF is malicious - * - * The function serves as an entry point for the malicious VF - * detection algorithm by handling the different states and state - * transitions of the algorithm: - * New snapshot: This state is entered when creating a new static - * snapshot. The data from any previous mailbox snapshot is - * cleared and a new capture of the mailbox head and tail is - * logged. This will be the new static snapshot to detect - * asynchronous messages sent by VFs. On capturing the snapshot - * and depending on whether the number of pending messages in that - * snapshot exceed the watermark value, the state machine enters - * traverse or detect states. - * Traverse: If pending message count is below watermark then iterate - * through the snapshot without any action on VF. - * Detect: If pending message count exceeds watermark traverse - * the static snapshot and look for a malicious VF. + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + u32 reg_idx, bit_idx; + + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; + /* read GLGEN_VFLRSTAT register to find out the flr VFs */ + reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); + if (reg & BIT(bit_idx)) + /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ + ice_reset_vf(vf, ICE_VF_RESET_VFLR | ICE_VF_RESET_LOCK); + } + mutex_unlock(&pf->vfs.table_lock); +} + +/** + * ice_get_vf_from_pfq - get the VF who owns the PF space queue passed in + * @pf: PF used to index all VFs + * @pfq: queue index relative to the PF's function space + * + * If no VF is found who owns the pfq then return NULL, otherwise return a + * pointer to the VF who owns the pfq + * + * If this function returns non-NULL, it acquires a reference count of the VF + * structure. The caller is responsible for calling ice_put_vf() to drop this + * reference. + */ +static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq) +{ + struct ice_vf *vf; + unsigned int bkt; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + struct ice_vsi *vsi; + u16 rxq_idx; + + vsi = ice_get_vf_vsi(vf); + + ice_for_each_rxq(vsi, rxq_idx) + if (vsi->rxq_map[rxq_idx] == pfq) { + struct ice_vf *found; + + if (kref_get_unless_zero(&vf->refcnt)) + found = vf; + else + found = NULL; + rcu_read_unlock(); + return found; + } + } + rcu_read_unlock(); + + return NULL; +} + +/** + * ice_globalq_to_pfq - convert from global queue index to PF space queue index + * @pf: PF used for conversion + * @globalq: global queue index used to convert to PF space queue index + */ +static u32 ice_globalq_to_pfq(struct ice_pf *pf, u32 globalq) +{ + return globalq - pf->hw.func_caps.common_cap.rxq_first_id; +} + +/** + * ice_vf_lan_overflow_event - handle LAN overflow event for a VF + * @pf: PF that the LAN overflow event happened on + * @event: structure holding the event information for the LAN overflow event + * + * Determine if the LAN overflow event was caused by a VF queue. If it was not + * caused by a VF, do nothing. If a VF caused this LAN overflow event trigger a + * reset on the offending VF. + */ +void +ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) +{ + u32 gldcb_rtctq, queue; + struct ice_vf *vf; + + gldcb_rtctq = le32_to_cpu(event->desc.params.lan_overflow.prtdcb_ruptq); + dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq); + + /* event returns device global Rx queue number */ + queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >> + GLDCB_RTCTQ_RXQNUM_S; + + vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue)); + if (!vf) + return; + + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK); + ice_put_vf(vf); +} + +/** + * ice_set_vf_spoofchk + * @netdev: network interface device structure + * @vf_id: VF identifier + * @ena: flag to enable or disable feature + * + * Enable or disable VF spoof checking + */ +int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct ice_vsi *vf_vsi; + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + + vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + netdev_err(netdev, "VSI %d for VF %d is null\n", + vf->lan_vsi_idx, vf->vf_id); + ret = -EINVAL; + goto out_put_vf; + } + + if (vf_vsi->type != ICE_VSI_VF) { + netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", + vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); + ret = -ENODEV; + goto out_put_vf; + } + + if (ena == vf->spoofchk) { + dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); + ret = 0; + goto out_put_vf; + } + + ret = ice_vsi_apply_spoofchk(vf_vsi, ena); + if (ret) + dev_err(dev, "Failed to set spoofchk %s for VF %d VSI %d\n error %d\n", + ena ? "ON" : "OFF", vf->vf_id, vf_vsi->vsi_num, ret); + else + vf->spoofchk = ena; + +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_get_vf_cfg + * @netdev: network interface device structure + * @vf_id: VF identifier + * @ivi: VF configuration structure + * + * return VF configuration */ int -ice_mbx_vf_state_handler(struct ice_hw *hw, - struct ice_mbx_data *mbx_data, u16 vf_id, - bool *is_malvf) +ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) { - struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; - struct ice_mbx_snap_buffer_data *snap_buf; - struct ice_ctl_q_info *cq = &hw->mailboxq; - enum ice_mbx_snapshot_state new_state; - int status = 0; + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vf *vf; + int ret; - if (!is_malvf || !mbx_data) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - /* When entering the mailbox state machine assume that the VF - * is not malicious until detected. - */ - *is_malvf = false; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + + ivi->vf = vf_id; + ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr); + + /* VF configuration for VLAN and applicable QoS */ + ivi->vlan = ice_vf_get_port_vlan_id(vf); + ivi->qos = ice_vf_get_port_vlan_prio(vf); + if (ice_vf_is_port_vlan_ena(vf)) + ivi->vlan_proto = cpu_to_be16(ice_vf_get_port_vlan_tpid(vf)); + + ivi->trusted = vf->trusted; + ivi->spoofchk = vf->spoofchk; + if (!vf->link_forced) + ivi->linkstate = IFLA_VF_LINK_STATE_AUTO; + else if (vf->link_up) + ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; + else + ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; + ivi->max_tx_rate = vf->max_tx_rate; + ivi->min_tx_rate = vf->min_tx_rate; + +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch + * @pf: PF used to reference the switch's rules + * @umac: unicast MAC to compare against existing switch rules + * + * Return true on the first/any match, else return false + */ +static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac) +{ + struct ice_sw_recipe *mac_recipe_list = + &pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC]; + struct ice_fltr_mgmt_list_entry *list_itr; + struct list_head *rule_head; + struct mutex *rule_lock; /* protect MAC filter list access */ + + rule_head = &mac_recipe_list->filt_rules; + rule_lock = &mac_recipe_list->filt_rule_lock; - /* Checking if max messages allowed to be processed while servicing current - * interrupt is not less than the defined AVF message threshold. - */ - if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD) + mutex_lock(rule_lock); + list_for_each_entry(list_itr, rule_head, list_entry) { + u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0]; + + if (ether_addr_equal(existing_mac, umac)) { + mutex_unlock(rule_lock); + return true; + } + } + + mutex_unlock(rule_lock); + + return false; +} + +/** + * ice_set_vf_mac + * @netdev: network interface device structure + * @vf_id: VF identifier + * @mac: MAC address + * + * program VF MAC address + */ +int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vf *vf; + int ret; + + if (is_multicast_ether_addr(mac)) { + netdev_err(netdev, "%pM not a valid unicast address\n", mac); return -EINVAL; + } + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + + /* nothing left to do, unicast MAC already set */ + if (ether_addr_equal(vf->dev_lan_addr.addr, mac) && + ether_addr_equal(vf->hw_lan_addr.addr, mac)) { + ret = 0; + goto out_put_vf; + } + + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; - /* The watermark value should not be lesser than the threshold limit - * set for the number of asynchronous messages a VF can send to mailbox - * nor should it be greater than the maximum number of messages in the - * mailbox serviced in current interrupt. + if (ice_unicast_mac_exists(pf, mac)) { + netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n", + mac, vf_id, mac); + ret = -EINVAL; + goto out_put_vf; + } + + mutex_lock(&vf->cfg_lock); + + /* VF is notified of its new MAC via the PF's response to the + * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset */ - if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD || - mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx) + ether_addr_copy(vf->dev_lan_addr.addr, mac); + ether_addr_copy(vf->hw_lan_addr.addr, mac); + if (is_zero_ether_addr(mac)) { + /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */ + vf->pf_set_mac = false; + netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n", + vf->vf_id); + } else { + /* PF will add MAC rule for the VF */ + vf->pf_set_mac = true; + netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n", + mac, vf_id); + } + + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); + mutex_unlock(&vf->cfg_lock); + +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_set_vf_trust + * @netdev: network interface device structure + * @vf_id: VF identifier + * @trusted: Boolean value to enable/disable trusted VF + * + * Enable or disable a given VF as trusted + */ +int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vf *vf; + int ret; + + if (ice_is_eswitch_mode_switchdev(pf)) { + dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); + return -EOPNOTSUPP; + } + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - new_state = ICE_MAL_VF_DETECT_STATE_INVALID; - snap_buf = &snap->mbx_buf; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; - switch (snap_buf->state) { - case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT: - /* Clear any previously held data in mailbox snapshot structure. */ - ice_mbx_reset_snapshot(snap); + /* Check if already trusted */ + if (trusted == vf->trusted) { + ret = 0; + goto out_put_vf; + } - /* Collect the pending ARQ count, number of messages processed and - * the maximum number of messages allowed to be processed from the - * Mailbox for current interrupt. - */ - snap_buf->num_pending_arq = mbx_data->num_pending_arq; - snap_buf->num_msg_proc = mbx_data->num_msg_proc; - snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx; + mutex_lock(&vf->cfg_lock); - /* Capture a new static snapshot of the mailbox by logging the - * head and tail of snapshot and set num_iterations to the tail - * value to mark the start of the iteration through the snapshot. - */ - snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean + - mbx_data->num_pending_arq); - snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1); - snap_buf->num_iterations = snap_buf->tail; - - /* Pending ARQ messages returned by ice_clean_rq_elem - * is the difference between the head and tail of the - * mailbox queue. Comparing this value against the watermark - * helps to check if we potentially have malicious VFs. - */ - if (snap_buf->num_pending_arq >= - mbx_data->async_watermark_val) { - new_state = ICE_MAL_VF_DETECT_STATE_DETECT; - status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); - } else { - new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; - ice_mbx_traverse(hw, &new_state); - } - break; + vf->trusted = trusted; + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); + dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", + vf_id, trusted ? "" : "un"); - case ICE_MAL_VF_DETECT_STATE_TRAVERSE: - new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; - ice_mbx_traverse(hw, &new_state); - break; + mutex_unlock(&vf->cfg_lock); - case ICE_MAL_VF_DETECT_STATE_DETECT: - new_state = ICE_MAL_VF_DETECT_STATE_DETECT; - status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); - break; +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_set_vf_link_state + * @netdev: network interface device structure + * @vf_id: VF identifier + * @link_state: required link state + * + * Set VF's link state, irrespective of physical link state status + */ +int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vf *vf; + int ret; + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + switch (link_state) { + case IFLA_VF_LINK_STATE_AUTO: + vf->link_forced = false; + break; + case IFLA_VF_LINK_STATE_ENABLE: + vf->link_forced = true; + vf->link_up = true; + break; + case IFLA_VF_LINK_STATE_DISABLE: + vf->link_forced = true; + vf->link_up = false; + break; default: - new_state = ICE_MAL_VF_DETECT_STATE_INVALID; - status = -EIO; + ret = -EINVAL; + goto out_put_vf; } - snap_buf->state = new_state; + ice_vc_notify_vf_link_state(vf); + +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs + * @pf: PF associated with VFs + */ +static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + int rate = 0; - return status; + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + rate += vf->min_tx_rate; + rcu_read_unlock(); + + return rate; } /** - * ice_mbx_report_malvf - Track and note malicious VF - * @hw: pointer to the HW struct - * @all_malvfs: all malicious VFs tracked by PF - * @bitmap_len: length of bitmap in bits - * @vf_id: relative virtual function ID of the malicious VF - * @report_malvf: boolean to indicate if malicious VF must be reported + * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription + * @vf: VF trying to configure min_tx_rate + * @min_tx_rate: min Tx rate in Mbps + * + * Check if the min_tx_rate being passed in will cause oversubscription of total + * min_tx_rate based on the current link speed and all other VFs configured + * min_tx_rate * - * This function will update a bitmap that keeps track of the malicious - * VFs attached to the PF. A malicious VF must be reported only once if - * discovered between VF resets or loading so the function checks - * the input vf_id against the bitmap to verify if the VF has been - * detected in any previous mailbox iterations. + * Return true if the passed min_tx_rate would cause oversubscription, else + * return false + */ +static bool +ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate) +{ + int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf)); + int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf); + + /* this VF's previous rate is being overwritten */ + all_vfs_min_tx_rate -= vf->min_tx_rate; + + if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) { + dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n", + min_tx_rate, vf->vf_id, + all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps, + link_speed_mbps); + return true; + } + + return false; +} + +/** + * ice_set_vf_bw - set min/max VF bandwidth + * @netdev: network interface device structure + * @vf_id: VF identifier + * @min_tx_rate: Minimum Tx rate in Mbps + * @max_tx_rate: Maximum Tx rate in Mbps */ int -ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id, bool *report_malvf) +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) { - if (!all_malvfs || !report_malvf) + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - *report_malvf = false; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + + vsi = ice_get_vf_vsi(vf); + + /* when max_tx_rate is zero that means no max Tx rate limiting, so only + * check if max_tx_rate is non-zero + */ + if (max_tx_rate && min_tx_rate > max_tx_rate) { + dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", + min_tx_rate, max_tx_rate); + ret = -EINVAL; + goto out_put_vf; + } + + if (min_tx_rate && ice_is_dcb_active(pf)) { + dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); + ret = -EOPNOTSUPP; + goto out_put_vf; + } + + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) { + ret = -EINVAL; + goto out_put_vf; + } + + if (vf->min_tx_rate != (unsigned int)min_tx_rate) { + ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set min-tx-rate for VF %d\n", + vf->vf_id); + goto out_put_vf; + } + + vf->min_tx_rate = min_tx_rate; + } + + if (vf->max_tx_rate != (unsigned int)max_tx_rate) { + ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set max-tx-rate for VF %d\n", + vf->vf_id); + goto out_put_vf; + } + + vf->max_tx_rate = max_tx_rate; + } + +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_get_vf_stats - populate some stats for the VF + * @netdev: the netdev of the PF + * @vf_id: the host OS identifier (0-255) + * @vf_stats: pointer to the OS memory to be initialized + */ +int ice_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_eth_stats *stats; + struct ice_vsi *vsi; + struct ice_vf *vf; + int ret; - if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len) + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - if (vf_id >= bitmap_len) - return -EIO; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; - /* If the vf_id is found in the bitmap set bit and boolean to true */ - if (!test_and_set_bit(vf_id, all_malvfs)) - *report_malvf = true; + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + ret = -EINVAL; + goto out_put_vf; + } - return 0; + ice_update_eth_stats(vsi); + stats = &vsi->eth_stats; + + memset(vf_stats, 0, sizeof(*vf_stats)); + + vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast + + stats->rx_multicast; + vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast + + stats->tx_multicast; + vf_stats->rx_bytes = stats->rx_bytes; + vf_stats->tx_bytes = stats->tx_bytes; + vf_stats->broadcast = stats->rx_broadcast; + vf_stats->multicast = stats->rx_multicast; + vf_stats->rx_dropped = stats->rx_discards; + vf_stats->tx_dropped = stats->tx_discards; + +out_put_vf: + ice_put_vf(vf); + return ret; +} + +/** + * ice_is_supported_port_vlan_proto - make sure the vlan_proto is supported + * @hw: hardware structure used to check the VLAN mode + * @vlan_proto: VLAN TPID being checked + * + * If the device is configured in Double VLAN Mode (DVM), then both ETH_P_8021Q + * and ETH_P_8021AD are supported. If the device is configured in Single VLAN + * Mode (SVM), then only ETH_P_8021Q is supported. + */ +static bool +ice_is_supported_port_vlan_proto(struct ice_hw *hw, u16 vlan_proto) +{ + bool is_supported = false; + + switch (vlan_proto) { + case ETH_P_8021Q: + is_supported = true; + break; + case ETH_P_8021AD: + if (ice_is_dvm_ena(hw)) + is_supported = true; + break; + } + + return is_supported; } /** - * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID - * @snap: pointer to the mailbox snapshot structure - * @all_malvfs: all malicious VFs tracked by PF - * @bitmap_len: length of bitmap in bits - * @vf_id: relative virtual function ID of the malicious VF + * ice_set_vf_port_vlan + * @netdev: network interface device structure + * @vf_id: VF identifier + * @vlan_id: VLAN ID being set + * @qos: priority setting + * @vlan_proto: VLAN protocol * - * In case of a VF reset, this function can be called to clear - * the bit corresponding to the VF ID in the bitmap tracking all - * malicious VFs attached to the PF. The function also clears the - * VF counter array at the index of the VF ID. This is to ensure - * that the new VF loaded is not considered malicious before going - * through the overflow detection algorithm. + * program VF Port VLAN ID and/or QoS */ int -ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id) +ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto) { - if (!snap || !all_malvfs) + struct ice_pf *pf = ice_netdev_to_pf(netdev); + u16 local_vlan_proto = ntohs(vlan_proto); + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + + if (vlan_id >= VLAN_N_VID || qos > 7) { + dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n", + vf_id, vlan_id, qos); return -EINVAL; + } - if (bitmap_len < snap->mbx_vf.vfcntr_len) + if (!ice_is_supported_port_vlan_proto(&pf->hw, local_vlan_proto)) { + dev_err(dev, "VF VLAN protocol 0x%04x is not supported\n", + local_vlan_proto); + return -EPROTONOSUPPORT; + } + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) return -EINVAL; - /* Ensure VF ID value is not larger than bitmap or VF counter length */ - if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len) - return -EIO; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; - /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */ - clear_bit(vf_id, all_malvfs); + if (ice_vf_get_port_vlan_prio(vf) == qos && + ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto && + ice_vf_get_port_vlan_id(vf) == vlan_id) { + /* duplicate request, so just return success */ + dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n", + vlan_id, qos, local_vlan_proto); + ret = 0; + goto out_put_vf; + } - /* Clear the VF counter in the mailbox snapshot structure for that VF ID. - * This is to ensure that if a VF is unloaded and a new one brought back - * up with the same VF ID for a snapshot currently in traversal or detect - * state the counter for that VF ID does not increment on top of existing - * values in the mailbox overflow detection algorithm. - */ - snap->mbx_vf.vf_cntr[vf_id] = 0; + mutex_lock(&vf->cfg_lock); - return 0; + vf->port_vlan_info = ICE_VLAN(local_vlan_proto, vlan_id, qos); + if (ice_vf_is_port_vlan_ena(vf)) + dev_info(dev, "Setting VLAN %u, QoS %u, TPID 0x%04x on VF %d\n", + vlan_id, qos, local_vlan_proto, vf_id); + else + dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); + + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); + mutex_unlock(&vf->cfg_lock); + +out_put_vf: + ice_put_vf(vf); + return ret; } /** - * ice_mbx_init_snapshot - Initialize mailbox snapshot structure - * @hw: pointer to the hardware structure - * @vf_count: number of VFs allocated on a PF - * - * Clear the mailbox snapshot structure and allocate memory - * for the VF counter array based on the number of VFs allocated - * on that PF. + * ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event + * @vf: pointer to the VF structure + */ +void ice_print_vf_rx_mdd_event(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + struct device *dev; + + dev = ice_pf_to_dev(pf); + + dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n", + vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id, + vf->dev_lan_addr.addr, + test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags) + ? "on" : "off"); +} + +/** + * ice_print_vfs_mdd_events - print VFs malicious driver detect event + * @pf: pointer to the PF structure * - * Assumption: This function will assume ice_get_caps() has already been - * called to ensure that the vf_count can be compared against the number - * of VFs supported as defined in the functional capabilities of the device. + * Called from ice_handle_mdd_event to rate limit and print VFs MDD events. */ -int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count) +void ice_print_vfs_mdd_events(struct ice_pf *pf) { - struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + struct ice_vf *vf; + unsigned int bkt; - /* Ensure that the number of VFs allocated is non-zero and - * is not greater than the number of supported VFs defined in - * the functional capabilities of the PF. - */ - if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs) - return -EINVAL; + /* check that there are pending MDD events to print */ + if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) + return; - snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count, - sizeof(*snap->mbx_vf.vf_cntr), - GFP_KERNEL); - if (!snap->mbx_vf.vf_cntr) - return -ENOMEM; + /* VF MDD event logs are rate limited to one second intervals */ + if (time_is_after_jiffies(pf->vfs.last_printed_mdd_jiffies + HZ * 1)) + return; - /* Setting the VF counter length to the number of allocated - * VFs for given PF's functional capabilities. - */ - snap->mbx_vf.vfcntr_len = vf_count; + pf->vfs.last_printed_mdd_jiffies = jiffies; - /* Clear mbx_buf in the mailbox snaphot structure and setting the - * mailbox snapshot state to a new capture. - */ - memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); - snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + /* only print Rx MDD event message if there are new events */ + if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) { + vf->mdd_rx_events.last_printed = + vf->mdd_rx_events.count; + ice_print_vf_rx_mdd_event(vf); + } - return 0; + /* only print Tx MDD event message if there are new events */ + if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) { + vf->mdd_tx_events.last_printed = + vf->mdd_tx_events.count; + + dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", + vf->mdd_tx_events.count, hw->pf_id, vf->vf_id, + vf->dev_lan_addr.addr); + } + } + mutex_unlock(&pf->vfs.table_lock); } /** - * ice_mbx_deinit_snapshot - Free mailbox snapshot structure - * @hw: pointer to the hardware structure + * ice_restore_all_vfs_msi_state - restore VF MSI state after PF FLR + * @pdev: pointer to a pci_dev structure * - * Clear the mailbox snapshot structure and free the VF counter array. + * Called when recovering from a PF FLR to restore interrupt capability to + * the VFs. + */ +void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) +{ + u16 vf_id; + int pos; + + if (!pci_num_vf(pdev)) + return; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (pos) { + struct pci_dev *vfdev; + + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, + &vf_id); + vfdev = pci_get_device(pdev->vendor, vf_id, NULL); + while (vfdev) { + if (vfdev->is_virtfn && vfdev->physfn == pdev) + pci_restore_msi_state(vfdev); + vfdev = pci_get_device(pdev->vendor, vf_id, + vfdev); + } + } +} + +/** + * ice_is_malicious_vf - helper function to detect a malicious VF + * @pf: ptr to struct ice_pf + * @event: pointer to the AQ event + * @num_msg_proc: the number of messages processed so far + * @num_msg_pending: the number of messages peinding in admin queue */ -void ice_mbx_deinit_snapshot(struct ice_hw *hw) +bool +ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, + u16 num_msg_proc, u16 num_msg_pending) { - struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + s16 vf_id = le16_to_cpu(event->desc.retval); + struct device *dev = ice_pf_to_dev(pf); + struct ice_mbx_data mbxdata; + bool malvf = false; + struct ice_vf *vf; + int status; + + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return false; - /* Free VF counter array and reset VF counter length */ - devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr); - snap->mbx_vf.vfcntr_len = 0; + if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) + goto out_put_vf; + + mbxdata.num_msg_proc = num_msg_proc; + mbxdata.num_pending_arq = num_msg_pending; + mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries; +#define ICE_MBX_OVERFLOW_WATERMARK 64 + mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; + + /* check to see if we have a malicious VF */ + status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf); + if (status) + goto out_put_vf; + + if (malvf) { + bool report_vf = false; + + /* if the VF is malicious and we haven't let the user + * know about it, then let them know now + */ + status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf_id, + &report_vf); + if (status) + dev_dbg(dev, "Error reporting malicious VF\n"); + + if (report_vf) { + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + + if (pf_vsi) + dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", + &vf->dev_lan_addr.addr[0], + pf_vsi->netdev->dev_addr); + } + } - /* Clear mbx_buf in the mailbox snaphot structure */ - memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); +out_put_vf: + ice_put_vf(vf); + return malvf; } diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 68686a3fd7e8..955ab810a198 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -3,50 +3,159 @@ #ifndef _ICE_SRIOV_H_ #define _ICE_SRIOV_H_ +#include "ice_virtchnl_fdir.h" +#include "ice_vf_lib.h" +#include "ice_virtchnl.h" -#include "ice_type.h" -#include "ice_controlq.h" +/* Static VF transaction/status register def */ +#define VF_DEVICE_STATUS 0xAA +#define VF_TRANS_PENDING_M 0x20 -/* Defining the mailbox message threshold as 63 asynchronous - * pending messages. Normal VF functionality does not require - * sending more than 63 asynchronous pending message. - */ -#define ICE_ASYNC_VF_MSG_THRESHOLD 63 +/* wait defines for polling PF_PCI_CIAD register status */ +#define ICE_PCI_CIAD_WAIT_COUNT 100 +#define ICE_PCI_CIAD_WAIT_DELAY_US 1 + +/* VF resource constraints */ +#define ICE_MIN_QS_PER_VF 1 +#define ICE_NONQ_VECS_VF 1 +#define ICE_NUM_VF_MSIX_MED 17 +#define ICE_NUM_VF_MSIX_SMALL 5 +#define ICE_NUM_VF_MSIX_MULTIQ_MIN 3 +#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) +#define ICE_MAX_VF_RESET_TRIES 40 +#define ICE_MAX_VF_RESET_SLEEP_MS 20 #ifdef CONFIG_PCI_IOV +void ice_process_vflr_event(struct ice_pf *pf); +int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); +int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); int -ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, - u8 *msg, u16 msglen, struct ice_sq_cd *cd); +ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); + +void ice_free_vfs(struct ice_pf *pf); +void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); +void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); +bool +ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, + u16 num_msg_proc, u16 num_msg_pending); -u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed); int -ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, - u16 vf_id, bool *is_mal_vf); +ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto); + int -ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id); -int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count); -void ice_mbx_deinit_snapshot(struct ice_hw *hw); +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); + +int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); + +int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); + +int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); + +int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); + int -ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, - u16 bitmap_len, u16 vf_id, bool *report_malvf); +ice_get_vf_stats(struct net_device *netdev, int vf_id, + struct ifla_vf_stats *vf_stats); +void +ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event); +void ice_print_vfs_mdd_events(struct ice_pf *pf); +void ice_print_vf_rx_mdd_event(struct ice_vf *vf); +bool +ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto); #else /* CONFIG_PCI_IOV */ +static inline void ice_process_vflr_event(struct ice_pf *pf) { } +static inline void ice_free_vfs(struct ice_pf *pf) { } +static inline +void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { } +static inline +void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { } +static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } +static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } +static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { } + +static inline bool +ice_is_malicious_vf(struct ice_pf __always_unused *pf, + struct ice_rq_event_info __always_unused *event, + u16 __always_unused num_msg_proc, + u16 __always_unused num_msg_pending) +{ + return false; +} + static inline int -ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw, - u16 __always_unused vfid, u32 __always_unused v_opcode, - u32 __always_unused v_retval, u8 __always_unused *msg, - u16 __always_unused msglen, - struct ice_sq_cd __always_unused *cd) +ice_sriov_configure(struct pci_dev __always_unused *pdev, + int __always_unused num_vfs) { - return 0; + return -EOPNOTSUPP; +} + +static inline int +ice_set_vf_mac(struct net_device __always_unused *netdev, + int __always_unused vf_id, u8 __always_unused *mac) +{ + return -EOPNOTSUPP; } -static inline u32 -ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support, - u16 __always_unused link_speed) +static inline int +ice_get_vf_cfg(struct net_device __always_unused *netdev, + int __always_unused vf_id, + struct ifla_vf_info __always_unused *ivi) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_set_vf_trust(struct net_device __always_unused *netdev, + int __always_unused vf_id, bool __always_unused trusted) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_set_vf_port_vlan(struct net_device __always_unused *netdev, + int __always_unused vf_id, u16 __always_unused vid, + u8 __always_unused qos, __be16 __always_unused v_proto) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_set_vf_spoofchk(struct net_device __always_unused *netdev, + int __always_unused vf_id, bool __always_unused ena) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_set_vf_link_state(struct net_device __always_unused *netdev, + int __always_unused vf_id, int __always_unused link_state) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_set_vf_bw(struct net_device __always_unused *netdev, + int __always_unused vf_id, int __always_unused min_tx_rate, + int __always_unused max_tx_rate) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, + struct ice_q_vector __always_unused *q_vector) { return 0; } +static inline int +ice_get_vf_stats(struct net_device __always_unused *netdev, + int __always_unused vf_id, + struct ifla_vf_stats __always_unused *vf_stats) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_SRIOV_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 7f3d97595890..25b8f6f726eb 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -5565,7 +5565,7 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, *offsets = dummy_ipv6_gtpu_ipv4_udp_packet_offsets; } else { *pkt = dummy_ipv6_gtpu_ipv4_tcp_packet; - *pkt_len = sizeof(dummy_ipv6_gtpu_ipv4_tcp_packet); + *pkt_len = sizeof(dummy_ipv6_gtpu_ipv4_tcp_packet); *offsets = dummy_ipv6_gtpu_ipv4_tcp_packet_offsets; } } diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h index cf685247c07a..ae98d5a8ff60 100644 --- a/drivers/net/ethernet/intel/ice/ice_trace.h +++ b/drivers/net/ethernet/intel/ice/ice_trace.h @@ -216,6 +216,30 @@ DEFINE_EVENT(ice_xmit_template, name, \ DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring); DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring_drop); +DECLARE_EVENT_CLASS(ice_tx_tstamp_template, + TP_PROTO(struct sk_buff *skb, int idx), + + TP_ARGS(skb, idx), + + TP_STRUCT__entry(__field(void *, skb) + __field(int, idx)), + + TP_fast_assign(__entry->skb = skb; + __entry->idx = idx;), + + TP_printk("skb %pK idx %d", + __entry->skb, __entry->idx) +); +#define DEFINE_TX_TSTAMP_OP_EVENT(name) \ +DEFINE_EVENT(ice_tx_tstamp_template, name, \ + TP_PROTO(struct sk_buff *skb, int idx), \ + TP_ARGS(skb, idx)) + +DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_request); +DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_req); +DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_done); +DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_complete); + /* End tracepoints */ #endif /* _ICE_TRACE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 28fcab26b868..f2a518a1fd94 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -9,6 +9,7 @@ #define ICE_CHNL_MAX_TC 16 #include "ice_hw_autogen.h" +#include "ice_devids.h" #include "ice_osdep.h" #include "ice_controlq.h" #include "ice_lan_tx_rx.h" diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c new file mode 100644 index 000000000000..6578059d9479 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -0,0 +1,1029 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022, Intel Corporation. */ + +#include "ice_vf_lib_private.h" +#include "ice.h" +#include "ice_lib.h" +#include "ice_fltr.h" +#include "ice_virtchnl_allowlist.h" + +/* Public functions which may be accessed by all driver files */ + +/** + * ice_get_vf_by_id - Get pointer to VF by ID + * @pf: the PF private structure + * @vf_id: the VF ID to locate + * + * Locate and return a pointer to the VF structure associated with a given ID. + * Returns NULL if the ID does not have a valid VF structure associated with + * it. + * + * This function takes a reference to the VF, which must be released by + * calling ice_put_vf() once the caller is finished accessing the VF structure + * returned. + */ +struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) +{ + struct ice_vf *vf; + + rcu_read_lock(); + hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) { + if (vf->vf_id == vf_id) { + struct ice_vf *found; + + if (kref_get_unless_zero(&vf->refcnt)) + found = vf; + else + found = NULL; + + rcu_read_unlock(); + return found; + } + } + rcu_read_unlock(); + + return NULL; +} + +/** + * ice_release_vf - Release VF associated with a refcount + * @ref: the kref decremented to zero + * + * Callback function for kref_put to release a VF once its reference count has + * hit zero. + */ +static void ice_release_vf(struct kref *ref) +{ + struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt); + + vf->vf_ops->free(vf); +} + +/** + * ice_put_vf - Release a reference to a VF + * @vf: the VF structure to decrease reference count on + * + * Decrease the reference count for a VF, and free the entry if it is no + * longer in use. + * + * This must be called after ice_get_vf_by_id() once the reference to the VF + * structure is no longer used. Otherwise, the VF structure will never be + * freed. + */ +void ice_put_vf(struct ice_vf *vf) +{ + kref_put(&vf->refcnt, ice_release_vf); +} + +/** + * ice_has_vfs - Return true if the PF has any associated VFs + * @pf: the PF private structure + * + * Return whether or not the PF has any allocated VFs. + * + * Note that this function only guarantees that there are no VFs at the point + * of calling it. It does not guarantee that no more VFs will be added. + */ +bool ice_has_vfs(struct ice_pf *pf) +{ + /* A simple check that the hash table is not empty does not require + * the mutex or rcu_read_lock. + */ + return !hash_empty(pf->vfs.table); +} + +/** + * ice_get_num_vfs - Get number of allocated VFs + * @pf: the PF private structure + * + * Return the total number of allocated VFs. NOTE: VF IDs are not guaranteed + * to be contiguous. Do not assume that a VF ID is guaranteed to be less than + * the output of this function. + */ +u16 ice_get_num_vfs(struct ice_pf *pf) +{ + struct ice_vf *vf; + unsigned int bkt; + u16 num_vfs = 0; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) + num_vfs++; + rcu_read_unlock(); + + return num_vfs; +} + +/** + * ice_get_vf_vsi - get VF's VSI based on the stored index + * @vf: VF used to get VSI + */ +struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) +{ + if (vf->lan_vsi_idx == ICE_NO_VSI) + return NULL; + + return vf->pf->vsi[vf->lan_vsi_idx]; +} + +/** + * ice_is_vf_disabled + * @vf: pointer to the VF info + * + * If the PF has been disabled, there is no need resetting VF until PF is + * active again. Similarly, if the VF has been disabled, this means something + * else is resetting the VF, so we shouldn't continue. + * + * Returns true if the caller should consider the VF as disabled whether + * because that single VF is explicitly disabled or because the PF is + * currently disabled. + */ +bool ice_is_vf_disabled(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + return (test_bit(ICE_VF_DIS, pf->state) || + test_bit(ICE_VF_STATE_DIS, vf->vf_states)); +} + +/** + * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset + * @vf: The VF being resseting + * + * The max poll time is about ~800ms, which is about the maximum time it takes + * for a VF to be reset and/or a VF driver to be removed. + */ +static void ice_wait_on_vf_reset(struct ice_vf *vf) +{ + int i; + + for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) { + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) + break; + msleep(ICE_MAX_VF_RESET_SLEEP_MS); + } +} + +/** + * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried + * @vf: VF to check if it's ready to be configured/queried + * + * The purpose of this function is to make sure the VF is not in reset, not + * disabled, and initialized so it can be configured and/or queried by a host + * administrator. + */ +int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +{ + ice_wait_on_vf_reset(vf); + + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + if (ice_check_vf_init(vf)) + return -EBUSY; + + return 0; +} + +/** + * ice_trigger_vf_reset - Reset a VF on HW + * @vf: pointer to the VF structure + * @is_vflr: true if VFLR was issued, false if not + * @is_pfr: true if the reset was triggered due to a previous PFR + * + * Trigger hardware to start a reset for a particular VF. Expects the caller + * to wait the proper amount of time to allow hardware to reset the VF before + * it cleans up and restores VF functionality. + */ +static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) +{ + /* Inform VF that it is no longer active, as a warning */ + clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); + + /* Disable VF's configuration API during reset. The flag is re-enabled + * when it's safe again to access VF's VSI. + */ + clear_bit(ICE_VF_STATE_INIT, vf->vf_states); + + /* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver + * needs to clear them in the case of VFR/VFLR. If this is done for + * PFR, it can mess up VF resets because the VF driver may already + * have started cleanup by the time we get here. + */ + if (!is_pfr) + vf->vf_ops->clear_mbx_register(vf); + + vf->vf_ops->trigger_reset_register(vf, is_vflr); +} + +static void ice_vf_clear_counters(struct ice_vf *vf) +{ + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + + vf->num_mac = 0; + vsi->num_vlan = 0; + memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); + memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events)); +} + +/** + * ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild + * @vf: VF to perform pre VSI rebuild tasks + * + * These tasks are items that don't need to be amortized since they are most + * likely called in a for loop with all VF(s) in the reset_all_vfs() case. + */ +static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf) +{ + ice_vf_clear_counters(vf); + vf->vf_ops->clear_reset_trigger(vf); +} + +/** + * ice_vf_rebuild_vsi - rebuild the VF's VSI + * @vf: VF to rebuild the VSI for + * + * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the + * host, PFR, CORER, etc.). + */ +static int ice_vf_rebuild_vsi(struct ice_vf *vf) +{ + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + struct ice_pf *pf = vf->pf; + + if (ice_vsi_rebuild(vsi, true)) { + dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n", + vf->vf_id); + return -EIO; + } + /* vsi->idx will remain the same in this case so don't update + * vf->lan_vsi_idx + */ + vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); + vf->lan_vsi_num = vsi->vsi_num; + + return 0; +} + +/** + * ice_is_any_vf_in_promisc - check if any VF(s) are in promiscuous mode + * @pf: PF structure for accessing VF(s) + * + * Return false if no VF(s) are in unicast and/or multicast promiscuous mode, + * else return true + */ +bool ice_is_any_vf_in_promisc(struct ice_pf *pf) +{ + bool is_vf_promisc = false; + struct ice_vf *vf; + unsigned int bkt; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + /* found a VF that has promiscuous mode configured */ + if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { + is_vf_promisc = true; + break; + } + } + rcu_read_unlock(); + + return is_vf_promisc; +} + +/** + * ice_vf_set_vsi_promisc - Enable promiscuous mode for a VF VSI + * @vf: the VF to configure + * @vsi: the VF's VSI + * @promisc_m: the promiscuous mode to enable + */ +int +ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) +{ + struct ice_hw *hw = &vsi->back->hw; + int status; + + if (ice_vf_is_port_vlan_ena(vf)) + status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, + ice_vf_get_port_vlan_id(vf)); + else if (ice_vsi_has_non_zero_vlans(vsi)) + status = ice_fltr_set_vlan_vsi_promisc(hw, vsi, promisc_m); + else + status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, 0); + + if (status && status != -EEXIST) { + dev_err(ice_pf_to_dev(vsi->back), "enable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n", + vf->vf_id, status); + return status; + } + + return 0; +} + +/** + * ice_vf_clear_vsi_promisc - Disable promiscuous mode for a VF VSI + * @vf: the VF to configure + * @vsi: the VF's VSI + * @promisc_m: the promiscuous mode to disable + */ +int +ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) +{ + struct ice_hw *hw = &vsi->back->hw; + int status; + + if (ice_vf_is_port_vlan_ena(vf)) + status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, + ice_vf_get_port_vlan_id(vf)); + else if (ice_vsi_has_non_zero_vlans(vsi)) + status = ice_fltr_clear_vlan_vsi_promisc(hw, vsi, promisc_m); + else + status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, 0); + + if (status && status != -ENOENT) { + dev_err(ice_pf_to_dev(vsi->back), "disable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n", + vf->vf_id, status); + return status; + } + + return 0; +} + +/** + * ice_reset_all_vfs - reset all allocated VFs in one go + * @pf: pointer to the PF structure + * + * First, tell the hardware to reset each VF, then do all the waiting in one + * chunk, and finally finish restoring each VF after the wait. This is useful + * during PF routines which need to reset all VFs, as otherwise it must perform + * these resets in a serialized fashion. + * + * Returns true if any VFs were reset, and false otherwise. + */ +void ice_reset_all_vfs(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; + struct ice_vf *vf; + unsigned int bkt; + + /* If we don't have any VFs, then there is nothing to reset */ + if (!ice_has_vfs(pf)) + return; + + mutex_lock(&pf->vfs.table_lock); + + /* clear all malicious info if the VFs are getting reset */ + ice_for_each_vf(pf, bkt, vf) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + vf->vf_id); + + /* If VFs have been disabled, there is no need to reset */ + if (test_and_set_bit(ICE_VF_DIS, pf->state)) { + mutex_unlock(&pf->vfs.table_lock); + return; + } + + /* Begin reset on all VFs at once */ + ice_for_each_vf(pf, bkt, vf) + ice_trigger_vf_reset(vf, true, true); + + /* HW requires some time to make sure it can flush the FIFO for a VF + * when it resets it. Now that we've triggered all of the VFs, iterate + * the table again and wait for each VF to complete. + */ + ice_for_each_vf(pf, bkt, vf) { + if (!vf->vf_ops->poll_reset_status(vf)) { + /* Display a warning if at least one VF didn't manage + * to reset in time, but continue on with the + * operation. + */ + dev_warn(dev, "VF %u reset check timeout\n", vf->vf_id); + break; + } + } + + /* free VF resources to begin resetting the VSI state */ + ice_for_each_vf(pf, bkt, vf) { + mutex_lock(&vf->cfg_lock); + + vf->driver_caps = 0; + ice_vc_set_default_allowlist(vf); + + ice_vf_fdir_exit(vf); + ice_vf_fdir_init(vf); + /* clean VF control VSI when resetting VFs since it should be + * setup only when VF creates its first FDIR rule. + */ + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + ice_vf_ctrl_invalidate_vsi(vf); + + ice_vf_pre_vsi_rebuild(vf); + ice_vf_rebuild_vsi(vf); + vf->vf_ops->post_vsi_rebuild(vf); + + mutex_unlock(&vf->cfg_lock); + } + + if (ice_is_eswitch_mode_switchdev(pf)) + if (ice_eswitch_rebuild(pf)) + dev_warn(dev, "eswitch rebuild failed\n"); + + ice_flush(hw); + clear_bit(ICE_VF_DIS, pf->state); + + mutex_unlock(&pf->vfs.table_lock); +} + +/** + * ice_notify_vf_reset - Notify VF of a reset event + * @vf: pointer to the VF structure + */ +static void ice_notify_vf_reset(struct ice_vf *vf) +{ + struct ice_hw *hw = &vf->pf->hw; + struct virtchnl_pf_event pfe; + + /* Bail out if VF is in disabled state, neither initialized, nor active + * state - otherwise proceed with notifications + */ + if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && + !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) || + test_bit(ICE_VF_STATE_DIS, vf->vf_states)) + return; + + pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; + pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; + ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, + VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), + NULL); +} + +/** + * ice_reset_vf - Reset a particular VF + * @vf: pointer to the VF structure + * @flags: flags controlling behavior of the reset + * + * Flags: + * ICE_VF_RESET_VFLR - Indicates a reset is due to VFLR event + * ICE_VF_RESET_NOTIFY - Send VF a notification prior to reset + * ICE_VF_RESET_LOCK - Acquire VF cfg_lock before resetting + * + * Returns 0 if the VF is currently in reset, if the resets are disabled, or + * if the VF resets successfully. Returns an error code if the VF fails to + * rebuild. + */ +int ice_reset_vf(struct ice_vf *vf, u32 flags) +{ + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + struct ice_hw *hw; + u8 promisc_m; + int err = 0; + bool rsd; + + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + + if (flags & ICE_VF_RESET_NOTIFY) + ice_notify_vf_reset(vf); + + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { + dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n", + vf->vf_id); + return 0; + } + + if (ice_is_vf_disabled(vf)) { + dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", + vf->vf_id); + return 0; + } + + if (flags & ICE_VF_RESET_LOCK) + mutex_lock(&vf->cfg_lock); + else + lockdep_assert_held(&vf->cfg_lock); + + /* Set VF disable bit state here, before triggering reset */ + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false); + + vsi = ice_get_vf_vsi(vf); + + ice_dis_vf_qs(vf); + + /* Call Disable LAN Tx queue AQ whether or not queues are + * enabled. This is needed for successful completion of VFR. + */ + ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, + NULL, vf->vf_ops->reset_type, vf->vf_id, NULL); + + /* poll VPGEN_VFRSTAT reg to make sure + * that reset is complete + */ + rsd = vf->vf_ops->poll_reset_status(vf); + + /* Display a warning if VF didn't manage to reset in time, but need to + * continue on with the operation. + */ + if (!rsd) + dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id); + + vf->driver_caps = 0; + ice_vc_set_default_allowlist(vf); + + /* disable promiscuous modes in case they were enabled + * ignore any error if disabling process failed + */ + if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { + if (ice_vf_is_port_vlan_ena(vf) || vsi->num_vlan) + promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_UCAST_PROMISC_BITS; + + if (ice_vf_clear_vsi_promisc(vf, vsi, promisc_m)) + dev_err(dev, "disabling promiscuous mode failed\n"); + } + + ice_eswitch_del_vf_mac_rule(vf); + + ice_vf_fdir_exit(vf); + ice_vf_fdir_init(vf); + /* clean VF control VSI when resetting VF since it should be setup + * only when VF creates its first FDIR rule. + */ + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + ice_vf_ctrl_vsi_release(vf); + + ice_vf_pre_vsi_rebuild(vf); + + if (vf->vf_ops->vsi_rebuild(vf)) { + dev_err(dev, "Failed to release and setup the VF%u's VSI\n", + vf->vf_id); + err = -EFAULT; + goto out_unlock; + } + + vf->vf_ops->post_vsi_rebuild(vf); + vsi = ice_get_vf_vsi(vf); + ice_eswitch_update_repr(vsi); + ice_eswitch_replay_vf_mac_rule(vf); + + /* if the VF has been reset allow it to come up again */ + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, + ICE_MAX_SRIOV_VFS, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + vf->vf_id); + +out_unlock: + if (flags & ICE_VF_RESET_LOCK) + mutex_unlock(&vf->cfg_lock); + + return err; +} + +/** + * ice_set_vf_state_qs_dis - Set VF queues state to disabled + * @vf: pointer to the VF structure + */ +void ice_set_vf_state_qs_dis(struct ice_vf *vf) +{ + /* Clear Rx/Tx enabled queues flag */ + bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF); + bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); + clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); +} + +/* Private functions only accessed from other virtualization files */ + +/** + * ice_dis_vf_qs - Disable the VF queues + * @vf: pointer to the VF structure + */ +void ice_dis_vf_qs(struct ice_vf *vf) +{ + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + + ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); + ice_vsi_stop_all_rx_rings(vsi); + ice_set_vf_state_qs_dis(vf); +} + +/** + * ice_check_vf_init - helper to check if VF init complete + * @vf: the pointer to the VF to check + */ +int ice_check_vf_init(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n", + vf->vf_id); + return -EBUSY; + } + return 0; +} + +/** + * ice_vf_get_port_info - Get the VF's port info structure + * @vf: VF used to get the port info structure for + */ +struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf) +{ + return vf->pf->hw.port_info; +} + +static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable) +{ + struct ice_vsi_ctx *ctx; + int err; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->info.sec_flags = vsi->info.sec_flags; + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + + if (enable) + ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; + else + ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; + + err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL); + if (err) + dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx MAC anti-spoof %s for VSI %d, error %d\n", + enable ? "ON" : "OFF", vsi->vsi_num, err); + else + vsi->info.sec_flags = ctx->info.sec_flags; + + kfree(ctx); + + return err; +} + +/** + * ice_vsi_ena_spoofchk - enable Tx spoof checking for this VSI + * @vsi: VSI to enable Tx spoof checking for + */ +static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + int err; + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + + err = vlan_ops->ena_tx_filtering(vsi); + if (err) + return err; + + return ice_cfg_mac_antispoof(vsi, true); +} + +/** + * ice_vsi_dis_spoofchk - disable Tx spoof checking for this VSI + * @vsi: VSI to disable Tx spoof checking for + */ +static int ice_vsi_dis_spoofchk(struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + int err; + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + + err = vlan_ops->dis_tx_filtering(vsi); + if (err) + return err; + + return ice_cfg_mac_antispoof(vsi, false); +} + +/** + * ice_vsi_apply_spoofchk - Apply Tx spoof checking setting to a VSI + * @vsi: VSI associated to the VF + * @enable: whether to enable or disable the spoof checking + */ +int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable) +{ + int err; + + if (enable) + err = ice_vsi_ena_spoofchk(vsi); + else + err = ice_vsi_dis_spoofchk(vsi); + + return err; +} + +/** + * ice_is_vf_trusted + * @vf: pointer to the VF info + */ +bool ice_is_vf_trusted(struct ice_vf *vf) +{ + return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); +} + +/** + * ice_vf_has_no_qs_ena - check if the VF has any Rx or Tx queues enabled + * @vf: the VF to check + * + * Returns true if the VF has no Rx and no Tx queues enabled and returns false + * otherwise + */ +bool ice_vf_has_no_qs_ena(struct ice_vf *vf) +{ + return (!bitmap_weight(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF) && + !bitmap_weight(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF)); +} + +/** + * ice_is_vf_link_up - check if the VF's link is up + * @vf: VF to check if link is up + */ +bool ice_is_vf_link_up(struct ice_vf *vf) +{ + struct ice_port_info *pi = ice_vf_get_port_info(vf); + + if (ice_check_vf_init(vf)) + return false; + + if (ice_vf_has_no_qs_ena(vf)) + return false; + else if (vf->link_forced) + return vf->link_up; + else + return pi->phy.link_info.link_info & + ICE_AQ_LINK_UP; +} + +/** + * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value + * @vf: VF to configure trust setting for + */ +static void ice_vf_set_host_trust_cfg(struct ice_vf *vf) +{ + if (vf->trusted) + set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); + else + clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); +} + +/** + * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA + * @vf: VF to add MAC filters for + * + * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver + * always re-adds a broadcast filter and the VF's perm_addr/LAA after reset. + */ +static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + u8 broadcast[ETH_ALEN]; + int status; + + if (ice_is_eswitch_mode_switchdev(vf->pf)) + return 0; + + eth_broadcast_addr(broadcast); + status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); + if (status) { + dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %d\n", + vf->vf_id, status); + return status; + } + + vf->num_mac++; + + if (is_valid_ether_addr(vf->hw_lan_addr.addr)) { + status = ice_fltr_add_mac(vsi, vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + if (status) { + dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %d\n", + &vf->hw_lan_addr.addr[0], vf->vf_id, + status); + return status; + } + vf->num_mac++; + + ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr); + } + + return 0; +} + +/** + * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN + * @vf: VF to add MAC filters for + * @vsi: Pointer to VSI + * + * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver + * always re-adds either a VLAN 0 or port VLAN based filter after reset. + */ +static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf, struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + struct device *dev = ice_pf_to_dev(vf->pf); + int err; + + if (ice_vf_is_port_vlan_ena(vf)) { + err = vlan_ops->set_port_vlan(vsi, &vf->port_vlan_info); + if (err) { + dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n", + vf->vf_id, err); + return err; + } + + err = vlan_ops->add_vlan(vsi, &vf->port_vlan_info); + } else { + err = ice_vsi_add_vlan_zero(vsi); + } + + if (err) { + dev_err(dev, "failed to add VLAN %u filter for VF %u during VF rebuild, error %d\n", + ice_vf_is_port_vlan_ena(vf) ? + ice_vf_get_port_vlan_id(vf) : 0, vf->vf_id, err); + return err; + } + + err = vlan_ops->ena_rx_filtering(vsi); + if (err) + dev_warn(dev, "failed to enable Rx VLAN filtering for VF %d VSI %d during VF rebuild, error %d\n", + vf->vf_id, vsi->idx, err); + + return 0; +} + +/** + * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration + * @vf: VF to re-apply the configuration for + * + * Called after a VF VSI has been re-added/rebuild during reset. The PF driver + * needs to re-apply the host configured Tx rate limiting configuration. + */ +static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + int err; + + if (vf->min_tx_rate) { + err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n", + vf->min_tx_rate, vf->vf_id, err); + return err; + } + } + + if (vf->max_tx_rate) { + err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n", + vf->max_tx_rate, vf->vf_id, err); + return err; + } + } + + return 0; +} + +/** + * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config + * @vsi: Pointer to VSI + * + * This function moves VSI into corresponding scheduler aggregator node + * based on cached value of "aggregator node info" per VSI + */ +static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + int status; + + if (!vsi->agg_node) + return; + + dev = ice_pf_to_dev(pf); + if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) { + dev_dbg(dev, + "agg_id %u already has reached max_num_vsis %u\n", + vsi->agg_node->agg_id, vsi->agg_node->num_vsis); + return; + } + + status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id, + vsi->idx, vsi->tc_cfg.ena_tc); + if (status) + dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node", + vsi->idx, vsi->agg_node->agg_id); + else + vsi->agg_node->num_vsis++; +} + +/** + * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset + * @vf: VF to rebuild host configuration on + */ +void ice_vf_rebuild_host_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + + ice_vf_set_host_trust_cfg(vf); + + if (ice_vf_rebuild_host_mac_cfg(vf)) + dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n", + vf->vf_id); + + if (ice_vf_rebuild_host_vlan_cfg(vf, vsi)) + dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", + vf->vf_id); + + if (ice_vf_rebuild_host_tx_rate_cfg(vf)) + dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", + vf->vf_id); + + if (ice_vsi_apply_spoofchk(vsi, vf->spoofchk)) + dev_err(dev, "failed to rebuild spoofchk configuration for VF %d\n", + vf->vf_id); + + /* rebuild aggregator node config for main VF VSI */ + ice_vf_rebuild_aggregator_node_cfg(vsi); +} + +/** + * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access + * @vf: VF that control VSI is being invalidated on + */ +void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf) +{ + vf->ctrl_vsi_idx = ICE_NO_VSI; +} + +/** + * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it + * @vf: VF that control VSI is being released on + */ +void ice_vf_ctrl_vsi_release(struct ice_vf *vf) +{ + ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]); + ice_vf_ctrl_invalidate_vsi(vf); +} + +/** + * ice_vf_ctrl_vsi_setup - Set up a VF control VSI + * @vf: VF to setup control VSI for + * + * Returns pointer to the successfully allocated VSI struct on success, + * otherwise returns NULL on failure. + */ +struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) +{ + struct ice_port_info *pi = ice_vf_get_port_info(vf); + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf, NULL); + if (!vsi) { + dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); + ice_vf_ctrl_invalidate_vsi(vf); + } + + return vsi; +} + +/** + * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access + * @vf: VF to remove access to VSI for + */ +void ice_vf_invalidate_vsi(struct ice_vf *vf) +{ + vf->lan_vsi_idx = ICE_NO_VSI; + vf->lan_vsi_num = ICE_NO_VSI; +} + +/** + * ice_vf_set_initialized - VF is ready for VIRTCHNL communication + * @vf: VF to set in initialized state + * + * After this function the VF will be ready to receive/handle the + * VIRTCHNL_OP_GET_VF_RESOURCES message + */ +void ice_vf_set_initialized(struct ice_vf *vf) +{ + ice_set_vf_state_qs_dis(vf); + clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); + clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); + clear_bit(ICE_VF_STATE_DIS, vf->vf_states); + set_bit(ICE_VF_STATE_INIT, vf->vf_states); + memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps)); +} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h new file mode 100644 index 000000000000..831b667dc5b2 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -0,0 +1,290 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_VF_LIB_H_ +#define _ICE_VF_LIB_H_ + +#include <linux/types.h> +#include <linux/hashtable.h> +#include <linux/bitmap.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <net/devlink.h> +#include <linux/avf/virtchnl.h> +#include "ice_type.h" +#include "ice_virtchnl_fdir.h" +#include "ice_vsi_vlan_ops.h" + +#define ICE_MAX_SRIOV_VFS 256 + +/* VF resource constraints */ +#define ICE_MAX_RSS_QS_PER_VF 16 + +struct ice_pf; +struct ice_vf; +struct ice_virtchnl_ops; + +/* VF capabilities */ +enum ice_virtchnl_cap { + ICE_VIRTCHNL_VF_CAP_PRIVILEGE = 0, +}; + +/* Specific VF states */ +enum ice_vf_states { + ICE_VF_STATE_INIT = 0, /* PF is initializing VF */ + ICE_VF_STATE_ACTIVE, /* VF resources are allocated for use */ + ICE_VF_STATE_QS_ENA, /* VF queue(s) enabled */ + ICE_VF_STATE_DIS, + ICE_VF_STATE_MC_PROMISC, + ICE_VF_STATE_UC_PROMISC, + ICE_VF_STATES_NBITS +}; + +struct ice_time_mac { + unsigned long time_modified; + u8 addr[ETH_ALEN]; +}; + +/* VF MDD events print structure */ +struct ice_mdd_vf_events { + u16 count; /* total count of Rx|Tx events */ + /* count number of the last printed event */ + u16 last_printed; +}; + +/* VF operations */ +struct ice_vf_ops { + enum ice_disq_rst_src reset_type; + void (*free)(struct ice_vf *vf); + void (*clear_mbx_register)(struct ice_vf *vf); + void (*trigger_reset_register)(struct ice_vf *vf, bool is_vflr); + bool (*poll_reset_status)(struct ice_vf *vf); + void (*clear_reset_trigger)(struct ice_vf *vf); + int (*vsi_rebuild)(struct ice_vf *vf); + void (*post_vsi_rebuild)(struct ice_vf *vf); +}; + +/* Virtchnl/SR-IOV config info */ +struct ice_vfs { + DECLARE_HASHTABLE(table, 8); /* table of VF entries */ + struct mutex table_lock; /* Lock for protecting the hash table */ + u16 num_supported; /* max supported VFs on this PF */ + u16 num_qps_per; /* number of queue pairs per VF */ + u16 num_msix_per; /* number of MSI-X vectors per VF */ + unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */ + DECLARE_BITMAP(malvfs, ICE_MAX_SRIOV_VFS); /* malicious VF indicator */ +}; + +/* VF information structure */ +struct ice_vf { + struct hlist_node entry; + struct rcu_head rcu; + struct kref refcnt; + struct ice_pf *pf; + + /* Used during virtchnl message handling and NDO ops against the VF + * that will trigger a VFR + */ + struct mutex cfg_lock; + + u16 vf_id; /* VF ID in the PF space */ + u16 lan_vsi_idx; /* index into PF struct */ + u16 ctrl_vsi_idx; + struct ice_vf_fdir fdir; + /* first vector index of this VF in the PF space */ + int first_vector_idx; + struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ + struct virtchnl_version_info vf_ver; + u32 driver_caps; /* reported by VF driver */ + struct virtchnl_ether_addr dev_lan_addr; + struct virtchnl_ether_addr hw_lan_addr; + struct ice_time_mac legacy_last_added_umac; + DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF); + DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF); + struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */ + struct virtchnl_vlan_caps vlan_v2_caps; + u8 pf_set_mac:1; /* VF MAC address set by VMM admin */ + u8 trusted:1; + u8 spoofchk:1; + u8 link_forced:1; + u8 link_up:1; /* only valid if VF link is forced */ + /* VSI indices - actual VSI pointers are maintained in the PF structure + * When assigned, these will be non-zero, because VSI 0 is always + * the main LAN VSI for the PF. + */ + u16 lan_vsi_num; /* ID as used by firmware */ + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ + unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ + DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ + + unsigned long vf_caps; /* VF's adv. capabilities */ + u8 num_req_qs; /* num of queue pairs requested by VF */ + u16 num_mac; + u16 num_vf_qs; /* num of queue configured per VF */ + struct ice_mdd_vf_events mdd_rx_events; + struct ice_mdd_vf_events mdd_tx_events; + DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX); + + struct ice_repr *repr; + const struct ice_virtchnl_ops *virtchnl_ops; + const struct ice_vf_ops *vf_ops; + + /* devlink port data */ + struct devlink_port devlink_port; +}; + +/* Flags for controlling behavior of ice_reset_vf */ +enum ice_vf_reset_flags { + ICE_VF_RESET_VFLR = BIT(0), /* Indicate a VFLR reset */ + ICE_VF_RESET_NOTIFY = BIT(1), /* Notify VF prior to reset */ + ICE_VF_RESET_LOCK = BIT(2), /* Acquire the VF cfg_lock */ +}; + +static inline u16 ice_vf_get_port_vlan_id(struct ice_vf *vf) +{ + return vf->port_vlan_info.vid; +} + +static inline u8 ice_vf_get_port_vlan_prio(struct ice_vf *vf) +{ + return vf->port_vlan_info.prio; +} + +static inline bool ice_vf_is_port_vlan_ena(struct ice_vf *vf) +{ + return (ice_vf_get_port_vlan_id(vf) || ice_vf_get_port_vlan_prio(vf)); +} + +static inline u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf) +{ + return vf->port_vlan_info.tpid; +} + +/* VF Hash Table access functions + * + * These functions provide abstraction for interacting with the VF hash table. + * In general, direct access to the hash table should be avoided outside of + * these functions where possible. + * + * The VF entries in the hash table are protected by reference counting to + * track lifetime of accesses from the table. The ice_get_vf_by_id() function + * obtains a reference to the VF structure which must be dropped by using + * ice_put_vf(). + */ + +/** + * ice_for_each_vf - Iterate over each VF entry + * @pf: pointer to the PF private structure + * @bkt: bucket index used for iteration + * @vf: pointer to the VF entry currently being processed in the loop. + * + * The bkt variable is an unsigned integer iterator used to traverse the VF + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. + * Use vf->vf_id to get the id number if needed. + * + * The caller is expected to be under the table_lock mutex for the entire + * loop. Use this iterator if your loop is long or if it might sleep. + */ +#define ice_for_each_vf(pf, bkt, vf) \ + hash_for_each((pf)->vfs.table, (bkt), (vf), entry) + +/** + * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU + * @pf: pointer to the PF private structure + * @bkt: bucket index used for iteration + * @vf: pointer to the VF entry currently being processed in the loop. + * + * The bkt variable is an unsigned integer iterator used to traverse the VF + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. + * Use vf->vf_id to get the id number if needed. + * + * The caller is expected to be under rcu_read_lock() for the entire loop. + * Only use this iterator if your loop is short and you can guarantee it does + * not sleep. + */ +#define ice_for_each_vf_rcu(pf, bkt, vf) \ + hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry) + +#ifdef CONFIG_PCI_IOV +struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id); +void ice_put_vf(struct ice_vf *vf); +bool ice_has_vfs(struct ice_pf *pf); +u16 ice_get_num_vfs(struct ice_pf *pf); +struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); +bool ice_is_vf_disabled(struct ice_vf *vf); +int ice_check_vf_ready_for_cfg(struct ice_vf *vf); +void ice_set_vf_state_qs_dis(struct ice_vf *vf); +bool ice_is_any_vf_in_promisc(struct ice_pf *pf); +int +ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m); +int +ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m); +int ice_reset_vf(struct ice_vf *vf, u32 flags); +void ice_reset_all_vfs(struct ice_pf *pf); +#else /* CONFIG_PCI_IOV */ +static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) +{ + return NULL; +} + +static inline void ice_put_vf(struct ice_vf *vf) +{ +} + +static inline bool ice_has_vfs(struct ice_pf *pf) +{ + return false; +} + +static inline u16 ice_get_num_vfs(struct ice_pf *pf) +{ + return 0; +} + +static inline struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) +{ + return NULL; +} + +static inline bool ice_is_vf_disabled(struct ice_vf *vf) +{ + return true; +} + +static inline int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +{ + return -EOPNOTSUPP; +} + +static inline void ice_set_vf_state_qs_dis(struct ice_vf *vf) +{ +} + +static inline bool ice_is_any_vf_in_promisc(struct ice_pf *pf) +{ + return false; +} + +static inline int +ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) +{ + return -EOPNOTSUPP; +} + +static inline int +ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) +{ + return -EOPNOTSUPP; +} + +static inline int ice_reset_vf(struct ice_vf *vf, u32 flags) +{ + return 0; +} + +static inline void ice_reset_all_vfs(struct ice_pf *pf) +{ +} +#endif /* !CONFIG_PCI_IOV */ + +#endif /* _ICE_VF_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h new file mode 100644 index 000000000000..15887e772c76 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_VF_LIB_PRIVATE_H_ +#define _ICE_VF_LIB_PRIVATE_H_ + +#include "ice_vf_lib.h" + +/* This header file is for exposing functions in ice_vf_lib.c to other files + * which are also conditionally compiled depending on CONFIG_PCI_IOV. + * Functions which may be used by other files should be exposed as part of + * ice_vf_lib.h + * + * Functions in this file are exposed only when CONFIG_PCI_IOV is enabled, and + * thus this header must not be included by .c files which may be compiled + * with CONFIG_PCI_IOV disabled. + * + * To avoid this, only include this header file directly within .c files that + * are conditionally enabled in the "ice-$(CONFIG_PCI_IOV)" block. + */ + +#ifndef CONFIG_PCI_IOV +#warning "Only include ice_vf_lib_private.h in CONFIG_PCI_IOV virtualization files" +#endif + +void ice_dis_vf_qs(struct ice_vf *vf); +int ice_check_vf_init(struct ice_vf *vf); +struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf); +int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable); +bool ice_is_vf_trusted(struct ice_vf *vf); +bool ice_vf_has_no_qs_ena(struct ice_vf *vf); +bool ice_is_vf_link_up(struct ice_vf *vf); +void ice_vf_rebuild_host_cfg(struct ice_vf *vf); +void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf); +void ice_vf_ctrl_vsi_release(struct ice_vf *vf); +struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf); +void ice_vf_invalidate_vsi(struct ice_vf *vf); +void ice_vf_set_initialized(struct ice_vf *vf); + +#endif /* _ICE_VF_LIB_PRIVATE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c new file mode 100644 index 000000000000..fc8c93fa4455 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018, Intel Corporation. */ + +#include "ice_common.h" +#include "ice_vf_mbx.h" + +/** + * ice_aq_send_msg_to_vf + * @hw: pointer to the hardware structure + * @vfid: VF ID to send msg + * @v_opcode: opcodes for VF-PF communication + * @v_retval: return error code + * @msg: pointer to the msg buffer + * @msglen: msg length + * @cd: pointer to command details + * + * Send message to VF driver (0x0802) using mailbox + * queue and asynchronously sending message via + * ice_sq_send_cmd() function + */ +int +ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, + u8 *msg, u16 msglen, struct ice_sq_cd *cd) +{ + struct ice_aqc_pf_vf_msg *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf); + + cmd = &desc.params.virt; + cmd->id = cpu_to_le32(vfid); + + desc.cookie_high = cpu_to_le32(v_opcode); + desc.cookie_low = cpu_to_le32(v_retval); + + if (msglen) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); +} + +/** + * ice_conv_link_speed_to_virtchnl + * @adv_link_support: determines the format of the returned link speed + * @link_speed: variable containing the link_speed to be converted + * + * Convert link speed supported by HW to link speed supported by virtchnl. + * If adv_link_support is true, then return link speed in Mbps. Else return + * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller + * needs to cast back to an enum virtchnl_link_speed in the case where + * adv_link_support is false, but when adv_link_support is true the caller can + * expect the speed in Mbps. + */ +u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) +{ + u32 speed; + + if (adv_link_support) + switch (link_speed) { + case ICE_AQ_LINK_SPEED_10MB: + speed = ICE_LINK_SPEED_10MBPS; + break; + case ICE_AQ_LINK_SPEED_100MB: + speed = ICE_LINK_SPEED_100MBPS; + break; + case ICE_AQ_LINK_SPEED_1000MB: + speed = ICE_LINK_SPEED_1000MBPS; + break; + case ICE_AQ_LINK_SPEED_2500MB: + speed = ICE_LINK_SPEED_2500MBPS; + break; + case ICE_AQ_LINK_SPEED_5GB: + speed = ICE_LINK_SPEED_5000MBPS; + break; + case ICE_AQ_LINK_SPEED_10GB: + speed = ICE_LINK_SPEED_10000MBPS; + break; + case ICE_AQ_LINK_SPEED_20GB: + speed = ICE_LINK_SPEED_20000MBPS; + break; + case ICE_AQ_LINK_SPEED_25GB: + speed = ICE_LINK_SPEED_25000MBPS; + break; + case ICE_AQ_LINK_SPEED_40GB: + speed = ICE_LINK_SPEED_40000MBPS; + break; + case ICE_AQ_LINK_SPEED_50GB: + speed = ICE_LINK_SPEED_50000MBPS; + break; + case ICE_AQ_LINK_SPEED_100GB: + speed = ICE_LINK_SPEED_100000MBPS; + break; + default: + speed = ICE_LINK_SPEED_UNKNOWN; + break; + } + else + /* Virtchnl speeds are not defined for every speed supported in + * the hardware. To maintain compatibility with older AVF + * drivers, while reporting the speed the new speed values are + * resolved to the closest known virtchnl speeds + */ + switch (link_speed) { + case ICE_AQ_LINK_SPEED_10MB: + case ICE_AQ_LINK_SPEED_100MB: + speed = (u32)VIRTCHNL_LINK_SPEED_100MB; + break; + case ICE_AQ_LINK_SPEED_1000MB: + case ICE_AQ_LINK_SPEED_2500MB: + case ICE_AQ_LINK_SPEED_5GB: + speed = (u32)VIRTCHNL_LINK_SPEED_1GB; + break; + case ICE_AQ_LINK_SPEED_10GB: + speed = (u32)VIRTCHNL_LINK_SPEED_10GB; + break; + case ICE_AQ_LINK_SPEED_20GB: + speed = (u32)VIRTCHNL_LINK_SPEED_20GB; + break; + case ICE_AQ_LINK_SPEED_25GB: + speed = (u32)VIRTCHNL_LINK_SPEED_25GB; + break; + case ICE_AQ_LINK_SPEED_40GB: + case ICE_AQ_LINK_SPEED_50GB: + case ICE_AQ_LINK_SPEED_100GB: + speed = (u32)VIRTCHNL_LINK_SPEED_40GB; + break; + default: + speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN; + break; + } + + return speed; +} + +/* The mailbox overflow detection algorithm helps to check if there + * is a possibility of a malicious VF transmitting too many MBX messages to the + * PF. + * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during + * driver initialization in ice_init_hw() using ice_mbx_init_snapshot(). + * The struct ice_mbx_snapshot helps to track and traverse a static window of + * messages within the mailbox queue while looking for a malicious VF. + * + * 2. When the caller starts processing its mailbox queue in response to an + * interrupt, the structure ice_mbx_snapshot is expected to be cleared before + * the algorithm can be run for the first time for that interrupt. This can be + * done via ice_mbx_reset_snapshot(). + * + * 3. For every message read by the caller from the MBX Queue, the caller must + * call the detection algorithm's entry function ice_mbx_vf_state_handler(). + * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is + * filled as it is required to be passed to the algorithm. + * + * 4. Every time a message is read from the MBX queue, a VFId is received which + * is passed to the state handler. The boolean output is_malvf of the state + * handler ice_mbx_vf_state_handler() serves as an indicator to the caller + * whether this VF is malicious or not. + * + * 5. When a VF is identified to be malicious, the caller can send a message + * to the system administrator. The caller can invoke ice_mbx_report_malvf() + * to help determine if a malicious VF is to be reported or not. This function + * requires the caller to maintain a global bitmap to track all malicious VFs + * and pass that to ice_mbx_report_malvf() along with the VFID which was identified + * to be malicious by ice_mbx_vf_state_handler(). + * + * 6. The global bitmap maintained by PF can be cleared completely if PF is in + * reset or the bit corresponding to a VF can be cleared if that VF is in reset. + * When a VF is shut down and brought back up, we assume that the new VF + * brought up is not malicious and hence report it if found malicious. + * + * 7. The function ice_mbx_reset_snapshot() is called to reset the information + * in ice_mbx_snapshot for every new mailbox interrupt handled. + * + * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated + * when driver is unloaded. + */ +#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M) +/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that + * the max messages check must be ignored in the algorithm + */ +#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF + +/** + * ice_mbx_traverse - Pass through mailbox snapshot + * @hw: pointer to the HW struct + * @new_state: new algorithm state + * + * Traversing the mailbox static snapshot without checking + * for malicious VFs. + */ +static void +ice_mbx_traverse(struct ice_hw *hw, + enum ice_mbx_snapshot_state *new_state) +{ + struct ice_mbx_snap_buffer_data *snap_buf; + u32 num_iterations; + + snap_buf = &hw->mbx_snapshot.mbx_buf; + + /* As mailbox buffer is circular, applying a mask + * on the incremented iteration count. + */ + num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations); + + /* Checking either of the below conditions to exit snapshot traversal: + * Condition-1: If the number of iterations in the mailbox is equal to + * the mailbox head which would indicate that we have reached the end + * of the static snapshot. + * Condition-2: If the maximum messages serviced in the mailbox for a + * given interrupt is the highest possible value then there is no need + * to check if the number of messages processed is equal to it. If not + * check if the number of messages processed is greater than or equal + * to the maximum number of mailbox entries serviced in current work item. + */ + if (num_iterations == snap_buf->head || + (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT && + ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx)) + *new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; +} + +/** + * ice_mbx_detect_malvf - Detect malicious VF in snapshot + * @hw: pointer to the HW struct + * @vf_id: relative virtual function ID + * @new_state: new algorithm state + * @is_malvf: boolean output to indicate if VF is malicious + * + * This function tracks the number of asynchronous messages + * sent per VF and marks the VF as malicious if it exceeds + * the permissible number of messages to send. + */ +static int +ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, + enum ice_mbx_snapshot_state *new_state, + bool *is_malvf) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + if (vf_id >= snap->mbx_vf.vfcntr_len) + return -EIO; + + /* increment the message count in the VF array */ + snap->mbx_vf.vf_cntr[vf_id]++; + + if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD) + *is_malvf = true; + + /* continue to iterate through the mailbox snapshot */ + ice_mbx_traverse(hw, new_state); + + return 0; +} + +/** + * ice_mbx_reset_snapshot - Reset mailbox snapshot structure + * @snap: pointer to mailbox snapshot structure in the ice_hw struct + * + * Reset the mailbox snapshot structure and clear VF counter array. + */ +static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) +{ + u32 vfcntr_len; + + if (!snap || !snap->mbx_vf.vf_cntr) + return; + + /* Clear VF counters. */ + vfcntr_len = snap->mbx_vf.vfcntr_len; + if (vfcntr_len) + memset(snap->mbx_vf.vf_cntr, 0, + (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr))); + + /* Reset mailbox snapshot for a new capture. */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); + snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; +} + +/** + * ice_mbx_vf_state_handler - Handle states of the overflow algorithm + * @hw: pointer to the HW struct + * @mbx_data: pointer to structure containing mailbox data + * @vf_id: relative virtual function (VF) ID + * @is_malvf: boolean output to indicate if VF is malicious + * + * The function serves as an entry point for the malicious VF + * detection algorithm by handling the different states and state + * transitions of the algorithm: + * New snapshot: This state is entered when creating a new static + * snapshot. The data from any previous mailbox snapshot is + * cleared and a new capture of the mailbox head and tail is + * logged. This will be the new static snapshot to detect + * asynchronous messages sent by VFs. On capturing the snapshot + * and depending on whether the number of pending messages in that + * snapshot exceed the watermark value, the state machine enters + * traverse or detect states. + * Traverse: If pending message count is below watermark then iterate + * through the snapshot without any action on VF. + * Detect: If pending message count exceeds watermark traverse + * the static snapshot and look for a malicious VF. + */ +int +ice_mbx_vf_state_handler(struct ice_hw *hw, + struct ice_mbx_data *mbx_data, u16 vf_id, + bool *is_malvf) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + struct ice_mbx_snap_buffer_data *snap_buf; + struct ice_ctl_q_info *cq = &hw->mailboxq; + enum ice_mbx_snapshot_state new_state; + int status = 0; + + if (!is_malvf || !mbx_data) + return -EINVAL; + + /* When entering the mailbox state machine assume that the VF + * is not malicious until detected. + */ + *is_malvf = false; + + /* Checking if max messages allowed to be processed while servicing current + * interrupt is not less than the defined AVF message threshold. + */ + if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD) + return -EINVAL; + + /* The watermark value should not be lesser than the threshold limit + * set for the number of asynchronous messages a VF can send to mailbox + * nor should it be greater than the maximum number of messages in the + * mailbox serviced in current interrupt. + */ + if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD || + mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx) + return -EINVAL; + + new_state = ICE_MAL_VF_DETECT_STATE_INVALID; + snap_buf = &snap->mbx_buf; + + switch (snap_buf->state) { + case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT: + /* Clear any previously held data in mailbox snapshot structure. */ + ice_mbx_reset_snapshot(snap); + + /* Collect the pending ARQ count, number of messages processed and + * the maximum number of messages allowed to be processed from the + * Mailbox for current interrupt. + */ + snap_buf->num_pending_arq = mbx_data->num_pending_arq; + snap_buf->num_msg_proc = mbx_data->num_msg_proc; + snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx; + + /* Capture a new static snapshot of the mailbox by logging the + * head and tail of snapshot and set num_iterations to the tail + * value to mark the start of the iteration through the snapshot. + */ + snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean + + mbx_data->num_pending_arq); + snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1); + snap_buf->num_iterations = snap_buf->tail; + + /* Pending ARQ messages returned by ice_clean_rq_elem + * is the difference between the head and tail of the + * mailbox queue. Comparing this value against the watermark + * helps to check if we potentially have malicious VFs. + */ + if (snap_buf->num_pending_arq >= + mbx_data->async_watermark_val) { + new_state = ICE_MAL_VF_DETECT_STATE_DETECT; + status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); + } else { + new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; + ice_mbx_traverse(hw, &new_state); + } + break; + + case ICE_MAL_VF_DETECT_STATE_TRAVERSE: + new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; + ice_mbx_traverse(hw, &new_state); + break; + + case ICE_MAL_VF_DETECT_STATE_DETECT: + new_state = ICE_MAL_VF_DETECT_STATE_DETECT; + status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); + break; + + default: + new_state = ICE_MAL_VF_DETECT_STATE_INVALID; + status = -EIO; + } + + snap_buf->state = new_state; + + return status; +} + +/** + * ice_mbx_report_malvf - Track and note malicious VF + * @hw: pointer to the HW struct + * @all_malvfs: all malicious VFs tracked by PF + * @bitmap_len: length of bitmap in bits + * @vf_id: relative virtual function ID of the malicious VF + * @report_malvf: boolean to indicate if malicious VF must be reported + * + * This function will update a bitmap that keeps track of the malicious + * VFs attached to the PF. A malicious VF must be reported only once if + * discovered between VF resets or loading so the function checks + * the input vf_id against the bitmap to verify if the VF has been + * detected in any previous mailbox iterations. + */ +int +ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id, bool *report_malvf) +{ + if (!all_malvfs || !report_malvf) + return -EINVAL; + + *report_malvf = false; + + if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len) + return -EINVAL; + + if (vf_id >= bitmap_len) + return -EIO; + + /* If the vf_id is found in the bitmap set bit and boolean to true */ + if (!test_and_set_bit(vf_id, all_malvfs)) + *report_malvf = true; + + return 0; +} + +/** + * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID + * @snap: pointer to the mailbox snapshot structure + * @all_malvfs: all malicious VFs tracked by PF + * @bitmap_len: length of bitmap in bits + * @vf_id: relative virtual function ID of the malicious VF + * + * In case of a VF reset, this function can be called to clear + * the bit corresponding to the VF ID in the bitmap tracking all + * malicious VFs attached to the PF. The function also clears the + * VF counter array at the index of the VF ID. This is to ensure + * that the new VF loaded is not considered malicious before going + * through the overflow detection algorithm. + */ +int +ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id) +{ + if (!snap || !all_malvfs) + return -EINVAL; + + if (bitmap_len < snap->mbx_vf.vfcntr_len) + return -EINVAL; + + /* Ensure VF ID value is not larger than bitmap or VF counter length */ + if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len) + return -EIO; + + /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */ + clear_bit(vf_id, all_malvfs); + + /* Clear the VF counter in the mailbox snapshot structure for that VF ID. + * This is to ensure that if a VF is unloaded and a new one brought back + * up with the same VF ID for a snapshot currently in traversal or detect + * state the counter for that VF ID does not increment on top of existing + * values in the mailbox overflow detection algorithm. + */ + snap->mbx_vf.vf_cntr[vf_id] = 0; + + return 0; +} + +/** + * ice_mbx_init_snapshot - Initialize mailbox snapshot structure + * @hw: pointer to the hardware structure + * @vf_count: number of VFs allocated on a PF + * + * Clear the mailbox snapshot structure and allocate memory + * for the VF counter array based on the number of VFs allocated + * on that PF. + * + * Assumption: This function will assume ice_get_caps() has already been + * called to ensure that the vf_count can be compared against the number + * of VFs supported as defined in the functional capabilities of the device. + */ +int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + /* Ensure that the number of VFs allocated is non-zero and + * is not greater than the number of supported VFs defined in + * the functional capabilities of the PF. + */ + if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs) + return -EINVAL; + + snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count, + sizeof(*snap->mbx_vf.vf_cntr), + GFP_KERNEL); + if (!snap->mbx_vf.vf_cntr) + return -ENOMEM; + + /* Setting the VF counter length to the number of allocated + * VFs for given PF's functional capabilities. + */ + snap->mbx_vf.vfcntr_len = vf_count; + + /* Clear mbx_buf in the mailbox snaphot structure and setting the + * mailbox snapshot state to a new capture. + */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); + snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; + + return 0; +} + +/** + * ice_mbx_deinit_snapshot - Free mailbox snapshot structure + * @hw: pointer to the hardware structure + * + * Clear the mailbox snapshot structure and free the VF counter array. + */ +void ice_mbx_deinit_snapshot(struct ice_hw *hw) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + /* Free VF counter array and reset VF counter length */ + devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr); + snap->mbx_vf.vfcntr_len = 0; + + /* Clear mbx_buf in the mailbox snaphot structure */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); +} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h new file mode 100644 index 000000000000..582716e6d5f9 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _ICE_VF_MBX_H_ +#define _ICE_VF_MBX_H_ + +#include "ice_type.h" +#include "ice_controlq.h" + +/* Defining the mailbox message threshold as 63 asynchronous + * pending messages. Normal VF functionality does not require + * sending more than 63 asynchronous pending message. + */ +#define ICE_ASYNC_VF_MSG_THRESHOLD 63 + +#ifdef CONFIG_PCI_IOV +int +ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, + u8 *msg, u16 msglen, struct ice_sq_cd *cd); + +u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed); +int +ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, + u16 vf_id, bool *is_mal_vf); +int +ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id); +int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count); +void ice_mbx_deinit_snapshot(struct ice_hw *hw); +int +ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id, bool *report_malvf); +#else /* CONFIG_PCI_IOV */ +static inline int +ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw, + u16 __always_unused vfid, u32 __always_unused v_opcode, + u32 __always_unused v_retval, u8 __always_unused *msg, + u16 __always_unused msglen, + struct ice_sq_cd __always_unused *cd) +{ + return 0; +} + +static inline u32 +ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support, + u16 __always_unused link_speed) +{ + return 0; +} + +#endif /* CONFIG_PCI_IOV */ +#endif /* _ICE_VF_MBX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index b16f946185f2..5ecc0ee9a78e 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -6,7 +6,7 @@ #include "ice_vlan_mode.h" #include "ice.h" #include "ice_vf_vsi_vlan_ops.h" -#include "ice_virtchnl_pf.h" +#include "ice_sriov.h" static int noop_vlan_arg(struct ice_vsi __always_unused *vsi, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 45fe36db076a..3f1a63815bac 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1,17 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2018, Intel Corporation. */ +/* Copyright (C) 2022, Intel Corporation. */ +#include "ice_virtchnl.h" +#include "ice_vf_lib_private.h" #include "ice.h" #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" -#include "ice_dcb_lib.h" -#include "ice_flow.h" -#include "ice_eswitch.h" #include "ice_virtchnl_allowlist.h" -#include "ice_flex_pipe.h" #include "ice_vf_vsi_vlan_ops.h" #include "ice_vlan.h" +#include "ice_flex_pipe.h" +#include "ice_dcb_lib.h" #define FIELD_SELECTOR(proto_hdr_field) \ BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) @@ -166,160 +166,6 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { }; /** - * ice_get_vf_vsi - get VF's VSI based on the stored index - * @vf: VF used to get VSI - */ -struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) -{ - return vf->pf->vsi[vf->lan_vsi_idx]; -} - -/** - * ice_get_vf_by_id - Get pointer to VF by ID - * @pf: the PF private structure - * @vf_id: the VF ID to locate - * - * Locate and return a pointer to the VF structure associated with a given ID. - * Returns NULL if the ID does not have a valid VF structure associated with - * it. - * - * This function takes a reference to the VF, which must be released by - * calling ice_put_vf() once the caller is finished accessing the VF structure - * returned. - */ -struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) -{ - struct ice_vf *vf; - - rcu_read_lock(); - hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) { - if (vf->vf_id == vf_id) { - struct ice_vf *found; - - if (kref_get_unless_zero(&vf->refcnt)) - found = vf; - else - found = NULL; - - rcu_read_unlock(); - return found; - } - } - rcu_read_unlock(); - - return NULL; -} - -/** - * ice_release_vf - Release VF associated with a refcount - * @ref: the kref decremented to zero - * - * Callback function for kref_put to release a VF once its reference count has - * hit zero. - */ -static void ice_release_vf(struct kref *ref) -{ - struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt); - - mutex_destroy(&vf->cfg_lock); - - kfree_rcu(vf, rcu); -} - -/** - * ice_put_vf - Release a reference to a VF - * @vf: the VF structure to decrease reference count on - * - * This must be called after ice_get_vf_by_id() once the reference to the VF - * structure is no longer used. Otherwise, the VF structure will never be - * freed. - */ -void ice_put_vf(struct ice_vf *vf) -{ - kref_put(&vf->refcnt, ice_release_vf); -} - -/** - * ice_has_vfs - Return true if the PF has any associated VFs - * @pf: the PF private structure - * - * Return whether or not the PF has any allocated VFs. - * - * Note that this function only guarantees that there are no VFs at the point - * of calling it. It does not guarantee that no more VFs will be added. - */ -bool ice_has_vfs(struct ice_pf *pf) -{ - /* A simple check that the hash table is not empty does not require - * the mutex or rcu_read_lock. - */ - return !hash_empty(pf->vfs.table); -} - -/** - * ice_get_num_vfs - Get number of allocated VFs - * @pf: the PF private structure - * - * Return the total number of allocated VFs. NOTE: VF IDs are not guaranteed - * to be contiguous. Do not assume that a VF ID is guaranteed to be less than - * the output of this function. - */ -u16 ice_get_num_vfs(struct ice_pf *pf) -{ - struct ice_vf *vf; - unsigned int bkt; - u16 num_vfs = 0; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) - num_vfs++; - rcu_read_unlock(); - - return num_vfs; -} - -/** - * ice_check_vf_init - helper to check if VF init complete - * @pf: pointer to the PF structure - * @vf: the pointer to the VF to check - */ -static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) -{ - if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n", - vf->vf_id); - return -EBUSY; - } - return 0; -} - -/** - * ice_free_vf_entries - Free all VF entries from the hash table - * @pf: pointer to the PF structure - * - * Iterate over the VF hash table, removing and releasing all VF entries. - * Called during VF teardown or as cleanup during failed VF initialization. - */ -static void ice_free_vf_entries(struct ice_pf *pf) -{ - struct ice_vfs *vfs = &pf->vfs; - struct hlist_node *tmp; - struct ice_vf *vf; - unsigned int bkt; - - /* Remove all VFs from the hash table and release their main - * reference. Once all references to the VF are dropped, ice_put_vf() - * will call ice_release_vf which will remove the VF memory. - */ - lockdep_assert_held(&vfs->table_lock); - - hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) { - hash_del_rcu(&vf->entry); - ice_put_vf(vf); - } -} - -/** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure * @v_opcode: operation code @@ -377,39 +223,6 @@ ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe, } /** - * ice_vf_has_no_qs_ena - check if the VF has any Rx or Tx queues enabled - * @vf: the VF to check - * - * Returns true if the VF has no Rx and no Tx queues enabled and returns false - * otherwise - */ -static bool ice_vf_has_no_qs_ena(struct ice_vf *vf) -{ - return (!bitmap_weight(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF) && - !bitmap_weight(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF)); -} - -/** - * ice_is_vf_link_up - check if the VF's link is up - * @vf: VF to check if link is up - */ -static bool ice_is_vf_link_up(struct ice_vf *vf) -{ - struct ice_pf *pf = vf->pf; - - if (ice_check_vf_init(pf, vf)) - return false; - - if (ice_vf_has_no_qs_ena(vf)) - return false; - else if (vf->link_forced) - return vf->link_up; - else - return pf->hw.port_info->phy.link_info.link_info & - ICE_AQ_LINK_UP; -} - -/** * ice_vc_notify_vf_link_state - Inform a VF of link status * @vf: pointer to the VF structure * @@ -435,1383 +248,6 @@ void ice_vc_notify_vf_link_state(struct ice_vf *vf) } /** - * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access - * @vf: VF to remove access to VSI for - */ -static void ice_vf_invalidate_vsi(struct ice_vf *vf) -{ - vf->lan_vsi_idx = ICE_NO_VSI; - vf->lan_vsi_num = ICE_NO_VSI; -} - -/** - * ice_vf_vsi_release - invalidate the VF's VSI after freeing it - * @vf: invalidate this VF's VSI after freeing it - */ -static void ice_vf_vsi_release(struct ice_vf *vf) -{ - ice_vsi_release(ice_get_vf_vsi(vf)); - ice_vf_invalidate_vsi(vf); -} - -/** - * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access - * @vf: VF that control VSI is being invalidated on - */ -static void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf) -{ - vf->ctrl_vsi_idx = ICE_NO_VSI; -} - -/** - * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it - * @vf: VF that control VSI is being released on - */ -static void ice_vf_ctrl_vsi_release(struct ice_vf *vf) -{ - ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]); - ice_vf_ctrl_invalidate_vsi(vf); -} - -/** - * ice_free_vf_res - Free a VF's resources - * @vf: pointer to the VF info - */ -static void ice_free_vf_res(struct ice_vf *vf) -{ - struct ice_pf *pf = vf->pf; - int i, last_vector_idx; - - /* First, disable VF's configuration API to prevent OS from - * accessing the VF's VSI after it's freed or invalidated. - */ - clear_bit(ICE_VF_STATE_INIT, vf->vf_states); - ice_vf_fdir_exit(vf); - /* free VF control VSI */ - if (vf->ctrl_vsi_idx != ICE_NO_VSI) - ice_vf_ctrl_vsi_release(vf); - - /* free VSI and disconnect it from the parent uplink */ - if (vf->lan_vsi_idx != ICE_NO_VSI) { - ice_vf_vsi_release(vf); - vf->num_mac = 0; - } - - last_vector_idx = vf->first_vector_idx + pf->vfs.num_msix_per - 1; - - /* clear VF MDD event information */ - memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); - memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events)); - - /* Disable interrupts so that VF starts in a known state */ - for (i = vf->first_vector_idx; i <= last_vector_idx; i++) { - wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M); - ice_flush(&pf->hw); - } - /* reset some of the state variables keeping track of the resources */ - clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); - clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); -} - -/** - * ice_dis_vf_mappings - * @vf: pointer to the VF structure - */ -static void ice_dis_vf_mappings(struct ice_vf *vf) -{ - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - struct device *dev; - int first, last, v; - struct ice_hw *hw; - - hw = &pf->hw; - vsi = ice_get_vf_vsi(vf); - - dev = ice_pf_to_dev(pf); - wr32(hw, VPINT_ALLOC(vf->vf_id), 0); - wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); - - first = vf->first_vector_idx; - last = first + pf->vfs.num_msix_per - 1; - for (v = first; v <= last; v++) { - u32 reg; - - reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) & - GLINT_VECT2FUNC_IS_PF_M) | - ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & - GLINT_VECT2FUNC_PF_NUM_M)); - wr32(hw, GLINT_VECT2FUNC(v), reg); - } - - if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) - wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0); - else - dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); - - if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) - wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0); - else - dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); -} - -/** - * ice_sriov_free_msix_res - Reset/free any used MSIX resources - * @pf: pointer to the PF structure - * - * Since no MSIX entries are taken from the pf->irq_tracker then just clear - * the pf->sriov_base_vector. - * - * Returns 0 on success, and -EINVAL on error. - */ -static int ice_sriov_free_msix_res(struct ice_pf *pf) -{ - struct ice_res_tracker *res; - - if (!pf) - return -EINVAL; - - res = pf->irq_tracker; - if (!res) - return -EINVAL; - - /* give back irq_tracker resources used */ - WARN_ON(pf->sriov_base_vector < res->num_entries); - - pf->sriov_base_vector = 0; - - return 0; -} - -/** - * ice_set_vf_state_qs_dis - Set VF queues state to disabled - * @vf: pointer to the VF structure - */ -void ice_set_vf_state_qs_dis(struct ice_vf *vf) -{ - /* Clear Rx/Tx enabled queues flag */ - bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF); - bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); - clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); -} - -/** - * ice_dis_vf_qs - Disable the VF queues - * @vf: pointer to the VF structure - */ -static void ice_dis_vf_qs(struct ice_vf *vf) -{ - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - - ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); - ice_vsi_stop_all_rx_rings(vsi); - ice_set_vf_state_qs_dis(vf); -} - -/** - * ice_free_vfs - Free all VFs - * @pf: pointer to the PF structure - */ -void ice_free_vfs(struct ice_pf *pf) -{ - struct device *dev = ice_pf_to_dev(pf); - struct ice_vfs *vfs = &pf->vfs; - struct ice_hw *hw = &pf->hw; - struct ice_vf *vf; - unsigned int bkt; - - if (!ice_has_vfs(pf)) - return; - - while (test_and_set_bit(ICE_VF_DIS, pf->state)) - usleep_range(1000, 2000); - - /* Disable IOV before freeing resources. This lets any VF drivers - * running in the host get themselves cleaned up before we yank - * the carpet out from underneath their feet. - */ - if (!pci_vfs_assigned(pf->pdev)) - pci_disable_sriov(pf->pdev); - else - dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - - mutex_lock(&vfs->table_lock); - - ice_eswitch_release(pf); - - ice_for_each_vf(pf, bkt, vf) { - mutex_lock(&vf->cfg_lock); - - ice_dis_vf_qs(vf); - - if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - /* disable VF qp mappings and set VF disable state */ - ice_dis_vf_mappings(vf); - set_bit(ICE_VF_STATE_DIS, vf->vf_states); - ice_free_vf_res(vf); - } - - if (!pci_vfs_assigned(pf->pdev)) { - u32 reg_idx, bit_idx; - - reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; - wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); - } - - /* clear malicious info since the VF is getting released */ - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_VF_COUNT, vf->vf_id)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - vf->vf_id); - - mutex_unlock(&vf->cfg_lock); - } - - if (ice_sriov_free_msix_res(pf)) - dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n"); - - vfs->num_qps_per = 0; - ice_free_vf_entries(pf); - - mutex_unlock(&vfs->table_lock); - - clear_bit(ICE_VF_DIS, pf->state); - clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); -} - -/** - * ice_trigger_vf_reset - Reset a VF on HW - * @vf: pointer to the VF structure - * @is_vflr: true if VFLR was issued, false if not - * @is_pfr: true if the reset was triggered due to a previous PFR - * - * Trigger hardware to start a reset for a particular VF. Expects the caller - * to wait the proper amount of time to allow hardware to reset the VF before - * it cleans up and restores VF functionality. - */ -static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) -{ - struct ice_pf *pf = vf->pf; - u32 reg, reg_idx, bit_idx; - unsigned int vf_abs_id, i; - struct device *dev; - struct ice_hw *hw; - - dev = ice_pf_to_dev(pf); - hw = &pf->hw; - vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id; - - /* Inform VF that it is no longer active, as a warning */ - clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); - - /* Disable VF's configuration API during reset. The flag is re-enabled - * when it's safe again to access VF's VSI. - */ - clear_bit(ICE_VF_STATE_INIT, vf->vf_states); - - /* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver - * needs to clear them in the case of VFR/VFLR. If this is done for - * PFR, it can mess up VF resets because the VF driver may already - * have started cleanup by the time we get here. - */ - if (!is_pfr) { - wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0); - wr32(hw, VF_MBX_ATQLEN(vf->vf_id), 0); - } - - /* In the case of a VFLR, the HW has already reset the VF and we - * just need to clean up, so don't hit the VFRTRIG register. - */ - if (!is_vflr) { - /* reset VF using VPGEN_VFRTRIG reg */ - reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id)); - reg |= VPGEN_VFRTRIG_VFSWR_M; - wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg); - } - /* clear the VFLR bit in GLGEN_VFLRSTAT */ - reg_idx = (vf_abs_id) / 32; - bit_idx = (vf_abs_id) % 32; - wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); - ice_flush(hw); - - wr32(hw, PF_PCI_CIAA, - VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S)); - for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) { - reg = rd32(hw, PF_PCI_CIAD); - /* no transactions pending so stop polling */ - if ((reg & VF_TRANS_PENDING_M) == 0) - break; - - dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id); - udelay(ICE_PCI_CIAD_WAIT_DELAY_US); - } -} - -/** - * ice_vf_get_port_info - Get the VF's port info structure - * @vf: VF used to get the port info structure for - */ -static struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf) -{ - return vf->pf->hw.port_info; -} - -/** - * ice_vf_vsi_setup - Set up a VF VSI - * @vf: VF to setup VSI for - * - * Returns pointer to the successfully allocated VSI struct on success, - * otherwise returns NULL on failure. - */ -static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) -{ - struct ice_port_info *pi = ice_vf_get_port_info(vf); - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf, NULL); - - if (!vsi) { - dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); - ice_vf_invalidate_vsi(vf); - return NULL; - } - - vf->lan_vsi_idx = vsi->idx; - vf->lan_vsi_num = vsi->vsi_num; - - return vsi; -} - -/** - * ice_vf_ctrl_vsi_setup - Set up a VF control VSI - * @vf: VF to setup control VSI for - * - * Returns pointer to the successfully allocated VSI struct on success, - * otherwise returns NULL on failure. - */ -struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) -{ - struct ice_port_info *pi = ice_vf_get_port_info(vf); - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf, NULL); - if (!vsi) { - dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); - ice_vf_ctrl_invalidate_vsi(vf); - } - - return vsi; -} - -/** - * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space - * @pf: pointer to PF structure - * @vf: pointer to VF that the first MSIX vector index is being calculated for - * - * This returns the first MSIX vector index in PF space that is used by this VF. - * This index is used when accessing PF relative registers such as - * GLINT_VECT2FUNC and GLINT_DYN_CTL. - * This will always be the OICR index in the AVF driver so any functionality - * using vf->first_vector_idx for queue configuration will have to increment by - * 1 to avoid meddling with the OICR index. - */ -static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) -{ - return pf->sriov_base_vector + vf->vf_id * pf->vfs.num_msix_per; -} - -/** - * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration - * @vf: VF to re-apply the configuration for - * - * Called after a VF VSI has been re-added/rebuild during reset. The PF driver - * needs to re-apply the host configured Tx rate limiting configuration. - */ -static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) -{ - struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - int err; - - if (vf->min_tx_rate) { - err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000); - if (err) { - dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n", - vf->min_tx_rate, vf->vf_id, err); - return err; - } - } - - if (vf->max_tx_rate) { - err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000); - if (err) { - dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n", - vf->max_tx_rate, vf->vf_id, err); - return err; - } - } - - return 0; -} - -static u16 ice_vf_get_port_vlan_id(struct ice_vf *vf) -{ - return vf->port_vlan_info.vid; -} - -static u8 ice_vf_get_port_vlan_prio(struct ice_vf *vf) -{ - return vf->port_vlan_info.prio; -} - -bool ice_vf_is_port_vlan_ena(struct ice_vf *vf) -{ - return (ice_vf_get_port_vlan_id(vf) || ice_vf_get_port_vlan_prio(vf)); -} - -static u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf) -{ - return vf->port_vlan_info.tpid; -} - -/** - * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN - * @vf: VF to add MAC filters for - * @vsi: Pointer to VSI - * - * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver - * always re-adds either a VLAN 0 or port VLAN based filter after reset. - */ -static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf, struct ice_vsi *vsi) -{ - struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); - struct device *dev = ice_pf_to_dev(vf->pf); - int err; - - if (ice_vf_is_port_vlan_ena(vf)) { - err = vlan_ops->set_port_vlan(vsi, &vf->port_vlan_info); - if (err) { - dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n", - vf->vf_id, err); - return err; - } - - err = vlan_ops->add_vlan(vsi, &vf->port_vlan_info); - } else { - err = ice_vsi_add_vlan_zero(vsi); - } - - if (err) { - dev_err(dev, "failed to add VLAN %u filter for VF %u during VF rebuild, error %d\n", - ice_vf_is_port_vlan_ena(vf) ? - ice_vf_get_port_vlan_id(vf) : 0, vf->vf_id, err); - return err; - } - - err = vlan_ops->ena_rx_filtering(vsi); - if (err) - dev_warn(dev, "failed to enable Rx VLAN filtering for VF %d VSI %d during VF rebuild, error %d\n", - vf->vf_id, vsi->idx, err); - - return 0; -} - -static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable) -{ - struct ice_vsi_ctx *ctx; - int err; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->info.sec_flags = vsi->info.sec_flags; - ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); - - if (enable) - ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; - else - ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; - - err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL); - if (err) - dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx MAC anti-spoof %s for VSI %d, error %d\n", - enable ? "ON" : "OFF", vsi->vsi_num, err); - else - vsi->info.sec_flags = ctx->info.sec_flags; - - kfree(ctx); - - return err; -} - -/** - * ice_vsi_ena_spoofchk - enable Tx spoof checking for this VSI - * @vsi: VSI to enable Tx spoof checking for - */ -static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi) -{ - struct ice_vsi_vlan_ops *vlan_ops; - int err; - - vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); - - err = vlan_ops->ena_tx_filtering(vsi); - if (err) - return err; - - return ice_cfg_mac_antispoof(vsi, true); -} - -/** - * ice_vsi_dis_spoofchk - disable Tx spoof checking for this VSI - * @vsi: VSI to disable Tx spoof checking for - */ -static int ice_vsi_dis_spoofchk(struct ice_vsi *vsi) -{ - struct ice_vsi_vlan_ops *vlan_ops; - int err; - - vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); - - err = vlan_ops->dis_tx_filtering(vsi); - if (err) - return err; - - return ice_cfg_mac_antispoof(vsi, false); -} - -/** - * ice_vf_set_spoofchk_cfg - apply Tx spoof checking setting - * @vf: VF set spoofchk for - * @vsi: VSI associated to the VF - */ -static int -ice_vf_set_spoofchk_cfg(struct ice_vf *vf, struct ice_vsi *vsi) -{ - int err; - - if (vf->spoofchk) - err = ice_vsi_ena_spoofchk(vsi); - else - err = ice_vsi_dis_spoofchk(vsi); - - return err; -} - -/** - * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA - * @vf: VF to add MAC filters for - * - * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver - * always re-adds a broadcast filter and the VF's perm_addr/LAA after reset. - */ -static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf) -{ - struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - u8 broadcast[ETH_ALEN]; - int status; - - if (ice_is_eswitch_mode_switchdev(vf->pf)) - return 0; - - eth_broadcast_addr(broadcast); - status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); - if (status) { - dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %d\n", - vf->vf_id, status); - return status; - } - - vf->num_mac++; - - if (is_valid_ether_addr(vf->hw_lan_addr.addr)) { - status = ice_fltr_add_mac(vsi, vf->hw_lan_addr.addr, - ICE_FWD_TO_VSI); - if (status) { - dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %d\n", - &vf->hw_lan_addr.addr[0], vf->vf_id, - status); - return status; - } - vf->num_mac++; - - ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr); - } - - return 0; -} - -/** - * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value - * @vf: VF to configure trust setting for - */ -static void ice_vf_set_host_trust_cfg(struct ice_vf *vf) -{ - if (vf->trusted) - set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); - else - clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); -} - -/** - * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware - * @vf: VF to enable MSIX mappings for - * - * Some of the registers need to be indexed/configured using hardware global - * device values and other registers need 0-based values, which represent PF - * based values. - */ -static void ice_ena_vf_msix_mappings(struct ice_vf *vf) -{ - int device_based_first_msix, device_based_last_msix; - int pf_based_first_msix, pf_based_last_msix, v; - struct ice_pf *pf = vf->pf; - int device_based_vf_id; - struct ice_hw *hw; - u32 reg; - - hw = &pf->hw; - pf_based_first_msix = vf->first_vector_idx; - pf_based_last_msix = (pf_based_first_msix + pf->vfs.num_msix_per) - 1; - - device_based_first_msix = pf_based_first_msix + - pf->hw.func_caps.common_cap.msix_vector_first_id; - device_based_last_msix = - (device_based_first_msix + pf->vfs.num_msix_per) - 1; - device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id; - - reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) & - VPINT_ALLOC_FIRST_M) | - ((device_based_last_msix << VPINT_ALLOC_LAST_S) & - VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M); - wr32(hw, VPINT_ALLOC(vf->vf_id), reg); - - reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S) - & VPINT_ALLOC_PCI_FIRST_M) | - ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) & - VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M); - wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg); - - /* map the interrupts to its functions */ - for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) { - reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) & - GLINT_VECT2FUNC_VF_NUM_M) | - ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) & - GLINT_VECT2FUNC_PF_NUM_M)); - wr32(hw, GLINT_VECT2FUNC(v), reg); - } - - /* Map mailbox interrupt to VF MSI-X vector 0 */ - wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M); -} - -/** - * ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF - * @vf: VF to enable the mappings for - * @max_txq: max Tx queues allowed on the VF's VSI - * @max_rxq: max Rx queues allowed on the VF's VSI - */ -static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq) -{ - struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - struct ice_hw *hw = &vf->pf->hw; - u32 reg; - - /* set regardless of mapping mode */ - wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M); - - /* VF Tx queues allocation */ - if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) { - /* set the VF PF Tx queue range - * VFNUMQ value should be set to (number of queues - 1). A value - * of 0 means 1 queue and a value of 255 means 256 queues - */ - reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) & - VPLAN_TX_QBASE_VFFIRSTQ_M) | - (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) & - VPLAN_TX_QBASE_VFNUMQ_M)); - wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg); - } else { - dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n"); - } - - /* set regardless of mapping mode */ - wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M); - - /* VF Rx queues allocation */ - if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) { - /* set the VF PF Rx queue range - * VFNUMQ value should be set to (number of queues - 1). A value - * of 0 means 1 queue and a value of 255 means 256 queues - */ - reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) & - VPLAN_RX_QBASE_VFFIRSTQ_M) | - (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) & - VPLAN_RX_QBASE_VFNUMQ_M)); - wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg); - } else { - dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); - } -} - -/** - * ice_ena_vf_mappings - enable VF MSIX and queue mapping - * @vf: pointer to the VF structure - */ -static void ice_ena_vf_mappings(struct ice_vf *vf) -{ - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - - ice_ena_vf_msix_mappings(vf); - ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq); -} - -/** - * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space - * @vf: VF to calculate the register index for - * @q_vector: a q_vector associated to the VF - */ -int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) -{ - struct ice_pf *pf; - - if (!vf || !q_vector) - return -EINVAL; - - pf = vf->pf; - - /* always add one to account for the OICR being the first MSIX */ - return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id + - q_vector->v_idx + 1; -} - -/** - * ice_get_max_valid_res_idx - Get the max valid resource index - * @res: pointer to the resource to find the max valid index for - * - * Start from the end of the ice_res_tracker and return right when we find the - * first res->list entry with the ICE_RES_VALID_BIT set. This function is only - * valid for SR-IOV because it is the only consumer that manipulates the - * res->end and this is always called when res->end is set to res->num_entries. - */ -static int ice_get_max_valid_res_idx(struct ice_res_tracker *res) -{ - int i; - - if (!res) - return -EINVAL; - - for (i = res->num_entries - 1; i >= 0; i--) - if (res->list[i] & ICE_RES_VALID_BIT) - return i; - - return 0; -} - -/** - * ice_sriov_set_msix_res - Set any used MSIX resources - * @pf: pointer to PF structure - * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs - * - * This function allows SR-IOV resources to be taken from the end of the PF's - * allowed HW MSIX vectors so that the irq_tracker will not be affected. We - * just set the pf->sriov_base_vector and return success. - * - * If there are not enough resources available, return an error. This should - * always be caught by ice_set_per_vf_res(). - * - * Return 0 on success, and -EINVAL when there are not enough MSIX vectors - * in the PF's space available for SR-IOV. - */ -static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) -{ - u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors_used = pf->irq_tracker->num_entries; - int sriov_base_vector; - - sriov_base_vector = total_vectors - num_msix_needed; - - /* make sure we only grab irq_tracker entries from the list end and - * that we have enough available MSIX vectors - */ - if (sriov_base_vector < vectors_used) - return -EINVAL; - - pf->sriov_base_vector = sriov_base_vector; - - return 0; -} - -/** - * ice_set_per_vf_res - check if vectors and queues are available - * @pf: pointer to the PF structure - * @num_vfs: the number of SR-IOV VFs being configured - * - * First, determine HW interrupts from common pool. If we allocate fewer VFs, we - * get more vectors and can enable more queues per VF. Note that this does not - * grab any vectors from the SW pool already allocated. Also note, that all - * vector counts include one for each VF's miscellaneous interrupt vector - * (i.e. OICR). - * - * Minimum VFs - 2 vectors, 1 queue pair - * Small VFs - 5 vectors, 4 queue pairs - * Medium VFs - 17 vectors, 16 queue pairs - * - * Second, determine number of queue pairs per VF by starting with a pre-defined - * maximum each VF supports. If this is not possible, then we adjust based on - * queue pairs available on the device. - * - * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used - * by each VF during VF initialization and reset. - */ -static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) -{ - int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); - u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; - int msix_avail_per_vf, msix_avail_for_sriov; - struct device *dev = ice_pf_to_dev(pf); - - lockdep_assert_held(&pf->vfs.table_lock); - - if (!num_vfs || max_valid_res_idx < 0) - return -EINVAL; - - /* determine MSI-X resources per VF */ - msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - - pf->irq_tracker->num_entries; - msix_avail_per_vf = msix_avail_for_sriov / num_vfs; - if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { - num_msix_per_vf = ICE_NUM_VF_MSIX_MED; - } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) { - num_msix_per_vf = ICE_NUM_VF_MSIX_SMALL; - } else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MULTIQ_MIN) { - num_msix_per_vf = ICE_NUM_VF_MSIX_MULTIQ_MIN; - } else if (msix_avail_per_vf >= ICE_MIN_INTR_PER_VF) { - num_msix_per_vf = ICE_MIN_INTR_PER_VF; - } else { - dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n", - msix_avail_for_sriov, ICE_MIN_INTR_PER_VF, - num_vfs); - return -EIO; - } - - num_txq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, - ICE_MAX_RSS_QS_PER_VF); - avail_qs = ice_get_avail_txq_count(pf) / num_vfs; - if (!avail_qs) - num_txq = 0; - else if (num_txq > avail_qs) - num_txq = rounddown_pow_of_two(avail_qs); - - num_rxq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF, - ICE_MAX_RSS_QS_PER_VF); - avail_qs = ice_get_avail_rxq_count(pf) / num_vfs; - if (!avail_qs) - num_rxq = 0; - else if (num_rxq > avail_qs) - num_rxq = rounddown_pow_of_two(avail_qs); - - if (num_txq < ICE_MIN_QS_PER_VF || num_rxq < ICE_MIN_QS_PER_VF) { - dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n", - ICE_MIN_QS_PER_VF, num_vfs); - return -EIO; - } - - if (ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs)) { - dev_err(dev, "Unable to set MSI-X resources for %d VFs\n", - num_vfs); - return -EINVAL; - } - - /* only allow equal Tx/Rx queue count (i.e. queue pairs) */ - pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq); - pf->vfs.num_msix_per = num_msix_per_vf; - dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n", - num_vfs, pf->vfs.num_msix_per, pf->vfs.num_qps_per); - - return 0; -} - -/** - * ice_clear_vf_reset_trigger - enable VF to access hardware - * @vf: VF to enabled hardware access for - */ -static void ice_clear_vf_reset_trigger(struct ice_vf *vf) -{ - struct ice_hw *hw = &vf->pf->hw; - u32 reg; - - reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id)); - reg &= ~VPGEN_VFRTRIG_VFSWR_M; - wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg); - ice_flush(hw); -} - -static int -ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) -{ - struct ice_hw *hw = &vsi->back->hw; - int status; - - if (ice_vf_is_port_vlan_ena(vf)) - status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, - ice_vf_get_port_vlan_id(vf)); - else if (ice_vsi_has_non_zero_vlans(vsi)) - status = ice_fltr_set_vlan_vsi_promisc(hw, vsi, promisc_m); - else - status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, 0); - - if (status && status != -EEXIST) { - dev_err(ice_pf_to_dev(vsi->back), "enable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n", - vf->vf_id, status); - return status; - } - - return 0; -} - -static int -ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m) -{ - struct ice_hw *hw = &vsi->back->hw; - int status; - - if (ice_vf_is_port_vlan_ena(vf)) - status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, - ice_vf_get_port_vlan_id(vf)); - else if (ice_vsi_has_non_zero_vlans(vsi)) - status = ice_fltr_clear_vlan_vsi_promisc(hw, vsi, promisc_m); - else - status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, 0); - - if (status && status != -ENOENT) { - dev_err(ice_pf_to_dev(vsi->back), "disable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n", - vf->vf_id, status); - return status; - } - - return 0; -} - -static void ice_vf_clear_counters(struct ice_vf *vf) -{ - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - - vf->num_mac = 0; - vsi->num_vlan = 0; - memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); - memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events)); -} - -/** - * ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild - * @vf: VF to perform pre VSI rebuild tasks - * - * These tasks are items that don't need to be amortized since they are most - * likely called in a for loop with all VF(s) in the reset_all_vfs() case. - */ -static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf) -{ - ice_vf_clear_counters(vf); - ice_clear_vf_reset_trigger(vf); -} - -/** - * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config - * @vsi: Pointer to VSI - * - * This function moves VSI into corresponding scheduler aggregator node - * based on cached value of "aggregator node info" per VSI - */ -static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - struct device *dev; - int status; - - if (!vsi->agg_node) - return; - - dev = ice_pf_to_dev(pf); - if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) { - dev_dbg(dev, - "agg_id %u already has reached max_num_vsis %u\n", - vsi->agg_node->agg_id, vsi->agg_node->num_vsis); - return; - } - - status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id, - vsi->idx, vsi->tc_cfg.ena_tc); - if (status) - dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node", - vsi->idx, vsi->agg_node->agg_id); - else - vsi->agg_node->num_vsis++; -} - -/** - * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset - * @vf: VF to rebuild host configuration on - */ -static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) -{ - struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - - ice_vf_set_host_trust_cfg(vf); - - if (ice_vf_rebuild_host_mac_cfg(vf)) - dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n", - vf->vf_id); - - if (ice_vf_rebuild_host_vlan_cfg(vf, vsi)) - dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", - vf->vf_id); - - if (ice_vf_rebuild_host_tx_rate_cfg(vf)) - dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", - vf->vf_id); - - if (ice_vf_set_spoofchk_cfg(vf, vsi)) - dev_err(dev, "failed to rebuild spoofchk configuration for VF %d\n", - vf->vf_id); - - /* rebuild aggregator node config for main VF VSI */ - ice_vf_rebuild_aggregator_node_cfg(vsi); -} - -/** - * ice_vf_rebuild_vsi_with_release - release and setup the VF's VSI - * @vf: VF to release and setup the VSI for - * - * This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF - * configuration change, etc.). - */ -static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf) -{ - ice_vf_vsi_release(vf); - if (!ice_vf_vsi_setup(vf)) - return -ENOMEM; - - return 0; -} - -/** - * ice_vf_rebuild_vsi - rebuild the VF's VSI - * @vf: VF to rebuild the VSI for - * - * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the - * host, PFR, CORER, etc.). - */ -static int ice_vf_rebuild_vsi(struct ice_vf *vf) -{ - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - struct ice_pf *pf = vf->pf; - - if (ice_vsi_rebuild(vsi, true)) { - dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n", - vf->vf_id); - return -EIO; - } - /* vsi->idx will remain the same in this case so don't update - * vf->lan_vsi_idx - */ - vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); - vf->lan_vsi_num = vsi->vsi_num; - - return 0; -} - -/** - * ice_vf_set_initialized - VF is ready for VIRTCHNL communication - * @vf: VF to set in initialized state - * - * After this function the VF will be ready to receive/handle the - * VIRTCHNL_OP_GET_VF_RESOURCES message - */ -static void ice_vf_set_initialized(struct ice_vf *vf) -{ - ice_set_vf_state_qs_dis(vf); - clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); - clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); - clear_bit(ICE_VF_STATE_DIS, vf->vf_states); - set_bit(ICE_VF_STATE_INIT, vf->vf_states); - memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps)); -} - -/** - * ice_vf_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt - * @vf: VF to perform tasks on - */ -static void ice_vf_post_vsi_rebuild(struct ice_vf *vf) -{ - struct ice_pf *pf = vf->pf; - struct ice_hw *hw; - - hw = &pf->hw; - - ice_vf_rebuild_host_cfg(vf); - - ice_vf_set_initialized(vf); - ice_ena_vf_mappings(vf); - wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); -} - -/** - * ice_reset_all_vfs - reset all allocated VFs in one go - * @pf: pointer to the PF structure - * @is_vflr: true if VFLR was issued, false if not - * - * First, tell the hardware to reset each VF, then do all the waiting in one - * chunk, and finally finish restoring each VF after the wait. This is useful - * during PF routines which need to reset all VFs, as otherwise it must perform - * these resets in a serialized fashion. - * - * Returns true if any VFs were reset, and false otherwise. - */ -bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) -{ - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - struct ice_vf *vf; - unsigned int bkt; - - /* If we don't have any VFs, then there is nothing to reset */ - if (!ice_has_vfs(pf)) - return false; - - mutex_lock(&pf->vfs.table_lock); - - /* clear all malicious info if the VFs are getting reset */ - ice_for_each_vf(pf, bkt, vf) - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_VF_COUNT, vf->vf_id)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", - vf->vf_id); - - /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(ICE_VF_DIS, pf->state)) { - mutex_unlock(&pf->vfs.table_lock); - return false; - } - - /* Begin reset on all VFs at once */ - ice_for_each_vf(pf, bkt, vf) - ice_trigger_vf_reset(vf, is_vflr, true); - - /* HW requires some time to make sure it can flush the FIFO for a VF - * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in - * sequence to make sure that it has completed. We'll keep track of - * the VFs using a simple iterator that increments once that VF has - * finished resetting. - */ - ice_for_each_vf(pf, bkt, vf) { - bool done = false; - unsigned int i; - u32 reg; - - for (i = 0; i < 10; i++) { - reg = rd32(&pf->hw, VPGEN_VFRSTAT(vf->vf_id)); - if (reg & VPGEN_VFRSTAT_VFRD_M) { - done = true; - break; - } - - /* only delay if check failed */ - usleep_range(10, 20); - } - - if (!done) { - /* Display a warning if at least one VF didn't manage - * to reset in time, but continue on with the - * operation. - */ - dev_warn(dev, "VF %u reset check timeout\n", vf->vf_id); - break; - } - } - - /* free VF resources to begin resetting the VSI state */ - ice_for_each_vf(pf, bkt, vf) { - mutex_lock(&vf->cfg_lock); - - vf->driver_caps = 0; - ice_vc_set_default_allowlist(vf); - - ice_vf_fdir_exit(vf); - ice_vf_fdir_init(vf); - /* clean VF control VSI when resetting VFs since it should be - * setup only when VF creates its first FDIR rule. - */ - if (vf->ctrl_vsi_idx != ICE_NO_VSI) - ice_vf_ctrl_invalidate_vsi(vf); - - ice_vf_pre_vsi_rebuild(vf); - ice_vf_rebuild_vsi(vf); - ice_vf_post_vsi_rebuild(vf); - - mutex_unlock(&vf->cfg_lock); - } - - if (ice_is_eswitch_mode_switchdev(pf)) - if (ice_eswitch_rebuild(pf)) - dev_warn(dev, "eswitch rebuild failed\n"); - - ice_flush(hw); - clear_bit(ICE_VF_DIS, pf->state); - - mutex_unlock(&pf->vfs.table_lock); - - return true; -} - -/** - * ice_is_vf_disabled - * @vf: pointer to the VF info - * - * Returns true if the PF or VF is disabled, false otherwise. - */ -bool ice_is_vf_disabled(struct ice_vf *vf) -{ - struct ice_pf *pf = vf->pf; - - /* If the PF has been disabled, there is no need resetting VF until - * PF is active again. Similarly, if the VF has been disabled, this - * means something else is resetting the VF, so we shouldn't continue. - * Otherwise, set disable VF state bit for actual reset, and continue. - */ - return (test_bit(ICE_VF_DIS, pf->state) || - test_bit(ICE_VF_STATE_DIS, vf->vf_states)); -} - -/** - * ice_reset_vf - Reset a particular VF - * @vf: pointer to the VF structure - * @is_vflr: true if VFLR was issued, false if not - * - * Returns true if the VF is currently in reset, resets successfully, or resets - * are disabled and false otherwise. - */ -bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) -{ - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - struct device *dev; - struct ice_hw *hw; - bool rsd = false; - u8 promisc_m; - u32 reg; - int i; - - lockdep_assert_held(&vf->cfg_lock); - - dev = ice_pf_to_dev(pf); - - if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { - dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n", - vf->vf_id); - return true; - } - - if (ice_is_vf_disabled(vf)) { - dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", - vf->vf_id); - return true; - } - - /* Set VF disable bit state here, before triggering reset */ - set_bit(ICE_VF_STATE_DIS, vf->vf_states); - ice_trigger_vf_reset(vf, is_vflr, false); - - vsi = ice_get_vf_vsi(vf); - - ice_dis_vf_qs(vf); - - /* Call Disable LAN Tx queue AQ whether or not queues are - * enabled. This is needed for successful completion of VFR. - */ - ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL, - NULL, ICE_VF_RESET, vf->vf_id, NULL); - - hw = &pf->hw; - /* poll VPGEN_VFRSTAT reg to make sure - * that reset is complete - */ - for (i = 0; i < 10; i++) { - /* VF reset requires driver to first reset the VF and then - * poll the status register to make sure that the reset - * completed successfully. - */ - reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); - if (reg & VPGEN_VFRSTAT_VFRD_M) { - rsd = true; - break; - } - - /* only sleep if the reset is not done */ - usleep_range(10, 20); - } - - vf->driver_caps = 0; - ice_vc_set_default_allowlist(vf); - - /* Display a warning if VF didn't manage to reset in time, but need to - * continue on with the operation. - */ - if (!rsd) - dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id); - - /* disable promiscuous modes in case they were enabled - * ignore any error if disabling process failed - */ - if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || - test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { - if (ice_vf_is_port_vlan_ena(vf) || vsi->num_vlan) - promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; - else - promisc_m = ICE_UCAST_PROMISC_BITS; - - if (ice_vf_clear_vsi_promisc(vf, vsi, promisc_m)) - dev_err(dev, "disabling promiscuous mode failed\n"); - } - - ice_eswitch_del_vf_mac_rule(vf); - - ice_vf_fdir_exit(vf); - ice_vf_fdir_init(vf); - /* clean VF control VSI when resetting VF since it should be setup - * only when VF creates its first FDIR rule. - */ - if (vf->ctrl_vsi_idx != ICE_NO_VSI) - ice_vf_ctrl_vsi_release(vf); - - ice_vf_pre_vsi_rebuild(vf); - - if (ice_vf_rebuild_vsi_with_release(vf)) { - dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id); - return false; - } - - ice_vf_post_vsi_rebuild(vf); - vsi = ice_get_vf_vsi(vf); - ice_eswitch_update_repr(vsi); - ice_eswitch_replay_vf_mac_rule(vf); - - /* if the VF has been reset allow it to come up again */ - if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs, - ICE_MAX_VF_COUNT, vf->vf_id)) - dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); - - return true; -} - -/** * ice_vc_notify_link_state - Inform all VFs on a PF of link status * @pf: pointer to the PF structure */ @@ -1846,505 +282,6 @@ void ice_vc_notify_reset(struct ice_pf *pf) } /** - * ice_vc_notify_vf_reset - Notify VF of a reset event - * @vf: pointer to the VF structure - */ -static void ice_vc_notify_vf_reset(struct ice_vf *vf) -{ - struct virtchnl_pf_event pfe; - struct ice_pf *pf = vf->pf; - - /* Bail out if VF is in disabled state, neither initialized, nor active - * state - otherwise proceed with notifications - */ - if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) && - !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) || - test_bit(ICE_VF_STATE_DIS, vf->vf_states)) - return; - - pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; - pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; - ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, - VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), - NULL); -} - -/** - * ice_init_vf_vsi_res - initialize/setup VF VSI resources - * @vf: VF to initialize/setup the VSI for - * - * This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the - * VF VSI's broadcast filter and is only used during initial VF creation. - */ -static int ice_init_vf_vsi_res(struct ice_vf *vf) -{ - struct ice_vsi_vlan_ops *vlan_ops; - struct ice_pf *pf = vf->pf; - u8 broadcast[ETH_ALEN]; - struct ice_vsi *vsi; - struct device *dev; - int err; - - vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf); - - dev = ice_pf_to_dev(pf); - vsi = ice_vf_vsi_setup(vf); - if (!vsi) - return -ENOMEM; - - err = ice_vsi_add_vlan_zero(vsi); - if (err) { - dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n", - vf->vf_id); - goto release_vsi; - } - - vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); - err = vlan_ops->ena_rx_filtering(vsi); - if (err) { - dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n", - vf->vf_id); - goto release_vsi; - } - - eth_broadcast_addr(broadcast); - err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); - if (err) { - dev_err(dev, "Failed to add broadcast MAC filter for VF %d, error %d\n", - vf->vf_id, err); - goto release_vsi; - } - - err = ice_vf_set_spoofchk_cfg(vf, vsi); - if (err) { - dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n", - vf->vf_id); - goto release_vsi; - } - - vf->num_mac = 1; - - return 0; - -release_vsi: - ice_vf_vsi_release(vf); - return err; -} - -/** - * ice_start_vfs - start VFs so they are ready to be used by SR-IOV - * @pf: PF the VFs are associated with - */ -static int ice_start_vfs(struct ice_pf *pf) -{ - struct ice_hw *hw = &pf->hw; - unsigned int bkt, it_cnt; - struct ice_vf *vf; - int retval; - - lockdep_assert_held(&pf->vfs.table_lock); - - it_cnt = 0; - ice_for_each_vf(pf, bkt, vf) { - ice_clear_vf_reset_trigger(vf); - - retval = ice_init_vf_vsi_res(vf); - if (retval) { - dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n", - vf->vf_id, retval); - goto teardown; - } - - set_bit(ICE_VF_STATE_INIT, vf->vf_states); - ice_ena_vf_mappings(vf); - wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); - it_cnt++; - } - - ice_flush(hw); - return 0; - -teardown: - ice_for_each_vf(pf, bkt, vf) { - if (it_cnt == 0) - break; - - ice_dis_vf_mappings(vf); - ice_vf_vsi_release(vf); - it_cnt--; - } - - return retval; -} - -/** - * ice_create_vf_entries - Allocate and insert VF entries - * @pf: pointer to the PF structure - * @num_vfs: the number of VFs to allocate - * - * Allocate new VF entries and insert them into the hash table. Set some - * basic default fields for initializing the new VFs. - * - * After this function exits, the hash table will have num_vfs entries - * inserted. - * - * Returns 0 on success or an integer error code on failure. - */ -static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs) -{ - struct ice_vfs *vfs = &pf->vfs; - struct ice_vf *vf; - u16 vf_id; - int err; - - lockdep_assert_held(&vfs->table_lock); - - for (vf_id = 0; vf_id < num_vfs; vf_id++) { - vf = kzalloc(sizeof(*vf), GFP_KERNEL); - if (!vf) { - err = -ENOMEM; - goto err_free_entries; - } - kref_init(&vf->refcnt); - - vf->pf = pf; - vf->vf_id = vf_id; - - vf->vf_sw_id = pf->first_sw; - /* assign default capabilities */ - set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps); - vf->spoofchk = true; - vf->num_vf_qs = pf->vfs.num_qps_per; - ice_vc_set_default_allowlist(vf); - - /* ctrl_vsi_idx will be set to a valid value only when VF - * creates its first fdir rule. - */ - ice_vf_ctrl_invalidate_vsi(vf); - ice_vf_fdir_init(vf); - - ice_vc_set_dflt_vf_ops(&vf->vc_ops); - - mutex_init(&vf->cfg_lock); - - hash_add_rcu(vfs->table, &vf->entry, vf_id); - } - - return 0; - -err_free_entries: - ice_free_vf_entries(pf); - return err; -} - -/** - * ice_ena_vfs - enable VFs so they are ready to be used - * @pf: pointer to the PF structure - * @num_vfs: number of VFs to enable - */ -static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) -{ - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - int ret; - - /* Disable global interrupt 0 so we don't try to handle the VFLR. */ - wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), - ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); - set_bit(ICE_OICR_INTR_DIS, pf->state); - ice_flush(hw); - - ret = pci_enable_sriov(pf->pdev, num_vfs); - if (ret) - goto err_unroll_intr; - - mutex_lock(&pf->vfs.table_lock); - - if (ice_set_per_vf_res(pf, num_vfs)) { - dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n", - num_vfs); - ret = -ENOSPC; - goto err_unroll_sriov; - } - - ret = ice_create_vf_entries(pf, num_vfs); - if (ret) { - dev_err(dev, "Failed to allocate VF entries for %d VFs\n", - num_vfs); - goto err_unroll_sriov; - } - - if (ice_start_vfs(pf)) { - dev_err(dev, "Failed to start VF(s)\n"); - ret = -EAGAIN; - goto err_unroll_vf_entries; - } - - clear_bit(ICE_VF_DIS, pf->state); - - ret = ice_eswitch_configure(pf); - if (ret) - goto err_unroll_sriov; - - /* rearm global interrupts */ - if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) - ice_irq_dynamic_ena(hw, NULL, NULL); - - mutex_unlock(&pf->vfs.table_lock); - - return 0; - -err_unroll_vf_entries: - ice_free_vf_entries(pf); -err_unroll_sriov: - mutex_unlock(&pf->vfs.table_lock); - pci_disable_sriov(pf->pdev); -err_unroll_intr: - /* rearm interrupts here */ - ice_irq_dynamic_ena(hw, NULL, NULL); - clear_bit(ICE_OICR_INTR_DIS, pf->state); - return ret; -} - -/** - * ice_pci_sriov_ena - Enable or change number of VFs - * @pf: pointer to the PF structure - * @num_vfs: number of VFs to allocate - * - * Returns 0 on success and negative on failure - */ -static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) -{ - int pre_existing_vfs = pci_num_vf(pf->pdev); - struct device *dev = ice_pf_to_dev(pf); - int err; - - if (pre_existing_vfs && pre_existing_vfs != num_vfs) - ice_free_vfs(pf); - else if (pre_existing_vfs && pre_existing_vfs == num_vfs) - return 0; - - if (num_vfs > pf->vfs.num_supported) { - dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", - num_vfs, pf->vfs.num_supported); - return -EOPNOTSUPP; - } - - dev_info(dev, "Enabling %d VFs\n", num_vfs); - err = ice_ena_vfs(pf, num_vfs); - if (err) { - dev_err(dev, "Failed to enable SR-IOV: %d\n", err); - return err; - } - - set_bit(ICE_FLAG_SRIOV_ENA, pf->flags); - return 0; -} - -/** - * ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks - * @pf: PF to enabled SR-IOV on - */ -static int ice_check_sriov_allowed(struct ice_pf *pf) -{ - struct device *dev = ice_pf_to_dev(pf); - - if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) { - dev_err(dev, "This device is not capable of SR-IOV\n"); - return -EOPNOTSUPP; - } - - if (ice_is_safe_mode(pf)) { - dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n"); - return -EOPNOTSUPP; - } - - if (!ice_pf_state_is_nominal(pf)) { - dev_err(dev, "Cannot enable SR-IOV, device not ready\n"); - return -EBUSY; - } - - return 0; -} - -/** - * ice_sriov_configure - Enable or change number of VFs via sysfs - * @pdev: pointer to a pci_dev structure - * @num_vfs: number of VFs to allocate or 0 to free VFs - * - * This function is called when the user updates the number of VFs in sysfs. On - * success return whatever num_vfs was set to by the caller. Return negative on - * failure. - */ -int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) -{ - struct ice_pf *pf = pci_get_drvdata(pdev); - struct device *dev = ice_pf_to_dev(pf); - int err; - - err = ice_check_sriov_allowed(pf); - if (err) - return err; - - if (!num_vfs) { - if (!pci_vfs_assigned(pdev)) { - ice_mbx_deinit_snapshot(&pf->hw); - ice_free_vfs(pf); - if (pf->lag) - ice_enable_lag(pf->lag); - return 0; - } - - dev_err(dev, "can't free VFs because some are assigned to VMs.\n"); - return -EBUSY; - } - - err = ice_mbx_init_snapshot(&pf->hw, num_vfs); - if (err) - return err; - - err = ice_pci_sriov_ena(pf, num_vfs); - if (err) { - ice_mbx_deinit_snapshot(&pf->hw); - return err; - } - - if (pf->lag) - ice_disable_lag(pf->lag); - return num_vfs; -} - -/** - * ice_process_vflr_event - Free VF resources via IRQ calls - * @pf: pointer to the PF structure - * - * called from the VFLR IRQ handler to - * free up VF resources and state variables - */ -void ice_process_vflr_event(struct ice_pf *pf) -{ - struct ice_hw *hw = &pf->hw; - struct ice_vf *vf; - unsigned int bkt; - u32 reg; - - if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || - !ice_has_vfs(pf)) - return; - - mutex_lock(&pf->vfs.table_lock); - ice_for_each_vf(pf, bkt, vf) { - u32 reg_idx, bit_idx; - - reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; - /* read GLGEN_VFLRSTAT register to find out the flr VFs */ - reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); - if (reg & BIT(bit_idx)) { - /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ - mutex_lock(&vf->cfg_lock); - ice_reset_vf(vf, true); - mutex_unlock(&vf->cfg_lock); - } - } - mutex_unlock(&pf->vfs.table_lock); -} - -/** - * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF - * @vf: pointer to the VF info - */ -static void ice_vc_reset_vf(struct ice_vf *vf) -{ - ice_vc_notify_vf_reset(vf); - ice_reset_vf(vf, false); -} - -/** - * ice_get_vf_from_pfq - get the VF who owns the PF space queue passed in - * @pf: PF used to index all VFs - * @pfq: queue index relative to the PF's function space - * - * If no VF is found who owns the pfq then return NULL, otherwise return a - * pointer to the VF who owns the pfq - * - * If this function returns non-NULL, it acquires a reference count of the VF - * structure. The caller is responsible for calling ice_put_vf() to drop this - * reference. - */ -static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq) -{ - struct ice_vf *vf; - unsigned int bkt; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) { - struct ice_vsi *vsi; - u16 rxq_idx; - - vsi = ice_get_vf_vsi(vf); - - ice_for_each_rxq(vsi, rxq_idx) - if (vsi->rxq_map[rxq_idx] == pfq) { - struct ice_vf *found; - - if (kref_get_unless_zero(&vf->refcnt)) - found = vf; - else - found = NULL; - rcu_read_unlock(); - return found; - } - } - rcu_read_unlock(); - - return NULL; -} - -/** - * ice_globalq_to_pfq - convert from global queue index to PF space queue index - * @pf: PF used for conversion - * @globalq: global queue index used to convert to PF space queue index - */ -static u32 ice_globalq_to_pfq(struct ice_pf *pf, u32 globalq) -{ - return globalq - pf->hw.func_caps.common_cap.rxq_first_id; -} - -/** - * ice_vf_lan_overflow_event - handle LAN overflow event for a VF - * @pf: PF that the LAN overflow event happened on - * @event: structure holding the event information for the LAN overflow event - * - * Determine if the LAN overflow event was caused by a VF queue. If it was not - * caused by a VF, do nothing. If a VF caused this LAN overflow event trigger a - * reset on the offending VF. - */ -void -ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) -{ - u32 gldcb_rtctq, queue; - struct ice_vf *vf; - - gldcb_rtctq = le32_to_cpu(event->desc.params.lan_overflow.prtdcb_ruptq); - dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq); - - /* event returns device global Rx queue number */ - queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >> - GLDCB_RTCTQ_RXQNUM_S; - - vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue)); - if (!vf) - return; - - mutex_lock(&vf->cfg_lock); - ice_vc_reset_vf(vf); - mutex_unlock(&vf->cfg_lock); - - ice_put_vf(vf); -} - -/** * ice_vc_send_msg_to_vf - Send message to VF * @vf: pointer to the VF info * @v_opcode: virtual channel opcode @@ -2433,12 +370,12 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_vf_resource *vfres = NULL; - struct ice_pf *pf = vf->pf; + struct ice_hw *hw = &vf->pf->hw; struct ice_vsi *vsi; int len = 0; int ret; - if (ice_check_vf_init(pf, vf)) { + if (ice_check_vf_init(vf)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto err; } @@ -2475,9 +412,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) * inner/single VLAN respectively and don't allow VF to * negotiate VIRTCHNL_VF_OFFLOAD in any other cases */ - if (ice_is_dvm_ena(&pf->hw) && ice_vf_is_port_vlan_ena(vf)) { + if (ice_is_dvm_ena(hw) && ice_vf_is_port_vlan_ena(vf)) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; - } else if (!ice_is_dvm_ena(&pf->hw) && + } else if (!ice_is_dvm_ena(hw) && !ice_vf_is_port_vlan_ena(vf)) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; /* configure backward compatible support for VFs that @@ -2485,7 +422,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) * configured in SVM, and no port VLAN is configured */ ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi); - } else if (ice_is_dvm_ena(&pf->hw)) { + } else if (ice_is_dvm_ena(hw)) { /* configure software offloaded VLAN support when DVM * is enabled, but no port VLAN is enabled */ @@ -2535,7 +472,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; - vfres->max_vectors = pf->vfs.num_msix_per; + vfres->max_vectors = vf->pf->vfs.num_msix_per; vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE; vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vfres->max_mtu = ice_vc_get_max_frame_size(vf); @@ -2574,7 +511,7 @@ err: static void ice_vc_reset_vf_msg(struct ice_vf *vf) { if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) - ice_reset_vf(vf, false); + ice_reset_vf(vf, 0); } /** @@ -3041,138 +978,6 @@ error_param: } /** - * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset - * @vf: The VF being resseting - * - * The max poll time is about ~800ms, which is about the maximum time it takes - * for a VF to be reset and/or a VF driver to be removed. - */ -static void ice_wait_on_vf_reset(struct ice_vf *vf) -{ - int i; - - for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) { - if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) - break; - msleep(ICE_MAX_VF_RESET_SLEEP_MS); - } -} - -/** - * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried - * @vf: VF to check if it's ready to be configured/queried - * - * The purpose of this function is to make sure the VF is not in reset, not - * disabled, and initialized so it can be configured and/or queried by a host - * administrator. - */ -int ice_check_vf_ready_for_cfg(struct ice_vf *vf) -{ - struct ice_pf *pf; - - ice_wait_on_vf_reset(vf); - - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - pf = vf->pf; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; - - return 0; -} - -/** - * ice_set_vf_spoofchk - * @netdev: network interface device structure - * @vf_id: VF identifier - * @ena: flag to enable or disable feature - * - * Enable or disable VF spoof checking - */ -int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_pf *pf = np->vsi->back; - struct ice_vsi *vf_vsi; - struct device *dev; - struct ice_vf *vf; - int ret; - - dev = ice_pf_to_dev(pf); - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - vf_vsi = ice_get_vf_vsi(vf); - if (!vf_vsi) { - netdev_err(netdev, "VSI %d for VF %d is null\n", - vf->lan_vsi_idx, vf->vf_id); - ret = -EINVAL; - goto out_put_vf; - } - - if (vf_vsi->type != ICE_VSI_VF) { - netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", - vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); - ret = -ENODEV; - goto out_put_vf; - } - - if (ena == vf->spoofchk) { - dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF"); - ret = 0; - goto out_put_vf; - } - - if (ena) - ret = ice_vsi_ena_spoofchk(vf_vsi); - else - ret = ice_vsi_dis_spoofchk(vf_vsi); - if (ret) - dev_err(dev, "Failed to set spoofchk %s for VF %d VSI %d\n error %d\n", - ena ? "ON" : "OFF", vf->vf_id, vf_vsi->vsi_num, ret); - else - vf->spoofchk = ena; - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_is_any_vf_in_promisc - check if any VF(s) are in promiscuous mode - * @pf: PF structure for accessing VF(s) - * - * Return false if no VF(s) are in unicast and/or multicast promiscuous mode, - * else return true - */ -bool ice_is_any_vf_in_promisc(struct ice_pf *pf) -{ - bool is_vf_promisc = false; - struct ice_vf *vf; - unsigned int bkt; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) { - /* found a VF that has promiscuous mode configured */ - if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || - test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { - is_vf_promisc = true; - break; - } - } - rcu_read_unlock(); - - return is_vf_promisc; -} - -/** * ice_vc_cfg_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -3209,7 +1014,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) } dev = ice_pf_to_dev(pf); - if (!test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + if (!ice_is_vf_trusted(vf)) { dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n", vf->vf_id); /* Leave v_ret alone, lie to the VF on purpose. */ @@ -3287,16 +1092,21 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", vf->vf_id); - else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + else if (!allmulti && + test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, + vf->vf_states)) dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", vf->vf_id); } if (!ucast_err) { - if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + if (alluni && + !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", vf->vf_id); - else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + else if (!alluni && + test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, + vf->vf_states)) dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", vf->vf_id); } @@ -3840,8 +1650,9 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi->max_frame = qpi->rxq.max_pkt_size; - /* add space for the port VLAN since the VF driver is not - * expected to account for it in the MTU calculation + /* add space for the port VLAN since the VF driver is + * not expected to account for it in the MTU + * calculation */ if (ice_vf_is_port_vlan_ena(vf)) vsi->max_frame += VLAN_HLEN; @@ -3860,15 +1671,6 @@ error_param: } /** - * ice_is_vf_trusted - * @vf: pointer to the VF info - */ -static bool ice_is_vf_trusted(struct ice_vf *vf) -{ - return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); -} - -/** * ice_can_vf_change_mac * @vf: pointer to the VF info * @@ -4242,7 +2044,7 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) } else { /* request is successful, then reset VF */ vf->num_req_qs = req_queues; - ice_vc_reset_vf(vf); + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); dev_info(dev, "VF %d granted request of %u queues.\n", vf->vf_id, req_queues); return 0; @@ -4255,102 +2057,6 @@ error_param: } /** - * ice_is_supported_port_vlan_proto - make sure the vlan_proto is supported - * @hw: hardware structure used to check the VLAN mode - * @vlan_proto: VLAN TPID being checked - * - * If the device is configured in Double VLAN Mode (DVM), then both ETH_P_8021Q - * and ETH_P_8021AD are supported. If the device is configured in Single VLAN - * Mode (SVM), then only ETH_P_8021Q is supported. - */ -static bool -ice_is_supported_port_vlan_proto(struct ice_hw *hw, u16 vlan_proto) -{ - bool is_supported = false; - - switch (vlan_proto) { - case ETH_P_8021Q: - is_supported = true; - break; - case ETH_P_8021AD: - if (ice_is_dvm_ena(hw)) - is_supported = true; - break; - } - - return is_supported; -} - -/** - * ice_set_vf_port_vlan - * @netdev: network interface device structure - * @vf_id: VF identifier - * @vlan_id: VLAN ID being set - * @qos: priority setting - * @vlan_proto: VLAN protocol - * - * program VF Port VLAN ID and/or QoS - */ -int -ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, - __be16 vlan_proto) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - u16 local_vlan_proto = ntohs(vlan_proto); - struct device *dev; - struct ice_vf *vf; - int ret; - - dev = ice_pf_to_dev(pf); - - if (vlan_id >= VLAN_N_VID || qos > 7) { - dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n", - vf_id, vlan_id, qos); - return -EINVAL; - } - - if (!ice_is_supported_port_vlan_proto(&pf->hw, local_vlan_proto)) { - dev_err(dev, "VF VLAN protocol 0x%04x is not supported\n", - local_vlan_proto); - return -EPROTONOSUPPORT; - } - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - if (ice_vf_get_port_vlan_prio(vf) == qos && - ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto && - ice_vf_get_port_vlan_id(vf) == vlan_id) { - /* duplicate request, so just return success */ - dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n", - vlan_id, qos, local_vlan_proto); - ret = 0; - goto out_put_vf; - } - - mutex_lock(&vf->cfg_lock); - - vf->port_vlan_info = ICE_VLAN(local_vlan_proto, vlan_id, qos); - if (ice_vf_is_port_vlan_ena(vf)) - dev_info(dev, "Setting VLAN %u, QoS %u, TPID 0x%04x on VF %d\n", - vlan_id, qos, local_vlan_proto, vf_id); - else - dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); - - ice_vc_reset_vf(vf); - mutex_unlock(&vf->cfg_lock); - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads * @caps: VF driver negotiated capabilities * @@ -4732,8 +2438,11 @@ static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf) } /** - * ice_vf_outer_vlan_not_allowed - check outer VLAN can be used when the device is in DVM + * ice_vf_outer_vlan_not_allowed - check if outer VLAN can be used * @vf: VF that being checked for + * + * When the device is in double VLAN mode, check whether or not the outer VLAN + * is allowed. */ static bool ice_vf_outer_vlan_not_allowed(struct ice_vf *vf) { @@ -5011,9 +2720,11 @@ ice_vc_validate_vlan_filter_list(struct virtchnl_vlan_filtering_caps *vfc, return false; if ((ice_vc_is_valid_vlan(outer) && - !ice_vc_validate_vlan_tpid(filtering_support->outer, outer->tpid)) || + !ice_vc_validate_vlan_tpid(filtering_support->outer, + outer->tpid)) || (ice_vc_is_valid_vlan(inner) && - !ice_vc_validate_vlan_tpid(filtering_support->inner, inner->tpid))) + !ice_vc_validate_vlan_tpid(filtering_support->inner, + inner->tpid))) return false; } @@ -5488,7 +3199,8 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) } out: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2, + v_ret, NULL, 0); } /** @@ -5557,7 +3269,8 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) } out: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2, + v_ret, NULL, 0); } /** @@ -5615,7 +3328,8 @@ static int ice_vc_ena_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg) } out: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2, v_ret, NULL, 0); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2, + v_ret, NULL, 0); } /** @@ -5669,10 +3383,11 @@ static int ice_vc_dis_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg) } out: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2, v_ret, NULL, 0); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2, + v_ret, NULL, 0); } -static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = { +static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .get_ver_msg = ice_vc_get_ver_msg, .get_vf_res_msg = ice_vc_get_vf_res_msg, .reset_vf = ice_vc_reset_vf_msg, @@ -5703,9 +3418,13 @@ static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = { .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, }; -void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops) +/** + * ice_virtchnl_set_dflt_ops - Switch to default virtchnl ops + * @vf: the VF to switch ops + */ +void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { - *ops = ice_vc_vf_dflt_ops; + vf->virtchnl_ops = &ice_virtchnl_dflt_ops; } /** @@ -5838,15 +3557,44 @@ ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg) NULL, 0); } -void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops) +static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { + .get_ver_msg = ice_vc_get_ver_msg, + .get_vf_res_msg = ice_vc_get_vf_res_msg, + .reset_vf = ice_vc_reset_vf_msg, + .add_mac_addr_msg = ice_vc_repr_add_mac, + .del_mac_addr_msg = ice_vc_repr_del_mac, + .cfg_qs_msg = ice_vc_cfg_qs_msg, + .ena_qs_msg = ice_vc_ena_qs_msg, + .dis_qs_msg = ice_vc_dis_qs_msg, + .request_qs_msg = ice_vc_request_qs_msg, + .cfg_irq_map_msg = ice_vc_cfg_irq_map_msg, + .config_rss_key = ice_vc_config_rss_key, + .config_rss_lut = ice_vc_config_rss_lut, + .get_stats_msg = ice_vc_get_stats_msg, + .cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode, + .add_vlan_msg = ice_vc_repr_add_vlan, + .remove_vlan_msg = ice_vc_repr_del_vlan, + .ena_vlan_stripping = ice_vc_repr_ena_vlan_stripping, + .dis_vlan_stripping = ice_vc_repr_dis_vlan_stripping, + .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, + .add_fdir_fltr_msg = ice_vc_add_fdir_fltr, + .del_fdir_fltr_msg = ice_vc_del_fdir_fltr, + .get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps, + .add_vlan_v2_msg = ice_vc_add_vlan_v2_msg, + .remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg, + .ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg, + .dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg, + .ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg, + .dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg, +}; + +/** + * ice_virtchnl_set_repr_ops - Switch to representor virtchnl ops + * @vf: the VF to switch ops + */ +void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { - ops->add_mac_addr_msg = ice_vc_repr_add_mac; - ops->del_mac_addr_msg = ice_vc_repr_del_mac; - ops->add_vlan_msg = ice_vc_repr_add_vlan; - ops->remove_vlan_msg = ice_vc_repr_del_vlan; - ops->ena_vlan_stripping = ice_vc_repr_ena_vlan_stripping; - ops->dis_vlan_stripping = ice_vc_repr_dis_vlan_stripping; - ops->cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode; + vf->virtchnl_ops = &ice_virtchnl_repr_ops; } /** @@ -5861,8 +3609,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { u32 v_opcode = le32_to_cpu(event->desc.cookie_high); s16 vf_id = le16_to_cpu(event->desc.retval); + const struct ice_virtchnl_ops *ops; u16 msglen = event->msg_len; - struct ice_vc_vf_ops *ops; u8 *msg = event->msg_buf; struct ice_vf *vf = NULL; struct device *dev; @@ -5883,7 +3631,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) goto error_handler; } - ops = &vf->vc_ops; + ops = vf->virtchnl_ops; /* Perform basic checks on the msg */ err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen); @@ -6035,579 +3783,3 @@ error_handler: mutex_unlock(&vf->cfg_lock); ice_put_vf(vf); } - -/** - * ice_get_vf_cfg - * @netdev: network interface device structure - * @vf_id: VF identifier - * @ivi: VF configuration structure - * - * return VF configuration - */ -int -ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_vf *vf; - int ret; - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - ivi->vf = vf_id; - ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr); - - /* VF configuration for VLAN and applicable QoS */ - ivi->vlan = ice_vf_get_port_vlan_id(vf); - ivi->qos = ice_vf_get_port_vlan_prio(vf); - if (ice_vf_is_port_vlan_ena(vf)) - ivi->vlan_proto = cpu_to_be16(ice_vf_get_port_vlan_tpid(vf)); - - ivi->trusted = vf->trusted; - ivi->spoofchk = vf->spoofchk; - if (!vf->link_forced) - ivi->linkstate = IFLA_VF_LINK_STATE_AUTO; - else if (vf->link_up) - ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; - else - ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; - ivi->max_tx_rate = vf->max_tx_rate; - ivi->min_tx_rate = vf->min_tx_rate; - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch - * @pf: PF used to reference the switch's rules - * @umac: unicast MAC to compare against existing switch rules - * - * Return true on the first/any match, else return false - */ -static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac) -{ - struct ice_sw_recipe *mac_recipe_list = - &pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC]; - struct ice_fltr_mgmt_list_entry *list_itr; - struct list_head *rule_head; - struct mutex *rule_lock; /* protect MAC filter list access */ - - rule_head = &mac_recipe_list->filt_rules; - rule_lock = &mac_recipe_list->filt_rule_lock; - - mutex_lock(rule_lock); - list_for_each_entry(list_itr, rule_head, list_entry) { - u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0]; - - if (ether_addr_equal(existing_mac, umac)) { - mutex_unlock(rule_lock); - return true; - } - } - - mutex_unlock(rule_lock); - - return false; -} - -/** - * ice_set_vf_mac - * @netdev: network interface device structure - * @vf_id: VF identifier - * @mac: MAC address - * - * program VF MAC address - */ -int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_vf *vf; - int ret; - - if (is_multicast_ether_addr(mac)) { - netdev_err(netdev, "%pM not a valid unicast address\n", mac); - return -EINVAL; - } - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - /* nothing left to do, unicast MAC already set */ - if (ether_addr_equal(vf->dev_lan_addr.addr, mac) && - ether_addr_equal(vf->hw_lan_addr.addr, mac)) { - ret = 0; - goto out_put_vf; - } - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - if (ice_unicast_mac_exists(pf, mac)) { - netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n", - mac, vf_id, mac); - ret = -EINVAL; - goto out_put_vf; - } - - mutex_lock(&vf->cfg_lock); - - /* VF is notified of its new MAC via the PF's response to the - * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset - */ - ether_addr_copy(vf->dev_lan_addr.addr, mac); - ether_addr_copy(vf->hw_lan_addr.addr, mac); - if (is_zero_ether_addr(mac)) { - /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */ - vf->pf_set_mac = false; - netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n", - vf->vf_id); - } else { - /* PF will add MAC rule for the VF */ - vf->pf_set_mac = true; - netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n", - mac, vf_id); - } - - ice_vc_reset_vf(vf); - mutex_unlock(&vf->cfg_lock); - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_set_vf_trust - * @netdev: network interface device structure - * @vf_id: VF identifier - * @trusted: Boolean value to enable/disable trusted VF - * - * Enable or disable a given VF as trusted - */ -int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_vf *vf; - int ret; - - if (ice_is_eswitch_mode_switchdev(pf)) { - dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); - return -EOPNOTSUPP; - } - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - /* Check if already trusted */ - if (trusted == vf->trusted) { - ret = 0; - goto out_put_vf; - } - - mutex_lock(&vf->cfg_lock); - - vf->trusted = trusted; - ice_vc_reset_vf(vf); - dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", - vf_id, trusted ? "" : "un"); - - mutex_unlock(&vf->cfg_lock); - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_set_vf_link_state - * @netdev: network interface device structure - * @vf_id: VF identifier - * @link_state: required link state - * - * Set VF's link state, irrespective of physical link state status - */ -int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_vf *vf; - int ret; - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - switch (link_state) { - case IFLA_VF_LINK_STATE_AUTO: - vf->link_forced = false; - break; - case IFLA_VF_LINK_STATE_ENABLE: - vf->link_forced = true; - vf->link_up = true; - break; - case IFLA_VF_LINK_STATE_DISABLE: - vf->link_forced = true; - vf->link_up = false; - break; - default: - ret = -EINVAL; - goto out_put_vf; - } - - ice_vc_notify_vf_link_state(vf); - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs - * @pf: PF associated with VFs - */ -static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) -{ - struct ice_vf *vf; - unsigned int bkt; - int rate = 0; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) - rate += vf->min_tx_rate; - rcu_read_unlock(); - - return rate; -} - -/** - * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription - * @vf: VF trying to configure min_tx_rate - * @min_tx_rate: min Tx rate in Mbps - * - * Check if the min_tx_rate being passed in will cause oversubscription of total - * min_tx_rate based on the current link speed and all other VFs configured - * min_tx_rate - * - * Return true if the passed min_tx_rate would cause oversubscription, else - * return false - */ -static bool -ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate) -{ - int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf)); - int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf); - - /* this VF's previous rate is being overwritten */ - all_vfs_min_tx_rate -= vf->min_tx_rate; - - if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) { - dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n", - min_tx_rate, vf->vf_id, - all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps, - link_speed_mbps); - return true; - } - - return false; -} - -/** - * ice_set_vf_bw - set min/max VF bandwidth - * @netdev: network interface device structure - * @vf_id: VF identifier - * @min_tx_rate: Minimum Tx rate in Mbps - * @max_tx_rate: Maximum Tx rate in Mbps - */ -int -ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, - int max_tx_rate) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_vsi *vsi; - struct device *dev; - struct ice_vf *vf; - int ret; - - dev = ice_pf_to_dev(pf); - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - vsi = ice_get_vf_vsi(vf); - - /* when max_tx_rate is zero that means no max Tx rate limiting, so only - * check if max_tx_rate is non-zero - */ - if (max_tx_rate && min_tx_rate > max_tx_rate) { - dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", - min_tx_rate, max_tx_rate); - ret = -EINVAL; - goto out_put_vf; - } - - if (min_tx_rate && ice_is_dcb_active(pf)) { - dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); - ret = -EOPNOTSUPP; - goto out_put_vf; - } - - if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) { - ret = -EINVAL; - goto out_put_vf; - } - - if (vf->min_tx_rate != (unsigned int)min_tx_rate) { - ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); - if (ret) { - dev_err(dev, "Unable to set min-tx-rate for VF %d\n", - vf->vf_id); - goto out_put_vf; - } - - vf->min_tx_rate = min_tx_rate; - } - - if (vf->max_tx_rate != (unsigned int)max_tx_rate) { - ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000); - if (ret) { - dev_err(dev, "Unable to set max-tx-rate for VF %d\n", - vf->vf_id); - goto out_put_vf; - } - - vf->max_tx_rate = max_tx_rate; - } - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_get_vf_stats - populate some stats for the VF - * @netdev: the netdev of the PF - * @vf_id: the host OS identifier (0-255) - * @vf_stats: pointer to the OS memory to be initialized - */ -int ice_get_vf_stats(struct net_device *netdev, int vf_id, - struct ifla_vf_stats *vf_stats) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct ice_eth_stats *stats; - struct ice_vsi *vsi; - struct ice_vf *vf; - int ret; - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - - ret = ice_check_vf_ready_for_cfg(vf); - if (ret) - goto out_put_vf; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - ret = -EINVAL; - goto out_put_vf; - } - - ice_update_eth_stats(vsi); - stats = &vsi->eth_stats; - - memset(vf_stats, 0, sizeof(*vf_stats)); - - vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast + - stats->rx_multicast; - vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast + - stats->tx_multicast; - vf_stats->rx_bytes = stats->rx_bytes; - vf_stats->tx_bytes = stats->tx_bytes; - vf_stats->broadcast = stats->rx_broadcast; - vf_stats->multicast = stats->rx_multicast; - vf_stats->rx_dropped = stats->rx_discards; - vf_stats->tx_dropped = stats->tx_discards; - -out_put_vf: - ice_put_vf(vf); - return ret; -} - -/** - * ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event - * @vf: pointer to the VF structure - */ -void ice_print_vf_rx_mdd_event(struct ice_vf *vf) -{ - struct ice_pf *pf = vf->pf; - struct device *dev; - - dev = ice_pf_to_dev(pf); - - dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n", - vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id, - vf->dev_lan_addr.addr, - test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags) - ? "on" : "off"); -} - -/** - * ice_print_vfs_mdd_events - print VFs malicious driver detect event - * @pf: pointer to the PF structure - * - * Called from ice_handle_mdd_event to rate limit and print VFs MDD events. - */ -void ice_print_vfs_mdd_events(struct ice_pf *pf) -{ - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - struct ice_vf *vf; - unsigned int bkt; - - /* check that there are pending MDD events to print */ - if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) - return; - - /* VF MDD event logs are rate limited to one second intervals */ - if (time_is_after_jiffies(pf->vfs.last_printed_mdd_jiffies + HZ * 1)) - return; - - pf->vfs.last_printed_mdd_jiffies = jiffies; - - mutex_lock(&pf->vfs.table_lock); - ice_for_each_vf(pf, bkt, vf) { - /* only print Rx MDD event message if there are new events */ - if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) { - vf->mdd_rx_events.last_printed = - vf->mdd_rx_events.count; - ice_print_vf_rx_mdd_event(vf); - } - - /* only print Tx MDD event message if there are new events */ - if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) { - vf->mdd_tx_events.last_printed = - vf->mdd_tx_events.count; - - dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", - vf->mdd_tx_events.count, hw->pf_id, vf->vf_id, - vf->dev_lan_addr.addr); - } - } - mutex_unlock(&pf->vfs.table_lock); -} - -/** - * ice_restore_all_vfs_msi_state - restore VF MSI state after PF FLR - * @pdev: pointer to a pci_dev structure - * - * Called when recovering from a PF FLR to restore interrupt capability to - * the VFs. - */ -void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) -{ - u16 vf_id; - int pos; - - if (!pci_num_vf(pdev)) - return; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - if (pos) { - struct pci_dev *vfdev; - - pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, - &vf_id); - vfdev = pci_get_device(pdev->vendor, vf_id, NULL); - while (vfdev) { - if (vfdev->is_virtfn && vfdev->physfn == pdev) - pci_restore_msi_state(vfdev); - vfdev = pci_get_device(pdev->vendor, vf_id, - vfdev); - } - } -} - -/** - * ice_is_malicious_vf - helper function to detect a malicious VF - * @pf: ptr to struct ice_pf - * @event: pointer to the AQ event - * @num_msg_proc: the number of messages processed so far - * @num_msg_pending: the number of messages peinding in admin queue - */ -bool -ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - u16 num_msg_proc, u16 num_msg_pending) -{ - s16 vf_id = le16_to_cpu(event->desc.retval); - struct device *dev = ice_pf_to_dev(pf); - struct ice_mbx_data mbxdata; - bool malvf = false; - struct ice_vf *vf; - int status; - - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return false; - - if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) - goto out_put_vf; - - mbxdata.num_msg_proc = num_msg_proc; - mbxdata.num_pending_arq = num_msg_pending; - mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries; -#define ICE_MBX_OVERFLOW_WATERMARK 64 - mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; - - /* check to see if we have a malicious VF */ - status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf); - if (status) - goto out_put_vf; - - if (malvf) { - bool report_vf = false; - - /* if the VF is malicious and we haven't let the user - * know about it, then let them know now - */ - status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs, - ICE_MAX_VF_COUNT, vf_id, - &report_vf); - if (status) - dev_dbg(dev, "Error reporting malicious VF\n"); - - if (report_vf) { - struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); - - if (pf_vsi) - dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - &vf->dev_lan_addr.addr[0], - pf_vsi->netdev->dev_addr); - } - } - -out_put_vf: - ice_put_vf(vf); - return malvf; -} diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h new file mode 100644 index 000000000000..b5a3fd8adbb4 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _ICE_VIRTCHNL_H_ +#define _ICE_VIRTCHNL_H_ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/if_ether.h> +#include <linux/avf/virtchnl.h> +#include "ice_vf_lib.h" + +/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */ +#define ICE_MAX_VLAN_PER_VF 8 + +/* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for + * broadcast, and 16 for additional unicast/multicast filters + */ +#define ICE_MAX_MACADDR_PER_VF 18 + +struct ice_virtchnl_ops { + int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); + int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg); + void (*reset_vf)(struct ice_vf *vf); + int (*add_mac_addr_msg)(struct ice_vf *vf, u8 *msg); + int (*del_mac_addr_msg)(struct ice_vf *vf, u8 *msg); + int (*cfg_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*dis_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*request_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*cfg_irq_map_msg)(struct ice_vf *vf, u8 *msg); + int (*config_rss_key)(struct ice_vf *vf, u8 *msg); + int (*config_rss_lut)(struct ice_vf *vf, u8 *msg); + int (*get_stats_msg)(struct ice_vf *vf, u8 *msg); + int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg); + int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg); + int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_vlan_stripping)(struct ice_vf *vf); + int (*dis_vlan_stripping)(struct ice_vf *vf); + int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add); + int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); + int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); + int (*get_offload_vlan_v2_caps)(struct ice_vf *vf); + int (*add_vlan_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*remove_vlan_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); + int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); +}; + +#ifdef CONFIG_PCI_IOV +void ice_virtchnl_set_dflt_ops(struct ice_vf *vf); +void ice_virtchnl_set_repr_ops(struct ice_vf *vf); +void ice_vc_notify_vf_link_state(struct ice_vf *vf); +void ice_vc_notify_link_state(struct ice_pf *pf); +void ice_vc_notify_reset(struct ice_pf *pf); +int +ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, + enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); +bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); +#else /* CONFIG_PCI_IOV */ +static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { } +static inline void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { } +static inline void ice_vc_notify_vf_link_state(struct ice_vf *vf) { } +static inline void ice_vc_notify_link_state(struct ice_pf *pf) { } +static inline void ice_vc_notify_reset(struct ice_pf *pf) { } + +static inline int +ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, + enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) +{ + return -EOPNOTSUPP; +} + +static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) +{ + return false; +} +#endif /* !CONFIG_PCI_IOV */ + +#endif /* _ICE_VIRTCHNL_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 07989f1d08ef..8e38ee2faf58 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -5,6 +5,7 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_flow.h" +#include "ice_vf_lib_private.h" #define to_fltr_conf_from_desc(p) \ container_of(p, struct virtchnl_fdir_fltr_conf, input) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h index f4e629f4c09b..c5bcc8d7481c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h @@ -6,6 +6,7 @@ struct ice_vf; struct ice_pf; +struct ice_vsi; enum ice_fdir_ctx_stat { ICE_FDIR_CTX_READY, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h deleted file mode 100644 index 7f16ed9c70d6..000000000000 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ /dev/null @@ -1,437 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018, Intel Corporation. */ - -#ifndef _ICE_VIRTCHNL_PF_H_ -#define _ICE_VIRTCHNL_PF_H_ -#include "ice.h" -#include "ice_virtchnl_fdir.h" -#include "ice_vsi_vlan_ops.h" - -/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */ -#define ICE_MAX_VLAN_PER_VF 8 -/* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for - * broadcast, and 16 for additional unicast/multicast filters - */ -#define ICE_MAX_MACADDR_PER_VF 18 - -/* Malicious Driver Detection */ -#define ICE_MDD_EVENTS_THRESHOLD 30 - -/* Static VF transaction/status register def */ -#define VF_DEVICE_STATUS 0xAA -#define VF_TRANS_PENDING_M 0x20 - -/* wait defines for polling PF_PCI_CIAD register status */ -#define ICE_PCI_CIAD_WAIT_COUNT 100 -#define ICE_PCI_CIAD_WAIT_DELAY_US 1 - -/* VF resource constraints */ -#define ICE_MAX_VF_COUNT 256 -#define ICE_MIN_QS_PER_VF 1 -#define ICE_NONQ_VECS_VF 1 -#define ICE_MAX_SCATTER_QS_PER_VF 16 -#define ICE_MAX_RSS_QS_PER_VF 16 -#define ICE_NUM_VF_MSIX_MED 17 -#define ICE_NUM_VF_MSIX_SMALL 5 -#define ICE_NUM_VF_MSIX_MULTIQ_MIN 3 -#define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) -#define ICE_MAX_VF_RESET_TRIES 40 -#define ICE_MAX_VF_RESET_SLEEP_MS 20 - -/* VF Hash Table access functions - * - * These functions provide abstraction for interacting with the VF hash table. - * In general, direct access to the hash table should be avoided outside of - * these functions where possible. - * - * The VF entries in the hash table are protected by reference counting to - * track lifetime of accesses from the table. The ice_get_vf_by_id() function - * obtains a reference to the VF structure which must be dropped by using - * ice_put_vf(). - */ - -/** - * ice_for_each_vf - Iterate over each VF entry - * @pf: pointer to the PF private structure - * @bkt: bucket index used for iteration - * @vf: pointer to the VF entry currently being processed in the loop. - * - * The bkt variable is an unsigned integer iterator used to traverse the VF - * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. - * Use vf->vf_id to get the id number if needed. - * - * The caller is expected to be under the table_lock mutex for the entire - * loop. Use this iterator if your loop is long or if it might sleep. - */ -#define ice_for_each_vf(pf, bkt, vf) \ - hash_for_each((pf)->vfs.table, (bkt), (vf), entry) - -/** - * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU - * @pf: pointer to the PF private structure - * @bkt: bucket index used for iteration - * @vf: pointer to the VF entry currently being processed in the loop. - * - * The bkt variable is an unsigned integer iterator used to traverse the VF - * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is. - * Use vf->vf_id to get the id number if needed. - * - * The caller is expected to be under rcu_read_lock() for the entire loop. - * Only use this iterator if your loop is short and you can guarantee it does - * not sleep. - */ -#define ice_for_each_vf_rcu(pf, bkt, vf) \ - hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry) - -/* Specific VF states */ -enum ice_vf_states { - ICE_VF_STATE_INIT = 0, /* PF is initializing VF */ - ICE_VF_STATE_ACTIVE, /* VF resources are allocated for use */ - ICE_VF_STATE_QS_ENA, /* VF queue(s) enabled */ - ICE_VF_STATE_DIS, - ICE_VF_STATE_MC_PROMISC, - ICE_VF_STATE_UC_PROMISC, - ICE_VF_STATES_NBITS -}; - -/* VF capabilities */ -enum ice_virtchnl_cap { - ICE_VIRTCHNL_VF_CAP_L2 = 0, - ICE_VIRTCHNL_VF_CAP_PRIVILEGE, -}; - -struct ice_time_mac { - unsigned long time_modified; - u8 addr[ETH_ALEN]; -}; - -/* VF MDD events print structure */ -struct ice_mdd_vf_events { - u16 count; /* total count of Rx|Tx events */ - /* count number of the last printed event */ - u16 last_printed; -}; - -struct ice_vf; - -struct ice_vc_vf_ops { - int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); - int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg); - void (*reset_vf)(struct ice_vf *vf); - int (*add_mac_addr_msg)(struct ice_vf *vf, u8 *msg); - int (*del_mac_addr_msg)(struct ice_vf *vf, u8 *msg); - int (*cfg_qs_msg)(struct ice_vf *vf, u8 *msg); - int (*ena_qs_msg)(struct ice_vf *vf, u8 *msg); - int (*dis_qs_msg)(struct ice_vf *vf, u8 *msg); - int (*request_qs_msg)(struct ice_vf *vf, u8 *msg); - int (*cfg_irq_map_msg)(struct ice_vf *vf, u8 *msg); - int (*config_rss_key)(struct ice_vf *vf, u8 *msg); - int (*config_rss_lut)(struct ice_vf *vf, u8 *msg); - int (*get_stats_msg)(struct ice_vf *vf, u8 *msg); - int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg); - int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg); - int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg); - int (*ena_vlan_stripping)(struct ice_vf *vf); - int (*dis_vlan_stripping)(struct ice_vf *vf); - int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add); - int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); - int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); - int (*get_offload_vlan_v2_caps)(struct ice_vf *vf); - int (*add_vlan_v2_msg)(struct ice_vf *vf, u8 *msg); - int (*remove_vlan_v2_msg)(struct ice_vf *vf, u8 *msg); - int (*ena_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); - int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg); - int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); - int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg); -}; - -/* Virtchnl/SR-IOV config info */ -struct ice_vfs { - DECLARE_HASHTABLE(table, 8); /* table of VF entries */ - struct mutex table_lock; /* Lock for protecting the hash table */ - u16 num_supported; /* max supported VFs on this PF */ - u16 num_qps_per; /* number of queue pairs per VF */ - u16 num_msix_per; /* number of MSI-X vectors per VF */ - unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */ - DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); /* malicious VF indicator */ -}; - -/* VF information structure */ -struct ice_vf { - struct hlist_node entry; - struct rcu_head rcu; - struct kref refcnt; - struct ice_pf *pf; - - /* Used during virtchnl message handling and NDO ops against the VF - * that will trigger a VFR - */ - struct mutex cfg_lock; - - u16 vf_id; /* VF ID in the PF space */ - u16 lan_vsi_idx; /* index into PF struct */ - u16 ctrl_vsi_idx; - struct ice_vf_fdir fdir; - /* first vector index of this VF in the PF space */ - int first_vector_idx; - struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ - struct virtchnl_version_info vf_ver; - u32 driver_caps; /* reported by VF driver */ - struct virtchnl_ether_addr dev_lan_addr; - struct virtchnl_ether_addr hw_lan_addr; - struct ice_time_mac legacy_last_added_umac; - DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF); - DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF); - struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */ - struct virtchnl_vlan_caps vlan_v2_caps; - u8 pf_set_mac:1; /* VF MAC address set by VMM admin */ - u8 trusted:1; - u8 spoofchk:1; - u8 link_forced:1; - u8 link_up:1; /* only valid if VF link is forced */ - /* VSI indices - actual VSI pointers are maintained in the PF structure - * When assigned, these will be non-zero, because VSI 0 is always - * the main LAN VSI for the PF. - */ - u16 lan_vsi_num; /* ID as used by firmware */ - unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ - unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ - DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ - - unsigned long vf_caps; /* VF's adv. capabilities */ - u8 num_req_qs; /* num of queue pairs requested by VF */ - u16 num_mac; - u16 num_vf_qs; /* num of queue configured per VF */ - struct ice_mdd_vf_events mdd_rx_events; - struct ice_mdd_vf_events mdd_tx_events; - DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX); - - struct ice_repr *repr; - - struct ice_vc_vf_ops vc_ops; - - /* devlink port data */ - struct devlink_port devlink_port; -}; - -#ifdef CONFIG_PCI_IOV -struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id); -void ice_put_vf(struct ice_vf *vf); -bool ice_has_vfs(struct ice_pf *pf); -u16 ice_get_num_vfs(struct ice_pf *pf); -struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); -void ice_process_vflr_event(struct ice_pf *pf); -int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); -int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); -int -ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); - -void ice_free_vfs(struct ice_pf *pf); -void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); -void ice_vc_notify_link_state(struct ice_pf *pf); -void ice_vc_notify_reset(struct ice_pf *pf); -void ice_vc_notify_vf_link_state(struct ice_vf *vf); -void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops); -void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops); -bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr); -bool ice_reset_vf(struct ice_vf *vf, bool is_vflr); -void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); -bool -ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, - u16 num_msg_proc, u16 num_msg_pending); - -int -ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, - __be16 vlan_proto); - -int -ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, - int max_tx_rate); - -int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); - -int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); - -int ice_check_vf_ready_for_cfg(struct ice_vf *vf); - -bool ice_is_vf_disabled(struct ice_vf *vf); - -int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); - -int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); - -void ice_set_vf_state_qs_dis(struct ice_vf *vf); -int -ice_get_vf_stats(struct net_device *netdev, int vf_id, - struct ifla_vf_stats *vf_stats); -bool ice_is_any_vf_in_promisc(struct ice_pf *pf); -void -ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event); -void ice_print_vfs_mdd_events(struct ice_pf *pf); -void ice_print_vf_rx_mdd_event(struct ice_vf *vf); -bool -ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto); -struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf); -int -ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, - enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); -bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); -bool ice_vf_is_port_vlan_ena(struct ice_vf *vf); -#else /* CONFIG_PCI_IOV */ -static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) -{ - return NULL; -} - -static inline void ice_put_vf(struct ice_vf *vf) -{ -} - -static inline bool ice_has_vfs(struct ice_pf *pf) -{ - return false; -} - -static inline u16 ice_get_num_vfs(struct ice_pf *pf) -{ - return 0; -} - -static inline void ice_process_vflr_event(struct ice_pf *pf) { } -static inline void ice_free_vfs(struct ice_pf *pf) { } -static inline -void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { } -static inline void ice_vc_notify_link_state(struct ice_pf *pf) { } -static inline void ice_vc_notify_reset(struct ice_pf *pf) { } -static inline void ice_vc_notify_vf_link_state(struct ice_vf *vf) { } -static inline void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops) { } -static inline void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops) { } -static inline void ice_set_vf_state_qs_dis(struct ice_vf *vf) { } -static inline -void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { } -static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } -static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } -static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { } - -static inline int ice_check_vf_ready_for_cfg(struct ice_vf *vf) -{ - return -EOPNOTSUPP; -} - -static inline bool ice_is_vf_disabled(struct ice_vf *vf) -{ - return true; -} - -static inline struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) -{ - return NULL; -} - -static inline bool -ice_is_malicious_vf(struct ice_pf __always_unused *pf, - struct ice_rq_event_info __always_unused *event, - u16 __always_unused num_msg_proc, - u16 __always_unused num_msg_pending) -{ - return false; -} - -static inline bool -ice_reset_all_vfs(struct ice_pf __always_unused *pf, - bool __always_unused is_vflr) -{ - return true; -} - -static inline bool -ice_reset_vf(struct ice_vf __always_unused *vf, bool __always_unused is_vflr) -{ - return true; -} - -static inline int -ice_sriov_configure(struct pci_dev __always_unused *pdev, - int __always_unused num_vfs) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_set_vf_mac(struct net_device __always_unused *netdev, - int __always_unused vf_id, u8 __always_unused *mac) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_get_vf_cfg(struct net_device __always_unused *netdev, - int __always_unused vf_id, - struct ifla_vf_info __always_unused *ivi) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_set_vf_trust(struct net_device __always_unused *netdev, - int __always_unused vf_id, bool __always_unused trusted) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_set_vf_port_vlan(struct net_device __always_unused *netdev, - int __always_unused vf_id, u16 __always_unused vid, - u8 __always_unused qos, __be16 __always_unused v_proto) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_set_vf_spoofchk(struct net_device __always_unused *netdev, - int __always_unused vf_id, bool __always_unused ena) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_set_vf_link_state(struct net_device __always_unused *netdev, - int __always_unused vf_id, int __always_unused link_state) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_set_vf_bw(struct net_device __always_unused *netdev, - int __always_unused vf_id, int __always_unused min_tx_rate, - int __always_unused max_tx_rate) -{ - return -EOPNOTSUPP; -} - -static inline int -ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, - struct ice_q_vector __always_unused *q_vector) -{ - return 0; -} - -static inline int -ice_get_vf_stats(struct net_device __always_unused *netdev, - int __always_unused vf_id, - struct ifla_vf_stats __always_unused *vf_stats) -{ - return -EOPNOTSUPP; -} - -static inline bool ice_is_any_vf_in_promisc(struct ice_pf __always_unused *pf) -{ - return false; -} - -static inline bool ice_vf_is_port_vlan_ena(struct ice_vf __always_unused *vf) -{ - return false; -} -#endif /* CONFIG_PCI_IOV */ -#endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 123bb98ebfbe..21faec8e97db 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -4,7 +4,6 @@ #ifndef _ICE_XSK_H_ #define _ICE_XSK_H_ #include "ice_txrx.h" -#include "ice.h" #define PKTS_PER_BATCH 8 diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index e6cd4e214d79..5f9ab1842d49 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3189,8 +3189,10 @@ static int mv643xx_eth_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); - if (WARN_ON(irq < 0)) - return irq; + if (WARN_ON(irq < 0)) { + err = irq; + goto out; + } dev->irq = irq; dev->netdev_ops = &mv643xx_eth_netdev_ops; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f1335a1ed695..934f6dd90992 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -76,6 +76,8 @@ #define MVNETA_WIN_SIZE(w) (0x2204 + ((w) << 3)) #define MVNETA_WIN_REMAP(w) (0x2280 + ((w) << 2)) #define MVNETA_BASE_ADDR_ENABLE 0x2290 +#define MVNETA_AC5_CNM_DDR_TARGET 0x2 +#define MVNETA_AC5_CNM_DDR_ATTR 0xb #define MVNETA_ACCESS_PROTECT_ENABLE 0x2294 #define MVNETA_PORT_CONFIG 0x2400 #define MVNETA_UNI_PROMISC_MODE BIT(0) @@ -544,6 +546,7 @@ struct mvneta_port { /* Flags for special SoC configurations */ bool neta_armada3700; + bool neta_ac5; u16 rx_offset_correction; const struct mbus_dram_target_info *dram_target_info; }; @@ -5324,6 +5327,10 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp, win_protect |= 3 << (2 * i); } } else { + if (pp->neta_ac5) + mvreg_write(pp, MVNETA_WIN_BASE(0), + (MVNETA_AC5_CNM_DDR_ATTR << 8) | + MVNETA_AC5_CNM_DDR_TARGET); /* For Armada3700 open default 4GB Mbus window, leaving * arbitration of target/attribute to a different layer * of configuration. @@ -5409,6 +5416,10 @@ static int mvneta_probe(struct platform_device *pdev) /* Get special SoC configurations */ if (of_device_is_compatible(dn, "marvell,armada-3700-neta")) pp->neta_armada3700 = true; + if (of_device_is_compatible(dn, "marvell,armada-ac5-neta")) { + pp->neta_armada3700 = true; + pp->neta_ac5 = true; + } dev->irq = irq_of_parse_and_map(dn, 0); if (dev->irq == 0) @@ -5769,6 +5780,7 @@ static const struct of_device_id mvneta_match[] = { { .compatible = "marvell,armada-370-neta" }, { .compatible = "marvell,armada-xp-neta" }, { .compatible = "marvell,armada-3700-neta" }, + { .compatible = "marvell,armada-ac5-neta" }, { } }; MODULE_DEVICE_TABLE(of, mvneta_match); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 8cfc649f226b..8f762fc170b3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1067,7 +1067,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, struct mlx4_qp_context *context; int err = 0; - context = kmalloc(sizeof(*context), GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; @@ -1078,7 +1078,6 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, } qp->event = mlx4_en_sqp_event; - memset(context, 0, sizeof(*context)); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index d5408f6ce5a7..e52b0bac09da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -71,53 +71,6 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, return cpu_handle; } -static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, - struct mlx5_frag_buf *buf, int node) -{ - dma_addr_t t; - - buf->size = size; - buf->npages = 1; - buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - - buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL); - if (!buf->frags) - return -ENOMEM; - - buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size, - &t, node); - if (!buf->frags->buf) - goto err_out; - - buf->frags->map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } - - return 0; -err_out: - kfree(buf->frags); - return -ENOMEM; -} - -int mlx5_buf_alloc(struct mlx5_core_dev *dev, - int size, struct mlx5_frag_buf *buf) -{ - return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); -} -EXPORT_SYMBOL(mlx5_buf_alloc); - -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) -{ - dma_free_coherent(mlx5_core_dma_dev(dev), buf->size, buf->frags->buf, - buf->frags->map); - - kfree(buf->frags); -} -EXPORT_SYMBOL_GPL(mlx5_buf_free); - int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node) { @@ -286,19 +239,6 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) } EXPORT_SYMBOL_GPL(mlx5_db_free); -void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas) -{ - u64 addr; - int i; - - for (i = 0; i < buf->npages; i++) { - addr = buf->frags->map + (i << buf->page_shift); - - pas[i] = cpu_to_be64(addr); - } -} -EXPORT_SYMBOL_GPL(mlx5_fill_page_array); - void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 0bd8698f7226..5c4711be6fae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -188,12 +188,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params, xsk) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk); - return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ? - mlx5e_get_linear_rq_headroom(params, xsk) : 0; + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + return linear_headroom; + + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return linear_headroom; + + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) + return linear_headroom; + + return 0; } u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -392,16 +398,25 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e }; } +static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size) +{ + /* Optimization for small packets: the last fragment is bigger than the others. */ + return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE; +} + #define DEFAULT_FRAG_SIZE (2048) -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) +static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); int frag_size_max = DEFAULT_FRAG_SIZE; + int first_frag_size_max; u32 buf_size = 0; + u16 headroom; + int max_mtu; int i; if (mlx5_fpga_is_ipsec_device(mdev)) @@ -420,21 +435,42 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, goto out; } - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) + headroom = mlx5e_get_linear_rq_headroom(params, xsk); + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max); + if (byte_count > max_mtu) { frag_size_max = PAGE_SIZE; + first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom); + + max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max); + if (byte_count > max_mtu) { + mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n", + params->sw_mtu, max_mtu); + return -EINVAL; + } + } i = 0; while (buf_size < byte_count) { int frag_size = byte_count - buf_size; - if (i < MLX5E_MAX_RX_FRAGS - 1) + if (i == 0) + frag_size = min(frag_size, first_frag_size_max); + else if (i < MLX5E_MAX_RX_FRAGS - 1) frag_size = min(frag_size, frag_size_max); info->arr[i].frag_size = frag_size; + buf_size += frag_size; + + if (i == 0) { + /* Ensure that headroom and tailroom are included. */ + frag_size += headroom; + frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + } + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - buf_size += frag_size; i++; } info->num_frags = i; @@ -444,6 +480,8 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, out: info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); info->log_num_frags = order_base_2(info->num_frags); + + return 0; } static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) @@ -540,6 +578,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); int ndsegs = 1; + int err; switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { @@ -579,7 +618,9 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + if (err) + return err; ndsegs = param->frags_info.num_frags; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index cb8f7593a00c..af37a8d247a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -35,6 +35,13 @@ static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { NULL, /* FLOW_ACTION_CT_METADATA, */ &mlx5e_tc_act_mpls_push, &mlx5e_tc_act_mpls_pop, + NULL, /* FLOW_ACTION_MPLS_MANGLE, */ + NULL, /* FLOW_ACTION_GATE, */ + NULL, /* FLOW_ACTION_PPPOE_PUSH, */ + NULL, /* FLOW_ACTION_JUMP, */ + NULL, /* FLOW_ACTION_PIPE, */ + &mlx5e_tc_act_vlan, + &mlx5e_tc_act_vlan, }; /* Must be aligned with enum flow_action_id. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 94a7cf38d6b1..f34714c5ddd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -22,6 +22,8 @@ struct mlx5e_tc_act_parse_state { bool encap; bool decap; bool mpls_push; + bool eth_push; + bool eth_pop; bool ptype_host; const struct ip_tunnel_info *tun_info; struct mlx5e_mpls_info mpls_info; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 05a42fb4ba97..2b002c6a2e73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -125,6 +125,16 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, return false; } + if (parse_state->eth_pop && !parse_state->mpls_push) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push"); + return false; + } + + if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push"); + return false; + } + if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { /* Ignore forward to self rules generated * by adding both mlx5 devs to the flow table diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c index 96a80e03d129..f106190bf37c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -57,12 +57,13 @@ tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, filter_dev = attr->parse_attr->filter_dev; /* we only support mpls pop if it is the first action + * or it is second action after tunnel key unset * and the filter net device is bareudp. Subsequent * actions can be pedit and the last can be mirred * egress redirect. */ - if (act_index) { - NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action"); + if ((act_index == 1 && !parse_state->decap) || act_index > 1) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap"); return false; } @@ -80,7 +81,7 @@ tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { - attr->parse_attr->eth.h_proto = act->mpls_pop.proto; + attr->esw_attr->eth.h_proto = act->mpls_pop.proto; attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_flag_set(parse_state->flow, L3_TO_L2_DECAP); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 39f8f71bed9e..47597c524e59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -42,13 +42,12 @@ out_err: return -EOPNOTSUPP; } -static int -parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct netlink_ext_ack *extack) +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) { - struct pedit_headers_action *hdrs = parse_attr->hdrs; u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; u8 htype = act->mangle.htype; int err = -EOPNOTSUPP; @@ -79,46 +78,6 @@ out_err: return err; } -static int -parse_pedit_to_reformat(const struct flow_action_entry *act, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct netlink_ext_ack *extack) -{ - u32 mask, val, offset; - u32 *p; - - if (act->id != FLOW_ACTION_MANGLE) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); - return -EOPNOTSUPP; - } - - if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { - NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); - return -EOPNOTSUPP; - } - - mask = ~act->mangle.mask; - val = act->mangle.val; - offset = act->mangle.offset; - p = (u32 *)&parse_attr->eth; - *(p + (offset >> 2)) |= (val & mask); - - return 0; -} - -int -mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) -{ - if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) - return parse_pedit_to_reformat(act, parse_attr, extack); - - return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, extack); -} - static bool tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -141,20 +100,16 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, ns_type = mlx5e_get_flow_namespace(flow); - err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr, - flow, parse_state->extack); + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs, + parse_state->extack); if (err) return err; - if (flow_flag_test(flow, L3_TO_L2_DECAP)) - goto out; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; if (ns_type == MLX5_FLOW_NAMESPACE_FDB) esw_attr->split_count = esw_attr->out_count; -out: return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h index 258f030a2dc6..434c8bd710a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -24,8 +24,7 @@ struct pedit_headers_action { int mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow, + struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack); #endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index 6378b7558ba2..b86ac604d0c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -34,7 +34,8 @@ parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, u32 *action, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct mlx5e_tc_act_parse_state *parse_state) { u8 vlan_idx = attr->total_vlan; @@ -84,6 +85,16 @@ parse_tc_vlan_action(struct mlx5e_priv *priv, *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } break; + case FLOW_ACTION_VLAN_POP_ETH: + parse_state->eth_pop = true; + break; + case FLOW_ACTION_VLAN_PUSH_ETH: + if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + parse_state->eth_push = true; + memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN); + memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN); + break; default: NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); return -EINVAL; @@ -109,7 +120,7 @@ mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, }; int err; - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack); + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); if (err) return err; @@ -139,7 +150,7 @@ mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, priv->netdev->lower_level; while (nest_level--) { err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, - extack); + extack, NULL); if (err) return err; } @@ -174,7 +185,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, parse_state->extack); } else { err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, - parse_state->extack); + parse_state->extack, parse_state); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 28444d4ffd73..9a8a1a6bd99e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -43,8 +43,8 @@ mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, return -EOPNOTSUPP; } - err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, - NULL, extack); + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs, + extack); *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index ca1510399d1e..e49f51124c74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -260,7 +260,8 @@ mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) return -EOPNOTSUPP; } } else { - return -EOPNOTSUPP; + if (tuple->ip_proto != IPPROTO_GRE) + return -EOPNOTSUPP; } return 0; @@ -809,7 +810,11 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->dest_chain = 0; attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act); attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; - attr->outer_match_level = MLX5_MATCH_L4; + if (entry->tuple.ip_proto == IPPROTO_TCP || + entry->tuple.ip_proto == IPPROTO_UDP) + attr->outer_match_level = MLX5_MATCH_L4; + else + attr->outer_match_level = MLX5_MATCH_L3; attr->counter = entry->counter->counter; attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) @@ -1156,7 +1161,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, } rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); - mlx5_tc_ct_entry_remove_from_tuples(entry); spin_unlock_bh(&ct_priv->ht_lock); mlx5_tc_ct_entry_put(entry); @@ -1226,16 +1230,20 @@ mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, struct flow_keys flow_keys; skb_reset_network_header(skb); - skb_flow_dissect_flow_keys(skb, &flow_keys, 0); + skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); tuple->zone = zone; if (flow_keys.basic.ip_proto != IPPROTO_TCP && - flow_keys.basic.ip_proto != IPPROTO_UDP) + flow_keys.basic.ip_proto != IPPROTO_UDP && + flow_keys.basic.ip_proto != IPPROTO_GRE) return false; - tuple->port.src = flow_keys.ports.src; - tuple->port.dst = flow_keys.ports.dst; + if (flow_keys.basic.ip_proto == IPPROTO_TCP || + flow_keys.basic.ip_proto == IPPROTO_UDP) { + tuple->port.src = flow_keys.ports.src; + tuple->port.dst = flow_keys.ports.dst; + } tuple->n_proto = flow_keys.basic.n_proto; tuple->ip_proto = flow_keys.basic.ip_proto; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 03c953dacb09..3b74a6fd5c43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -41,7 +41,6 @@ struct mlx5e_tc_flow_parse_attr { struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; - struct ethhdr eth; struct mlx5e_tc_act_parse_state parse_state; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 5105c8018d37..5aff97914367 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -906,20 +906,18 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5_pkt_reformat_params reformat_params; - struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_decap_entry *d; struct mlx5e_decap_key key; uintptr_t hash_key; int err = 0; - parse_attr = flow->attr->parse_attr; - if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { NL_SET_ERR_MSG_MOD(extack, "encap header larger than max supported"); return -EOPNOTSUPP; } - key.key = parse_attr->eth; + key.key = attr->eth; hash_key = hash_decap_info(&key); mutex_lock(&esw->offloads.decap_tbl_lock); d = mlx5e_decap_get(priv, &key, hash_key); @@ -949,8 +947,8 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv, memset(&reformat_params, 0, sizeof(reformat_params)); reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; - reformat_params.size = sizeof(parse_attr->eth); - reformat_params.data = &parse_attr->eth; + reformat_params.size = sizeof(attr->eth); + reformat_params.data = &attr->eth; d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index a7f020399370..6aa77f0e094e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -120,19 +120,14 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, /* returns true if packet was consumed by xdp */ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 *len, struct xdp_buff *xdp) + struct bpf_prog *prog, struct xdp_buff *xdp) { - struct bpf_prog *prog = rcu_dereference(rq->xdp_prog); u32 act; int err; - if (!prog) - return false; - act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS: - *len = xdp->data_end - xdp->data; return false; case XDP_TX: if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index c62f11d7ef6a..20d8af66c072 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -48,7 +48,7 @@ struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - u32 *len, struct xdp_buff *xdp); + struct bpf_prog *prog, struct xdp_buff *xdp); void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 8e7b877d8a12..021da085e603 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -4,6 +4,7 @@ #include "rx.h" #include "en/xdp.h" #include <net/xdp_sock_drv.h> +#include <linux/filter.h> /* RX data path */ @@ -30,7 +31,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, u32 page_idx) { struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk; - u32 cqe_bcnt32 = cqe_bcnt; + struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -45,7 +46,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, */ WARN_ON_ONCE(head_offset); - xdp->data_end = xdp->data + cqe_bcnt32; + xdp->data_end = xdp->data + cqe_bcnt; xdp_set_data_meta_invalid(xdp); xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); net_prefetch(xdp->data); @@ -65,7 +66,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, * allocated first from the Reuse Ring, so it has enough space. */ - if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) { + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ @@ -74,7 +76,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the * frame. On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32); + return mlx5e_xsk_construct_skb(rq, xdp->data, xdp->data_end - xdp->data); } struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, @@ -83,6 +85,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, u32 cqe_bcnt) { struct xdp_buff *xdp = wi->di->xsk; + struct bpf_prog *prog; /* wi->offset is not used in this function, because xdp->data and the * DMA address point directly to the necessary place. Furthermore, the @@ -101,12 +104,13 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, return NULL; } - if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp))) + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) return NULL; /* page/packet was consumed by XDP */ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_put_rx_frag. * On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt); + return mlx5e_xsk_construct_skb(rq, xdp->data, xdp->data_end - xdp->data); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 074a44b281b6..4b8699f39200 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -34,6 +34,7 @@ #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/bitmap.h> +#include <linux/filter.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> @@ -373,12 +374,15 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, int i; for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) { + u16 headroom; + err = mlx5e_get_rx_frag(rq, frag); if (unlikely(err)) goto free_frags; + headroom = i == 0 ? rq->buff.headroom : 0; wqe->data[i].addr = cpu_to_be64(frag->di->addr + - frag->offset + rq->buff.headroom); + frag->offset + headroom); } return 0; @@ -1520,11 +1524,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, { struct mlx5e_dma_info *di = wi->di; u16 rx_headroom = rq->buff.headroom; - struct xdp_buff xdp; + struct bpf_prog *prog; struct sk_buff *skb; + u32 metasize = 0; void *va, *data; u32 frag_size; - u32 metasize; va = page_address(di->page) + wi->offset; data = va + rx_headroom; @@ -1532,16 +1536,22 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, frag_size, DMA_FROM_DEVICE); - net_prefetchw(va); /* xdp_frame data area */ net_prefetch(data); - mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); - if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp)) - return NULL; /* page/packet was consumed by XDP */ + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct xdp_buff xdp; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); + if (mlx5e_xdp_handle(rq, di, prog, &xdp)) + return NULL; /* page/packet was consumed by XDP */ - rx_headroom = xdp.data - xdp.data_hard_start; + rx_headroom = xdp.data - xdp.data_hard_start; + metasize = xdp.data - xdp.data_meta; + cqe_bcnt = xdp.data_end - xdp.data; + } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - metasize = xdp.data - xdp.data_meta; skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); if (unlikely(!skb)) return NULL; @@ -1557,43 +1567,45 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; - struct mlx5e_wqe_frag_info *head_wi = wi; - u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt); - u16 frag_headlen = headlen; - u16 byte_cnt = cqe_bcnt - headlen; + u16 rx_headroom = rq->buff.headroom; + struct mlx5e_dma_info *di = wi->di; + u32 frag_consumed_bytes; + u32 first_frag_size; struct sk_buff *skb; + void *va; + + va = page_address(di->page) + wi->offset; + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); + first_frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + frag_consumed_bytes); + + dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset, + first_frag_size, DMA_FROM_DEVICE); + net_prefetch(va + rx_headroom); /* XDP is not supported in this configuration, as incoming packets * might spread among multiple pages. */ - skb = napi_alloc_skb(rq->cq.napi, - ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); - if (unlikely(!skb)) { - rq->stats->buff_alloc_err++; + skb = mlx5e_build_linear_skb(rq, va, first_frag_size, rx_headroom, + frag_consumed_bytes, 0); + if (unlikely(!skb)) return NULL; - } - net_prefetchw(skb->data); + page_ref_inc(di->page); + + cqe_bcnt -= frag_consumed_bytes; + frag_info++; + wi++; - while (byte_cnt) { - u16 frag_consumed_bytes = - min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt); + while (cqe_bcnt) { + frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen, + mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset, frag_consumed_bytes, frag_info->frag_stride); - byte_cnt -= frag_consumed_bytes; - frag_headlen = 0; + cqe_bcnt -= frag_consumed_bytes; frag_info++; wi++; } - /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset, - headlen); - /* skb linear part was allocated with headlen and aligned to long */ - skb->tail += headlen; - skb->len += headlen; - return skb; } @@ -1836,12 +1848,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, { struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; u16 rx_headroom = rq->buff.headroom; - u32 cqe_bcnt32 = cqe_bcnt; - struct xdp_buff xdp; + struct bpf_prog *prog; struct sk_buff *skb; + u32 metasize = 0; void *va, *data; u32 frag_size; - u32 metasize; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -1851,24 +1862,30 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, va = page_address(di->page) + head_offset; data = va + rx_headroom; - frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset, frag_size, DMA_FROM_DEVICE); - net_prefetchw(va); /* xdp_frame data area */ net_prefetch(data); - mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp); - if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) { - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ - return NULL; /* page/packet was consumed by XDP */ - } + prog = rcu_dereference(rq->xdp_prog); + if (prog) { + struct xdp_buff xdp; + + net_prefetchw(va); /* xdp_frame data area */ + mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); + if (mlx5e_xdp_handle(rq, di, prog, &xdp)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } - rx_headroom = xdp.data - xdp.data_hard_start; - frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32); - metasize = xdp.data - xdp.data_meta; - skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32, metasize); + rx_headroom = xdp.data - xdp.data_hard_start; + metasize = xdp.data - xdp.data_meta; + cqe_bcnt = xdp.data_end - xdp.data; + } + frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); + skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); if (unlikely(!skb)) return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 336e4d04c5f2..bdc870f9c2f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -521,14 +521,15 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->channels.num; i++) /* for active channels only */ + mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); + for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; int j; - mlx5e_stats_update_stats_rq_page_pool(priv->channels.c[i]); - mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); mlx5e_stats_grp_sw_update_stats_xdpsq(s, &channel_stats->rq_xdpsq); mlx5e_stats_grp_sw_update_stats_ch_stats(s, &channel_stats->ch); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 973281bdb4a2..bac5160837c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -474,6 +474,7 @@ struct mlx5_esw_flow_attr { int src_port_rewrite_act_id; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_rx_tun_attr *rx_tun_attr; + struct ethhdr eth; struct mlx5_pkt_reformat *decap_pkt_reformat; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 743422acc3d8..850937cd8bf9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -570,6 +570,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, for (i = 0; i < num_actions; i++) { struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_icm_chunk *chunk; struct mlx5dr_action *action; int max_actions_type = 1; u32 action_type; @@ -598,9 +599,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, matcher->tbl->level, dest_tbl->tbl->level); } - attr.final_icm_addr = rx_rule ? - dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : - dest_tbl->tbl->tx.s_anchor->chunk->icm_addr; + chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk : + dest_tbl->tbl->tx.s_anchor->chunk; + attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); } else { struct mlx5dr_cmd_query_flow_table_details output; int ret; @@ -1123,7 +1124,8 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, } action->rewrite->data = (void *)hw_actions; - action->rewrite->index = (action->rewrite->chunk->icm_addr - + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr + (action->rewrite->chunk) - dmn->info.caps.hdr_modify_icm_addr) / ACTION_CACHE_LINE_SIZE; @@ -1702,7 +1704,7 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, action->rewrite->modify_ttl = modify_ttl; action->rewrite->data = (u8 *)hw_actions; action->rewrite->num_of_actions = num_hw_actions; - action->rewrite->index = (chunk->icm_addr - + action->rewrite->index = (mlx5dr_icm_pool_get_chunk_icm_addr(chunk) - dmn->info.caps.hdr_modify_icm_addr) / ACTION_CACHE_LINE_SIZE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index d232f1ea34a2..d5998ef59be4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -217,7 +217,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1; } - dr_dump_hex_print(hw_ste_dump, (char *)ste->hw_ste, DR_STE_SIZE_REDUCED); + dr_dump_hex_print(hw_ste_dump, (char *)mlx5dr_ste_get_hw_ste(ste), + DR_STE_SIZE_REDUCED); seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type, dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id, @@ -346,16 +347,19 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, const u64 matcher_id) { enum dr_dump_rec_type rec_type; + u64 s_icm_addr, e_icm_addr; int i, ret; rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX : DR_DUMP_REC_TYPE_MATCHER_TX; + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->s_htbl->chunk); + e_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(matcher_rx_tx->e_anchor->chunk); seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n", rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx), matcher_id, matcher_rx_tx->num_of_builders, - dr_dump_icm_to_idx(matcher_rx_tx->s_htbl->chunk->icm_addr), - dr_dump_icm_to_idx(matcher_rx_tx->e_anchor->chunk->icm_addr)); + dr_dump_icm_to_idx(s_icm_addr), + dr_dump_icm_to_idx(e_icm_addr)); for (i = 0; i < matcher_rx_tx->num_of_builders; i++) { ret = dr_dump_matcher_builder(file, @@ -426,12 +430,14 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, const u64 table_id) { enum dr_dump_rec_type rec_type; + u64 s_icm_addr; rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX : DR_DUMP_REC_TYPE_TABLE_TX; + s_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(table_rx_tx->s_anchor->chunk); seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id, - dr_dump_icm_to_idx(table_rx_tx->s_anchor->chunk->icm_addr)); + dr_dump_icm_to_idx(s_icm_addr)); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index e289cfdbce07..4ca67fa24cc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -57,6 +57,36 @@ static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, return mlx5_core_create_mkey(mdev, mkey, in, inlen); } +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 offset = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)offset * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->icm_mr->mkey; +} + +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk) +{ + u32 size = mlx5dr_icm_pool_dm_type_to_entry_size(chunk->buddy_mem->pool->icm_type); + + return (u64)chunk->buddy_mem->icm_mr->icm_start_addr + size * chunk->seg; +} + +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_byte(chunk->size, + chunk->buddy_mem->pool->icm_type); +} + +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk) +{ + return mlx5dr_icm_pool_chunk_size_to_entries(chunk->size); +} + static struct mlx5dr_icm_mr * dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) { @@ -158,12 +188,13 @@ static void dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk, int offset) static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) { + int num_of_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; memset(chunk->hw_ste_arr, 0, - chunk->num_of_entries * dr_icm_buddy_get_ste_size(buddy)); + num_of_entries * dr_icm_buddy_get_ste_size(buddy)); memset(chunk->ste_arr, 0, - chunk->num_of_entries * sizeof(chunk->ste_arr[0])); + num_of_entries * sizeof(chunk->ste_arr[0])); } static enum mlx5dr_icm_type @@ -177,7 +208,7 @@ static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, { enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk); - buddy->used_memory -= chunk->byte_size; + buddy->used_memory -= mlx5dr_icm_pool_get_chunk_byte_size(chunk); list_del(&chunk->chunk_list); if (icm_type == DR_ICM_TYPE_STE) @@ -298,21 +329,14 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; - chunk->rkey = buddy_mem_pool->icm_mr->mkey; - chunk->mr_addr = offset; - chunk->icm_addr = - (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset; - chunk->num_of_entries = - mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); - chunk->byte_size = - mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); chunk->seg = seg; + chunk->size = chunk_size; chunk->buddy_mem = buddy_mem_pool; if (pool->icm_type == DR_ICM_TYPE_STE) dr_icm_chunk_ste_init(chunk, offset); - buddy_mem_pool->used_memory += chunk->byte_size; + buddy_mem_pool->used_memory += mlx5dr_icm_pool_get_chunk_byte_size(chunk); INIT_LIST_HEAD(&chunk->chunk_list); /* chunk now is part of the used_list */ @@ -336,6 +360,7 @@ static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) { struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + u32 num_entries; int err; err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); @@ -348,9 +373,9 @@ static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) struct mlx5dr_icm_chunk *chunk, *tmp_chunk; list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) { - mlx5dr_buddy_free_mem(buddy, chunk->seg, - ilog2(chunk->num_of_entries)); - pool->hot_memory_size -= chunk->byte_size; + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); + mlx5dr_buddy_free_mem(buddy, chunk->seg, ilog2(num_entries)); + pool->hot_memory_size -= mlx5dr_icm_pool_get_chunk_byte_size(chunk); dr_icm_chunk_destroy(chunk, buddy); } @@ -448,7 +473,7 @@ void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) /* move the memory to the waiting list AKA "hot" */ mutex_lock(&pool->mutex); list_move_tail(&chunk->chunk_list, &buddy->hot_list); - pool->hot_memory_size += chunk->byte_size; + pool->hot_memory_size += mlx5dr_icm_pool_get_chunk_byte_size(chunk); /* Check if we have chunks that are waiting for sync-ste */ if (dr_icm_pool_is_sync_required(pool)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index a4b5b415df90..0726848eb3ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -705,7 +705,7 @@ static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn, /* Connect start hash table to end anchor */ info.type = CONNECT_MISS; - info.miss_icm_addr = curr_nic_matcher->e_anchor->chunk->icm_addr; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(curr_nic_matcher->e_anchor->chunk); ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, curr_nic_matcher->s_htbl, &info, false); @@ -726,12 +726,14 @@ static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn, return ret; /* Update the pointing ste and next hash table */ - curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->ste_arr; - prev_htbl->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; + curr_nic_matcher->s_htbl->pointing_ste = prev_htbl->chunk->ste_arr; + prev_htbl->chunk->ste_arr[0].next_htbl = curr_nic_matcher->s_htbl; if (next_nic_matcher) { - next_nic_matcher->s_htbl->pointing_ste = curr_nic_matcher->e_anchor->ste_arr; - curr_nic_matcher->e_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = + curr_nic_matcher->e_anchor->chunk->ste_arr; + curr_nic_matcher->e_anchor->chunk->ste_arr[0].next_htbl = + next_nic_matcher->s_htbl; } return 0; @@ -1043,12 +1045,12 @@ static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn, if (next_nic_matcher) { info.type = CONNECT_HIT; info.hit_next_htbl = next_nic_matcher->s_htbl; - next_nic_matcher->s_htbl->pointing_ste = prev_anchor->ste_arr; - prev_anchor->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; + next_nic_matcher->s_htbl->pointing_ste = prev_anchor->chunk->ste_arr; + prev_anchor->chunk->ste_arr[0].next_htbl = next_nic_matcher->s_htbl; } else { info.type = CONNECT_MISS; info.miss_icm_addr = nic_tbl->default_icm_addr; - prev_anchor->ste_arr[0].next_htbl = NULL; + prev_anchor->chunk->ste_arr[0].next_htbl = NULL; } return mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, prev_anchor, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index b4374578425b..ddfaf7891188 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -21,12 +21,12 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx, if (!ste_info_last) return -ENOMEM; - mlx5dr_ste_set_miss_addr(ste_ctx, last_ste->hw_ste, + mlx5dr_ste_set_miss_addr(ste_ctx, mlx5dr_ste_get_hw_ste(last_ste), mlx5dr_ste_get_icm_addr(new_last_ste)); list_add_tail(&new_last_ste->miss_list_node, miss_list); mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL, - 0, last_ste->hw_ste, + 0, mlx5dr_ste_get_hw_ste(last_ste), ste_info_last, send_list, true); return 0; @@ -41,6 +41,7 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; struct mlx5dr_ste_htbl *new_htbl; struct mlx5dr_ste *ste; + u64 icm_addr; /* Create new table for miss entry */ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, @@ -53,9 +54,9 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, } /* One and only entry, never grows */ - ste = new_htbl->ste_arr; - mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, - nic_matcher->e_anchor->chunk->icm_addr); + ste = new_htbl->chunk->ste_arr; + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr); mlx5dr_htbl_get(new_htbl); return ste; @@ -79,7 +80,7 @@ dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher, ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste; /* In collision entry, all members share the same miss_list_head */ - ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste); + ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(orig_ste); /* Next table */ if (mlx5dr_ste_create_next_htbl(matcher, nic_matcher, ste, hw_ste, @@ -107,9 +108,11 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, * is already written to the hw. */ if (ste_info->size == DR_STE_SIZE_CTRL) - memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_CTRL); + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_CTRL); else - memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + memcpy(mlx5dr_ste_get_hw_ste(ste_info->ste), + ste_info->data, DR_STE_SIZE_REDUCED); ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, ste_info->size, ste_info->offset); @@ -159,7 +162,7 @@ dr_rule_find_ste_in_miss_list(struct list_head *miss_list, u8 *hw_ste) /* Check if hw_ste is present in the list */ list_for_each_entry(ste, miss_list, miss_list_node) { - if (mlx5dr_ste_equal_tag(ste->hw_ste, hw_ste)) + if (mlx5dr_ste_equal_tag(mlx5dr_ste_get_hw_ste(ste), hw_ste)) return ste; } @@ -185,7 +188,7 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher, new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste; /* In collision entry, all members share the same miss_list_head */ - new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste); + new_ste->htbl->chunk->miss_list = mlx5dr_ste_get_miss_list(col_ste); /* Update the previous from the list */ ret = dr_rule_append_to_miss_list(dmn->ste_ctx, new_ste, @@ -235,6 +238,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, bool use_update_list = false; u8 hw_ste[DR_STE_SIZE] = {}; struct mlx5dr_ste *new_ste; + u64 icm_addr; int new_idx; u8 sb_idx; @@ -243,12 +247,12 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); /* Copy STE control and tag */ - memcpy(hw_ste, cur_ste->hw_ste, DR_STE_SIZE_REDUCED); - mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, - nic_matcher->e_anchor->chunk->icm_addr); + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED); + mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); - new_ste = &new_htbl->ste_arr[new_idx]; + new_ste = &new_htbl->chunk->ste_arr[new_idx]; if (mlx5dr_ste_is_not_used(new_ste)) { mlx5dr_htbl_get(new_htbl); @@ -269,7 +273,7 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, use_update_list = true; } - memcpy(new_ste->hw_ste, hw_ste, DR_STE_SIZE_REDUCED); + memcpy(mlx5dr_ste_get_hw_ste(new_ste), hw_ste, DR_STE_SIZE_REDUCED); new_htbl->ctrl.num_of_valid_entries++; @@ -334,7 +338,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, int err = 0; int i; - cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk_size); + cur_entries = mlx5dr_icm_pool_chunk_size_to_entries(cur_htbl->chunk->size); if (cur_entries < 1) { mlx5dr_dbg(matcher->tbl->dmn, "Invalid number of entries\n"); @@ -342,7 +346,7 @@ static int dr_rule_rehash_copy_htbl(struct mlx5dr_matcher *matcher, } for (i = 0; i < cur_entries; i++) { - cur_ste = &cur_htbl->ste_arr[i]; + cur_ste = &cur_htbl->chunk->ste_arr[i]; if (mlx5dr_ste_is_not_used(cur_ste)) /* Empty, nothing to copy */ continue; @@ -398,7 +402,7 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule, /* Write new table to HW */ info.type = CONNECT_MISS; - info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + info.miss_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); mlx5dr_ste_set_formatted_ste(dmn->ste_ctx, dmn->info.caps.gvmi, nic_dmn->type, @@ -446,21 +450,21 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule, * (48B len) which works only on first 32B */ mlx5dr_ste_set_hit_addr(dmn->ste_ctx, - prev_htbl->ste_arr[0].hw_ste, - new_htbl->chunk->icm_addr, - new_htbl->chunk->num_of_entries); + prev_htbl->chunk->hw_ste_arr, + mlx5dr_icm_pool_get_chunk_icm_addr(new_htbl->chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(new_htbl->chunk)); - ste_to_update = &prev_htbl->ste_arr[0]; + ste_to_update = &prev_htbl->chunk->ste_arr[0]; } else { mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx, - cur_htbl->pointing_ste->hw_ste, + mlx5dr_ste_get_hw_ste(cur_htbl->pointing_ste), new_htbl); ste_to_update = cur_htbl->pointing_ste; } mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL, - 0, ste_to_update->hw_ste, ste_info, - update_list, false); + 0, mlx5dr_ste_get_hw_ste(ste_to_update), + ste_info, update_list, false); return new_htbl; @@ -489,10 +493,10 @@ static struct mlx5dr_ste_htbl *dr_rule_rehash(struct mlx5dr_rule *rule, struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; enum mlx5dr_icm_chunk_size new_size; - new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk_size); + new_size = mlx5dr_icm_next_higher_chunk(cur_htbl->chunk->size); new_size = min_t(u32, new_size, dmn->info.max_log_sw_icm_sz); - if (new_size == cur_htbl->chunk_size) + if (new_size == cur_htbl->chunk->size) return NULL; /* Skip rehash, we already at the max size */ return dr_rule_rehash_htbl(rule, nic_rule, cur_htbl, ste_location, @@ -659,13 +663,13 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl, struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl; int threshold; - if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size) + if (dmn->info.max_log_sw_icm_sz <= htbl->chunk->size) return false; if (!mlx5dr_ste_htbl_may_grow(htbl)) return false; - if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size) + if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk->size) return false; threshold = mlx5dr_ste_htbl_increase_threshold(htbl); @@ -755,6 +759,7 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, { struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_ste_send_info *ste_info; + u64 icm_addr; /* Take ref on table, only on first time this ste is used */ mlx5dr_htbl_get(cur_htbl); @@ -762,8 +767,8 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, /* new entry -> new branch */ list_add_tail(&ste->miss_list_node, miss_list); - mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, - nic_matcher->e_anchor->chunk->icm_addr); + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); ste->ste_chain_location = ste_location; @@ -822,7 +827,7 @@ dr_rule_handle_ste_branch(struct mlx5dr_rule *rule, again: index = mlx5dr_ste_calc_hash_index(hw_ste, cur_htbl); miss_list = &cur_htbl->chunk->miss_list[index]; - ste = &cur_htbl->ste_arr[index]; + ste = &cur_htbl->chunk->ste_arr[index]; if (mlx5dr_ste_is_not_used(ste)) { if (dr_rule_handle_empty_entry(matcher, nic_matcher, cur_htbl, @@ -858,7 +863,7 @@ again: ste_location, send_ste_list); if (!new_htbl) { mlx5dr_err(dmn, "Failed creating rehash table, htbl-log_size: %d\n", - cur_htbl->chunk_size); + cur_htbl->chunk->size); mlx5dr_htbl_put(cur_htbl); } else { cur_htbl = new_htbl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 00aef47d7682..ef19a66f5233 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -407,17 +407,17 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, int *iterations, int *num_stes) { + u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int alloc_size; - if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { - *iterations = htbl->chunk->byte_size / - dmn->send_ring->max_post_send_size; + if (chunk_byte_size > dmn->send_ring->max_post_send_size) { + *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; *byte_size = dmn->send_ring->max_post_send_size; alloc_size = *byte_size; *num_stes = *byte_size / DR_STE_SIZE; } else { *iterations = 1; - *num_stes = htbl->chunk->num_of_entries; + *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); alloc_size = *num_stes * DR_STE_SIZE; } @@ -453,7 +453,7 @@ int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, send_info.write.length = size; send_info.write.lkey = 0; send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; - send_info.rkey = ste->htbl->chunk->rkey; + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); return dr_postsend_icm_data(dmn, &send_info); } @@ -462,7 +462,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, struct mlx5dr_ste_htbl *htbl, u8 *formatted_ste, u8 *mask) { - u32 byte_size = htbl->chunk->byte_size; + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int num_stes_per_iter; int iterations; u8 *data; @@ -486,7 +486,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, * need to add the bit_mask */ for (j = 0; j < num_stes_per_iter; j++) { - struct mlx5dr_ste *ste = &htbl->ste_arr[ste_index + j]; + struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; u32 ste_off = j * DR_STE_SIZE; if (mlx5dr_ste_is_not_used(ste)) { @@ -495,7 +495,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, } else { /* Copy data */ memcpy(data + ste_off, - htbl->ste_arr[ste_index + j].hw_ste, + htbl->chunk->hw_ste_arr + + DR_STE_SIZE_REDUCED * (ste_index + j), DR_STE_SIZE_REDUCED); /* Copy bit_mask */ memcpy(data + ste_off + DR_STE_SIZE_REDUCED, @@ -511,8 +512,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, send_info.write.length = byte_size; send_info.write.lkey = 0; send_info.remote_addr = - mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); - send_info.rkey = htbl->chunk->rkey; + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) @@ -530,7 +531,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, u8 *ste_init_data, bool update_hw_ste) { - u32 byte_size = htbl->chunk->byte_size; + u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int iterations; int num_stes; u8 *copy_dst; @@ -546,7 +547,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, if (update_hw_ste) { /* Copy the reduced STE to hash table ste_arr */ for (i = 0; i < num_stes; i++) { - copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; + copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); } } @@ -568,8 +569,8 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, send_info.write.length = byte_size; send_info.write.lkey = 0; send_info.remote_addr = - mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); - send_info.rkey = htbl->chunk->rkey; + mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) @@ -591,8 +592,9 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, send_info.write.length = action->rewrite->num_of_actions * DR_MODIFY_ACTION_SIZE; send_info.write.lkey = 0; - send_info.remote_addr = action->rewrite->chunk->mr_addr; - send_info.rkey = action->rewrite->chunk->rkey; + send_info.remote_addr = + mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); + send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); ret = dr_postsend_icm_data(dmn, &send_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 518e949847a3..09ebd3088857 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -25,6 +25,7 @@ bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) { + u32 num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; u8 masked[DR_STE_SIZE_TAG] = {}; u32 crc32, index; @@ -32,7 +33,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) int i; /* Don't calculate CRC if the result is predicted */ - if (htbl->chunk->num_of_entries == 1 || htbl->byte_mask == 0) + if (num_entries == 1 || htbl->byte_mask == 0) return 0; /* Mask tag using byte mask, bit per byte */ @@ -45,7 +46,7 @@ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) } crc32 = dr_ste_crc32_calc(masked, DR_STE_SIZE_TAG); - index = crc32 & (htbl->chunk->num_of_entries - 1); + index = crc32 & (num_entries - 1); return index; } @@ -96,13 +97,11 @@ void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, } static void dr_ste_always_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste *ste, u64 miss_addr) + u8 *hw_ste, u64 miss_addr) { - u8 *hw_ste_p = ste->hw_ste; - - ste_ctx->set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); - ste_ctx->set_miss_addr(hw_ste_p, miss_addr); - dr_ste_set_always_miss((struct dr_hw_ste_format *)ste->hw_ste); + ste_ctx->set_next_lu_type(hw_ste, MLX5DR_STE_LU_TYPE_DONT_CARE); + ste_ctx->set_miss_addr(hw_ste, miss_addr); + dr_ste_set_always_miss((struct dr_hw_ste_format *)hw_ste); } void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, @@ -113,37 +112,45 @@ void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, u64 mlx5dr_ste_get_icm_addr(struct mlx5dr_ste *ste) { - u32 index = ste - ste->htbl->ste_arr; + u64 base_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(ste->htbl->chunk); + u32 index = ste - ste->htbl->chunk->ste_arr; - return ste->htbl->chunk->icm_addr + DR_STE_SIZE * index; + return base_icm_addr + DR_STE_SIZE * index; } u64 mlx5dr_ste_get_mr_addr(struct mlx5dr_ste *ste) { - u32 index = ste - ste->htbl->ste_arr; + u32 index = ste - ste->htbl->chunk->ste_arr; - return ste->htbl->chunk->mr_addr + DR_STE_SIZE * index; + return mlx5dr_icm_pool_get_chunk_mr_addr(ste->htbl->chunk) + DR_STE_SIZE * index; +} + +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste) +{ + u64 index = ste - ste->htbl->chunk->ste_arr; + + return ste->htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * index; } struct list_head *mlx5dr_ste_get_miss_list(struct mlx5dr_ste *ste) { - u32 index = ste - ste->htbl->ste_arr; + u32 index = ste - ste->htbl->chunk->ste_arr; - return &ste->htbl->miss_list[index]; + return &ste->htbl->chunk->miss_list[index]; } static void dr_ste_always_hit_htbl(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste *ste, + u8 *hw_ste, struct mlx5dr_ste_htbl *next_htbl) { struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; - u8 *hw_ste = ste->hw_ste; ste_ctx->set_byte_mask(hw_ste, next_htbl->byte_mask); ste_ctx->set_next_lu_type(hw_ste, next_htbl->lu_type); - ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + ste_ctx->set_hit_addr(hw_ste, mlx5dr_icm_pool_get_chunk_icm_addr(chunk), + mlx5dr_icm_pool_get_chunk_num_of_entries(chunk)); - dr_ste_set_always_hit((struct dr_hw_ste_format *)ste->hw_ste); + dr_ste_set_always_hit((struct dr_hw_ste_format *)hw_ste); } bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, @@ -166,7 +173,8 @@ bool mlx5dr_ste_is_last_in_rule(struct mlx5dr_matcher_rx_tx *nic_matcher, */ static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src) { - memcpy(dst->hw_ste, src->hw_ste, DR_STE_SIZE_REDUCED); + memcpy(mlx5dr_ste_get_hw_ste(dst), mlx5dr_ste_get_hw_ste(src), + DR_STE_SIZE_REDUCED); dst->next_htbl = src->next_htbl; if (dst->next_htbl) dst->next_htbl->pointing_ste = dst; @@ -184,18 +192,17 @@ dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_htbl *stats_tbl) { u8 tmp_data_ste[DR_STE_SIZE] = {}; - struct mlx5dr_ste tmp_ste = {}; u64 miss_addr; - tmp_ste.hw_ste = tmp_data_ste; + miss_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); /* Use temp ste because dr_ste_always_miss_addr * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. */ - memcpy(tmp_ste.hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); - miss_addr = nic_matcher->e_anchor->chunk->icm_addr; - dr_ste_always_miss_addr(ste_ctx, &tmp_ste, miss_addr); - memcpy(ste->hw_ste, tmp_ste.hw_ste, DR_STE_SIZE_REDUCED); + memcpy(tmp_data_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); + dr_ste_always_miss_addr(ste_ctx, tmp_data_ste, miss_addr); + memcpy(mlx5dr_ste_get_hw_ste(ste), tmp_data_ste, DR_STE_SIZE_REDUCED); list_del_init(&ste->miss_list_node); @@ -237,7 +244,7 @@ dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher, mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false); /* Copy all 64 hw_ste bytes */ - memcpy(hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); + memcpy(hw_ste, mlx5dr_ste_get_hw_ste(ste), DR_STE_SIZE_REDUCED); sb_idx = ste->ste_chain_location - 1; mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); @@ -273,12 +280,13 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx, if (WARN_ON(!prev_ste)) return; - miss_addr = ste_ctx->get_miss_addr(ste->hw_ste); - ste_ctx->set_miss_addr(prev_ste->hw_ste, miss_addr); + miss_addr = ste_ctx->get_miss_addr(mlx5dr_ste_get_hw_ste(ste)); + ste_ctx->set_miss_addr(mlx5dr_ste_get_hw_ste(prev_ste), miss_addr); mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0, - prev_ste->hw_ste, ste_info, - send_ste_list, true /* Copy data*/); + mlx5dr_ste_get_hw_ste(prev_ste), + ste_info, send_ste_list, + true /* Copy data*/); list_del_init(&ste->miss_list_node); @@ -364,9 +372,11 @@ void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste, struct mlx5dr_ste_htbl *next_htbl) { - struct mlx5dr_icm_chunk *chunk = next_htbl->chunk; + u64 icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(next_htbl->chunk); + u32 num_entries = + mlx5dr_icm_pool_get_chunk_num_of_entries(next_htbl->chunk); - ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); + ste_ctx->set_hit_addr(hw_ste, icm_addr, num_entries); } void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, @@ -385,15 +395,22 @@ void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_htbl_connect_info *connect_info) { bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX; - struct mlx5dr_ste ste = {}; + u8 tmp_hw_ste[DR_STE_SIZE] = {0}; ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi); - ste.hw_ste = formatted_ste; + /* Use temp ste because dr_ste_always_miss_addr/hit_htbl + * touches bit_mask area which doesn't exist at ste->hw_ste. + * Need to use a full-sized (DR_STE_SIZE) hw_ste. + */ + memcpy(tmp_hw_ste, formatted_ste, DR_STE_SIZE_REDUCED); if (connect_info->type == CONNECT_HIT) - dr_ste_always_hit_htbl(ste_ctx, &ste, connect_info->hit_next_htbl); + dr_ste_always_hit_htbl(ste_ctx, tmp_hw_ste, + connect_info->hit_next_htbl); else - dr_ste_always_miss_addr(ste_ctx, &ste, connect_info->miss_icm_addr); + dr_ste_always_miss_addr(ste_ctx, tmp_hw_ste, + connect_info->miss_icm_addr); + memcpy(formatted_ste, tmp_hw_ste, DR_STE_SIZE_REDUCED); } int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, @@ -444,7 +461,8 @@ int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher, /* Write new table to HW */ info.type = CONNECT_MISS; - info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr; + info.miss_icm_addr = + mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); if (mlx5dr_ste_htbl_init_and_postsend(dmn, nic_dmn, next_htbl, &info, false)) { mlx5dr_info(dmn, "Failed writing table to HW\n"); @@ -470,6 +488,7 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, { struct mlx5dr_icm_chunk *chunk; struct mlx5dr_ste_htbl *htbl; + u32 num_entries; int i; htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); @@ -483,22 +502,18 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool, htbl->chunk = chunk; htbl->lu_type = lu_type; htbl->byte_mask = byte_mask; - htbl->ste_arr = chunk->ste_arr; - htbl->hw_ste_arr = chunk->hw_ste_arr; - htbl->miss_list = chunk->miss_list; htbl->refcount = 0; + num_entries = mlx5dr_icm_pool_get_chunk_num_of_entries(chunk); - for (i = 0; i < chunk->num_of_entries; i++) { - struct mlx5dr_ste *ste = &htbl->ste_arr[i]; + for (i = 0; i < num_entries; i++) { + struct mlx5dr_ste *ste = &chunk->ste_arr[i]; - ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; ste->htbl = htbl; ste->refcount = 0; INIT_LIST_HEAD(&ste->miss_list_node); - INIT_LIST_HEAD(&htbl->miss_list[i]); + INIT_LIST_HEAD(&chunk->miss_list[i]); } - htbl->chunk_size = chunk_size; return htbl; out_free_htbl: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index f5f2d356e75f..e5f6412baea9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -10,6 +10,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL; struct mlx5dr_htbl_connect_info info; struct mlx5dr_ste_htbl *last_htbl; + struct mlx5dr_icm_chunk *chunk; int ret; if (!list_empty(&nic_tbl->nic_matcher_list)) @@ -22,13 +23,14 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, else last_htbl = nic_tbl->s_anchor; - if (action) - nic_tbl->default_icm_addr = - nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ? - action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : - action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr; - else + if (action) { + chunk = nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ? + action->dest_tbl->tbl->rx.s_anchor->chunk : + action->dest_tbl->tbl->tx.s_anchor->chunk; + nic_tbl->default_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + } else { nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr; + } info.type = CONNECT_MISS; info.miss_icm_addr = nic_tbl->default_icm_addr; @@ -222,10 +224,10 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl) int ret; if (tbl->rx.s_anchor) - icm_addr_rx = tbl->rx.s_anchor->chunk->icm_addr; + icm_addr_rx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->rx.s_anchor->chunk); if (tbl->tx.s_anchor) - icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr; + icm_addr_tx = mlx5dr_icm_pool_get_chunk_icm_addr(tbl->tx.s_anchor->chunk); ft_attr.table_type = tbl->table_type; ft_attr.icm_addr_rx = icm_addr_rx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 88092fabf55b..46866a5fc5ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -147,10 +147,12 @@ struct mlx5dr_matcher_rx_tx; struct mlx5dr_ste_ctx; struct mlx5dr_ste { - u8 *hw_ste; /* refcount: indicates the num of rules that using this ste */ u32 refcount; + /* this ste is part of a rule, located in ste's chain */ + u8 ste_chain_location; + /* attached to the miss_list head at each htbl entry */ struct list_head miss_list_node; @@ -161,9 +163,6 @@ struct mlx5dr_ste { /* The rule this STE belongs to */ struct mlx5dr_rule_rx_tx *rule_rx_tx; - - /* this ste is part of a rule, located in ste's chain */ - u8 ste_chain_location; }; struct mlx5dr_ste_htbl_ctrl { @@ -181,14 +180,7 @@ struct mlx5dr_ste_htbl { u16 byte_mask; u32 refcount; struct mlx5dr_icm_chunk *chunk; - struct mlx5dr_ste *ste_arr; - u8 *hw_ste_arr; - - struct list_head *miss_list; - - enum mlx5dr_icm_chunk_size chunk_size; struct mlx5dr_ste *pointing_ste; - struct mlx5dr_ste_htbl_ctrl ctrl; }; @@ -1097,16 +1089,12 @@ int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr, struct mlx5dr_icm_chunk { struct mlx5dr_icm_buddy_mem *buddy_mem; struct list_head chunk_list; - u32 rkey; - u32 num_of_entries; - u32 byte_size; - u64 icm_addr; - u64 mr_addr; /* indicates the index of this chunk in the whole memory, * used for deleting the chunk from the buddy */ unsigned int seg; + enum mlx5dr_icm_chunk_size size; /* Memory optimisation */ struct mlx5dr_ste *ste_arr; @@ -1146,6 +1134,13 @@ int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, enum mlx5dr_ipv outer_ipv, enum mlx5dr_ipv inner_ipv); +u64 mlx5dr_icm_pool_get_chunk_mr_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_rkey(struct mlx5dr_icm_chunk *chunk); +u64 mlx5dr_icm_pool_get_chunk_icm_addr(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_num_of_entries(struct mlx5dr_icm_chunk *chunk); +u32 mlx5dr_icm_pool_get_chunk_byte_size(struct mlx5dr_icm_chunk *chunk); +u8 *mlx5dr_ste_get_hw_ste(struct mlx5dr_ste *ste); + static inline int mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type) { @@ -1178,7 +1173,7 @@ static inline int mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl) { int num_of_entries = - mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size); + mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk->size); /* Threshold is 50%, one is added to table of size 1 */ return (num_of_entries + 1) / 2; @@ -1187,7 +1182,7 @@ mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl) static inline bool mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl) { - if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) + if (htbl->chunk->size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask) return false; return true; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 0bf1d64644ba..b13e0f8d232a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1217,36 +1217,37 @@ static void mlxsw_core_fw_params_unregister(struct mlxsw_core *mlxsw_core) ARRAY_SIZE(mlxsw_core_fw_devlink_params)); } +static void *__dl_port(struct devlink_port *devlink_port) +{ + return container_of(devlink_port, struct mlxsw_core_port, devlink_port); +} + static int mlxsw_devlink_port_split(struct devlink *devlink, - unsigned int port_index, + struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack) { + struct mlxsw_core_port *mlxsw_core_port = __dl_port(port); struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - if (port_index >= mlxsw_core->max_ports) { - NL_SET_ERR_MSG_MOD(extack, "Port index exceeds maximum number of ports"); - return -EINVAL; - } if (!mlxsw_core->driver->port_split) return -EOPNOTSUPP; - return mlxsw_core->driver->port_split(mlxsw_core, port_index, count, - extack); + return mlxsw_core->driver->port_split(mlxsw_core, + mlxsw_core_port->local_port, + count, extack); } static int mlxsw_devlink_port_unsplit(struct devlink *devlink, - unsigned int port_index, + struct devlink_port *port, struct netlink_ext_ack *extack) { + struct mlxsw_core_port *mlxsw_core_port = __dl_port(port); struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - if (port_index >= mlxsw_core->max_ports) { - NL_SET_ERR_MSG_MOD(extack, "Port index exceeds maximum number of ports"); - return -EINVAL; - } if (!mlxsw_core->driver->port_unsplit) return -EOPNOTSUPP; - return mlxsw_core->driver->port_unsplit(mlxsw_core, port_index, + return mlxsw_core->driver->port_unsplit(mlxsw_core, + mlxsw_core_port->local_port, extack); } @@ -1280,11 +1281,6 @@ mlxsw_devlink_sb_pool_set(struct devlink *devlink, extack); } -static void *__dl_port(struct devlink_port *devlink_port) -{ - return container_of(devlink_port, struct mlxsw_core_port, devlink_port); -} - static int mlxsw_devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type port_type) { @@ -2983,7 +2979,7 @@ static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, attrs.switch_id.id_len = switch_id_len; mlxsw_core_port->local_port = local_port; devlink_port_attrs_set(devlink_port, &attrs); - err = devlink_port_register(devlink, devlink_port, local_port); + err = devl_port_register(devlink, devlink_port, local_port); if (err) memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); return err; @@ -2995,7 +2991,7 @@ static void __mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u16 local_port &mlxsw_core->ports[local_port]; struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; - devlink_port_unregister(devlink_port); + devl_port_unregister(devlink_port); memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 060209983438..3bc012dafd08 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -422,6 +422,7 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, struct netlink_ext_ack *extack) { struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + struct devlink *devlink = priv_to_devlink(mlxsw_core); int err; mlxsw_m->core = mlxsw_core; @@ -437,7 +438,9 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, return err; } + devl_lock(devlink); err = mlxsw_m_ports_create(mlxsw_m); + devl_unlock(devlink); if (err) { dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n"); return err; @@ -449,8 +452,11 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + struct devlink *devlink = priv_to_devlink(mlxsw_core); + devl_lock(devlink); mlxsw_m_ports_remove(mlxsw_m); + devl_unlock(devlink); } static const struct mlxsw_config_profile mlxsw_m_config_profile; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7b7b17183d10..8eb05090ffec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2818,6 +2818,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct devlink *devlink = priv_to_devlink(mlxsw_core); int err; mlxsw_sp->core = mlxsw_core; @@ -2978,7 +2979,9 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_sample_trigger_init; } + devl_lock(devlink); err = mlxsw_sp_ports_create(mlxsw_sp); + devl_unlock(devlink); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); goto err_ports_create; @@ -3159,8 +3162,12 @@ static int mlxsw_sp4_init(struct mlxsw_core *mlxsw_core, static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct devlink *devlink = priv_to_devlink(mlxsw_core); + devl_lock(devlink); mlxsw_sp_ports_remove(mlxsw_sp); + devl_unlock(devlink); + rhashtable_destroy(&mlxsw_sp->sample_trigger_ht); mlxsw_sp_port_module_info_fini(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index d024983815da..2b3eb5ed8233 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -5225,7 +5225,6 @@ static irqreturn_t netdev_intr(int irq, void *dev_id) * Linux network device functions */ -static unsigned long next_jiffies; #ifdef CONFIG_NET_POLL_CONTROLLER static void netdev_netpoll(struct net_device *dev) @@ -5411,10 +5410,12 @@ static int netdev_open(struct net_device *dev) struct dev_info *hw_priv = priv->adapter; struct ksz_hw *hw = &hw_priv->hw; struct ksz_port *port = &priv->port; + unsigned long next_jiffies; int i; int p; int rc = 0; + next_jiffies = jiffies + HZ * 2; priv->multicast = 0; priv->promiscuous = 0; @@ -5428,10 +5429,7 @@ static int netdev_open(struct net_device *dev) if (rc) return rc; for (i = 0; i < hw->mib_port_cnt; i++) { - if (next_jiffies < jiffies) - next_jiffies = jiffies + HZ * 2; - else - next_jiffies += HZ * 1; + next_jiffies += HZ * 1; hw_priv->counter[i].time = next_jiffies; hw->port_mib[i].state = media_disconnected; port_init_cnt(hw, i); @@ -6563,6 +6561,7 @@ static void mib_read_work(struct work_struct *work) struct dev_info *hw_priv = container_of(work, struct dev_info, mib_read); struct ksz_hw *hw = &hw_priv->hw; + unsigned long next_jiffies; struct ksz_port_mib *mib; int i; diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 5f1e7b8bad4f..c8fe8b31f07b 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -7,6 +7,8 @@ #include <linux/phy.h> #include "lan743x_main.h" #include "lan743x_ethtool.h" +#include <linux/sched.h> +#include <linux/iopoll.h> /* eeprom */ #define LAN743X_EEPROM_MAGIC (0x74A5) @@ -19,6 +21,10 @@ #define OTP_INDICATOR_1 (0xF3) #define OTP_INDICATOR_2 (0xF7) +#define LOCK_TIMEOUT_MAX_CNT (100) // 1 sec (10 msce * 100) + +#define LAN743X_CSR_READ_OP(offset) lan743x_csr_read(adapter, offset) + static int lan743x_otp_power_up(struct lan743x_adapter *adapter) { u32 reg_value; @@ -149,6 +155,217 @@ static int lan743x_otp_write(struct lan743x_adapter *adapter, u32 offset, return 0; } +static int lan743x_hs_syslock_acquire(struct lan743x_adapter *adapter, + u16 timeout) +{ + u16 timeout_cnt = 0; + u32 val; + + do { + spin_lock(&adapter->eth_syslock_spinlock); + if (adapter->eth_syslock_acquire_cnt == 0) { + lan743x_csr_write(adapter, ETH_SYSTEM_SYS_LOCK_REG, + SYS_LOCK_REG_ENET_SS_LOCK_); + val = lan743x_csr_read(adapter, + ETH_SYSTEM_SYS_LOCK_REG); + if (val & SYS_LOCK_REG_ENET_SS_LOCK_) { + adapter->eth_syslock_acquire_cnt++; + WARN_ON(adapter->eth_syslock_acquire_cnt == 0); + spin_unlock(&adapter->eth_syslock_spinlock); + break; + } + } else { + adapter->eth_syslock_acquire_cnt++; + WARN_ON(adapter->eth_syslock_acquire_cnt == 0); + spin_unlock(&adapter->eth_syslock_spinlock); + break; + } + + spin_unlock(&adapter->eth_syslock_spinlock); + + if (timeout_cnt++ < timeout) + usleep_range(10000, 11000); + else + return -ETIMEDOUT; + } while (true); + + return 0; +} + +static void lan743x_hs_syslock_release(struct lan743x_adapter *adapter) +{ + u32 val; + + spin_lock(&adapter->eth_syslock_spinlock); + WARN_ON(adapter->eth_syslock_acquire_cnt == 0); + + if (adapter->eth_syslock_acquire_cnt) { + adapter->eth_syslock_acquire_cnt--; + if (adapter->eth_syslock_acquire_cnt == 0) { + lan743x_csr_write(adapter, ETH_SYSTEM_SYS_LOCK_REG, 0); + val = lan743x_csr_read(adapter, + ETH_SYSTEM_SYS_LOCK_REG); + WARN_ON((val & SYS_LOCK_REG_ENET_SS_LOCK_) != 0); + } + } + + spin_unlock(&adapter->eth_syslock_spinlock); +} + +static void lan743x_hs_otp_power_up(struct lan743x_adapter *adapter) +{ + u32 reg_value; + + reg_value = lan743x_csr_read(adapter, HS_OTP_PWR_DN); + if (reg_value & OTP_PWR_DN_PWRDN_N_) { + reg_value &= ~OTP_PWR_DN_PWRDN_N_; + lan743x_csr_write(adapter, HS_OTP_PWR_DN, reg_value); + /* To flush the posted write so the subsequent delay is + * guaranteed to happen after the write at the hardware + */ + lan743x_csr_read(adapter, HS_OTP_PWR_DN); + udelay(1); + } +} + +static void lan743x_hs_otp_power_down(struct lan743x_adapter *adapter) +{ + u32 reg_value; + + reg_value = lan743x_csr_read(adapter, HS_OTP_PWR_DN); + if (!(reg_value & OTP_PWR_DN_PWRDN_N_)) { + reg_value |= OTP_PWR_DN_PWRDN_N_; + lan743x_csr_write(adapter, HS_OTP_PWR_DN, reg_value); + /* To flush the posted write so the subsequent delay is + * guaranteed to happen after the write at the hardware + */ + lan743x_csr_read(adapter, HS_OTP_PWR_DN); + udelay(1); + } +} + +static void lan743x_hs_otp_set_address(struct lan743x_adapter *adapter, + u32 address) +{ + lan743x_csr_write(adapter, HS_OTP_ADDR_HIGH, (address >> 8) & 0x03); + lan743x_csr_write(adapter, HS_OTP_ADDR_LOW, address & 0xFF); +} + +static void lan743x_hs_otp_read_go(struct lan743x_adapter *adapter) +{ + lan743x_csr_write(adapter, HS_OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); + lan743x_csr_write(adapter, HS_OTP_CMD_GO, OTP_CMD_GO_GO_); +} + +static int lan743x_hs_otp_cmd_cmplt_chk(struct lan743x_adapter *adapter) +{ + u32 val; + + return readx_poll_timeout(LAN743X_CSR_READ_OP, HS_OTP_STATUS, val, + !(val & OTP_STATUS_BUSY_), + 80, 10000); +} + +static int lan743x_hs_otp_read(struct lan743x_adapter *adapter, u32 offset, + u32 length, u8 *data) +{ + int ret; + int i; + + ret = lan743x_hs_syslock_acquire(adapter, LOCK_TIMEOUT_MAX_CNT); + if (ret < 0) + return ret; + + lan743x_hs_otp_power_up(adapter); + + ret = lan743x_hs_otp_cmd_cmplt_chk(adapter); + if (ret < 0) + goto power_down; + + lan743x_hs_syslock_release(adapter); + + for (i = 0; i < length; i++) { + ret = lan743x_hs_syslock_acquire(adapter, + LOCK_TIMEOUT_MAX_CNT); + if (ret < 0) + return ret; + + lan743x_hs_otp_set_address(adapter, offset + i); + + lan743x_hs_otp_read_go(adapter); + ret = lan743x_hs_otp_cmd_cmplt_chk(adapter); + if (ret < 0) + goto power_down; + + data[i] = lan743x_csr_read(adapter, HS_OTP_READ_DATA); + + lan743x_hs_syslock_release(adapter); + } + + ret = lan743x_hs_syslock_acquire(adapter, + LOCK_TIMEOUT_MAX_CNT); + if (ret < 0) + return ret; + +power_down: + lan743x_hs_otp_power_down(adapter); + lan743x_hs_syslock_release(adapter); + + return ret; +} + +static int lan743x_hs_otp_write(struct lan743x_adapter *adapter, u32 offset, + u32 length, u8 *data) +{ + int ret; + int i; + + ret = lan743x_hs_syslock_acquire(adapter, LOCK_TIMEOUT_MAX_CNT); + if (ret < 0) + return ret; + + lan743x_hs_otp_power_up(adapter); + + ret = lan743x_hs_otp_cmd_cmplt_chk(adapter); + if (ret < 0) + goto power_down; + + /* set to BYTE program mode */ + lan743x_csr_write(adapter, HS_OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); + + lan743x_hs_syslock_release(adapter); + + for (i = 0; i < length; i++) { + ret = lan743x_hs_syslock_acquire(adapter, + LOCK_TIMEOUT_MAX_CNT); + if (ret < 0) + return ret; + + lan743x_hs_otp_set_address(adapter, offset + i); + + lan743x_csr_write(adapter, HS_OTP_PRGM_DATA, data[i]); + lan743x_csr_write(adapter, HS_OTP_TST_CMD, + OTP_TST_CMD_PRGVRFY_); + lan743x_csr_write(adapter, HS_OTP_CMD_GO, OTP_CMD_GO_GO_); + + ret = lan743x_hs_otp_cmd_cmplt_chk(adapter); + if (ret < 0) + goto power_down; + + lan743x_hs_syslock_release(adapter); + } + + ret = lan743x_hs_syslock_acquire(adapter, LOCK_TIMEOUT_MAX_CNT); + if (ret < 0) + return ret; + +power_down: + lan743x_hs_otp_power_down(adapter); + lan743x_hs_syslock_release(adapter); + + return ret; +} + static int lan743x_eeprom_wait(struct lan743x_adapter *adapter) { unsigned long start_time = jiffies; @@ -263,6 +480,100 @@ static int lan743x_eeprom_write(struct lan743x_adapter *adapter, return 0; } +static int lan743x_hs_eeprom_cmd_cmplt_chk(struct lan743x_adapter *adapter) +{ + u32 val; + + return readx_poll_timeout(LAN743X_CSR_READ_OP, HS_E2P_CMD, val, + (!(val & HS_E2P_CMD_EPC_BUSY_) || + (val & HS_E2P_CMD_EPC_TIMEOUT_)), + 50, 10000); +} + +static int lan743x_hs_eeprom_read(struct lan743x_adapter *adapter, + u32 offset, u32 length, u8 *data) +{ + int retval; + u32 val; + int i; + + retval = lan743x_hs_syslock_acquire(adapter, LOCK_TIMEOUT_MAX_CNT); + if (retval < 0) + return retval; + + retval = lan743x_hs_eeprom_cmd_cmplt_chk(adapter); + lan743x_hs_syslock_release(adapter); + if (retval < 0) + return retval; + + for (i = 0; i < length; i++) { + retval = lan743x_hs_syslock_acquire(adapter, + LOCK_TIMEOUT_MAX_CNT); + if (retval < 0) + return retval; + + val = HS_E2P_CMD_EPC_BUSY_ | HS_E2P_CMD_EPC_CMD_READ_; + val |= (offset & HS_E2P_CMD_EPC_ADDR_MASK_); + lan743x_csr_write(adapter, HS_E2P_CMD, val); + retval = lan743x_hs_eeprom_cmd_cmplt_chk(adapter); + if (retval < 0) { + lan743x_hs_syslock_release(adapter); + return retval; + } + + val = lan743x_csr_read(adapter, HS_E2P_DATA); + + lan743x_hs_syslock_release(adapter); + + data[i] = val & 0xFF; + offset++; + } + + return 0; +} + +static int lan743x_hs_eeprom_write(struct lan743x_adapter *adapter, + u32 offset, u32 length, u8 *data) +{ + int retval; + u32 val; + int i; + + retval = lan743x_hs_syslock_acquire(adapter, LOCK_TIMEOUT_MAX_CNT); + if (retval < 0) + return retval; + + retval = lan743x_hs_eeprom_cmd_cmplt_chk(adapter); + lan743x_hs_syslock_release(adapter); + if (retval < 0) + return retval; + + for (i = 0; i < length; i++) { + retval = lan743x_hs_syslock_acquire(adapter, + LOCK_TIMEOUT_MAX_CNT); + if (retval < 0) + return retval; + + /* Fill data register */ + val = data[i]; + lan743x_csr_write(adapter, HS_E2P_DATA, val); + + /* Send "write" command */ + val = HS_E2P_CMD_EPC_BUSY_ | HS_E2P_CMD_EPC_CMD_WRITE_; + val |= (offset & HS_E2P_CMD_EPC_ADDR_MASK_); + lan743x_csr_write(adapter, HS_E2P_CMD, val); + + retval = lan743x_hs_eeprom_cmd_cmplt_chk(adapter); + lan743x_hs_syslock_release(adapter); + if (retval < 0) + return retval; + + offset++; + } + + return 0; +} + static void lan743x_ethtool_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -304,10 +615,21 @@ static int lan743x_ethtool_get_eeprom(struct net_device *netdev, struct lan743x_adapter *adapter = netdev_priv(netdev); int ret = 0; - if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) - ret = lan743x_otp_read(adapter, ee->offset, ee->len, data); - else - ret = lan743x_eeprom_read(adapter, ee->offset, ee->len, data); + if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) { + if (adapter->is_pci11x1x) + ret = lan743x_hs_otp_read(adapter, ee->offset, + ee->len, data); + else + ret = lan743x_otp_read(adapter, ee->offset, + ee->len, data); + } else { + if (adapter->is_pci11x1x) + ret = lan743x_hs_eeprom_read(adapter, ee->offset, + ee->len, data); + else + ret = lan743x_eeprom_read(adapter, ee->offset, + ee->len, data); + } return ret; } @@ -321,13 +643,22 @@ static int lan743x_ethtool_set_eeprom(struct net_device *netdev, if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) { /* Beware! OTP is One Time Programming ONLY! */ if (ee->magic == LAN743X_OTP_MAGIC) { - ret = lan743x_otp_write(adapter, ee->offset, - ee->len, data); + if (adapter->is_pci11x1x) + ret = lan743x_hs_otp_write(adapter, ee->offset, + ee->len, data); + else + ret = lan743x_otp_write(adapter, ee->offset, + ee->len, data); } } else { if (ee->magic == LAN743X_EEPROM_MAGIC) { - ret = lan743x_eeprom_write(adapter, ee->offset, - ee->len, data); + if (adapter->is_pci11x1x) + ret = lan743x_hs_eeprom_write(adapter, + ee->offset, + ee->len, data); + else + ret = lan743x_eeprom_write(adapter, ee->offset, + ee->len, data); } } @@ -365,6 +696,14 @@ static const char lan743x_set1_sw_cnt_strings[][ETH_GSTRING_LEN] = { "RX Queue 3 Frames", }; +static const char lan743x_tx_queue_cnt_strings[][ETH_GSTRING_LEN] = { + "TX Queue 0 Frames", + "TX Queue 1 Frames", + "TX Queue 2 Frames", + "TX Queue 3 Frames", + "TX Total Queue Frames", +}; + static const char lan743x_set2_hw_cnt_strings[][ETH_GSTRING_LEN] = { "RX Total Frames", "EEE RX LPI Transitions", @@ -462,6 +801,8 @@ static const char lan743x_priv_flags_strings[][ETH_GSTRING_LEN] = { static void lan743x_ethtool_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { + struct lan743x_adapter *adapter = netdev_priv(netdev); + switch (stringset) { case ETH_SS_STATS: memcpy(data, lan743x_set0_hw_cnt_strings, @@ -473,6 +814,13 @@ static void lan743x_ethtool_get_strings(struct net_device *netdev, sizeof(lan743x_set1_sw_cnt_strings)], lan743x_set2_hw_cnt_strings, sizeof(lan743x_set2_hw_cnt_strings)); + if (adapter->is_pci11x1x) { + memcpy(&data[sizeof(lan743x_set0_hw_cnt_strings) + + sizeof(lan743x_set1_sw_cnt_strings) + + sizeof(lan743x_set2_hw_cnt_strings)], + lan743x_tx_queue_cnt_strings, + sizeof(lan743x_tx_queue_cnt_strings)); + } break; case ETH_SS_PRIV_FLAGS: memcpy(data, lan743x_priv_flags_strings, @@ -486,7 +834,9 @@ static void lan743x_ethtool_get_ethtool_stats(struct net_device *netdev, u64 *data) { struct lan743x_adapter *adapter = netdev_priv(netdev); + u64 total_queue_count = 0; int data_index = 0; + u64 pkt_cnt; u32 buf; int i; @@ -500,6 +850,14 @@ static void lan743x_ethtool_get_ethtool_stats(struct net_device *netdev, buf = lan743x_csr_read(adapter, lan743x_set2_hw_cnt_addr[i]); data[data_index++] = (u64)buf; } + if (adapter->is_pci11x1x) { + for (i = 0; i < ARRAY_SIZE(adapter->tx); i++) { + pkt_cnt = (u64)(adapter->tx[i].frame_count); + data[data_index++] = pkt_cnt; + total_queue_count += pkt_cnt; + } + data[data_index++] = total_queue_count; + } } static u32 lan743x_ethtool_get_priv_flags(struct net_device *netdev) @@ -520,6 +878,8 @@ static int lan743x_ethtool_set_priv_flags(struct net_device *netdev, u32 flags) static int lan743x_ethtool_get_sset_count(struct net_device *netdev, int sset) { + struct lan743x_adapter *adapter = netdev_priv(netdev); + switch (sset) { case ETH_SS_STATS: { @@ -528,6 +888,8 @@ static int lan743x_ethtool_get_sset_count(struct net_device *netdev, int sset) ret = ARRAY_SIZE(lan743x_set0_hw_cnt_strings); ret += ARRAY_SIZE(lan743x_set1_sw_cnt_strings); ret += ARRAY_SIZE(lan743x_set2_hw_cnt_strings); + if (adapter->is_pci11x1x) + ret += ARRAY_SIZE(lan743x_tx_queue_cnt_strings); return ret; } case ETH_SS_PRIV_FLAGS: diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 5282d25a6f9b..9ac0c2b96a15 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1776,6 +1776,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, dev_kfree_skb_irq(skb); goto unlock; } + tx->frame_count++; if (gso) lan743x_tx_frame_add_lso(tx, frame_length, nr_frags); @@ -2868,6 +2869,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, adapter->used_tx_channels = PCI11X1X_USED_TX_CHANNELS; adapter->max_vector_count = PCI11X1X_MAX_VECTOR_COUNT; pci11x1x_strap_get_status(adapter); + spin_lock_init(&adapter->eth_syslock_spinlock); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 2c8e76b4e1f7..1ca5f3216403 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -86,6 +86,40 @@ #define E2P_DATA (0x044) +/* Hearthstone top level & System Reg Addresses */ +#define ETH_CTRL_REG_ADDR_BASE (0x0000) +#define ETH_SYS_REG_ADDR_BASE (0x4000) +#define CONFIG_REG_ADDR_BASE (0x0000) +#define ETH_EEPROM_REG_ADDR_BASE (0x0E00) +#define ETH_OTP_REG_ADDR_BASE (0x1000) +#define SYS_LOCK_REG (0x00A0) +#define SYS_LOCK_REG_MAIN_LOCK_ BIT(7) +#define SYS_LOCK_REG_GEN_PERI_LOCK_ BIT(5) +#define SYS_LOCK_REG_SPI_PERI_LOCK_ BIT(4) +#define SYS_LOCK_REG_SMBUS_PERI_LOCK_ BIT(3) +#define SYS_LOCK_REG_UART_SS_LOCK_ BIT(2) +#define SYS_LOCK_REG_ENET_SS_LOCK_ BIT(1) +#define SYS_LOCK_REG_USB_SS_LOCK_ BIT(0) +#define ETH_SYSTEM_SYS_LOCK_REG (ETH_SYS_REG_ADDR_BASE + \ + CONFIG_REG_ADDR_BASE + \ + SYS_LOCK_REG) +#define HS_EEPROM_REG_ADDR_BASE (ETH_SYS_REG_ADDR_BASE + \ + ETH_EEPROM_REG_ADDR_BASE) +#define HS_E2P_CMD (HS_EEPROM_REG_ADDR_BASE + 0x0000) +#define HS_E2P_CMD_EPC_BUSY_ BIT(31) +#define HS_E2P_CMD_EPC_CMD_WRITE_ GENMASK(29, 28) +#define HS_E2P_CMD_EPC_CMD_READ_ (0x0) +#define HS_E2P_CMD_EPC_TIMEOUT_ BIT(17) +#define HS_E2P_CMD_EPC_ADDR_MASK_ GENMASK(15, 0) +#define HS_E2P_DATA (HS_EEPROM_REG_ADDR_BASE + 0x0004) +#define HS_E2P_DATA_MASK_ GENMASK(7, 0) +#define HS_E2P_CFG (HS_EEPROM_REG_ADDR_BASE + 0x0008) +#define HS_E2P_CFG_I2C_PULSE_MASK_ GENMASK(19, 16) +#define HS_E2P_CFG_EEPROM_SIZE_SEL_ BIT(12) +#define HS_E2P_CFG_I2C_BAUD_RATE_MASK_ GENMASK(9, 8) +#define HS_E2P_CFG_TEST_EEPR_TO_BYP_ BIT(0) +#define HS_E2P_PAD_CTL (HS_EEPROM_REG_ADDR_BASE + 0x000C) + #define GPIO_CFG0 (0x050) #define GPIO_CFG0_GPIO_DIR_BIT_(bit) BIT(16 + (bit)) #define GPIO_CFG0_GPIO_DATA_BIT_(bit) BIT(0 + (bit)) @@ -302,6 +336,7 @@ #define INT_MOD_CFG9 (0x7E4) #define PTP_CMD_CTL (0x0A00) +#define PTP_CMD_CTL_PTP_LTC_TARGET_READ_ BIT(13) #define PTP_CMD_CTL_PTP_CLK_STP_NSEC_ BIT(6) #define PTP_CMD_CTL_PTP_CLOCK_STEP_SEC_ BIT(5) #define PTP_CMD_CTL_PTP_CLOCK_LOAD_ BIT(4) @@ -323,9 +358,51 @@ (((value) & 0x7) << (1 + ((channel) << 2))) #define PTP_GENERAL_CONFIG_RELOAD_ADD_X_(channel) (BIT((channel) << 2)) +#define HS_PTP_GENERAL_CONFIG (0x0A04) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_X_MASK_(channel) \ + (0xf << (4 + ((channel) << 2))) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_100NS_ (0) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_500NS_ (1) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_1US_ (2) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_5US_ (3) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_10US_ (4) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_50US_ (5) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_100US_ (6) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_500US_ (7) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_1MS_ (8) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_5MS_ (9) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_10MS_ (10) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_50MS_ (11) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_100MS_ (12) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_ (13) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_TOGG_ (14) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_INT_ (15) +#define HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_X_SET_(channel, value) \ + (((value) & 0xf) << (4 + ((channel) << 2))) +#define HS_PTP_GENERAL_CONFIG_EVENT_POL_X_(channel) (BIT(1 + ((channel) * 2))) +#define HS_PTP_GENERAL_CONFIG_RELOAD_ADD_X_(channel) (BIT((channel) * 2)) + #define PTP_INT_STS (0x0A08) +#define PTP_INT_IO_FE_MASK_ GENMASK(31, 24) +#define PTP_INT_IO_FE_SHIFT_ (24) +#define PTP_INT_IO_FE_SET_(channel) BIT(24 + (channel)) +#define PTP_INT_IO_RE_MASK_ GENMASK(23, 16) +#define PTP_INT_IO_RE_SHIFT_ (16) +#define PTP_INT_IO_RE_SET_(channel) BIT(16 + (channel)) +#define PTP_INT_TX_TS_OVRFL_INT_ BIT(14) +#define PTP_INT_TX_SWTS_ERR_INT_ BIT(13) +#define PTP_INT_TX_TS_INT_ BIT(12) +#define PTP_INT_RX_TS_OVRFL_INT_ BIT(9) +#define PTP_INT_RX_TS_INT_ BIT(8) +#define PTP_INT_TIMER_INT_B_ BIT(1) +#define PTP_INT_TIMER_INT_A_ BIT(0) #define PTP_INT_EN_SET (0x0A0C) +#define PTP_INT_EN_FE_EN_SET_(channel) BIT(24 + (channel)) +#define PTP_INT_EN_RE_EN_SET_(channel) BIT(16 + (channel)) +#define PTP_INT_EN_TIMER_SET_(channel) BIT(channel) #define PTP_INT_EN_CLR (0x0A10) +#define PTP_INT_EN_FE_EN_CLR_(channel) BIT(24 + (channel)) +#define PTP_INT_EN_RE_EN_CLR_(channel) BIT(16 + (channel)) #define PTP_INT_BIT_TX_SWTS_ERR_ BIT(13) #define PTP_INT_BIT_TX_TS_ BIT(12) #define PTP_INT_BIT_TIMER_B_ BIT(1) @@ -343,6 +420,16 @@ #define PTP_CLOCK_TARGET_NS_X(channel) (0x0A34 + ((channel) << 4)) #define PTP_CLOCK_TARGET_RELOAD_SEC_X(channel) (0x0A38 + ((channel) << 4)) #define PTP_CLOCK_TARGET_RELOAD_NS_X(channel) (0x0A3C + ((channel) << 4)) +#define PTP_LTC_SET_SEC_HI (0x0A50) +#define PTP_LTC_SET_SEC_HI_SEC_47_32_MASK_ GENMASK(15, 0) +#define PTP_VERSION (0x0A54) +#define PTP_VERSION_TX_UP_MASK_ GENMASK(31, 24) +#define PTP_VERSION_TX_LO_MASK_ GENMASK(23, 16) +#define PTP_VERSION_RX_UP_MASK_ GENMASK(15, 8) +#define PTP_VERSION_RX_LO_MASK_ GENMASK(7, 0) +#define PTP_IO_SEL (0x0A58) +#define PTP_IO_SEL_MASK_ GENMASK(10, 8) +#define PTP_IO_SEL_SHIFT_ (8) #define PTP_LATENCY (0x0A5C) #define PTP_LATENCY_TX_SET_(tx_latency) (((u32)(tx_latency)) << 16) #define PTP_LATENCY_RX_SET_(rx_latency) \ @@ -367,6 +454,59 @@ #define PTP_TX_MSG_HEADER_MSG_TYPE_ (0x000F0000) #define PTP_TX_MSG_HEADER_MSG_TYPE_SYNC_ (0x00000000) +#define PTP_TX_CAP_INFO (0x0AB8) +#define PTP_TX_CAP_INFO_TX_CH_MASK_ GENMASK(1, 0) +#define PTP_TX_DOMAIN (0x0ABC) +#define PTP_TX_DOMAIN_MASK_ GENMASK(23, 16) +#define PTP_TX_DOMAIN_RANGE_EN_ BIT(15) +#define PTP_TX_DOMAIN_RANGE_MASK_ GENMASK(7, 0) +#define PTP_TX_SDOID (0x0AC0) +#define PTP_TX_SDOID_MASK_ GENMASK(23, 16) +#define PTP_TX_SDOID_RANGE_EN_ BIT(15) +#define PTP_TX_SDOID_11_0_MASK_ GENMASK(7, 0) +#define PTP_IO_CAP_CONFIG (0x0AC4) +#define PTP_IO_CAP_CONFIG_LOCK_FE_(channel) BIT(24 + (channel)) +#define PTP_IO_CAP_CONFIG_LOCK_RE_(channel) BIT(16 + (channel)) +#define PTP_IO_CAP_CONFIG_FE_CAP_EN_(channel) BIT(8 + (channel)) +#define PTP_IO_CAP_CONFIG_RE_CAP_EN_(channel) BIT(0 + (channel)) +#define PTP_IO_RE_LTC_SEC_CAP_X (0x0AC8) +#define PTP_IO_RE_LTC_NS_CAP_X (0x0ACC) +#define PTP_IO_FE_LTC_SEC_CAP_X (0x0AD0) +#define PTP_IO_FE_LTC_NS_CAP_X (0x0AD4) +#define PTP_IO_EVENT_OUTPUT_CFG (0x0AD8) +#define PTP_IO_EVENT_OUTPUT_CFG_SEL_(channel) BIT(16 + (channel)) +#define PTP_IO_EVENT_OUTPUT_CFG_EN_(channel) BIT(0 + (channel)) +#define PTP_IO_PIN_CFG (0x0ADC) +#define PTP_IO_PIN_CFG_OBUF_TYPE_(channel) BIT(0 + (channel)) +#define PTP_LTC_RD_SEC_HI (0x0AF0) +#define PTP_LTC_RD_SEC_HI_SEC_47_32_MASK_ GENMASK(15, 0) +#define PTP_LTC_RD_SEC_LO (0x0AF4) +#define PTP_LTC_RD_NS (0x0AF8) +#define PTP_LTC_RD_NS_29_0_MASK_ GENMASK(29, 0) +#define PTP_LTC_RD_SUBNS (0x0AFC) +#define PTP_RX_USER_MAC_HI (0x0B00) +#define PTP_RX_USER_MAC_HI_47_32_MASK_ GENMASK(15, 0) +#define PTP_RX_USER_MAC_LO (0x0B04) +#define PTP_RX_USER_IP_ADDR_0 (0x0B20) +#define PTP_RX_USER_IP_ADDR_1 (0x0B24) +#define PTP_RX_USER_IP_ADDR_2 (0x0B28) +#define PTP_RX_USER_IP_ADDR_3 (0x0B2C) +#define PTP_RX_USER_IP_MASK_0 (0x0B30) +#define PTP_RX_USER_IP_MASK_1 (0x0B34) +#define PTP_RX_USER_IP_MASK_2 (0x0B38) +#define PTP_RX_USER_IP_MASK_3 (0x0B3C) +#define PTP_TX_USER_MAC_HI (0x0B40) +#define PTP_TX_USER_MAC_HI_47_32_MASK_ GENMASK(15, 0) +#define PTP_TX_USER_MAC_LO (0x0B44) +#define PTP_TX_USER_IP_ADDR_0 (0x0B60) +#define PTP_TX_USER_IP_ADDR_1 (0x0B64) +#define PTP_TX_USER_IP_ADDR_2 (0x0B68) +#define PTP_TX_USER_IP_ADDR_3 (0x0B6C) +#define PTP_TX_USER_IP_MASK_0 (0x0B70) +#define PTP_TX_USER_IP_MASK_1 (0x0B74) +#define PTP_TX_USER_IP_MASK_2 (0x0B78) +#define PTP_TX_USER_IP_MASK_3 (0x0B7C) + #define DMAC_CFG (0xC00) #define DMAC_CFG_COAL_EN_ BIT(16) #define DMAC_CFG_CH_ARB_SEL_RX_HIGH_ (0x00000000) @@ -522,6 +662,20 @@ #define OTP_STATUS (0x1030) #define OTP_STATUS_BUSY_ BIT(0) +/* Hearthstone OTP block registers */ +#define HS_OTP_BLOCK_BASE (ETH_SYS_REG_ADDR_BASE + \ + ETH_OTP_REG_ADDR_BASE) +#define HS_OTP_PWR_DN (HS_OTP_BLOCK_BASE + 0x0) +#define HS_OTP_ADDR_HIGH (HS_OTP_BLOCK_BASE + 0x4) +#define HS_OTP_ADDR_LOW (HS_OTP_BLOCK_BASE + 0x8) +#define HS_OTP_PRGM_DATA (HS_OTP_BLOCK_BASE + 0x10) +#define HS_OTP_PRGM_MODE (HS_OTP_BLOCK_BASE + 0x14) +#define HS_OTP_READ_DATA (HS_OTP_BLOCK_BASE + 0x18) +#define HS_OTP_FUNC_CMD (HS_OTP_BLOCK_BASE + 0x20) +#define HS_OTP_TST_CMD (HS_OTP_BLOCK_BASE + 0x24) +#define HS_OTP_CMD_GO (HS_OTP_BLOCK_BASE + 0x28) +#define HS_OTP_STATUS (HS_OTP_BLOCK_BASE + 0x30) + /* MAC statistics registers */ #define STAT_RX_FCS_ERRORS (0x1200) #define STAT_RX_ALIGNMENT_ERRORS (0x1204) @@ -715,6 +869,7 @@ struct lan743x_tx { int last_tail; struct napi_struct napi; + u32 frame_count; struct sk_buff *overflow_skb; }; @@ -772,6 +927,10 @@ struct lan743x_adapter { struct lan743x_rx rx[LAN743X_USED_RX_CHANNELS]; bool is_pci11x1x; bool is_sgmii_en; + /* protect ethernet syslock */ + spinlock_t eth_syslock_spinlock; + bool eth_syslock_en; + u32 eth_syslock_acquire_cnt; u8 max_tx_channels; u8 used_tx_channels; u8 max_vector_count; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c index ec082594bbbd..6a11e2ceb013 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.c +++ b/drivers/net/ethernet/microchip/lan743x_ptp.c @@ -25,6 +25,18 @@ static void lan743x_ptp_clock_set(struct lan743x_adapter *adapter, u32 seconds, u32 nano_seconds, u32 sub_nano_seconds); +static int lan743x_get_channel(u32 ch_map) +{ + int idx; + + for (idx = 0; idx < 32; idx++) { + if (ch_map & (0x1 << idx)) + return idx; + } + + return -EINVAL; +} + int lan743x_gpio_init(struct lan743x_adapter *adapter) { struct lan743x_gpio *gpio = &adapter->gpio; @@ -179,6 +191,8 @@ static void lan743x_ptp_release_event_ch(struct lan743x_adapter *adapter, static void lan743x_ptp_clock_get(struct lan743x_adapter *adapter, u32 *seconds, u32 *nano_seconds, u32 *sub_nano_seconds); +static void lan743x_ptp_io_clock_get(struct lan743x_adapter *adapter, + u32 *sec, u32 *nsec, u32 *sub_nsec); static void lan743x_ptp_clock_step(struct lan743x_adapter *adapter, s64 time_step_ns); @@ -407,7 +421,11 @@ static int lan743x_ptpci_gettime64(struct ptp_clock_info *ptpci, u32 nano_seconds = 0; u32 seconds = 0; - lan743x_ptp_clock_get(adapter, &seconds, &nano_seconds, NULL); + if (adapter->is_pci11x1x) + lan743x_ptp_io_clock_get(adapter, &seconds, &nano_seconds, + NULL); + else + lan743x_ptp_clock_get(adapter, &seconds, &nano_seconds, NULL); ts->tv_sec = seconds; ts->tv_nsec = nano_seconds; @@ -671,6 +689,322 @@ failed: return ret; } +static void lan743x_ptp_io_perout_off(struct lan743x_adapter *adapter, + u32 index) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + int perout_pin; + int event_ch; + u32 gen_cfg; + int val; + + event_ch = ptp->ptp_io_perout[index]; + if (event_ch >= 0) { + /* set target to far in the future, effectively disabling it */ + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_SEC_X(event_ch), + 0xFFFF0000); + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_NS_X(event_ch), + 0); + + gen_cfg = lan743x_csr_read(adapter, HS_PTP_GENERAL_CONFIG); + gen_cfg &= ~(HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_X_MASK_ + (event_ch)); + gen_cfg &= ~(HS_PTP_GENERAL_CONFIG_EVENT_POL_X_(event_ch)); + gen_cfg |= HS_PTP_GENERAL_CONFIG_RELOAD_ADD_X_(event_ch); + lan743x_csr_write(adapter, HS_PTP_GENERAL_CONFIG, gen_cfg); + if (event_ch) + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_TIMER_INT_B_); + else + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_TIMER_INT_A_); + lan743x_ptp_release_event_ch(adapter, event_ch); + ptp->ptp_io_perout[index] = -1; + } + + perout_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT, index); + + /* Deselect Event output */ + val = lan743x_csr_read(adapter, PTP_IO_EVENT_OUTPUT_CFG); + + /* Disables the output of Local Time Target compare events */ + val &= ~PTP_IO_EVENT_OUTPUT_CFG_EN_(perout_pin); + lan743x_csr_write(adapter, PTP_IO_EVENT_OUTPUT_CFG, val); + + /* Configured as an opendrain driver*/ + val = lan743x_csr_read(adapter, PTP_IO_PIN_CFG); + val &= ~PTP_IO_PIN_CFG_OBUF_TYPE_(perout_pin); + lan743x_csr_write(adapter, PTP_IO_PIN_CFG, val); + /* Dummy read to make sure write operation success */ + val = lan743x_csr_read(adapter, PTP_IO_PIN_CFG); +} + +static int lan743x_ptp_io_perout(struct lan743x_adapter *adapter, int on, + struct ptp_perout_request *perout_request) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + u32 period_sec, period_nsec; + u32 start_sec, start_nsec; + u32 pulse_sec, pulse_nsec; + int pulse_width; + int perout_pin; + int event_ch; + u32 gen_cfg; + u32 index; + int val; + + index = perout_request->index; + event_ch = ptp->ptp_io_perout[index]; + + if (on) { + perout_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT, index); + if (perout_pin < 0) + return -EBUSY; + } else { + lan743x_ptp_io_perout_off(adapter, index); + return 0; + } + + if (event_ch >= LAN743X_PTP_N_EVENT_CHAN) { + /* already on, turn off first */ + lan743x_ptp_io_perout_off(adapter, index); + } + + event_ch = lan743x_ptp_reserve_event_ch(adapter, index); + if (event_ch < 0) { + netif_warn(adapter, drv, adapter->netdev, + "Failed to reserve event channel %d for PEROUT\n", + index); + goto failed; + } + ptp->ptp_io_perout[index] = event_ch; + + if (perout_request->flags & PTP_PEROUT_DUTY_CYCLE) { + pulse_sec = perout_request->on.sec; + pulse_sec += perout_request->on.nsec / 1000000000; + pulse_nsec = perout_request->on.nsec % 1000000000; + } else { + pulse_sec = perout_request->period.sec; + pulse_sec += perout_request->period.nsec / 1000000000; + pulse_nsec = perout_request->period.nsec % 1000000000; + } + + if (pulse_sec == 0) { + if (pulse_nsec >= 400000000) { + pulse_width = PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_; + } else if (pulse_nsec >= 200000000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_100MS_; + } else if (pulse_nsec >= 100000000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_50MS_; + } else if (pulse_nsec >= 20000000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_10MS_; + } else if (pulse_nsec >= 10000000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_5MS_; + } else if (pulse_nsec >= 2000000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_1MS_; + } else if (pulse_nsec >= 1000000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_500US_; + } else if (pulse_nsec >= 200000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_100US_; + } else if (pulse_nsec >= 100000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_50US_; + } else if (pulse_nsec >= 20000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_10US_; + } else if (pulse_nsec >= 10000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_5US_; + } else if (pulse_nsec >= 2000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_1US_; + } else if (pulse_nsec >= 1000) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_500NS_; + } else if (pulse_nsec >= 200) { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_100NS_; + } else { + netif_warn(adapter, drv, adapter->netdev, + "perout period too small, min is 200nS\n"); + goto failed; + } + } else { + pulse_width = HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_200MS_; + } + + /* turn off by setting target far in future */ + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_SEC_X(event_ch), + 0xFFFF0000); + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_NS_X(event_ch), 0); + + /* Configure to pulse every period */ + gen_cfg = lan743x_csr_read(adapter, HS_PTP_GENERAL_CONFIG); + gen_cfg &= ~(HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_X_MASK_(event_ch)); + gen_cfg |= HS_PTP_GENERAL_CONFIG_CLOCK_EVENT_X_SET_ + (event_ch, pulse_width); + gen_cfg |= HS_PTP_GENERAL_CONFIG_EVENT_POL_X_(event_ch); + gen_cfg &= ~(HS_PTP_GENERAL_CONFIG_RELOAD_ADD_X_(event_ch)); + lan743x_csr_write(adapter, HS_PTP_GENERAL_CONFIG, gen_cfg); + + /* set the reload to one toggle cycle */ + period_sec = perout_request->period.sec; + period_sec += perout_request->period.nsec / 1000000000; + period_nsec = perout_request->period.nsec % 1000000000; + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_RELOAD_SEC_X(event_ch), + period_sec); + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_RELOAD_NS_X(event_ch), + period_nsec); + + start_sec = perout_request->start.sec; + start_sec += perout_request->start.nsec / 1000000000; + start_nsec = perout_request->start.nsec % 1000000000; + + /* set the start time */ + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_SEC_X(event_ch), + start_sec); + lan743x_csr_write(adapter, + PTP_CLOCK_TARGET_NS_X(event_ch), + start_nsec); + + /* Enable LTC Target Read */ + val = lan743x_csr_read(adapter, PTP_CMD_CTL); + val |= PTP_CMD_CTL_PTP_LTC_TARGET_READ_; + lan743x_csr_write(adapter, PTP_CMD_CTL, val); + + /* Configure as an push/pull driver */ + val = lan743x_csr_read(adapter, PTP_IO_PIN_CFG); + val |= PTP_IO_PIN_CFG_OBUF_TYPE_(perout_pin); + lan743x_csr_write(adapter, PTP_IO_PIN_CFG, val); + + /* Select Event output */ + val = lan743x_csr_read(adapter, PTP_IO_EVENT_OUTPUT_CFG); + if (event_ch) + /* Channel B as the output */ + val |= PTP_IO_EVENT_OUTPUT_CFG_SEL_(perout_pin); + else + /* Channel A as the output */ + val &= ~PTP_IO_EVENT_OUTPUT_CFG_SEL_(perout_pin); + + /* Enables the output of Local Time Target compare events */ + val |= PTP_IO_EVENT_OUTPUT_CFG_EN_(perout_pin); + lan743x_csr_write(adapter, PTP_IO_EVENT_OUTPUT_CFG, val); + + return 0; + +failed: + lan743x_ptp_io_perout_off(adapter, index); + return -ENODEV; +} + +static void lan743x_ptp_io_extts_off(struct lan743x_adapter *adapter, + u32 index) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + struct lan743x_extts *extts; + int val; + + extts = &ptp->extts[index]; + /* PTP Interrupt Enable Clear Register */ + if (extts->flags & PTP_FALLING_EDGE) + val = PTP_INT_EN_FE_EN_CLR_(index); + else + val = PTP_INT_EN_RE_EN_CLR_(index); + lan743x_csr_write(adapter, PTP_INT_EN_CLR, val); + + /* Disables PTP-IO edge lock */ + val = lan743x_csr_read(adapter, PTP_IO_CAP_CONFIG); + if (extts->flags & PTP_FALLING_EDGE) { + val &= ~PTP_IO_CAP_CONFIG_LOCK_FE_(index); + val &= ~PTP_IO_CAP_CONFIG_FE_CAP_EN_(index); + } else { + val &= ~PTP_IO_CAP_CONFIG_LOCK_RE_(index); + val &= ~PTP_IO_CAP_CONFIG_RE_CAP_EN_(index); + } + lan743x_csr_write(adapter, PTP_IO_CAP_CONFIG, val); + + /* PTP-IO De-select register */ + val = lan743x_csr_read(adapter, PTP_IO_SEL); + val &= ~PTP_IO_SEL_MASK_; + lan743x_csr_write(adapter, PTP_IO_SEL, val); + + /* Clear timestamp */ + memset(&extts->ts, 0, sizeof(struct timespec64)); + extts->flags = 0; +} + +static int lan743x_ptp_io_event_cap_en(struct lan743x_adapter *adapter, + u32 flags, u32 channel) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + int val; + + if ((flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + + mutex_lock(&ptp->command_lock); + /* PTP-IO Event Capture Enable */ + val = lan743x_csr_read(adapter, PTP_IO_CAP_CONFIG); + if (flags & PTP_FALLING_EDGE) { + val &= ~PTP_IO_CAP_CONFIG_LOCK_RE_(channel); + val &= ~PTP_IO_CAP_CONFIG_RE_CAP_EN_(channel); + val |= PTP_IO_CAP_CONFIG_LOCK_FE_(channel); + val |= PTP_IO_CAP_CONFIG_FE_CAP_EN_(channel); + } else { + /* Rising eventing as Default */ + val &= ~PTP_IO_CAP_CONFIG_LOCK_FE_(channel); + val &= ~PTP_IO_CAP_CONFIG_FE_CAP_EN_(channel); + val |= PTP_IO_CAP_CONFIG_LOCK_RE_(channel); + val |= PTP_IO_CAP_CONFIG_RE_CAP_EN_(channel); + } + lan743x_csr_write(adapter, PTP_IO_CAP_CONFIG, val); + + /* PTP-IO Select */ + val = lan743x_csr_read(adapter, PTP_IO_SEL); + val &= ~PTP_IO_SEL_MASK_; + val |= channel << PTP_IO_SEL_SHIFT_; + lan743x_csr_write(adapter, PTP_IO_SEL, val); + + /* PTP Interrupt Enable Register */ + if (flags & PTP_FALLING_EDGE) + val = PTP_INT_EN_FE_EN_SET_(channel); + else + val = PTP_INT_EN_RE_EN_SET_(channel); + lan743x_csr_write(adapter, PTP_INT_EN_SET, val); + + mutex_unlock(&ptp->command_lock); + + return 0; +} + +static int lan743x_ptp_io_extts(struct lan743x_adapter *adapter, int on, + struct ptp_extts_request *extts_request) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + u32 flags = extts_request->flags; + u32 index = extts_request->index; + struct lan743x_extts *extts; + int extts_pin; + int ret = 0; + + extts = &ptp->extts[index]; + + if (on) { + extts_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS, index); + if (extts_pin < 0) + return -EBUSY; + + ret = lan743x_ptp_io_event_cap_en(adapter, flags, index); + if (!ret) + extts->flags = flags; + } else { + lan743x_ptp_io_extts_off(adapter, index); + } + + return ret; +} + static int lan743x_ptpci_enable(struct ptp_clock_info *ptpci, struct ptp_clock_request *request, int on) { @@ -682,11 +1016,19 @@ static int lan743x_ptpci_enable(struct ptp_clock_info *ptpci, if (request) { switch (request->type) { case PTP_CLK_REQ_EXTTS: + if (request->extts.index < ptpci->n_ext_ts) + return lan743x_ptp_io_extts(adapter, on, + &request->extts); return -EINVAL; case PTP_CLK_REQ_PEROUT: - if (request->perout.index < ptpci->n_per_out) - return lan743x_ptp_perout(adapter, on, + if (request->perout.index < ptpci->n_per_out) { + if (adapter->is_pci11x1x) + return lan743x_ptp_io_perout(adapter, on, + &request->perout); + else + return lan743x_ptp_perout(adapter, on, &request->perout); + } return -EINVAL; case PTP_CLK_REQ_PPS: return -EINVAL; @@ -715,8 +1057,8 @@ static int lan743x_ptpci_verify_pin_config(struct ptp_clock_info *ptp, switch (func) { case PTP_PF_NONE: case PTP_PF_PEROUT: - break; case PTP_PF_EXTTS: + break; case PTP_PF_PHYSYNC: default: result = -1; @@ -725,6 +1067,33 @@ static int lan743x_ptpci_verify_pin_config(struct ptp_clock_info *ptp, return result; } +static void lan743x_ptp_io_event_clock_get(struct lan743x_adapter *adapter, + bool fe, u8 channel, + struct timespec64 *ts) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + struct lan743x_extts *extts; + u32 sec, nsec; + + mutex_lock(&ptp->command_lock); + if (fe) { + sec = lan743x_csr_read(adapter, PTP_IO_FE_LTC_SEC_CAP_X); + nsec = lan743x_csr_read(adapter, PTP_IO_FE_LTC_NS_CAP_X); + } else { + sec = lan743x_csr_read(adapter, PTP_IO_RE_LTC_SEC_CAP_X); + nsec = lan743x_csr_read(adapter, PTP_IO_RE_LTC_NS_CAP_X); + } + + mutex_unlock(&ptp->command_lock); + + /* Update Local timestamp */ + extts = &ptp->extts[channel]; + extts->ts.tv_sec = sec; + extts->ts.tv_nsec = nsec; + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + static long lan743x_ptpci_do_aux_work(struct ptp_clock_info *ptpci) { struct lan743x_ptp *ptp = @@ -733,41 +1102,121 @@ static long lan743x_ptpci_do_aux_work(struct ptp_clock_info *ptpci) container_of(ptp, struct lan743x_adapter, ptp); u32 cap_info, cause, header, nsec, seconds; bool new_timestamp_available = false; + struct ptp_clock_event ptp_event; + struct timespec64 ts; + int ptp_int_sts; int count = 0; + int channel; + s64 ns; - while ((count < 100) && - (lan743x_csr_read(adapter, PTP_INT_STS) & PTP_INT_BIT_TX_TS_)) { + ptp_int_sts = lan743x_csr_read(adapter, PTP_INT_STS); + while ((count < 100) && ptp_int_sts) { count++; - cap_info = lan743x_csr_read(adapter, PTP_CAP_INFO); - - if (PTP_CAP_INFO_TX_TS_CNT_GET_(cap_info) > 0) { - seconds = lan743x_csr_read(adapter, - PTP_TX_EGRESS_SEC); - nsec = lan743x_csr_read(adapter, PTP_TX_EGRESS_NS); - cause = (nsec & - PTP_TX_EGRESS_NS_CAPTURE_CAUSE_MASK_); - header = lan743x_csr_read(adapter, - PTP_TX_MSG_HEADER); - - if (cause == PTP_TX_EGRESS_NS_CAPTURE_CAUSE_SW_) { - nsec &= PTP_TX_EGRESS_NS_TS_NS_MASK_; - lan743x_ptp_tx_ts_enqueue_ts(adapter, - seconds, nsec, - header); - new_timestamp_available = true; - } else if (cause == - PTP_TX_EGRESS_NS_CAPTURE_CAUSE_AUTO_) { - netif_err(adapter, drv, adapter->netdev, - "Auto capture cause not supported\n"); + + if (ptp_int_sts & PTP_INT_BIT_TX_TS_) { + cap_info = lan743x_csr_read(adapter, PTP_CAP_INFO); + + if (PTP_CAP_INFO_TX_TS_CNT_GET_(cap_info) > 0) { + seconds = lan743x_csr_read(adapter, + PTP_TX_EGRESS_SEC); + nsec = lan743x_csr_read(adapter, + PTP_TX_EGRESS_NS); + cause = (nsec & + PTP_TX_EGRESS_NS_CAPTURE_CAUSE_MASK_); + header = lan743x_csr_read(adapter, + PTP_TX_MSG_HEADER); + + if (cause == + PTP_TX_EGRESS_NS_CAPTURE_CAUSE_SW_) { + nsec &= PTP_TX_EGRESS_NS_TS_NS_MASK_; + lan743x_ptp_tx_ts_enqueue_ts(adapter, + seconds, + nsec, + header); + new_timestamp_available = true; + } else if (cause == + PTP_TX_EGRESS_NS_CAPTURE_CAUSE_AUTO_) { + netif_err(adapter, drv, adapter->netdev, + "Auto capture cause not supported\n"); + } else { + netif_warn(adapter, drv, adapter->netdev, + "unknown tx timestamp capture cause\n"); + } } else { netif_warn(adapter, drv, adapter->netdev, - "unknown tx timestamp capture cause\n"); + "TX TS INT but no TX TS CNT\n"); } - } else { - netif_warn(adapter, drv, adapter->netdev, - "TX TS INT but no TX TS CNT\n"); + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_BIT_TX_TS_); } - lan743x_csr_write(adapter, PTP_INT_STS, PTP_INT_BIT_TX_TS_); + + if (ptp_int_sts & PTP_INT_IO_FE_MASK_) { + do { + channel = lan743x_get_channel((ptp_int_sts & + PTP_INT_IO_FE_MASK_) >> + PTP_INT_IO_FE_SHIFT_); + if (channel >= 0 && + channel < PCI11X1X_PTP_IO_MAX_CHANNELS) { + lan743x_ptp_io_event_clock_get(adapter, + true, + channel, + &ts); + /* PTP Falling Event post */ + ns = timespec64_to_ns(&ts); + ptp_event.timestamp = ns; + ptp_event.index = channel; + ptp_event.type = PTP_CLOCK_EXTTS; + ptp_clock_event(ptp->ptp_clock, + &ptp_event); + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_IO_FE_SET_ + (channel)); + ptp_int_sts &= ~(1 << + (PTP_INT_IO_FE_SHIFT_ + + channel)); + } else { + /* Clear falling event interrupts */ + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_IO_FE_MASK_); + ptp_int_sts &= ~PTP_INT_IO_FE_MASK_; + } + } while (ptp_int_sts & PTP_INT_IO_FE_MASK_); + } + + if (ptp_int_sts & PTP_INT_IO_RE_MASK_) { + do { + channel = lan743x_get_channel((ptp_int_sts & + PTP_INT_IO_RE_MASK_) >> + PTP_INT_IO_RE_SHIFT_); + if (channel >= 0 && + channel < PCI11X1X_PTP_IO_MAX_CHANNELS) { + lan743x_ptp_io_event_clock_get(adapter, + false, + channel, + &ts); + /* PTP Rising Event post */ + ns = timespec64_to_ns(&ts); + ptp_event.timestamp = ns; + ptp_event.index = channel; + ptp_event.type = PTP_CLOCK_EXTTS; + ptp_clock_event(ptp->ptp_clock, + &ptp_event); + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_IO_RE_SET_ + (channel)); + ptp_int_sts &= ~(1 << + (PTP_INT_IO_RE_SHIFT_ + + channel)); + } else { + /* Clear Rising event interrupt */ + lan743x_csr_write(adapter, PTP_INT_STS, + PTP_INT_IO_RE_MASK_); + ptp_int_sts &= ~PTP_INT_IO_RE_MASK_; + } + } while (ptp_int_sts & PTP_INT_IO_RE_MASK_); + } + + ptp_int_sts = lan743x_csr_read(adapter, PTP_INT_STS); } if (new_timestamp_available) @@ -802,6 +1251,28 @@ static void lan743x_ptp_clock_get(struct lan743x_adapter *adapter, mutex_unlock(&ptp->command_lock); } +static void lan743x_ptp_io_clock_get(struct lan743x_adapter *adapter, + u32 *sec, u32 *nsec, u32 *sub_nsec) +{ + struct lan743x_ptp *ptp = &adapter->ptp; + + mutex_lock(&ptp->command_lock); + lan743x_csr_write(adapter, PTP_CMD_CTL, PTP_CMD_CTL_PTP_CLOCK_READ_); + lan743x_ptp_wait_till_cmd_done(adapter, PTP_CMD_CTL_PTP_CLOCK_READ_); + + if (sec) + (*sec) = lan743x_csr_read(adapter, PTP_LTC_RD_SEC_LO); + + if (nsec) + (*nsec) = lan743x_csr_read(adapter, PTP_LTC_RD_NS); + + if (sub_nsec) + (*sub_nsec) = + lan743x_csr_read(adapter, PTP_LTC_RD_SUBNS); + + mutex_unlock(&ptp->command_lock); +} + static void lan743x_ptp_clock_step(struct lan743x_adapter *adapter, s64 time_step_ns) { @@ -815,8 +1286,12 @@ static void lan743x_ptp_clock_step(struct lan743x_adapter *adapter, if (time_step_ns > 15000000000LL) { /* convert to clock set */ - lan743x_ptp_clock_get(adapter, &unsigned_seconds, - &nano_seconds, NULL); + if (adapter->is_pci11x1x) + lan743x_ptp_io_clock_get(adapter, &unsigned_seconds, + &nano_seconds, NULL); + else + lan743x_ptp_clock_get(adapter, &unsigned_seconds, + &nano_seconds, NULL); unsigned_seconds += div_u64_rem(time_step_ns, 1000000000LL, &remainder); nano_seconds += remainder; @@ -831,8 +1306,13 @@ static void lan743x_ptp_clock_step(struct lan743x_adapter *adapter, /* convert to clock set */ time_step_ns = -time_step_ns; - lan743x_ptp_clock_get(adapter, &unsigned_seconds, - &nano_seconds, NULL); + if (adapter->is_pci11x1x) { + lan743x_ptp_io_clock_get(adapter, &unsigned_seconds, + &nano_seconds, NULL); + } else { + lan743x_ptp_clock_get(adapter, &unsigned_seconds, + &nano_seconds, NULL); + } unsigned_seconds -= div_u64_rem(time_step_ns, 1000000000LL, &remainder); nano_seconds_step = remainder; @@ -1061,6 +1541,8 @@ int lan743x_ptp_open(struct lan743x_adapter *adapter) n_pins = LAN7430_N_GPIO; break; case ID_REV_ID_LAN7431_: + case ID_REV_ID_A011_: + case ID_REV_ID_A041_: n_pins = LAN7431_N_GPIO; break; default: @@ -1088,10 +1570,10 @@ int lan743x_ptp_open(struct lan743x_adapter *adapter) adapter->netdev->dev_addr); ptp->ptp_clock_info.max_adj = LAN743X_PTP_MAX_FREQ_ADJ_IN_PPB; ptp->ptp_clock_info.n_alarm = 0; - ptp->ptp_clock_info.n_ext_ts = 0; + ptp->ptp_clock_info.n_ext_ts = LAN743X_PTP_N_EXTTS; ptp->ptp_clock_info.n_per_out = LAN743X_PTP_N_EVENT_CHAN; ptp->ptp_clock_info.n_pins = n_pins; - ptp->ptp_clock_info.pps = 0; + ptp->ptp_clock_info.pps = LAN743X_PTP_N_PPS; ptp->ptp_clock_info.pin_config = ptp->pin_config; ptp->ptp_clock_info.adjfine = lan743x_ptpci_adjfine; ptp->ptp_clock_info.adjfreq = lan743x_ptpci_adjfreq; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.h b/drivers/net/ethernet/microchip/lan743x_ptp.h index 7663bf5d2e33..e26d4eff7133 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.h +++ b/drivers/net/ethernet/microchip/lan743x_ptp.h @@ -18,6 +18,9 @@ */ #define LAN743X_PTP_N_EVENT_CHAN 2 #define LAN743X_PTP_N_PEROUT LAN743X_PTP_N_EVENT_CHAN +#define LAN743X_PTP_N_EXTTS 4 +#define LAN743X_PTP_N_PPS 0 +#define PCI11X1X_PTP_IO_MAX_CHANNELS 8 struct lan743x_adapter; @@ -60,6 +63,11 @@ struct lan743x_ptp_perout { int gpio_pin; /* GPIO pin where output appears */ }; +struct lan743x_extts { + int flags; + struct timespec64 ts; +}; + struct lan743x_ptp { int flags; @@ -72,6 +80,8 @@ struct lan743x_ptp { unsigned long used_event_ch; struct lan743x_ptp_perout perout[LAN743X_PTP_N_PEROUT]; + int ptp_io_perout[LAN743X_PTP_N_PEROUT]; /* PTP event channel (0=channel A, 1=channel B) */ + struct lan743x_extts extts[LAN743X_PTP_N_EXTTS]; bool leds_multiplexed; bool led_enabled[LAN7430_N_LED]; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 9a8e4f201eb1..82b1b3c9a065 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -286,7 +286,8 @@ static void sparx5_fdb_call_notifiers(enum switchdev_notifier_type type, } int sparx5_add_mact_entry(struct sparx5 *sparx5, - struct sparx5_port *port, + struct net_device *dev, + u16 portno, const unsigned char *addr, u16 vid) { struct sparx5_mact_entry *mact_entry; @@ -302,14 +303,14 @@ int sparx5_add_mact_entry(struct sparx5 *sparx5, * mact thread to start the frame will reach CPU and the CPU will * add the entry but without the extern_learn flag. */ - mact_entry = find_mact_entry(sparx5, addr, vid, port->portno); + mact_entry = find_mact_entry(sparx5, addr, vid, portno); if (mact_entry) goto update_hw; /* Add the entry in SW MAC table not to get the notification when * SW is pulling again */ - mact_entry = alloc_mact_entry(sparx5, addr, vid, port->portno); + mact_entry = alloc_mact_entry(sparx5, addr, vid, portno); if (!mact_entry) return -ENOMEM; @@ -318,13 +319,13 @@ int sparx5_add_mact_entry(struct sparx5 *sparx5, mutex_unlock(&sparx5->mact_lock); update_hw: - ret = sparx5_mact_learn(sparx5, port->portno, addr, vid); + ret = sparx5_mact_learn(sparx5, portno, addr, vid); /* New entry? */ if (mact_entry->flags == 0) { mact_entry->flags |= MAC_ENT_LOCK; /* Don't age this */ sparx5_fdb_call_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, addr, vid, - port->ndev, true); + dev, true); } return ret; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 33892dfc3b2f..df68a0891029 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -305,7 +305,8 @@ bool sparx5_mact_getnext(struct sparx5 *sparx5, int sparx5_mact_forget(struct sparx5 *sparx5, const unsigned char mac[ETH_ALEN], u16 vid); int sparx5_add_mact_entry(struct sparx5 *sparx5, - struct sparx5_port *port, + struct net_device *dev, + u16 portno, const unsigned char *addr, u16 vid); int sparx5_del_mact_entry(struct sparx5 *sparx5, const unsigned char *addr, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c index cd110c31e5a4..0ed1ea7727c5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ptp.c @@ -45,7 +45,7 @@ static u64 sparx5_ptp_get_1ppm(struct sparx5 *sparx5) res = 920535763834; break; default: - WARN_ON("Invalid core clock"); + WARN(1, "Invalid core clock"); break; } @@ -67,7 +67,7 @@ static u64 sparx5_ptp_get_nominal_value(struct sparx5 *sparx5) res = 0x0CC6666666666666; break; default: - WARN_ON("Invalid core clock"); + WARN(1, "Invalid core clock"); break; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index dacb87f49552..2d5de1c06fab 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -16,6 +16,7 @@ struct sparx5_switchdev_event_work { struct work_struct work; struct switchdev_notifier_fdb_info fdb_info; struct net_device *dev; + struct sparx5 *sparx5; unsigned long event; }; @@ -247,31 +248,34 @@ static void sparx5_switchdev_bridge_fdb_event_work(struct work_struct *work) struct switchdev_notifier_fdb_info *fdb_info; struct sparx5_port *port; struct sparx5 *sparx5; + bool host_addr; rtnl_lock(); - if (!sparx5_netdevice_check(dev)) - goto out; - - port = netdev_priv(dev); - sparx5 = port->sparx5; + if (!sparx5_netdevice_check(dev)) { + host_addr = true; + sparx5 = switchdev_work->sparx5; + } else { + host_addr = false; + sparx5 = switchdev_work->sparx5; + port = netdev_priv(dev); + } fdb_info = &switchdev_work->fdb_info; switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: - if (!fdb_info->added_by_user) - break; - sparx5_add_mact_entry(sparx5, port, fdb_info->addr, - fdb_info->vid); + if (host_addr) + sparx5_add_mact_entry(sparx5, dev, PGID_CPU, + fdb_info->addr, fdb_info->vid); + else + sparx5_add_mact_entry(sparx5, port->ndev, port->portno, + fdb_info->addr, fdb_info->vid); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: - if (!fdb_info->added_by_user) - break; sparx5_del_mact_entry(sparx5, fdb_info->addr, fdb_info->vid); break; } -out: rtnl_unlock(); kfree(switchdev_work->fdb_info.addr); kfree(switchdev_work); @@ -283,15 +287,18 @@ static void sparx5_schedule_work(struct work_struct *work) queue_work(sparx5_owq, work); } -static int sparx5_switchdev_event(struct notifier_block *unused, +static int sparx5_switchdev_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct sparx5_switchdev_event_work *switchdev_work; struct switchdev_notifier_fdb_info *fdb_info; struct switchdev_notifier_info *info = ptr; + struct sparx5 *spx5; int err; + spx5 = container_of(nb, struct sparx5, switchdev_nb); + switch (event) { case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, @@ -307,6 +314,7 @@ static int sparx5_switchdev_event(struct notifier_block *unused, switchdev_work->dev = dev; switchdev_work->event = event; + switchdev_work->sparx5 = spx5; fdb_info = container_of(info, struct switchdev_notifier_fdb_info, @@ -333,54 +341,6 @@ err_addr_alloc: return NOTIFY_BAD; } -static void sparx5_sync_port_dev_addr(struct sparx5 *sparx5, - struct sparx5_port *port, - u16 vid, bool add) -{ - if (!port || - !test_bit(port->portno, sparx5->bridge_mask)) - return; /* Skip null/host interfaces */ - - /* Bridge connects to vid? */ - if (add) { - /* Add port MAC address from the VLAN */ - sparx5_mact_learn(sparx5, PGID_CPU, - port->ndev->dev_addr, vid); - } else { - /* Control port addr visibility depending on - * port VLAN connectivity. - */ - if (test_bit(port->portno, sparx5->vlan_mask[vid])) - sparx5_mact_learn(sparx5, PGID_CPU, - port->ndev->dev_addr, vid); - else - sparx5_mact_forget(sparx5, - port->ndev->dev_addr, vid); - } -} - -static void sparx5_sync_bridge_dev_addr(struct net_device *dev, - struct sparx5 *sparx5, - u16 vid, bool add) -{ - int i; - - /* First, handle bridge address'es */ - if (add) { - sparx5_mact_learn(sparx5, PGID_CPU, dev->dev_addr, - vid); - sparx5_mact_learn(sparx5, PGID_BCAST, dev->broadcast, - vid); - } else { - sparx5_mact_forget(sparx5, dev->dev_addr, vid); - sparx5_mact_forget(sparx5, dev->broadcast, vid); - } - - /* Now look at bridged ports */ - for (i = 0; i < SPX5_PORTS; i++) - sparx5_sync_port_dev_addr(sparx5, sparx5->ports[i], vid, add); -} - static int sparx5_handle_port_vlan_add(struct net_device *dev, struct notifier_block *nb, const struct switchdev_obj_port_vlan *v) @@ -392,7 +352,9 @@ static int sparx5_handle_port_vlan_add(struct net_device *dev, container_of(nb, struct sparx5, switchdev_blocking_nb); - sparx5_sync_bridge_dev_addr(dev, sparx5, v->vid, true); + /* Flood broadcast to CPU */ + sparx5_mact_learn(sparx5, PGID_BCAST, dev->broadcast, + v->vid); return 0; } @@ -438,7 +400,7 @@ static int sparx5_handle_port_vlan_del(struct net_device *dev, container_of(nb, struct sparx5, switchdev_blocking_nb); - sparx5_sync_bridge_dev_addr(dev, sparx5, vid, false); + sparx5_mact_forget(sparx5, dev->broadcast, vid); return 0; } @@ -449,9 +411,6 @@ static int sparx5_handle_port_vlan_del(struct net_device *dev, if (ret) return ret; - /* Delete the port MAC address with the matching VLAN information */ - sparx5_mact_forget(port->sparx5, port->ndev->dev_addr, vid); - return 0; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 21134125a6e4..e443bd8b2d09 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2907,6 +2907,198 @@ void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_port_bridge_flags); +int ocelot_port_get_default_prio(struct ocelot *ocelot, int port) +{ + int val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + + return ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); +} +EXPORT_SYMBOL_GPL(ocelot_port_get_default_prio); + +int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) +{ + if (prio >= OCELOT_NUM_TC) + return -ERANGE; + + ocelot_rmw_gix(ocelot, + ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL(prio), + ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_M, + ANA_PORT_QOS_CFG, + port); + + return 0; +} +EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); + +int ocelot_port_get_dscp_prio(struct ocelot *ocelot, int port, u8 dscp) +{ + int qos_cfg = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + int dscp_cfg = ocelot_read_rix(ocelot, ANA_DSCP_CFG, dscp); + + /* Return error if DSCP prioritization isn't enabled */ + if (!(qos_cfg & ANA_PORT_QOS_CFG_QOS_DSCP_ENA)) + return -EOPNOTSUPP; + + if (qos_cfg & ANA_PORT_QOS_CFG_DSCP_TRANSLATE_ENA) { + dscp = ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_X(dscp_cfg); + /* Re-read ANA_DSCP_CFG for the translated DSCP */ + dscp_cfg = ocelot_read_rix(ocelot, ANA_DSCP_CFG, dscp); + } + + /* If the DSCP value is not trusted, the QoS classification falls back + * to VLAN PCP or port-based default. + */ + if (!(dscp_cfg & ANA_DSCP_CFG_DSCP_TRUST_ENA)) + return -EOPNOTSUPP; + + return ANA_DSCP_CFG_QOS_DSCP_VAL_X(dscp_cfg); +} +EXPORT_SYMBOL_GPL(ocelot_port_get_dscp_prio); + +int ocelot_port_add_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio) +{ + int mask, val; + + if (prio >= OCELOT_NUM_TC) + return -ERANGE; + + /* There is at least one app table priority (this one), so we need to + * make sure DSCP prioritization is enabled on the port. + * Also make sure DSCP translation is disabled + * (dcbnl doesn't support it). + */ + mask = ANA_PORT_QOS_CFG_QOS_DSCP_ENA | + ANA_PORT_QOS_CFG_DSCP_TRANSLATE_ENA; + + ocelot_rmw_gix(ocelot, ANA_PORT_QOS_CFG_QOS_DSCP_ENA, mask, + ANA_PORT_QOS_CFG, port); + + /* Trust this DSCP value and map it to the given QoS class */ + val = ANA_DSCP_CFG_DSCP_TRUST_ENA | ANA_DSCP_CFG_QOS_DSCP_VAL(prio); + + ocelot_write_rix(ocelot, val, ANA_DSCP_CFG, dscp); + + return 0; +} +EXPORT_SYMBOL_GPL(ocelot_port_add_dscp_prio); + +int ocelot_port_del_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio) +{ + int dscp_cfg = ocelot_read_rix(ocelot, ANA_DSCP_CFG, dscp); + int mask, i; + + /* During a "dcb app replace" command, the new app table entry will be + * added first, then the old one will be deleted. But the hardware only + * supports one QoS class per DSCP value (duh), so if we blindly delete + * the app table entry for this DSCP value, we end up deleting the + * entry with the new priority. Avoid that by checking whether user + * space wants to delete the priority which is currently configured, or + * something else which is no longer current. + */ + if (ANA_DSCP_CFG_QOS_DSCP_VAL_X(dscp_cfg) != prio) + return 0; + + /* Untrust this DSCP value */ + ocelot_write_rix(ocelot, 0, ANA_DSCP_CFG, dscp); + + for (i = 0; i < 64; i++) { + int dscp_cfg = ocelot_read_rix(ocelot, ANA_DSCP_CFG, i); + + /* There are still app table entries on the port, so we need to + * keep DSCP enabled, nothing to do. + */ + if (dscp_cfg & ANA_DSCP_CFG_DSCP_TRUST_ENA) + return 0; + } + + /* Disable DSCP QoS classification if there isn't any trusted + * DSCP value left. + */ + mask = ANA_PORT_QOS_CFG_QOS_DSCP_ENA | + ANA_PORT_QOS_CFG_DSCP_TRANSLATE_ENA; + + ocelot_rmw_gix(ocelot, 0, mask, ANA_PORT_QOS_CFG, port); + + return 0; +} +EXPORT_SYMBOL_GPL(ocelot_port_del_dscp_prio); + +struct ocelot_mirror *ocelot_mirror_get(struct ocelot *ocelot, int to, + struct netlink_ext_ack *extack) +{ + struct ocelot_mirror *m = ocelot->mirror; + + if (m) { + if (m->to != to) { + NL_SET_ERR_MSG_MOD(extack, + "Mirroring already configured towards different egress port"); + return ERR_PTR(-EBUSY); + } + + refcount_inc(&m->refcount); + return m; + } + + m = kzalloc(sizeof(*m), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + m->to = to; + refcount_set(&m->refcount, 1); + ocelot->mirror = m; + + /* Program the mirror port to hardware */ + ocelot_write(ocelot, BIT(to), ANA_MIRRORPORTS); + + return m; +} + +void ocelot_mirror_put(struct ocelot *ocelot) +{ + struct ocelot_mirror *m = ocelot->mirror; + + if (!refcount_dec_and_test(&m->refcount)) + return; + + ocelot_write(ocelot, 0, ANA_MIRRORPORTS); + ocelot->mirror = NULL; + kfree(m); +} + +int ocelot_port_mirror_add(struct ocelot *ocelot, int from, int to, + bool ingress, struct netlink_ext_ack *extack) +{ + struct ocelot_mirror *m = ocelot_mirror_get(ocelot, to, extack); + + if (IS_ERR(m)) + return PTR_ERR(m); + + if (ingress) { + ocelot_rmw_gix(ocelot, ANA_PORT_PORT_CFG_SRC_MIRROR_ENA, + ANA_PORT_PORT_CFG_SRC_MIRROR_ENA, + ANA_PORT_PORT_CFG, from); + } else { + ocelot_rmw(ocelot, BIT(from), BIT(from), + ANA_EMIRRORPORTS); + } + + return 0; +} +EXPORT_SYMBOL_GPL(ocelot_port_mirror_add); + +void ocelot_port_mirror_del(struct ocelot *ocelot, int from, bool ingress) +{ + if (ingress) { + ocelot_rmw_gix(ocelot, 0, ANA_PORT_PORT_CFG_SRC_MIRROR_ENA, + ANA_PORT_PORT_CFG, from); + } else { + ocelot_rmw(ocelot, 0, BIT(from), ANA_EMIRRORPORTS); + } + + ocelot_mirror_put(ocelot); +} +EXPORT_SYMBOL_GPL(ocelot_port_mirror_del); + void ocelot_init_port(struct ocelot *ocelot, int port) { struct ocelot_port *ocelot_port = ocelot->ports[port]; diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index f8dc0d75eb5d..d0fa8ab6cc81 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -38,7 +38,8 @@ struct ocelot_port_tc { bool block_shared; unsigned long offload_cnt; - + unsigned long ingress_mirred_id; + unsigned long egress_mirred_id; unsigned long police_id; }; @@ -111,6 +112,10 @@ int ocelot_trap_add(struct ocelot *ocelot, int port, void (*populate)(struct ocelot_vcap_filter *f)); int ocelot_trap_del(struct ocelot *ocelot, int port, unsigned long cookie); +struct ocelot_mirror *ocelot_mirror_get(struct ocelot *ocelot, int to, + struct netlink_ext_ack *extack); +void ocelot_mirror_put(struct ocelot *ocelot); + extern struct notifier_block ocelot_netdevice_nb; extern struct notifier_block ocelot_switchdev_nb; extern struct notifier_block ocelot_switchdev_blocking_nb; diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index b3f5418dc622..03b5e59d033e 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -61,6 +61,12 @@ static int ocelot_chain_to_block(int chain, bool ingress) */ static int ocelot_chain_to_lookup(int chain) { + /* Backwards compatibility with older, single-chain tc-flower + * offload support in Ocelot + */ + if (chain == 0) + return 0; + return (chain / VCAP_LOOKUP) % 10; } @@ -69,7 +75,15 @@ static int ocelot_chain_to_lookup(int chain) */ static int ocelot_chain_to_pag(int chain) { - int lookup = ocelot_chain_to_lookup(chain); + int lookup; + + /* Backwards compatibility with older, single-chain tc-flower + * offload support in Ocelot + */ + if (chain == 0) + return 0; + + lookup = ocelot_chain_to_lookup(chain); /* calculate PAG value as chain index relative to the first PAG */ return chain - VCAP_IS2_CHAIN(lookup, 0); @@ -345,6 +359,27 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, filter->action.port_mask = BIT(egress_port); filter->type = OCELOT_VCAP_FILTER_OFFLOAD; break; + case FLOW_ACTION_MIRRED: + if (filter->block_id != VCAP_IS2) { + NL_SET_ERR_MSG_MOD(extack, + "Mirror action can only be offloaded to VCAP IS2"); + return -EOPNOTSUPP; + } + if (filter->goto_target != -1) { + NL_SET_ERR_MSG_MOD(extack, + "Last action must be GOTO"); + return -EOPNOTSUPP; + } + egress_port = ocelot->ops->netdev_to_port(a->dev); + if (egress_port < 0) { + NL_SET_ERR_MSG_MOD(extack, + "Destination not an ocelot port"); + return -EOPNOTSUPP; + } + filter->egress_port.value = egress_port; + filter->action.mirror_ena = true; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + break; case FLOW_ACTION_VLAN_POP: if (filter->block_id != VCAP_IS1) { NL_SET_ERR_MSG_MOD(extack, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 5767e38c0c5a..247bc105bdd2 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -20,6 +20,8 @@ #define OCELOT_MAC_QUIRKS OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP +static bool ocelot_netdevice_dev_check(const struct net_device *dev); + static struct ocelot *devlink_port_to_ocelot(struct devlink_port *dlp) { return devlink_priv(dlp->devlink); @@ -216,14 +218,14 @@ int ocelot_setup_tc_cls_flower(struct ocelot_port_private *priv, } } -static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, - struct tc_cls_matchall_offload *f, - bool ingress) +static int ocelot_setup_tc_cls_matchall_police(struct ocelot_port_private *priv, + struct tc_cls_matchall_offload *f, + bool ingress, + struct netlink_ext_ack *extack) { - struct netlink_ext_ack *extack = f->common.extack; + struct flow_action_entry *action = &f->rule->action.entries[0]; struct ocelot *ocelot = priv->port.ocelot; struct ocelot_policer pol = { 0 }; - struct flow_action_entry *action; int port = priv->chip_port; int err; @@ -232,6 +234,119 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, return -EOPNOTSUPP; } + if (priv->tc.police_id && priv->tc.police_id != f->cookie) { + NL_SET_ERR_MSG_MOD(extack, + "Only one policer per port is supported"); + return -EEXIST; + } + + err = ocelot_policer_validate(&f->rule->action, action, extack); + if (err) + return err; + + pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8; + pol.burst = action->police.burst; + + err = ocelot_port_policer_add(ocelot, port, &pol); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Could not add policer"); + return err; + } + + priv->tc.police_id = f->cookie; + priv->tc.offload_cnt++; + + return 0; +} + +static int ocelot_setup_tc_cls_matchall_mirred(struct ocelot_port_private *priv, + struct tc_cls_matchall_offload *f, + bool ingress, + struct netlink_ext_ack *extack) +{ + struct flow_action *action = &f->rule->action; + struct ocelot *ocelot = priv->port.ocelot; + struct ocelot_port_private *other_priv; + const struct flow_action_entry *a; + int err; + + if (f->common.protocol != htons(ETH_P_ALL)) + return -EOPNOTSUPP; + + if (!flow_action_basic_hw_stats_check(action, extack)) + return -EOPNOTSUPP; + + a = &action->entries[0]; + if (!a->dev) + return -EINVAL; + + if (!ocelot_netdevice_dev_check(a->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Destination not an ocelot port"); + return -EOPNOTSUPP; + } + + other_priv = netdev_priv(a->dev); + + err = ocelot_port_mirror_add(ocelot, priv->chip_port, + other_priv->chip_port, ingress, extack); + if (err) + return err; + + if (ingress) + priv->tc.ingress_mirred_id = f->cookie; + else + priv->tc.egress_mirred_id = f->cookie; + priv->tc.offload_cnt++; + + return 0; +} + +static int ocelot_del_tc_cls_matchall_police(struct ocelot_port_private *priv, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = priv->port.ocelot; + int port = priv->chip_port; + int err; + + err = ocelot_port_policer_del(ocelot, port); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Could not delete policer"); + return err; + } + + priv->tc.police_id = 0; + priv->tc.offload_cnt--; + + return 0; +} + +static int ocelot_del_tc_cls_matchall_mirred(struct ocelot_port_private *priv, + bool ingress, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = priv->port.ocelot; + int port = priv->chip_port; + + ocelot_port_mirror_del(ocelot, port, ingress); + + if (ingress) + priv->tc.ingress_mirred_id = 0; + else + priv->tc.egress_mirred_id = 0; + priv->tc.offload_cnt--; + + return 0; +} + +static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, + struct tc_cls_matchall_offload *f, + bool ingress) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct flow_action_entry *action; + switch (f->command) { case TC_CLSMATCHALL_REPLACE: if (!flow_offload_has_one_action(&f->rule->action)) { @@ -242,53 +357,41 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, if (priv->tc.block_shared) { NL_SET_ERR_MSG_MOD(extack, - "Rate limit is not supported on shared blocks"); + "Matchall offloads not supported on shared blocks"); return -EOPNOTSUPP; } action = &f->rule->action.entries[0]; - if (action->id != FLOW_ACTION_POLICE) { + switch (action->id) { + case FLOW_ACTION_POLICE: + return ocelot_setup_tc_cls_matchall_police(priv, f, + ingress, + extack); + break; + case FLOW_ACTION_MIRRED: + return ocelot_setup_tc_cls_matchall_mirred(priv, f, + ingress, + extack); + default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); return -EOPNOTSUPP; } - if (priv->tc.police_id && priv->tc.police_id != f->cookie) { - NL_SET_ERR_MSG_MOD(extack, - "Only one policer per port is supported"); - return -EEXIST; - } - - err = ocelot_policer_validate(&f->rule->action, action, - extack); - if (err) - return err; - - pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8; - pol.burst = action->police.burst; - - err = ocelot_port_policer_add(ocelot, port, &pol); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Could not add policer"); - return err; - } - - priv->tc.police_id = f->cookie; - priv->tc.offload_cnt++; - return 0; + break; case TC_CLSMATCHALL_DESTROY: - if (priv->tc.police_id != f->cookie) + action = &f->rule->action.entries[0]; + + if (f->cookie == priv->tc.police_id) + return ocelot_del_tc_cls_matchall_police(priv, extack); + else if (f->cookie == priv->tc.ingress_mirred_id || + f->cookie == priv->tc.egress_mirred_id) + return ocelot_del_tc_cls_matchall_mirred(priv, ingress, + extack); + else return -ENOENT; - err = ocelot_port_policer_del(ocelot, port); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Could not delete policer"); - return err; - } - priv->tc.police_id = 0; - priv->tc.offload_cnt--; - return 0; + break; case TC_CLSMATCHALL_STATS: default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index b976d480aeb3..c8701ac955a8 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -335,6 +335,7 @@ static void is2_action_set(struct ocelot *ocelot, struct vcap_data *data, vcap_action_set(vcap, data, VCAP_IS2_ACT_MASK_MODE, a->mask_mode); vcap_action_set(vcap, data, VCAP_IS2_ACT_PORT_MASK, a->port_mask); + vcap_action_set(vcap, data, VCAP_IS2_ACT_MIRROR_ENA, a->mirror_ena); vcap_action_set(vcap, data, VCAP_IS2_ACT_POLICE_ENA, a->police_ena); vcap_action_set(vcap, data, VCAP_IS2_ACT_POLICE_IDX, a->pol_ix); vcap_action_set(vcap, data, VCAP_IS2_ACT_CPU_QU_NUM, a->cpu_qu_num); @@ -955,14 +956,21 @@ int ocelot_vcap_policer_del(struct ocelot *ocelot, u32 pol_ix) } EXPORT_SYMBOL(ocelot_vcap_policer_del); -static int ocelot_vcap_filter_add_to_block(struct ocelot *ocelot, - struct ocelot_vcap_block *block, - struct ocelot_vcap_filter *filter) +static int +ocelot_vcap_filter_add_aux_resources(struct ocelot *ocelot, + struct ocelot_vcap_filter *filter, + struct netlink_ext_ack *extack) { - struct ocelot_vcap_filter *tmp; - struct list_head *pos, *n; + struct ocelot_mirror *m; int ret; + if (filter->block_id == VCAP_IS2 && filter->action.mirror_ena) { + m = ocelot_mirror_get(ocelot, filter->egress_port.value, + extack); + if (IS_ERR(m)) + return PTR_ERR(m); + } + if (filter->block_id == VCAP_IS2 && filter->action.police_ena) { ret = ocelot_vcap_policer_add(ocelot, filter->action.pol_ix, &filter->action.pol); @@ -970,6 +978,33 @@ static int ocelot_vcap_filter_add_to_block(struct ocelot *ocelot, return ret; } + return 0; +} + +static void +ocelot_vcap_filter_del_aux_resources(struct ocelot *ocelot, + struct ocelot_vcap_filter *filter) +{ + if (filter->block_id == VCAP_IS2 && filter->action.police_ena) + ocelot_vcap_policer_del(ocelot, filter->action.pol_ix); + + if (filter->block_id == VCAP_IS2 && filter->action.mirror_ena) + ocelot_mirror_put(ocelot); +} + +static int ocelot_vcap_filter_add_to_block(struct ocelot *ocelot, + struct ocelot_vcap_block *block, + struct ocelot_vcap_filter *filter, + struct netlink_ext_ack *extack) +{ + struct ocelot_vcap_filter *tmp; + struct list_head *pos, *n; + int ret; + + ret = ocelot_vcap_filter_add_aux_resources(ocelot, filter, extack); + if (ret) + return ret; + block->count++; if (list_empty(&block->rules)) { @@ -1168,7 +1203,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, } /* Add filter to the linked list */ - ret = ocelot_vcap_filter_add_to_block(ocelot, block, filter); + ret = ocelot_vcap_filter_add_to_block(ocelot, block, filter, extack); if (ret) return ret; @@ -1199,11 +1234,7 @@ static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot, list_for_each_entry_safe(tmp, n, &block->rules, list) { if (ocelot_vcap_filter_equal(filter, tmp)) { - if (tmp->block_id == VCAP_IS2 && - tmp->action.police_ena) - ocelot_vcap_policer_del(ocelot, - tmp->action.pol_ix); - + ocelot_vcap_filter_del_aux_resources(ocelot, tmp); list_del(&tmp->list); kfree(tmp); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2c40a3959f94..1b9421e844a9 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -956,7 +956,7 @@ nfp_flower_meter_action(struct nfp_app *app, meter_id = action->hw_index; if (!nfp_flower_search_meter_entry(app, meter_id)) { NL_SET_ERR_MSG_MOD(extack, - "can not offload flow table with unsupported police action.\n"); + "can not offload flow table with unsupported police action."); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index ac1dcfa1d179..4d960a9641b3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -266,7 +266,7 @@ nfp_flower_reprs_reify(struct nfp_app *app, enum nfp_repr_type type, int i, err, count = 0; reprs = rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); if (!reprs) return 0; @@ -295,7 +295,7 @@ nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl) if (!tot_repl) return 0; - lockdep_assert_held(&app->pf->lock); + assert_nfp_app_locked(app); if (!wait_event_timeout(priv->reify_wait_queue, atomic_read(replies) >= tot_repl, NFP_FL_REPLY_TIMEOUT)) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 3a973282b2bb..09f250e74dfa 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -121,7 +121,7 @@ struct nfp_reprs * nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type) { return rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); } struct nfp_reprs * diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 3e9baff07100..dd56207df246 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -75,7 +75,7 @@ extern const struct nfp_app_type app_abm; * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program * @eswitch_mode_get: get SR-IOV eswitch mode - * @eswitch_mode_set: set SR-IOV eswitch mode (under pf->lock) + * @eswitch_mode_set: set SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up * @dev_get: get representor or internal port representing netdev @@ -174,6 +174,16 @@ struct nfp_app { void *priv; }; +static inline void assert_nfp_app_locked(struct nfp_app *app) +{ + devl_assert_locked(priv_to_devlink(app->pf)); +} + +static inline bool nfp_app_is_locked(struct nfp_app *app) +{ + return devl_lock_is_held(priv_to_devlink(app->pf)); +} + void nfp_check_rhashtable_empty(void *ptr, void *arg); bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index bea978df7713..48b95566b52b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -26,12 +26,11 @@ nfp_devlink_fill_eth_port(struct nfp_port *port, } static int -nfp_devlink_fill_eth_port_from_id(struct nfp_pf *pf, unsigned int port_index, +nfp_devlink_fill_eth_port_from_id(struct nfp_pf *pf, + struct devlink_port *dl_port, struct nfp_eth_table_port *copy) { - struct nfp_port *port; - - port = nfp_port_from_id(pf, NFP_PORT_PHYS_PORT, port_index); + struct nfp_port *port = container_of(dl_port, struct nfp_port, dl_port); return nfp_devlink_fill_eth_port(port, copy); } @@ -62,7 +61,7 @@ nfp_devlink_set_lanes(struct nfp_pf *pf, unsigned int idx, unsigned int lanes) } static int -nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, +nfp_devlink_port_split(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); @@ -70,33 +69,25 @@ nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, unsigned int lanes; int ret; - mutex_lock(&pf->lock); - rtnl_lock(); - ret = nfp_devlink_fill_eth_port_from_id(pf, port_index, ð_port); + ret = nfp_devlink_fill_eth_port_from_id(pf, port, ð_port); rtnl_unlock(); if (ret) - goto out; + return ret; - if (eth_port.port_lanes % count) { - ret = -EINVAL; - goto out; - } + if (eth_port.port_lanes % count) + return -EINVAL; /* Special case the 100G CXP -> 2x40G split */ lanes = eth_port.port_lanes / count; if (eth_port.lanes == 10 && count == 2) lanes = 8 / count; - ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); -out: - mutex_unlock(&pf->lock); - - return ret; + return nfp_devlink_set_lanes(pf, eth_port.index, lanes); } static int -nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index, +nfp_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); @@ -104,29 +95,21 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index, unsigned int lanes; int ret; - mutex_lock(&pf->lock); - rtnl_lock(); - ret = nfp_devlink_fill_eth_port_from_id(pf, port_index, ð_port); + ret = nfp_devlink_fill_eth_port_from_id(pf, port, ð_port); rtnl_unlock(); if (ret) - goto out; + return ret; - if (!eth_port.is_split) { - ret = -EINVAL; - goto out; - } + if (!eth_port.is_split) + return -EINVAL; /* Special case the 100G CXP -> 2x40G unsplit */ lanes = eth_port.port_lanes; if (eth_port.port_lanes == 8) lanes = 10; - ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); -out: - mutex_unlock(&pf->lock); - - return ret; + return nfp_devlink_set_lanes(pf, eth_port.index, lanes); } static int @@ -163,9 +146,9 @@ static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct nfp_pf *pf = devlink_priv(devlink); int ret; - mutex_lock(&pf->lock); + devl_lock(devlink); ret = nfp_app_eswitch_mode_set(pf->app, mode); - mutex_unlock(&pf->lock); + devl_unlock(devlink); return ret; } @@ -375,12 +358,12 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) devlink = priv_to_devlink(app->pf); - return devlink_port_register(devlink, &port->dl_port, port->eth_id); + return devl_port_register(devlink, &port->dl_port, port->eth_id); } void nfp_devlink_port_unregister(struct nfp_port *port) { - devlink_port_unregister(&port->dl_port); + devl_port_unregister(&port->dl_port); } void nfp_devlink_port_type_eth_set(struct nfp_port *port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 8693f9905fbe..eeda39e34f84 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -227,6 +227,7 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) { #ifdef CONFIG_PCI_IOV struct nfp_pf *pf = pci_get_drvdata(pdev); + struct devlink *devlink; int err; if (num_vfs > pf->limit_vfs) { @@ -241,7 +242,8 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) return err; } - mutex_lock(&pf->lock); + devlink = priv_to_devlink(pf); + devl_lock(devlink); err = nfp_app_sriov_enable(pf->app, num_vfs); if (err) { @@ -255,11 +257,11 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) dev_dbg(&pdev->dev, "Created %d VFs.\n", pf->num_vfs); - mutex_unlock(&pf->lock); + devl_unlock(devlink); return num_vfs; err_sriov_disable: - mutex_unlock(&pf->lock); + devl_unlock(devlink); pci_disable_sriov(pdev); return err; #endif @@ -270,8 +272,10 @@ static int nfp_pcie_sriov_disable(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct nfp_pf *pf = pci_get_drvdata(pdev); + struct devlink *devlink; - mutex_lock(&pf->lock); + devlink = priv_to_devlink(pf); + devl_lock(devlink); /* If the VFs are assigned we cannot shut down SR-IOV without * causing issues, so just leave the hardware available but @@ -279,7 +283,7 @@ static int nfp_pcie_sriov_disable(struct pci_dev *pdev) */ if (pci_vfs_assigned(pdev)) { dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n"); - mutex_unlock(&pf->lock); + devl_unlock(devlink); return -EPERM; } @@ -287,7 +291,7 @@ static int nfp_pcie_sriov_disable(struct pci_dev *pdev) pf->num_vfs = 0; - mutex_unlock(&pf->lock); + devl_unlock(devlink); pci_disable_sriov(pdev); dev_dbg(&pdev->dev, "Removed VFs.\n"); @@ -707,7 +711,6 @@ static int nfp_pci_probe(struct pci_dev *pdev, pf = devlink_priv(devlink); INIT_LIST_HEAD(&pf->vnics); INIT_LIST_HEAD(&pf->ports); - mutex_init(&pf->lock); pci_set_drvdata(pdev, pf); pf->pdev = pdev; pf->dev_info = dev_info; @@ -798,7 +801,6 @@ err_disable_msix: destroy_workqueue(pf->wq); err_pci_priv_unset: pci_set_drvdata(pdev, NULL); - mutex_destroy(&pf->lock); devlink_free(devlink); err_rel_regions: pci_release_regions(pdev); @@ -835,7 +837,6 @@ static void __nfp_pci_shutdown(struct pci_dev *pdev, bool unload_fw) kfree(pf->eth_tbl); kfree(pf->nspi); - mutex_destroy(&pf->lock); devlink_free(priv_to_devlink(pf)); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index 9c72a0ad18ea..f56ca11de134 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -13,7 +13,6 @@ #include <linux/list.h> #include <linux/types.h> #include <linux/msi.h> -#include <linux/mutex.h> #include <linux/pci.h> #include <linux/workqueue.h> #include <net/devlink.h> @@ -85,7 +84,8 @@ struct nfp_dumpspec { * @port_refresh_work: Work entry for taking netdevs out * @shared_bufs: Array of shared buffer structures if FW has any SBs * @num_shared_bufs: Number of elements in @shared_bufs - * @lock: Protects all fields which may change after probe + * + * Fields which may change after proble are protected by devlink instance lock. */ struct nfp_pf { struct pci_dev *pdev; @@ -141,8 +141,6 @@ struct nfp_pf { struct nfp_shared_buf *shared_bufs; unsigned int num_shared_bufs; - - struct mutex lock; }; extern struct pci_driver nfp_netvf_pci_driver; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index a18b99c93ab3..09a0a2076c6e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -308,6 +308,7 @@ err_prev_deinit: static int nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) { + struct devlink *devlink = priv_to_devlink(pf); u8 __iomem *ctrl_bar; int err; @@ -315,9 +316,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) if (IS_ERR(pf->app)) return PTR_ERR(pf->app); - mutex_lock(&pf->lock); + devl_lock(devlink); err = nfp_app_init(pf->app); - mutex_unlock(&pf->lock); + devl_unlock(devlink); if (err) goto err_free; @@ -344,9 +345,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) err_unmap: nfp_cpp_area_release_free(pf->ctrl_vnic_bar); err_app_clean: - mutex_lock(&pf->lock); + devl_lock(devlink); nfp_app_clean(pf->app); - mutex_unlock(&pf->lock); + devl_unlock(devlink); err_free: nfp_app_free(pf->app); pf->app = NULL; @@ -355,14 +356,16 @@ err_free: static void nfp_net_pf_app_clean(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); + if (pf->ctrl_vnic) { nfp_net_pf_free_vnic(pf, pf->ctrl_vnic); nfp_cpp_area_release_free(pf->ctrl_vnic_bar); } - mutex_lock(&pf->lock); + devl_lock(devlink); nfp_app_clean(pf->app); - mutex_unlock(&pf->lock); + devl_unlock(devlink); nfp_app_free(pf->app); pf->app = NULL; @@ -548,12 +551,13 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); struct nfp_eth_table *eth_table; struct nfp_net *nn, *next; struct nfp_port *port; int err; - lockdep_assert_held(&pf->lock); + devl_assert_locked(devlink); /* Check for nfp_net_pci_remove() racing against us */ if (list_empty(&pf->vnics)) @@ -602,10 +606,11 @@ static void nfp_net_refresh_vnics(struct work_struct *work) { struct nfp_pf *pf = container_of(work, struct nfp_pf, port_refresh_work); + struct devlink *devlink = priv_to_devlink(pf); - mutex_lock(&pf->lock); + devl_lock(devlink); nfp_net_refresh_port_table_sync(pf); - mutex_unlock(&pf->lock); + devl_unlock(devlink); } void nfp_net_refresh_port_table(struct nfp_port *port) @@ -711,7 +716,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_shared_buf_unreg; - mutex_lock(&pf->lock); + devl_lock(devlink); pf->ddir = nfp_net_debugfs_device_add(pf->pdev); /* Allocate the vnics and do basic init */ @@ -731,7 +736,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_stop_app; - mutex_unlock(&pf->lock); + devl_unlock(devlink); devlink_register(devlink); return 0; @@ -744,7 +749,7 @@ err_free_vnics: nfp_net_pf_free_vnics(pf); err_clean_ddir: nfp_net_debugfs_dir_clean(&pf->ddir); - mutex_unlock(&pf->lock); + devl_unlock(devlink); nfp_devlink_params_unregister(pf); err_shared_buf_unreg: nfp_shared_buf_unregister(pf); @@ -758,10 +763,11 @@ err_unmap: void nfp_net_pci_remove(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); struct nfp_net *nn, *next; devlink_unregister(priv_to_devlink(pf)); - mutex_lock(&pf->lock); + devl_lock(devlink); list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { if (!nfp_net_is_data_vnic(nn)) continue; @@ -773,7 +779,7 @@ void nfp_net_pci_remove(struct nfp_pf *pf) /* stop app first, to avoid double free of ctrl vNIC's ddir */ nfp_net_debugfs_dir_clean(&pf->ddir); - mutex_unlock(&pf->lock); + devl_unlock(devlink); nfp_devlink_params_unregister(pf); nfp_shared_buf_unregister(pf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 181ac8e789a3..ba3fa7eac98d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -20,7 +20,7 @@ struct net_device * nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, unsigned int id) { return rcu_dereference_protected(set->reprs[id], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); } static void @@ -476,7 +476,7 @@ nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type) int i; reprs = rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); if (!reprs) return; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c index 93c5bfc0510b..4f2308570dcf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -75,23 +75,6 @@ int nfp_port_set_features(struct net_device *netdev, netdev_features_t features) return 0; } -struct nfp_port * -nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id) -{ - struct nfp_port *port; - - lockdep_assert_held(&pf->lock); - - if (type != NFP_PORT_PHYS_PORT) - return NULL; - - list_for_each_entry(port, &pf->ports, port_list) - if (port->eth_id == id) - return port; - - return NULL; -} - struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port) { if (!port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index df316b9e891d..d1ebe6c72f7f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -106,8 +106,6 @@ nfp_port_set_features(struct net_device *netdev, netdev_features_t features); struct nfp_port *nfp_port_from_netdev(struct net_device *netdev); int nfp_port_get_port_parent_id(struct net_device *netdev, struct netdev_phys_item_id *ppid); -struct nfp_port * -nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id); struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port); struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port); diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 12105f62cbdd..03650022d444 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -191,7 +191,7 @@ IV. Notes Thanks to Kim Stearns of Packet Engines for providing a pair of G-NIC boards. Thanks to Bruce Faust of Digitalscape for providing both their SYM53C885 board -and an AlphaStation to verifty the Alpha port! +and an AlphaStation to verify the Alpha port! IVb. References diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 67014eb76969..33f5c5698ccb 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1397,8 +1397,11 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) rtl_lock_config_regs(tp); device_set_wakeup_enable(tp_to_dev(tp), wolopts); - rtl_set_d3_pll_down(tp, !wolopts); - tp->dev->wol_enabled = wolopts ? 1 : 0; + + if (tp->dash_type == RTL_DASH_NONE) { + rtl_set_d3_pll_down(tp, !wolopts); + tp->dev->wol_enabled = wolopts ? 1 : 0; + } } static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) @@ -4938,6 +4941,9 @@ static int rtl8169_runtime_idle(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); + if (tp->dash_type != RTL_DASH_NONE) + return -EBUSY; + if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev)) pm_schedule_suspend(device, 10000); @@ -4978,7 +4984,8 @@ static void rtl_shutdown(struct pci_dev *pdev) /* Restore original MAC address */ rtl_rar_set(tp, tp->dev->perm_addr); - if (system_state == SYSTEM_POWER_OFF) { + if (system_state == SYSTEM_POWER_OFF && + tp->dash_type == RTL_DASH_NONE) { if (tp->saved_wolopts) rtl_wol_shutdown_quirk(tp); @@ -5449,7 +5456,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* configure chip for default features */ rtl8169_set_features(dev, dev->features); - rtl_set_d3_pll_down(tp, true); + if (tp->dash_type == RTL_DASH_NONE) { + rtl_set_d3_pll_down(tp, true); + } else { + rtl_set_d3_pll_down(tp, false); + dev->wol_enabled = 1; + } jumbo_max = rtl_jumbo_max(tp); if (jumbo_max) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 58c0feaa8131..6ff88df58767 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -9,7 +9,6 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_net.h> -#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/stmmac.h> @@ -40,6 +39,33 @@ #define ETH_FINE_DLY_GTXC BIT(1) #define ETH_FINE_DLY_RXC BIT(0) +/* Peri Configuration register for mt8195 */ +#define MT8195_PERI_ETH_CTRL0 0xFD0 +#define MT8195_RMII_CLK_SRC_INTERNAL BIT(28) +#define MT8195_RMII_CLK_SRC_RXC BIT(27) +#define MT8195_ETH_INTF_SEL GENMASK(26, 24) +#define MT8195_RGMII_TXC_PHASE_CTRL BIT(22) +#define MT8195_EXT_PHY_MODE BIT(21) +#define MT8195_DLY_GTXC_INV BIT(12) +#define MT8195_DLY_GTXC_ENABLE BIT(5) +#define MT8195_DLY_GTXC_STAGES GENMASK(4, 0) + +#define MT8195_PERI_ETH_CTRL1 0xFD4 +#define MT8195_DLY_RXC_INV BIT(25) +#define MT8195_DLY_RXC_ENABLE BIT(18) +#define MT8195_DLY_RXC_STAGES GENMASK(17, 13) +#define MT8195_DLY_TXC_INV BIT(12) +#define MT8195_DLY_TXC_ENABLE BIT(5) +#define MT8195_DLY_TXC_STAGES GENMASK(4, 0) + +#define MT8195_PERI_ETH_CTRL2 0xFD8 +#define MT8195_DLY_RMII_RXC_INV BIT(25) +#define MT8195_DLY_RMII_RXC_ENABLE BIT(18) +#define MT8195_DLY_RMII_RXC_STAGES GENMASK(17, 13) +#define MT8195_DLY_RMII_TXC_INV BIT(12) +#define MT8195_DLY_RMII_TXC_ENABLE BIT(5) +#define MT8195_DLY_RMII_TXC_STAGES GENMASK(4, 0) + struct mac_delay_struct { u32 tx_delay; u32 rx_delay; @@ -50,19 +76,21 @@ struct mac_delay_struct { struct mediatek_dwmac_plat_data { const struct mediatek_dwmac_variant *variant; struct mac_delay_struct mac_delay; + struct clk *rmii_internal_clk; struct clk_bulk_data *clks; - struct device_node *np; struct regmap *peri_regmap; + struct device_node *np; struct device *dev; phy_interface_t phy_mode; - int num_clks_to_config; bool rmii_clk_from_mac; bool rmii_rxc; + bool mac_wol; }; struct mediatek_dwmac_variant { int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); + void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed); /* clock ids to be requested */ const char * const *clk_list; @@ -75,7 +103,11 @@ struct mediatek_dwmac_variant { /* list of clocks required for mac */ static const char * const mt2712_dwmac_clk_l[] = { - "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" + "axi", "apb", "mac_main", "ptp_ref" +}; + +static const char * const mt8195_dwmac_clk_l[] = { + "axi", "apb", "mac_cg", "mac_main", "ptp_ref" }; static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) @@ -84,23 +116,12 @@ static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat) int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0; u32 intf_val = 0; - /* The clock labeled as "rmii_internal" in mt2712_dwmac_clk_l is needed - * only in RMII(when MAC provides the reference clock), and useless for - * RGMII/MII/RMII(when PHY provides the reference clock). - * num_clks_to_config indicates the real number of clocks should be - * configured, equals to (plat->variant->num_clks - 1) in default for all the case, - * then +1 for rmii_clk_from_mac case. - */ - plat->num_clks_to_config = plat->variant->num_clks - 1; - /* select phy interface in top control domain */ switch (plat->phy_mode) { case PHY_INTERFACE_MODE_MII: intf_val |= PHY_INTF_MII; break; case PHY_INTERFACE_MODE_RMII: - if (plat->rmii_clk_from_mac) - plat->num_clks_to_config++; intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac); break; case PHY_INTERFACE_MODE_RGMII: @@ -268,6 +289,193 @@ static const struct mediatek_dwmac_variant mt2712_gmac_variant = { .tx_delay_max = 17600, }; +static int mt8195_set_interface(struct mediatek_dwmac_plat_data *plat) +{ + int rmii_clk_from_mac = plat->rmii_clk_from_mac ? MT8195_RMII_CLK_SRC_INTERNAL : 0; + int rmii_rxc = plat->rmii_rxc ? MT8195_RMII_CLK_SRC_RXC : 0; + u32 intf_val = 0; + + /* select phy interface in top control domain */ + switch (plat->phy_mode) { + case PHY_INTERFACE_MODE_MII: + intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_MII); + break; + case PHY_INTERFACE_MODE_RMII: + intf_val |= (rmii_rxc | rmii_clk_from_mac); + intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_RMII); + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + intf_val |= FIELD_PREP(MT8195_ETH_INTF_SEL, PHY_INTF_RGMII); + break; + default: + dev_err(plat->dev, "phy interface not supported\n"); + return -EINVAL; + } + + /* MT8195 only support external PHY */ + intf_val |= MT8195_EXT_PHY_MODE; + + regmap_write(plat->peri_regmap, MT8195_PERI_ETH_CTRL0, intf_val); + + return 0; +} + +static void mt8195_delay_ps2stage(struct mediatek_dwmac_plat_data *plat) +{ + struct mac_delay_struct *mac_delay = &plat->mac_delay; + + /* 290ps per stage */ + mac_delay->tx_delay /= 290; + mac_delay->rx_delay /= 290; +} + +static void mt8195_delay_stage2ps(struct mediatek_dwmac_plat_data *plat) +{ + struct mac_delay_struct *mac_delay = &plat->mac_delay; + + /* 290ps per stage */ + mac_delay->tx_delay *= 290; + mac_delay->rx_delay *= 290; +} + +static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat) +{ + struct mac_delay_struct *mac_delay = &plat->mac_delay; + u32 gtxc_delay_val = 0, delay_val = 0, rmii_delay_val = 0; + + mt8195_delay_ps2stage(plat); + + switch (plat->phy_mode) { + case PHY_INTERFACE_MODE_MII: + delay_val |= FIELD_PREP(MT8195_DLY_TXC_ENABLE, !!mac_delay->tx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_TXC_STAGES, mac_delay->tx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_TXC_INV, mac_delay->tx_inv); + + delay_val |= FIELD_PREP(MT8195_DLY_RXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_RXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_RXC_INV, mac_delay->rx_inv); + break; + case PHY_INTERFACE_MODE_RMII: + if (plat->rmii_clk_from_mac) { + /* case 1: mac provides the rmii reference clock, + * and the clock output to TXC pin. + * The egress timing can be adjusted by RMII_TXC delay macro circuit. + * The ingress timing can be adjusted by RMII_RXC delay macro circuit. + */ + rmii_delay_val |= FIELD_PREP(MT8195_DLY_RMII_TXC_ENABLE, + !!mac_delay->tx_delay); + rmii_delay_val |= FIELD_PREP(MT8195_DLY_RMII_TXC_STAGES, + mac_delay->tx_delay); + rmii_delay_val |= FIELD_PREP(MT8195_DLY_RMII_TXC_INV, + mac_delay->tx_inv); + + rmii_delay_val |= FIELD_PREP(MT8195_DLY_RMII_RXC_ENABLE, + !!mac_delay->rx_delay); + rmii_delay_val |= FIELD_PREP(MT8195_DLY_RMII_RXC_STAGES, + mac_delay->rx_delay); + rmii_delay_val |= FIELD_PREP(MT8195_DLY_RMII_RXC_INV, + mac_delay->rx_inv); + } else { + /* case 2: the rmii reference clock is from external phy, + * and the property "rmii_rxc" indicates which pin(TXC/RXC) + * the reference clk is connected to. The reference clock is a + * received signal, so rx_delay/rx_inv are used to indicate + * the reference clock timing adjustment + */ + if (plat->rmii_rxc) { + /* the rmii reference clock from outside is connected + * to RXC pin, the reference clock will be adjusted + * by RXC delay macro circuit. + */ + delay_val |= FIELD_PREP(MT8195_DLY_RXC_ENABLE, + !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_RXC_STAGES, + mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_RXC_INV, + mac_delay->rx_inv); + } else { + /* the rmii reference clock from outside is connected + * to TXC pin, the reference clock will be adjusted + * by TXC delay macro circuit. + */ + delay_val |= FIELD_PREP(MT8195_DLY_TXC_ENABLE, + !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_TXC_STAGES, + mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_TXC_INV, + mac_delay->rx_inv); + } + } + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + gtxc_delay_val |= FIELD_PREP(MT8195_DLY_GTXC_ENABLE, !!mac_delay->tx_delay); + gtxc_delay_val |= FIELD_PREP(MT8195_DLY_GTXC_STAGES, mac_delay->tx_delay); + gtxc_delay_val |= FIELD_PREP(MT8195_DLY_GTXC_INV, mac_delay->tx_inv); + + delay_val |= FIELD_PREP(MT8195_DLY_RXC_ENABLE, !!mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_RXC_STAGES, mac_delay->rx_delay); + delay_val |= FIELD_PREP(MT8195_DLY_RXC_INV, mac_delay->rx_inv); + + break; + default: + dev_err(plat->dev, "phy interface not supported\n"); + return -EINVAL; + } + + regmap_update_bits(plat->peri_regmap, + MT8195_PERI_ETH_CTRL0, + MT8195_RGMII_TXC_PHASE_CTRL | + MT8195_DLY_GTXC_INV | + MT8195_DLY_GTXC_ENABLE | + MT8195_DLY_GTXC_STAGES, + gtxc_delay_val); + regmap_write(plat->peri_regmap, MT8195_PERI_ETH_CTRL1, delay_val); + regmap_write(plat->peri_regmap, MT8195_PERI_ETH_CTRL2, rmii_delay_val); + + mt8195_delay_stage2ps(plat); + + return 0; +} + +static void mt8195_fix_mac_speed(void *priv, unsigned int speed) +{ + struct mediatek_dwmac_plat_data *priv_plat = priv; + + if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) { + /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL, + * when link speed is 1Gbps with RGMII interface, + * Fall back to delay macro circuit for 10/100Mbps link speed. + */ + if (speed == SPEED_1000) + regmap_update_bits(priv_plat->peri_regmap, + MT8195_PERI_ETH_CTRL0, + MT8195_RGMII_TXC_PHASE_CTRL | + MT8195_DLY_GTXC_ENABLE | + MT8195_DLY_GTXC_INV | + MT8195_DLY_GTXC_STAGES, + MT8195_RGMII_TXC_PHASE_CTRL); + else + mt8195_set_delay(priv_plat); + } +} + +static const struct mediatek_dwmac_variant mt8195_gmac_variant = { + .dwmac_set_phy_interface = mt8195_set_interface, + .dwmac_set_delay = mt8195_set_delay, + .dwmac_fix_mac_speed = mt8195_fix_mac_speed, + .clk_list = mt8195_dwmac_clk_l, + .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), + .dma_bit_mask = 35, + .rx_delay_max = 9280, + .tx_delay_max = 9280, +}; + static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) { struct mac_delay_struct *mac_delay = &plat->mac_delay; @@ -308,6 +516,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse"); plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc"); plat->rmii_clk_from_mac = of_property_read_bool(plat->np, "mediatek,rmii-clk-from-mac"); + plat->mac_wol = of_property_read_bool(plat->np, "mediatek,mac-wol"); return 0; } @@ -315,18 +524,34 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat) static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat) { const struct mediatek_dwmac_variant *variant = plat->variant; - int i, num = variant->num_clks; + int i, ret; - plat->clks = devm_kcalloc(plat->dev, num, sizeof(*plat->clks), GFP_KERNEL); + plat->clks = devm_kcalloc(plat->dev, variant->num_clks, sizeof(*plat->clks), GFP_KERNEL); if (!plat->clks) return -ENOMEM; - for (i = 0; i < num; i++) + for (i = 0; i < variant->num_clks; i++) plat->clks[i].id = variant->clk_list[i]; - plat->num_clks_to_config = variant->num_clks; + ret = devm_clk_bulk_get(plat->dev, variant->num_clks, plat->clks); + if (ret) + return ret; + + /* The clock labeled as "rmii_internal" is needed only in RMII(when + * MAC provides the reference clock), and useless for RGMII/MII or + * RMII(when PHY provides the reference clock). + * So, "rmii_internal" clock is got and configured only when + * reference clock of RMII is from MAC. + */ + if (plat->rmii_clk_from_mac) { + plat->rmii_internal_clk = devm_clk_get(plat->dev, "rmii_internal"); + if (IS_ERR(plat->rmii_internal_clk)) + ret = PTR_ERR(plat->rmii_internal_clk); + } else { + plat->rmii_internal_clk = NULL; + } - return devm_clk_bulk_get(plat->dev, num, plat->clks); + return ret; } static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) @@ -335,44 +560,117 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) const struct mediatek_dwmac_variant *variant = plat->variant; int ret; - ret = dma_set_mask_and_coherent(plat->dev, DMA_BIT_MASK(variant->dma_bit_mask)); - if (ret) { - dev_err(plat->dev, "No suitable DMA available, err = %d\n", ret); - return ret; + if (variant->dwmac_set_phy_interface) { + ret = variant->dwmac_set_phy_interface(plat); + if (ret) { + dev_err(plat->dev, "failed to set phy interface, err = %d\n", ret); + return ret; + } } - ret = variant->dwmac_set_phy_interface(plat); - if (ret) { - dev_err(plat->dev, "failed to set phy interface, err = %d\n", ret); - return ret; + if (variant->dwmac_set_delay) { + ret = variant->dwmac_set_delay(plat); + if (ret) { + dev_err(plat->dev, "failed to set delay value, err = %d\n", ret); + return ret; + } } - ret = variant->dwmac_set_delay(plat); + ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); if (ret) { - dev_err(plat->dev, "failed to set delay value, err = %d\n", ret); + dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); return ret; } - ret = clk_bulk_prepare_enable(plat->num_clks_to_config, plat->clks); + ret = clk_prepare_enable(plat->rmii_internal_clk); if (ret) { - dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); - return ret; + dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret); + goto err_clk; } - pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); - return 0; + +err_clk: + clk_bulk_disable_unprepare(variant->num_clks, plat->clks); + return ret; } static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; + const struct mediatek_dwmac_variant *variant = plat->variant; - clk_bulk_disable_unprepare(plat->num_clks_to_config, plat->clks); + clk_disable_unprepare(plat->rmii_internal_clk); + clk_bulk_disable_unprepare(variant->num_clks, plat->clks); +} - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); +static int mediatek_dwmac_clks_config(void *priv, bool enabled) +{ + struct mediatek_dwmac_plat_data *plat = priv; + const struct mediatek_dwmac_variant *variant = plat->variant; + int ret = 0; + + if (enabled) { + ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks); + if (ret) { + dev_err(plat->dev, "failed to enable clks, err = %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(plat->rmii_internal_clk); + if (ret) { + dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret); + return ret; + } + } else { + clk_disable_unprepare(plat->rmii_internal_clk); + clk_bulk_disable_unprepare(variant->num_clks, plat->clks); + } + + return ret; +} + +static int mediatek_dwmac_common_data(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct mediatek_dwmac_plat_data *priv_plat) +{ + int i; + + plat->interface = priv_plat->phy_mode; + plat->use_phy_wol = priv_plat->mac_wol ? 0 : 1; + plat->riwt_off = 1; + plat->maxmtu = ETH_DATA_LEN; + plat->addr64 = priv_plat->variant->dma_bit_mask; + plat->bsp_priv = priv_plat; + plat->init = mediatek_dwmac_init; + plat->exit = mediatek_dwmac_exit; + plat->clks_config = mediatek_dwmac_clks_config; + if (priv_plat->variant->dwmac_fix_mac_speed) + plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; + + plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, + sizeof(*plat->safety_feat_cfg), + GFP_KERNEL); + if (!plat->safety_feat_cfg) + return -ENOMEM; + + plat->safety_feat_cfg->tsoee = 1; + plat->safety_feat_cfg->mrxpee = 0; + plat->safety_feat_cfg->mestee = 1; + plat->safety_feat_cfg->mrxee = 1; + plat->safety_feat_cfg->mtxee = 1; + plat->safety_feat_cfg->epsi = 0; + plat->safety_feat_cfg->edpp = 1; + plat->safety_feat_cfg->prtyen = 1; + plat->safety_feat_cfg->tmouten = 1; + + for (i = 0; i < plat->tx_queues_to_use; i++) { + /* Default TX Q0 to use TSO and rest TXQ for TBS */ + if (i > 0) + plat->tx_queues_cfg[i].tbs_en = 1; + } + + return 0; } static int mediatek_dwmac_probe(struct platform_device *pdev) @@ -411,15 +709,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - plat_dat->interface = priv_plat->phy_mode; - plat_dat->has_gmac4 = 1; - plat_dat->has_gmac = 0; - plat_dat->pmt = 0; - plat_dat->riwt_off = 1; - plat_dat->maxmtu = ETH_DATA_LEN; - plat_dat->bsp_priv = priv_plat; - plat_dat->init = mediatek_dwmac_init; - plat_dat->exit = mediatek_dwmac_exit; + mediatek_dwmac_common_data(pdev, plat_dat, priv_plat); mediatek_dwmac_init(pdev, priv_plat); ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); @@ -434,6 +724,8 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) static const struct of_device_id mediatek_dwmac_match[] = { { .compatible = "mediatek,mt2712-gmac", .data = &mt2712_gmac_variant }, + { .compatible = "mediatek,mt8195-gmac", + .data = &mt8195_gmac_variant }, { } }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c1bfd89a5a1f..4a4b3651ab3e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1668,7 +1668,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; + int queue; int ret; /* RX INITIALIZATION */ @@ -1695,9 +1695,6 @@ err_init_rx_buffers: rx_q->buf_alloc_num = 0; rx_q->xsk_pool = NULL; - if (queue == 0) - break; - queue--; } diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ba8ad76313a9..42460c0885fc 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -7909,7 +7909,7 @@ static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent, * won't get any interrupts and that's painful to debug. */ if (nr64(LDG_NUM(ldn)) != ldg) { - dev_err(np->device, "Port %u, mis-matched LDG assignment for ldn %d, should be %d is %llu\n", + dev_err(np->device, "Port %u, mismatched LDG assignment for ldn %d, should be %d is %llu\n", np->port, ldn, ldg, (unsigned long long) nr64(LDG_NUM(ldn))); return -EINVAL; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index b818e4579f6f..16507bff652a 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2082,7 +2082,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, netcp->tx_pool_region_id = temp[1]; if (netcp->tx_pool_size < MAX_SKB_FRAGS) { - dev_err(dev, "tx-pool size too small, must be atleast(%ld)\n", + dev_err(dev, "tx-pool size too small, must be at least %ld\n", MAX_SKB_FRAGS); ret = -ENODEV; goto quit; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index a895ff756093..8f30660224c5 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -56,6 +56,7 @@ struct geneve_config { bool use_udp6_rx_checksums; bool ttl_inherit; enum ifla_geneve_df df; + bool inner_proto_inherit; }; /* Pseudo network device */ @@ -251,17 +252,24 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, } } - skb_reset_mac_header(skb); - skb->protocol = eth_type_trans(skb, geneve->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - if (tun_dst) skb_dst_set(skb, &tun_dst->dst); - /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { - geneve->dev->stats.rx_errors++; - goto drop; + if (gnvh->proto_type == htons(ETH_P_TEB)) { + skb_reset_mac_header(skb); + skb->protocol = eth_type_trans(skb, geneve->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + + /* Ignore packet loops (and multicast echo) */ + if (ether_addr_equal(eth_hdr(skb)->h_source, + geneve->dev->dev_addr)) { + geneve->dev->stats.rx_errors++; + goto drop; + } + } else { + skb_reset_mac_header(skb); + skb->dev = geneve->dev; + skb->pkt_type = PACKET_HOST; } oiph = skb_network_header(skb); @@ -345,6 +353,7 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct genevehdr *geneveh; struct geneve_dev *geneve; struct geneve_sock *gs; + __be16 inner_proto; int opts_len; /* Need UDP and Geneve header to be present */ @@ -356,7 +365,11 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(geneveh->ver != GENEVE_VER)) goto drop; - if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) + inner_proto = geneveh->proto_type; + + if (unlikely((inner_proto != htons(ETH_P_TEB) && + inner_proto != htons(ETH_P_IP) && + inner_proto != htons(ETH_P_IPV6)))) goto drop; gs = rcu_dereference_sk_user_data(sk); @@ -367,9 +380,14 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!geneve) goto drop; + if (unlikely((!geneve->cfg.inner_proto_inherit && + inner_proto != htons(ETH_P_TEB)))) { + geneve->dev->stats.rx_dropped++; + goto drop; + } + opts_len = geneveh->opt_len * 4; - if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, - htons(ETH_P_TEB), + if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, !net_eq(geneve->net, dev_net(geneve->dev)))) { geneve->dev->stats.rx_dropped++; goto drop; @@ -717,7 +735,8 @@ static int geneve_stop(struct net_device *dev) } static void geneve_build_header(struct genevehdr *geneveh, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 inner_proto) { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; @@ -725,7 +744,7 @@ static void geneve_build_header(struct genevehdr *geneveh, geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); - geneveh->proto_type = htons(ETH_P_TEB); + geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0; if (info->key.tun_flags & TUNNEL_GENEVE_OPT) @@ -734,10 +753,12 @@ static void geneve_build_header(struct genevehdr *geneveh, static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, const struct ip_tunnel_info *info, - bool xnet, int ip_hdr_len) + bool xnet, int ip_hdr_len, + bool inner_proto_inherit) { bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); struct genevehdr *gnvh; + __be16 inner_proto; int min_headroom; int err; @@ -755,8 +776,9 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, goto free_dst; gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len); - geneve_build_header(gnvh, info); - skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); + geneve_build_header(gnvh, info, inner_proto); + skb_set_inner_protocol(skb, inner_proto); return 0; free_dst: @@ -959,7 +981,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } } - err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr)); + err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), + geneve->cfg.inner_proto_inherit); if (unlikely(err)) return err; @@ -1038,7 +1061,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = key->ttl; ttl = ttl ? : ip6_dst_hoplimit(dst); } - err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr)); + err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), + geneve->cfg.inner_proto_inherit); if (unlikely(err)) return err; @@ -1388,6 +1412,14 @@ static int geneve_configure(struct net *net, struct net_device *dev, dst_cache_reset(&geneve->cfg.info.dst_cache); memcpy(&geneve->cfg, cfg, sizeof(*cfg)); + if (geneve->cfg.inner_proto_inherit) { + dev->header_ops = NULL; + dev->type = ARPHRD_NONE; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->flags = IFF_NOARP; + } + err = register_netdevice(dev); if (err) return err; @@ -1561,10 +1593,18 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], #endif } + if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { + if (changelink) { + attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; + goto change_notsup; + } + cfg->inner_proto_inherit = true; + } + return 0; change_notsup: NL_SET_ERR_MSG_ATTR(extack, data[attrtype], - "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported"); + "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported"); return -EOPNOTSUPP; } @@ -1799,6 +1839,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) goto nla_put_failure; + if (geneve->cfg.inner_proto_inherit && + nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 756714d4ad92..a208e2b1a9af 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1793,7 +1793,7 @@ static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(rt)) { netdev_dbg(gtp->dev, "no route for echo request to %pI4\n", &dst_ip); - kfree_skb(skb_to_send); + kfree_skb(skb_to_send); return -ENODEV; } diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index a03d0b474641..3e69079ed694 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -982,10 +982,10 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr) bc->cfg.extmodem = 0; if (strstr(modestr,"extmodem")) bc->cfg.extmodem = 1; - if (strstr(modestr,"noloopback")) - bc->cfg.loopback = 0; if (strstr(modestr,"loopback")) bc->cfg.loopback = 1; + if (strstr(modestr, "noloopback")) + bc->cfg.loopback = 0; if ((cp = strstr(modestr,"fclk="))) { bc->cfg.fclk = simple_strtoul(cp+5, NULL, 0); if (bc->cfg.fclk < 1000000) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3646469433b1..fde1c492ca02 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1587,6 +1587,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4b77819e9328..069e8824c264 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -889,7 +889,7 @@ static void macvlan_set_lockdep_class(struct net_device *dev) static int macvlan_init(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - const struct net_device *lowerdev = vlan->lowerdev; + struct net_device *lowerdev = vlan->lowerdev; struct macvlan_port *port = vlan->port; dev->state = (dev->state & ~MACVLAN_STATE_MASK) | @@ -911,6 +911,9 @@ static int macvlan_init(struct net_device *dev) port->count += 1; + /* Get macvlan's reference to lowerdev */ + dev_hold_track(lowerdev, &vlan->dev_tracker, GFP_KERNEL); + return 0; } @@ -1173,6 +1176,14 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_features_check = passthru_features_check, }; +static void macvlan_dev_free(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + /* Get rid of the macvlan's reference to lowerdev */ + dev_put_track(vlan->lowerdev, &vlan->dev_tracker); +} + void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); @@ -1184,6 +1195,7 @@ void macvlan_common_setup(struct net_device *dev) dev->priv_flags |= IFF_UNICAST_FLT | IFF_CHANGE_PROTO_DOWN; dev->netdev_ops = &macvlan_netdev_ops; dev->needs_free_netdev = true; + dev->priv_destructor = macvlan_dev_free; dev->header_ops = &macvlan_hard_header_ops; dev->ethtool_ops = &macvlan_ethtool_ops; } diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 7d2abaf2b2c9..64fb76c1e395 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -187,6 +187,13 @@ static const struct regmap_config mscc_miim_regmap_config = { .reg_stride = 4, }; +static const struct regmap_config mscc_miim_phy_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .name = "phy", +}; + int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name, struct regmap *mii_regmap, int status_offset) { @@ -250,7 +257,7 @@ static int mscc_miim_probe(struct platform_device *pdev) } phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs, - &mscc_miim_regmap_config); + &mscc_miim_phy_regmap_config); if (IS_ERR(phy_regmap)) { dev_err(&pdev->dev, "Unable to create phy register regmap\n"); return PTR_ERR(phy_regmap); diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index a1cbfa44a1e1..5735e5b1a2cb 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ - netdev.o dev.o ethtool.o fib.o bus.o health.o udp_tunnels.o + netdev.o dev.o ethtool.o fib.o bus.o health.o hwstats.o udp_tunnels.o ifeq ($(CONFIG_BPF_SYSCALL),y) netdevsim-objs += \ diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 08d7b465a0de..dbc8e88d2841 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1498,10 +1498,14 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, if (err) goto err_health_exit; - err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + err = nsim_dev_hwstats_init(nsim_dev); if (err) goto err_psample_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_hwstats_exit; + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, @@ -1509,6 +1513,8 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, &nsim_dev_take_snapshot_fops); return 0; +err_hwstats_exit: + nsim_dev_hwstats_exit(nsim_dev); err_psample_exit: nsim_dev_psample_exit(nsim_dev); err_health_exit: @@ -1595,15 +1601,21 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_bpf_dev_exit; - err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + err = nsim_dev_hwstats_init(nsim_dev); if (err) goto err_psample_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_hwstats_exit; + nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; devlink_set_features(devlink, DEVLINK_F_RELOAD); devlink_register(devlink); return 0; +err_hwstats_exit: + nsim_dev_hwstats_exit(nsim_dev); err_psample_exit: nsim_dev_psample_exit(nsim_dev); err_bpf_dev_exit: @@ -1648,6 +1660,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) mutex_unlock(&nsim_dev->vfs_lock); nsim_dev_port_del_all(nsim_dev); + nsim_dev_hwstats_exit(nsim_dev); nsim_dev_psample_exit(nsim_dev); nsim_dev_health_exit(nsim_dev); nsim_fib_destroy(devlink, nsim_dev->fib_data); diff --git a/drivers/net/netdevsim/hwstats.c b/drivers/net/netdevsim/hwstats.c new file mode 100644 index 000000000000..605a38e16db0 --- /dev/null +++ b/drivers/net/netdevsim/hwstats.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/debugfs.h> + +#include "netdevsim.h" + +#define NSIM_DEV_HWSTATS_TRAFFIC_MS 100 + +static struct list_head * +nsim_dev_hwstats_get_list_head(struct nsim_dev_hwstats *hwstats, + enum netdev_offload_xstats_type type) +{ + switch (type) { + case NETDEV_OFFLOAD_XSTATS_TYPE_L3: + return &hwstats->l3_list; + } + + WARN_ON_ONCE(1); + return NULL; +} + +static void nsim_dev_hwstats_traffic_bump(struct nsim_dev_hwstats *hwstats, + enum netdev_offload_xstats_type type) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + struct list_head *hwsdev_list; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, type); + if (WARN_ON(!hwsdev_list)) + return; + + list_for_each_entry(hwsdev, hwsdev_list, list) { + if (hwsdev->enabled) { + hwsdev->stats.rx_packets += 1; + hwsdev->stats.tx_packets += 2; + hwsdev->stats.rx_bytes += 100; + hwsdev->stats.tx_bytes += 300; + } + } +} + +static void nsim_dev_hwstats_traffic_work(struct work_struct *work) +{ + struct nsim_dev_hwstats *hwstats; + + hwstats = container_of(work, struct nsim_dev_hwstats, traffic_dw.work); + mutex_lock(&hwstats->hwsdev_list_lock); + nsim_dev_hwstats_traffic_bump(hwstats, NETDEV_OFFLOAD_XSTATS_TYPE_L3); + mutex_unlock(&hwstats->hwsdev_list_lock); + + schedule_delayed_work(&hwstats->traffic_dw, + msecs_to_jiffies(NSIM_DEV_HWSTATS_TRAFFIC_MS)); +} + +static struct nsim_dev_hwstats_netdev * +nsim_dev_hwslist_find_hwsdev(struct list_head *hwsdev_list, + int ifindex) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + + list_for_each_entry(hwsdev, hwsdev_list, list) { + if (hwsdev->netdev->ifindex == ifindex) + return hwsdev; + } + + return NULL; +} + +static int nsim_dev_hwsdev_enable(struct nsim_dev_hwstats_netdev *hwsdev, + struct netlink_ext_ack *extack) +{ + if (hwsdev->fail_enable) { + hwsdev->fail_enable = false; + NL_SET_ERR_MSG_MOD(extack, "Stats enablement set to fail"); + return -ECANCELED; + } + + hwsdev->enabled = true; + return 0; +} + +static void nsim_dev_hwsdev_disable(struct nsim_dev_hwstats_netdev *hwsdev) +{ + hwsdev->enabled = false; + memset(&hwsdev->stats, 0, sizeof(hwsdev->stats)); +} + +static int +nsim_dev_hwsdev_report_delta(struct nsim_dev_hwstats_netdev *hwsdev, + struct netdev_notifier_offload_xstats_info *info) +{ + netdev_offload_xstats_report_delta(info->report_delta, &hwsdev->stats); + memset(&hwsdev->stats, 0, sizeof(hwsdev->stats)); + return 0; +} + +static void +nsim_dev_hwsdev_report_used(struct nsim_dev_hwstats_netdev *hwsdev, + struct netdev_notifier_offload_xstats_info *info) +{ + if (hwsdev->enabled) + netdev_offload_xstats_report_used(info->report_used); +} + +static int nsim_dev_hwstats_event_off_xstats(struct nsim_dev_hwstats *hwstats, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_offload_xstats_info *info; + struct nsim_dev_hwstats_netdev *hwsdev; + struct list_head *hwsdev_list; + int err = 0; + + info = ptr; + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, info->type); + if (!hwsdev_list) + return 0; + + mutex_lock(&hwstats->hwsdev_list_lock); + + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, dev->ifindex); + if (!hwsdev) + goto out; + + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + err = nsim_dev_hwsdev_enable(hwsdev, info->info.extack); + break; + case NETDEV_OFFLOAD_XSTATS_DISABLE: + nsim_dev_hwsdev_disable(hwsdev); + break; + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + nsim_dev_hwsdev_report_used(hwsdev, info); + break; + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + err = nsim_dev_hwsdev_report_delta(hwsdev, info); + break; + } + +out: + mutex_unlock(&hwstats->hwsdev_list_lock); + return err; +} + +static void nsim_dev_hwsdev_fini(struct nsim_dev_hwstats_netdev *hwsdev) +{ + dev_put(hwsdev->netdev); + kfree(hwsdev); +} + +static void +__nsim_dev_hwstats_event_unregister(struct nsim_dev_hwstats *hwstats, + struct net_device *dev, + enum netdev_offload_xstats_type type) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + struct list_head *hwsdev_list; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, type); + if (WARN_ON(!hwsdev_list)) + return; + + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, dev->ifindex); + if (!hwsdev) + return; + + list_del(&hwsdev->list); + nsim_dev_hwsdev_fini(hwsdev); +} + +static void nsim_dev_hwstats_event_unregister(struct nsim_dev_hwstats *hwstats, + struct net_device *dev) +{ + mutex_lock(&hwstats->hwsdev_list_lock); + __nsim_dev_hwstats_event_unregister(hwstats, dev, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + mutex_unlock(&hwstats->hwsdev_list_lock); +} + +static int nsim_dev_hwstats_event(struct nsim_dev_hwstats *hwstats, + struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return nsim_dev_hwstats_event_off_xstats(hwstats, dev, + event, ptr); + case NETDEV_UNREGISTER: + nsim_dev_hwstats_event_unregister(hwstats, dev); + break; + } + + return 0; +} + +static int nsim_dev_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nsim_dev_hwstats *hwstats; + int err = 0; + + hwstats = container_of(nb, struct nsim_dev_hwstats, netdevice_nb); + err = nsim_dev_hwstats_event(hwstats, dev, event, ptr); + if (err) + return notifier_from_errno(err); + + return NOTIFY_OK; +} + +static int +nsim_dev_hwstats_enable_ifindex(struct nsim_dev_hwstats *hwstats, + int ifindex, + enum netdev_offload_xstats_type type, + struct list_head *hwsdev_list) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + struct nsim_dev *nsim_dev; + struct net_device *netdev; + bool notify = false; + struct net *net; + int err = 0; + + nsim_dev = container_of(hwstats, struct nsim_dev, hwstats); + net = nsim_dev_net(nsim_dev); + + rtnl_lock(); + mutex_lock(&hwstats->hwsdev_list_lock); + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, ifindex); + if (hwsdev) + goto out_unlock_list; + + netdev = dev_get_by_index(net, ifindex); + if (!netdev) { + err = -ENODEV; + goto out_unlock_list; + } + + hwsdev = kzalloc(sizeof(*hwsdev), GFP_KERNEL); + if (!hwsdev) { + err = -ENOMEM; + goto out_put_netdev; + } + + hwsdev->netdev = netdev; + list_add_tail(&hwsdev->list, hwsdev_list); + mutex_unlock(&hwstats->hwsdev_list_lock); + + if (netdev_offload_xstats_enabled(netdev, type)) { + nsim_dev_hwsdev_enable(hwsdev, NULL); + notify = true; + } + + if (notify) + rtnl_offload_xstats_notify(netdev); + rtnl_unlock(); + return err; + +out_put_netdev: + dev_put(netdev); +out_unlock_list: + mutex_unlock(&hwstats->hwsdev_list_lock); + rtnl_unlock(); + return err; +} + +static int +nsim_dev_hwstats_disable_ifindex(struct nsim_dev_hwstats *hwstats, + int ifindex, + enum netdev_offload_xstats_type type, + struct list_head *hwsdev_list) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + int err = 0; + + rtnl_lock(); + mutex_lock(&hwstats->hwsdev_list_lock); + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, ifindex); + if (hwsdev) + list_del(&hwsdev->list); + mutex_unlock(&hwstats->hwsdev_list_lock); + + if (!hwsdev) { + err = -ENOENT; + goto unlock_out; + } + + if (netdev_offload_xstats_enabled(hwsdev->netdev, type)) { + netdev_offload_xstats_push_delta(hwsdev->netdev, type, + &hwsdev->stats); + rtnl_offload_xstats_notify(hwsdev->netdev); + } + nsim_dev_hwsdev_fini(hwsdev); + +unlock_out: + rtnl_unlock(); + return err; +} + +static int +nsim_dev_hwstats_fail_ifindex(struct nsim_dev_hwstats *hwstats, + int ifindex, + enum netdev_offload_xstats_type type, + struct list_head *hwsdev_list) +{ + struct nsim_dev_hwstats_netdev *hwsdev; + int err = 0; + + mutex_lock(&hwstats->hwsdev_list_lock); + + hwsdev = nsim_dev_hwslist_find_hwsdev(hwsdev_list, ifindex); + if (!hwsdev) { + err = -ENOENT; + goto err_hwsdev_list_unlock; + } + + hwsdev->fail_enable = true; + +err_hwsdev_list_unlock: + mutex_unlock(&hwstats->hwsdev_list_lock); + return err; +} + +enum nsim_dev_hwstats_do { + NSIM_DEV_HWSTATS_DO_DISABLE, + NSIM_DEV_HWSTATS_DO_ENABLE, + NSIM_DEV_HWSTATS_DO_FAIL, +}; + +struct nsim_dev_hwstats_fops { + const struct file_operations fops; + enum nsim_dev_hwstats_do action; + enum netdev_offload_xstats_type type; +}; + +static ssize_t +nsim_dev_hwstats_do_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_hwstats *hwstats = file->private_data; + struct nsim_dev_hwstats_fops *hwsfops; + struct list_head *hwsdev_list; + int ifindex; + int err; + + hwsfops = container_of(debugfs_real_fops(file), + struct nsim_dev_hwstats_fops, fops); + + err = kstrtoint_from_user(data, count, 0, &ifindex); + if (err) + return err; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, hwsfops->type); + if (WARN_ON(!hwsdev_list)) + return -EINVAL; + + switch (hwsfops->action) { + case NSIM_DEV_HWSTATS_DO_DISABLE: + err = nsim_dev_hwstats_disable_ifindex(hwstats, ifindex, + hwsfops->type, + hwsdev_list); + break; + case NSIM_DEV_HWSTATS_DO_ENABLE: + err = nsim_dev_hwstats_enable_ifindex(hwstats, ifindex, + hwsfops->type, + hwsdev_list); + break; + case NSIM_DEV_HWSTATS_DO_FAIL: + err = nsim_dev_hwstats_fail_ifindex(hwstats, ifindex, + hwsfops->type, + hwsdev_list); + break; + } + if (err) + return err; + + return count; +} + +#define NSIM_DEV_HWSTATS_FOPS(ACTION, TYPE) \ + { \ + .fops = { \ + .open = simple_open, \ + .write = nsim_dev_hwstats_do_write, \ + .llseek = generic_file_llseek, \ + .owner = THIS_MODULE, \ + }, \ + .action = ACTION, \ + .type = TYPE, \ + } + +static const struct nsim_dev_hwstats_fops nsim_dev_hwstats_l3_disable_fops = + NSIM_DEV_HWSTATS_FOPS(NSIM_DEV_HWSTATS_DO_DISABLE, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + +static const struct nsim_dev_hwstats_fops nsim_dev_hwstats_l3_enable_fops = + NSIM_DEV_HWSTATS_FOPS(NSIM_DEV_HWSTATS_DO_ENABLE, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + +static const struct nsim_dev_hwstats_fops nsim_dev_hwstats_l3_fail_fops = + NSIM_DEV_HWSTATS_FOPS(NSIM_DEV_HWSTATS_DO_FAIL, + NETDEV_OFFLOAD_XSTATS_TYPE_L3); + +#undef NSIM_DEV_HWSTATS_FOPS + +int nsim_dev_hwstats_init(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_hwstats *hwstats = &nsim_dev->hwstats; + struct net *net = nsim_dev_net(nsim_dev); + int err; + + mutex_init(&hwstats->hwsdev_list_lock); + INIT_LIST_HEAD(&hwstats->l3_list); + + hwstats->netdevice_nb.notifier_call = nsim_dev_netdevice_event; + err = register_netdevice_notifier_net(net, &hwstats->netdevice_nb); + if (err) + goto err_mutex_destroy; + + hwstats->ddir = debugfs_create_dir("hwstats", nsim_dev->ddir); + if (IS_ERR(hwstats->ddir)) { + err = PTR_ERR(hwstats->ddir); + goto err_unregister_notifier; + } + + hwstats->l3_ddir = debugfs_create_dir("l3", hwstats->ddir); + if (IS_ERR(hwstats->l3_ddir)) { + err = PTR_ERR(hwstats->l3_ddir); + goto err_remove_hwstats_recursive; + } + + debugfs_create_file("enable_ifindex", 0600, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_enable_fops.fops); + debugfs_create_file("disable_ifindex", 0600, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_disable_fops.fops); + debugfs_create_file("fail_next_enable", 0600, hwstats->l3_ddir, hwstats, + &nsim_dev_hwstats_l3_fail_fops.fops); + + INIT_DELAYED_WORK(&hwstats->traffic_dw, + &nsim_dev_hwstats_traffic_work); + schedule_delayed_work(&hwstats->traffic_dw, + msecs_to_jiffies(NSIM_DEV_HWSTATS_TRAFFIC_MS)); + return 0; + +err_remove_hwstats_recursive: + debugfs_remove_recursive(hwstats->ddir); +err_unregister_notifier: + unregister_netdevice_notifier_net(net, &hwstats->netdevice_nb); +err_mutex_destroy: + mutex_destroy(&hwstats->hwsdev_list_lock); + return err; +} + +static void nsim_dev_hwsdev_list_wipe(struct nsim_dev_hwstats *hwstats, + enum netdev_offload_xstats_type type) +{ + struct nsim_dev_hwstats_netdev *hwsdev, *tmp; + struct list_head *hwsdev_list; + + hwsdev_list = nsim_dev_hwstats_get_list_head(hwstats, type); + if (WARN_ON(!hwsdev_list)) + return; + + mutex_lock(&hwstats->hwsdev_list_lock); + list_for_each_entry_safe(hwsdev, tmp, hwsdev_list, list) { + list_del(&hwsdev->list); + nsim_dev_hwsdev_fini(hwsdev); + } + mutex_unlock(&hwstats->hwsdev_list_lock); +} + +void nsim_dev_hwstats_exit(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_hwstats *hwstats = &nsim_dev->hwstats; + struct net *net = nsim_dev_net(nsim_dev); + + cancel_delayed_work_sync(&hwstats->traffic_dw); + debugfs_remove_recursive(hwstats->ddir); + unregister_netdevice_notifier_net(net, &hwstats->netdevice_nb); + nsim_dev_hwsdev_list_wipe(hwstats, NETDEV_OFFLOAD_XSTATS_TYPE_L3); + mutex_destroy(&hwstats->hwsdev_list_lock); +} diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index c49771f27f17..128f229d9b4d 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -184,6 +184,28 @@ struct nsim_dev_health { int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink); void nsim_dev_health_exit(struct nsim_dev *nsim_dev); +struct nsim_dev_hwstats_netdev { + struct list_head list; + struct net_device *netdev; + struct rtnl_hw_stats64 stats; + bool enabled; + bool fail_enable; +}; + +struct nsim_dev_hwstats { + struct dentry *ddir; + struct dentry *l3_ddir; + + struct mutex hwsdev_list_lock; /* protects hwsdev list(s) */ + struct list_head l3_list; + + struct notifier_block netdevice_nb; + struct delayed_work traffic_dw; +}; + +int nsim_dev_hwstats_init(struct nsim_dev *nsim_dev); +void nsim_dev_hwstats_exit(struct nsim_dev *nsim_dev); + #if IS_ENABLED(CONFIG_PSAMPLE) int nsim_dev_psample_init(struct nsim_dev *nsim_dev); void nsim_dev_psample_exit(struct nsim_dev *nsim_dev); @@ -261,6 +283,7 @@ struct nsim_dev { bool fail_reload; struct devlink_region *dummy_region; struct nsim_dev_health health; + struct nsim_dev_hwstats hwstats; struct flow_action_cookie *fa_cookie; spinlock_t fa_cookie_lock; /* protects fa_cookie */ bool fail_trap_group_set; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 902495afcb38..ea7571a2b39b 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -220,6 +220,7 @@ config MEDIATEK_GE_PHY config MICREL_PHY tristate "Micrel PHYs" + depends on PTP_1588_CLOCK_OPTIONAL help Supports the KSZ9021, VSC8201, KS8001 PHYs. diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2429db614b59..2702faf7b0f6 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1687,8 +1687,8 @@ static int marvell_suspend(struct phy_device *phydev) int err; /* Suspend the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; @@ -1722,8 +1722,8 @@ static int marvell_resume(struct phy_device *phydev) int err; /* Resume the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index ebfeeb3c67c1..7e3017e7a1c0 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2685,3 +2685,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver"); MODULE_AUTHOR("Nagaraju Lakkaraju"); MODULE_LICENSE("Dual MIT/GPL"); + +MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW); +MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW); diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index c1512c9925a6..15aa5ac1ff49 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -75,6 +75,12 @@ static const struct sfp_quirk sfp_quirks[] = { .part = "MA5671A", .modes = sfp_quirk_2500basex, }, { + // Lantech 8330-262D-E can operate at 2500base-X, but + // incorrectly report 2500MBd NRZ in their EEPROM + .vendor = "Lantech", + .part = "8330-262D-E", + .modes = sfp_quirk_2500basex, + }, { .vendor = "UBNT", .part = "UF-INSTANT", .modes = sfp_quirk_ubnt_uf_instant, diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 714cafcf6c6c..85e362461d71 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -472,14 +472,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, memset(&fl6, 0, sizeof(fl6)); /* needed to match OIF rule */ - fl6.flowi6_oif = dev->ifindex; + fl6.flowi6_l3mdev = dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = iph->nexthdr; - fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst) || dst == dst_null) @@ -551,10 +550,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, memset(&fl4, 0, sizeof(fl4)); /* needed to match OIF rule */ - fl4.flowi4_oif = vrf_dev->ifindex; + fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.flowi4_tos = RT_TOS(ip4h->tos); - fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 2b78ed89e1b6..cd438f76f284 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2611,36 +2611,9 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_mac_handle_beacon(ar, skb); if (ieee80211_is_beacon(hdr->frame_control) || - ieee80211_is_probe_resp(hdr->frame_control)) { - struct ieee80211_mgmt *mgmt = (void *)skb->data; - enum cfg80211_bss_frame_type ftype; - u8 *ies; - int ies_ch; - + ieee80211_is_probe_resp(hdr->frame_control)) status->boottime_ns = ktime_get_boottime_ns(); - if (!ar->scan_channel) - goto drop; - - ies = mgmt->u.beacon.variable; - - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else - ftype = CFG80211_BSS_FTYPE_PRESP; - - ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable, - skb_tail_pointer(skb) - ies, - sband->band, ftype); - - if (ies_ch > 0 && ies_ch != channel) { - ath10k_dbg(ar, ATH10K_DBG_MGMT, - "channel mismatched ds channel %d scan channel %d\n", - ies_ch, channel); - goto drop; - } - } - ath10k_dbg(ar, ATH10K_DBG_MGMT, "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, @@ -2654,10 +2627,6 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ieee80211_rx_ni(ar->hw, skb); return 0; - -drop: - dev_kfree_skb(skb); - return 0; } static int freq_to_idx(struct ath10k *ar, int freq) diff --git a/drivers/phy/freescale/Kconfig b/drivers/phy/freescale/Kconfig index c3669c28ea9f..0e91cd99c36b 100644 --- a/drivers/phy/freescale/Kconfig +++ b/drivers/phy/freescale/Kconfig @@ -22,3 +22,13 @@ config PHY_FSL_IMX8M_PCIE help Enable this to add support for the PCIE PHY as found on i.MX8M family of SOCs. + +config PHY_FSL_LYNX_28G + tristate "Freescale Layerscape Lynx 28G SerDes PHY support" + depends on OF + select GENERIC_PHY + help + Enable this to add support for the Lynx SerDes 28G PHY as + found on NXP's Layerscape platforms such as LX2160A. + Used to change the protocol running on SerDes lanes at runtime. + Only useful for a restricted set of Ethernet protocols. diff --git a/drivers/phy/freescale/Makefile b/drivers/phy/freescale/Makefile index 55d07c742ab0..3518d5dbe8a7 100644 --- a/drivers/phy/freescale/Makefile +++ b/drivers/phy/freescale/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_PHY_FSL_IMX8MQ_USB) += phy-fsl-imx8mq-usb.o obj-$(CONFIG_PHY_MIXEL_MIPI_DPHY) += phy-fsl-imx8-mipi-dphy.o obj-$(CONFIG_PHY_FSL_IMX8M_PCIE) += phy-fsl-imx8m-pcie.o +obj-$(CONFIG_PHY_FSL_LYNX_28G) += phy-fsl-lynx-28g.o diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c new file mode 100644 index 000000000000..569f12af2aaf --- /dev/null +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -0,0 +1,623 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (c) 2021-2022 NXP. */ + +#include <linux/module.h> +#include <linux/phy.h> +#include <linux/phy/phy.h> +#include <linux/platform_device.h> +#include <linux/workqueue.h> + +#define LYNX_28G_NUM_LANE 8 +#define LYNX_28G_NUM_PLL 2 + +/* General registers per SerDes block */ +#define LYNX_28G_PCC8 0x10a0 +#define LYNX_28G_PCC8_SGMII 0x1 +#define LYNX_28G_PCC8_SGMII_DIS 0x0 + +#define LYNX_28G_PCCC 0x10b0 +#define LYNX_28G_PCCC_10GBASER 0x9 +#define LYNX_28G_PCCC_USXGMII 0x1 +#define LYNX_28G_PCCC_SXGMII_DIS 0x0 + +#define LYNX_28G_LNa_PCC_OFFSET(lane) (4 * (LYNX_28G_NUM_LANE - (lane->id) - 1)) + +/* Per PLL registers */ +#define LYNX_28G_PLLnRSTCTL(pll) (0x400 + (pll) * 0x100 + 0x0) +#define LYNX_28G_PLLnRSTCTL_DIS(rstctl) (((rstctl) & BIT(24)) >> 24) +#define LYNX_28G_PLLnRSTCTL_LOCK(rstctl) (((rstctl) & BIT(23)) >> 23) + +#define LYNX_28G_PLLnCR0(pll) (0x400 + (pll) * 0x100 + 0x4) +#define LYNX_28G_PLLnCR0_REFCLK_SEL(cr0) (((cr0) & GENMASK(20, 16))) +#define LYNX_28G_PLLnCR0_REFCLK_SEL_100MHZ 0x0 +#define LYNX_28G_PLLnCR0_REFCLK_SEL_125MHZ 0x10000 +#define LYNX_28G_PLLnCR0_REFCLK_SEL_156MHZ 0x20000 +#define LYNX_28G_PLLnCR0_REFCLK_SEL_150MHZ 0x30000 +#define LYNX_28G_PLLnCR0_REFCLK_SEL_161MHZ 0x40000 + +#define LYNX_28G_PLLnCR1(pll) (0x400 + (pll) * 0x100 + 0x8) +#define LYNX_28G_PLLnCR1_FRATE_SEL(cr1) (((cr1) & GENMASK(28, 24))) +#define LYNX_28G_PLLnCR1_FRATE_5G_10GVCO 0x0 +#define LYNX_28G_PLLnCR1_FRATE_5G_25GVCO 0x10000000 +#define LYNX_28G_PLLnCR1_FRATE_10G_20GVCO 0x6000000 + +/* Per SerDes lane registers */ +/* Lane a General Control Register */ +#define LYNX_28G_LNaGCR0(lane) (0x800 + (lane) * 0x100 + 0x0) +#define LYNX_28G_LNaGCR0_PROTO_SEL_MSK GENMASK(7, 3) +#define LYNX_28G_LNaGCR0_PROTO_SEL_SGMII 0x8 +#define LYNX_28G_LNaGCR0_PROTO_SEL_XFI 0x50 +#define LYNX_28G_LNaGCR0_IF_WIDTH_MSK GENMASK(2, 0) +#define LYNX_28G_LNaGCR0_IF_WIDTH_10_BIT 0x0 +#define LYNX_28G_LNaGCR0_IF_WIDTH_20_BIT 0x2 + +/* Lane a Tx Reset Control Register */ +#define LYNX_28G_LNaTRSTCTL(lane) (0x800 + (lane) * 0x100 + 0x20) +#define LYNX_28G_LNaTRSTCTL_HLT_REQ BIT(27) +#define LYNX_28G_LNaTRSTCTL_RST_DONE BIT(30) +#define LYNX_28G_LNaTRSTCTL_RST_REQ BIT(31) + +/* Lane a Tx General Control Register */ +#define LYNX_28G_LNaTGCR0(lane) (0x800 + (lane) * 0x100 + 0x24) +#define LYNX_28G_LNaTGCR0_USE_PLLF 0x0 +#define LYNX_28G_LNaTGCR0_USE_PLLS BIT(28) +#define LYNX_28G_LNaTGCR0_USE_PLL_MSK BIT(28) +#define LYNX_28G_LNaTGCR0_N_RATE_FULL 0x0 +#define LYNX_28G_LNaTGCR0_N_RATE_HALF 0x1000000 +#define LYNX_28G_LNaTGCR0_N_RATE_QUARTER 0x2000000 +#define LYNX_28G_LNaTGCR0_N_RATE_MSK GENMASK(26, 24) + +#define LYNX_28G_LNaTECR0(lane) (0x800 + (lane) * 0x100 + 0x30) + +/* Lane a Rx Reset Control Register */ +#define LYNX_28G_LNaRRSTCTL(lane) (0x800 + (lane) * 0x100 + 0x40) +#define LYNX_28G_LNaRRSTCTL_HLT_REQ BIT(27) +#define LYNX_28G_LNaRRSTCTL_RST_DONE BIT(30) +#define LYNX_28G_LNaRRSTCTL_RST_REQ BIT(31) +#define LYNX_28G_LNaRRSTCTL_CDR_LOCK BIT(12) + +/* Lane a Rx General Control Register */ +#define LYNX_28G_LNaRGCR0(lane) (0x800 + (lane) * 0x100 + 0x44) +#define LYNX_28G_LNaRGCR0_USE_PLLF 0x0 +#define LYNX_28G_LNaRGCR0_USE_PLLS BIT(28) +#define LYNX_28G_LNaRGCR0_USE_PLL_MSK BIT(28) +#define LYNX_28G_LNaRGCR0_N_RATE_MSK GENMASK(26, 24) +#define LYNX_28G_LNaRGCR0_N_RATE_FULL 0x0 +#define LYNX_28G_LNaRGCR0_N_RATE_HALF 0x1000000 +#define LYNX_28G_LNaRGCR0_N_RATE_QUARTER 0x2000000 +#define LYNX_28G_LNaRGCR0_N_RATE_MSK GENMASK(26, 24) + +#define LYNX_28G_LNaRGCR1(lane) (0x800 + (lane) * 0x100 + 0x48) + +#define LYNX_28G_LNaRECR0(lane) (0x800 + (lane) * 0x100 + 0x50) +#define LYNX_28G_LNaRECR1(lane) (0x800 + (lane) * 0x100 + 0x54) +#define LYNX_28G_LNaRECR2(lane) (0x800 + (lane) * 0x100 + 0x58) + +#define LYNX_28G_LNaRSCCR0(lane) (0x800 + (lane) * 0x100 + 0x74) + +#define LYNX_28G_LNaPSS(lane) (0x1000 + (lane) * 0x4) +#define LYNX_28G_LNaPSS_TYPE(pss) (((pss) & GENMASK(30, 24)) >> 24) +#define LYNX_28G_LNaPSS_TYPE_SGMII 0x4 +#define LYNX_28G_LNaPSS_TYPE_XFI 0x28 + +#define LYNX_28G_SGMIIaCR1(lane) (0x1804 + (lane) * 0x10) +#define LYNX_28G_SGMIIaCR1_SGPCS_EN BIT(11) +#define LYNX_28G_SGMIIaCR1_SGPCS_DIS 0x0 +#define LYNX_28G_SGMIIaCR1_SGPCS_MSK BIT(11) + +struct lynx_28g_priv; + +struct lynx_28g_pll { + struct lynx_28g_priv *priv; + u32 rstctl, cr0, cr1; + int id; + DECLARE_PHY_INTERFACE_MASK(supported); +}; + +struct lynx_28g_lane { + struct lynx_28g_priv *priv; + struct phy *phy; + bool powered_up; + bool init; + unsigned int id; + phy_interface_t interface; +}; + +struct lynx_28g_priv { + void __iomem *base; + struct device *dev; + struct lynx_28g_pll pll[LYNX_28G_NUM_PLL]; + struct lynx_28g_lane lane[LYNX_28G_NUM_LANE]; + + struct delayed_work cdr_check; +}; + +static void lynx_28g_rmw(struct lynx_28g_priv *priv, unsigned long off, + u32 val, u32 mask) +{ + void __iomem *reg = priv->base + off; + u32 orig, tmp; + + orig = ioread32(reg); + tmp = orig & ~mask; + tmp |= val; + iowrite32(tmp, reg); +} + +#define lynx_28g_lane_rmw(lane, reg, val, mask) \ + lynx_28g_rmw((lane)->priv, LYNX_28G_##reg(lane->id), \ + LYNX_28G_##reg##_##val, LYNX_28G_##reg##_##mask) +#define lynx_28g_lane_read(lane, reg) \ + ioread32((lane)->priv->base + LYNX_28G_##reg((lane)->id)) +#define lynx_28g_pll_read(pll, reg) \ + ioread32((pll)->priv->base + LYNX_28G_##reg((pll)->id)) + +static bool lynx_28g_supports_interface(struct lynx_28g_priv *priv, int intf) +{ + int i; + + for (i = 0; i < LYNX_28G_NUM_PLL; i++) { + if (LYNX_28G_PLLnRSTCTL_DIS(priv->pll[i].rstctl)) + continue; + + if (test_bit(intf, priv->pll[i].supported)) + return true; + } + + return false; +} + +static struct lynx_28g_pll *lynx_28g_pll_get(struct lynx_28g_priv *priv, + phy_interface_t intf) +{ + struct lynx_28g_pll *pll; + int i; + + for (i = 0; i < LYNX_28G_NUM_PLL; i++) { + pll = &priv->pll[i]; + + if (LYNX_28G_PLLnRSTCTL_DIS(pll->rstctl)) + continue; + + if (test_bit(intf, pll->supported)) + return pll; + } + + return NULL; +} + +static void lynx_28g_lane_set_nrate(struct lynx_28g_lane *lane, + struct lynx_28g_pll *pll, + phy_interface_t intf) +{ + switch (LYNX_28G_PLLnCR1_FRATE_SEL(pll->cr1)) { + case LYNX_28G_PLLnCR1_FRATE_5G_10GVCO: + case LYNX_28G_PLLnCR1_FRATE_5G_25GVCO: + switch (intf) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + lynx_28g_lane_rmw(lane, LNaTGCR0, N_RATE_QUARTER, N_RATE_MSK); + lynx_28g_lane_rmw(lane, LNaRGCR0, N_RATE_QUARTER, N_RATE_MSK); + break; + default: + break; + } + break; + case LYNX_28G_PLLnCR1_FRATE_10G_20GVCO: + switch (intf) { + case PHY_INTERFACE_MODE_10GBASER: + case PHY_INTERFACE_MODE_USXGMII: + lynx_28g_lane_rmw(lane, LNaTGCR0, N_RATE_FULL, N_RATE_MSK); + lynx_28g_lane_rmw(lane, LNaRGCR0, N_RATE_FULL, N_RATE_MSK); + break; + default: + break; + } + break; + default: + break; + } +} + +static void lynx_28g_lane_set_pll(struct lynx_28g_lane *lane, + struct lynx_28g_pll *pll) +{ + if (pll->id == 0) { + lynx_28g_lane_rmw(lane, LNaTGCR0, USE_PLLF, USE_PLL_MSK); + lynx_28g_lane_rmw(lane, LNaRGCR0, USE_PLLF, USE_PLL_MSK); + } else { + lynx_28g_lane_rmw(lane, LNaTGCR0, USE_PLLS, USE_PLL_MSK); + lynx_28g_lane_rmw(lane, LNaRGCR0, USE_PLLS, USE_PLL_MSK); + } +} + +static void lynx_28g_cleanup_lane(struct lynx_28g_lane *lane) +{ + u32 lane_offset = LYNX_28G_LNa_PCC_OFFSET(lane); + struct lynx_28g_priv *priv = lane->priv; + + /* Cleanup the protocol configuration registers of the current protocol */ + switch (lane->interface) { + case PHY_INTERFACE_MODE_10GBASER: + lynx_28g_rmw(priv, LYNX_28G_PCCC, + LYNX_28G_PCCC_SXGMII_DIS << lane_offset, + GENMASK(3, 0) << lane_offset); + break; + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + lynx_28g_rmw(priv, LYNX_28G_PCC8, + LYNX_28G_PCC8_SGMII_DIS << lane_offset, + GENMASK(3, 0) << lane_offset); + break; + default: + break; + } +} + +static void lynx_28g_lane_set_sgmii(struct lynx_28g_lane *lane) +{ + u32 lane_offset = LYNX_28G_LNa_PCC_OFFSET(lane); + struct lynx_28g_priv *priv = lane->priv; + struct lynx_28g_pll *pll; + + lynx_28g_cleanup_lane(lane); + + /* Setup the lane to run in SGMII */ + lynx_28g_rmw(priv, LYNX_28G_PCC8, + LYNX_28G_PCC8_SGMII << lane_offset, + GENMASK(3, 0) << lane_offset); + + /* Setup the protocol select and SerDes parallel interface width */ + lynx_28g_lane_rmw(lane, LNaGCR0, PROTO_SEL_SGMII, PROTO_SEL_MSK); + lynx_28g_lane_rmw(lane, LNaGCR0, IF_WIDTH_10_BIT, IF_WIDTH_MSK); + + /* Switch to the PLL that works with this interface type */ + pll = lynx_28g_pll_get(priv, PHY_INTERFACE_MODE_SGMII); + lynx_28g_lane_set_pll(lane, pll); + + /* Choose the portion of clock net to be used on this lane */ + lynx_28g_lane_set_nrate(lane, pll, PHY_INTERFACE_MODE_SGMII); + + /* Enable the SGMII PCS */ + lynx_28g_lane_rmw(lane, SGMIIaCR1, SGPCS_EN, SGPCS_MSK); + + /* Configure the appropriate equalization parameters for the protocol */ + iowrite32(0x00808006, priv->base + LYNX_28G_LNaTECR0(lane->id)); + iowrite32(0x04310000, priv->base + LYNX_28G_LNaRGCR1(lane->id)); + iowrite32(0x9f800000, priv->base + LYNX_28G_LNaRECR0(lane->id)); + iowrite32(0x001f0000, priv->base + LYNX_28G_LNaRECR1(lane->id)); + iowrite32(0x00000000, priv->base + LYNX_28G_LNaRECR2(lane->id)); + iowrite32(0x00000000, priv->base + LYNX_28G_LNaRSCCR0(lane->id)); +} + +static void lynx_28g_lane_set_10gbaser(struct lynx_28g_lane *lane) +{ + u32 lane_offset = LYNX_28G_LNa_PCC_OFFSET(lane); + struct lynx_28g_priv *priv = lane->priv; + struct lynx_28g_pll *pll; + + lynx_28g_cleanup_lane(lane); + + /* Enable the SXGMII lane */ + lynx_28g_rmw(priv, LYNX_28G_PCCC, + LYNX_28G_PCCC_10GBASER << lane_offset, + GENMASK(3, 0) << lane_offset); + + /* Setup the protocol select and SerDes parallel interface width */ + lynx_28g_lane_rmw(lane, LNaGCR0, PROTO_SEL_XFI, PROTO_SEL_MSK); + lynx_28g_lane_rmw(lane, LNaGCR0, IF_WIDTH_20_BIT, IF_WIDTH_MSK); + + /* Switch to the PLL that works with this interface type */ + pll = lynx_28g_pll_get(priv, PHY_INTERFACE_MODE_10GBASER); + lynx_28g_lane_set_pll(lane, pll); + + /* Choose the portion of clock net to be used on this lane */ + lynx_28g_lane_set_nrate(lane, pll, PHY_INTERFACE_MODE_10GBASER); + + /* Disable the SGMII PCS */ + lynx_28g_lane_rmw(lane, SGMIIaCR1, SGPCS_DIS, SGPCS_MSK); + + /* Configure the appropriate equalization parameters for the protocol */ + iowrite32(0x10808307, priv->base + LYNX_28G_LNaTECR0(lane->id)); + iowrite32(0x10000000, priv->base + LYNX_28G_LNaRGCR1(lane->id)); + iowrite32(0x00000000, priv->base + LYNX_28G_LNaRECR0(lane->id)); + iowrite32(0x001f0000, priv->base + LYNX_28G_LNaRECR1(lane->id)); + iowrite32(0x81000020, priv->base + LYNX_28G_LNaRECR2(lane->id)); + iowrite32(0x00002000, priv->base + LYNX_28G_LNaRSCCR0(lane->id)); +} + +static int lynx_28g_power_off(struct phy *phy) +{ + struct lynx_28g_lane *lane = phy_get_drvdata(phy); + u32 trstctl, rrstctl; + + if (!lane->powered_up) + return 0; + + /* Issue a halt request */ + lynx_28g_lane_rmw(lane, LNaTRSTCTL, HLT_REQ, HLT_REQ); + lynx_28g_lane_rmw(lane, LNaRRSTCTL, HLT_REQ, HLT_REQ); + + /* Wait until the halting process is complete */ + do { + trstctl = lynx_28g_lane_read(lane, LNaTRSTCTL); + rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); + } while ((trstctl & LYNX_28G_LNaTRSTCTL_HLT_REQ) || + (rrstctl & LYNX_28G_LNaRRSTCTL_HLT_REQ)); + + lane->powered_up = false; + + return 0; +} + +static int lynx_28g_power_on(struct phy *phy) +{ + struct lynx_28g_lane *lane = phy_get_drvdata(phy); + u32 trstctl, rrstctl; + + if (lane->powered_up) + return 0; + + /* Issue a reset request on the lane */ + lynx_28g_lane_rmw(lane, LNaTRSTCTL, RST_REQ, RST_REQ); + lynx_28g_lane_rmw(lane, LNaRRSTCTL, RST_REQ, RST_REQ); + + /* Wait until the reset sequence is completed */ + do { + trstctl = lynx_28g_lane_read(lane, LNaTRSTCTL); + rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); + } while (!(trstctl & LYNX_28G_LNaTRSTCTL_RST_DONE) || + !(rrstctl & LYNX_28G_LNaRRSTCTL_RST_DONE)); + + lane->powered_up = true; + + return 0; +} + +static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) +{ + struct lynx_28g_lane *lane = phy_get_drvdata(phy); + struct lynx_28g_priv *priv = lane->priv; + int powered_up = lane->powered_up; + int err = 0; + + if (mode != PHY_MODE_ETHERNET) + return -EOPNOTSUPP; + + if (lane->interface == PHY_INTERFACE_MODE_NA) + return -EOPNOTSUPP; + + if (!lynx_28g_supports_interface(priv, submode)) + return -EOPNOTSUPP; + + /* If the lane is powered up, put the lane into the halt state while + * the reconfiguration is being done. + */ + if (powered_up) + lynx_28g_power_off(phy); + + switch (submode) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + lynx_28g_lane_set_sgmii(lane); + break; + case PHY_INTERFACE_MODE_10GBASER: + lynx_28g_lane_set_10gbaser(lane); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + lane->interface = submode; + +out: + /* Power up the lane if necessary */ + if (powered_up) + lynx_28g_power_on(phy); + + return err; +} + +static int lynx_28g_validate(struct phy *phy, enum phy_mode mode, int submode, + union phy_configure_opts *opts __always_unused) +{ + struct lynx_28g_lane *lane = phy_get_drvdata(phy); + struct lynx_28g_priv *priv = lane->priv; + + if (mode != PHY_MODE_ETHERNET) + return -EOPNOTSUPP; + + if (!lynx_28g_supports_interface(priv, submode)) + return -EOPNOTSUPP; + + return 0; +} + +static int lynx_28g_init(struct phy *phy) +{ + struct lynx_28g_lane *lane = phy_get_drvdata(phy); + + /* Mark the fact that the lane was init */ + lane->init = true; + + /* SerDes lanes are powered on at boot time. Any lane that is managed + * by this driver will get powered down at init time aka at dpaa2-eth + * probe time. + */ + lane->powered_up = true; + lynx_28g_power_off(phy); + + return 0; +} + +static const struct phy_ops lynx_28g_ops = { + .init = lynx_28g_init, + .power_on = lynx_28g_power_on, + .power_off = lynx_28g_power_off, + .set_mode = lynx_28g_set_mode, + .validate = lynx_28g_validate, + .owner = THIS_MODULE, +}; + +static void lynx_28g_pll_read_configuration(struct lynx_28g_priv *priv) +{ + struct lynx_28g_pll *pll; + int i; + + for (i = 0; i < LYNX_28G_NUM_PLL; i++) { + pll = &priv->pll[i]; + pll->priv = priv; + pll->id = i; + + pll->rstctl = lynx_28g_pll_read(pll, PLLnRSTCTL); + pll->cr0 = lynx_28g_pll_read(pll, PLLnCR0); + pll->cr1 = lynx_28g_pll_read(pll, PLLnCR1); + + if (LYNX_28G_PLLnRSTCTL_DIS(pll->rstctl)) + continue; + + switch (LYNX_28G_PLLnCR1_FRATE_SEL(pll->cr1)) { + case LYNX_28G_PLLnCR1_FRATE_5G_10GVCO: + case LYNX_28G_PLLnCR1_FRATE_5G_25GVCO: + /* 5GHz clock net */ + __set_bit(PHY_INTERFACE_MODE_1000BASEX, pll->supported); + __set_bit(PHY_INTERFACE_MODE_SGMII, pll->supported); + break; + case LYNX_28G_PLLnCR1_FRATE_10G_20GVCO: + /* 10.3125GHz clock net */ + __set_bit(PHY_INTERFACE_MODE_10GBASER, pll->supported); + break; + default: + /* 6GHz, 12.890625GHz, 8GHz */ + break; + } + } +} + +#define work_to_lynx(w) container_of((w), struct lynx_28g_priv, cdr_check.work) + +static void lynx_28g_cdr_lock_check(struct work_struct *work) +{ + struct lynx_28g_priv *priv = work_to_lynx(work); + struct lynx_28g_lane *lane; + u32 rrstctl; + int i; + + for (i = 0; i < LYNX_28G_NUM_LANE; i++) { + lane = &priv->lane[i]; + + if (!lane->init) + continue; + + if (!lane->powered_up) + continue; + + rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); + if (!(rrstctl & LYNX_28G_LNaRRSTCTL_CDR_LOCK)) { + lynx_28g_lane_rmw(lane, LNaRRSTCTL, RST_REQ, RST_REQ); + do { + rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); + } while (!(rrstctl & LYNX_28G_LNaRRSTCTL_RST_DONE)); + } + } + queue_delayed_work(system_power_efficient_wq, &priv->cdr_check, + msecs_to_jiffies(1000)); +} + +static void lynx_28g_lane_read_configuration(struct lynx_28g_lane *lane) +{ + u32 pss, protocol; + + pss = lynx_28g_lane_read(lane, LNaPSS); + protocol = LYNX_28G_LNaPSS_TYPE(pss); + switch (protocol) { + case LYNX_28G_LNaPSS_TYPE_SGMII: + lane->interface = PHY_INTERFACE_MODE_SGMII; + break; + case LYNX_28G_LNaPSS_TYPE_XFI: + lane->interface = PHY_INTERFACE_MODE_10GBASER; + break; + default: + lane->interface = PHY_INTERFACE_MODE_NA; + } +} + +static struct phy *lynx_28g_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct lynx_28g_priv *priv = dev_get_drvdata(dev); + int idx = args->args[0]; + + if (WARN_ON(idx >= LYNX_28G_NUM_LANE)) + return ERR_PTR(-EINVAL); + + return priv->lane[idx].phy; +} + +static int lynx_28g_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct phy_provider *provider; + struct lynx_28g_priv *priv; + int i; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->dev = &pdev->dev; + + priv->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + lynx_28g_pll_read_configuration(priv); + + for (i = 0; i < LYNX_28G_NUM_LANE; i++) { + struct lynx_28g_lane *lane = &priv->lane[i]; + struct phy *phy; + + memset(lane, 0, sizeof(*lane)); + + phy = devm_phy_create(&pdev->dev, NULL, &lynx_28g_ops); + if (IS_ERR(phy)) + return PTR_ERR(phy); + + lane->priv = priv; + lane->phy = phy; + lane->id = i; + phy_set_drvdata(phy, lane); + lynx_28g_lane_read_configuration(lane); + } + + dev_set_drvdata(dev, priv); + + INIT_DELAYED_WORK(&priv->cdr_check, lynx_28g_cdr_lock_check); + + queue_delayed_work(system_power_efficient_wq, &priv->cdr_check, + msecs_to_jiffies(1000)); + + dev_set_drvdata(&pdev->dev, priv); + provider = devm_of_phy_provider_register(&pdev->dev, lynx_28g_xlate); + + return PTR_ERR_OR_ZERO(provider); +} + +static const struct of_device_id lynx_28g_of_match_table[] = { + { .compatible = "fsl,lynx-28g" }, + { }, +}; +MODULE_DEVICE_TABLE(of, lynx_28g_of_match_table); + +static struct platform_driver lynx_28g_driver = { + .probe = lynx_28g_probe, + .driver = { + .name = "lynx-28g", + .of_match_table = lynx_28g_of_match_table, + }, +}; +module_platform_driver(lynx_28g_driver); + +MODULE_AUTHOR("Ioana Ciornei <ioana.ciornei@nxp.com>"); +MODULE_DESCRIPTION("Lynx 28G SerDes PHY driver for Layerscape SoCs"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 56b04a7bba3a..d64a1ce5f5bc 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -970,13 +970,25 @@ ptp_ocp_verify(struct ptp_clock_info *ptp_info, unsigned pin, struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info); char buf[16]; - if (func != PTP_PF_PEROUT) + switch (func) { + case PTP_PF_NONE: + sprintf(buf, "IN: None"); + break; + case PTP_PF_EXTTS: + /* Allow timestamps, but require sysfs configuration. */ + return 0; + case PTP_PF_PEROUT: + /* channel 0 is 1PPS from PHC. + * channels 1..4 are the frequency generators. + */ + if (chan) + sprintf(buf, "OUT: GEN%d", chan); + else + sprintf(buf, "OUT: PHC"); + break; + default: return -EOPNOTSUPP; - - if (chan) - sprintf(buf, "OUT: GEN%d", chan); - else - sprintf(buf, "OUT: PHC"); + } return ptp_ocp_sma_store(bp, buf, pin + 1); } @@ -2922,7 +2934,7 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr) return; on = signal->running; - sprintf(label, "GEN%d", nr); + sprintf(label, "GEN%d", nr + 1); seq_printf(s, "%7s: %s, period:%llu duty:%d%% phase:%llu pol:%d", label, on ? " ON" : "OFF", signal->period, signal->duty, signal->phase, @@ -2947,7 +2959,7 @@ _frequency_summary_show(struct seq_file *s, int nr, if (!reg) return; - sprintf(label, "FREQ%d", nr); + sprintf(label, "FREQ%d", nr + 1); val = ioread32(®->ctrl); on = val & 1; val = (val >> 8) & 0xff; @@ -2971,11 +2983,12 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) { struct device *dev = s->private; struct ptp_system_timestamp sts; - u16 sma_val[4][2], ctrl, val; struct ts_reg __iomem *ts_reg; struct timespec64 ts; struct ptp_ocp *bp; + u16 sma_val[4][2]; char *src, *buf; + u32 ctrl, val; bool on, map; int i; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 082380c03a3e..1768362115c6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1170,7 +1170,9 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, goto done; } - if (msg.size == 0) { + if ((msg.type == VHOST_IOTLB_UPDATE || + msg.type == VHOST_IOTLB_INVALIDATE) && + msg.size == 0) { ret = -EINVAL; goto done; } diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..e6c9d41db1de 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + vsock_for_each_connected_socket(&vhost_transport.transport, + vhost_vsock_reset_orphans); /* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. diff --git a/fs/afs/write.c b/fs/afs/write.c index 5e9157d0da29..f447c902318d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -703,7 +703,7 @@ static int afs_writepages_region(struct address_space *mapping, struct folio *folio; struct page *head_page; ssize_t ret; - int n; + int n, skips = 0; _enter("%llx,%llx,", start, end); @@ -754,8 +754,15 @@ static int afs_writepages_region(struct address_space *mapping, #ifdef CONFIG_AFS_FSCACHE folio_wait_fscache(folio); #endif + } else { + start += folio_size(folio); } folio_put(folio); + if (wbc->sync_mode == WB_SYNC_NONE) { + if (skips >= 5 || need_resched()) + break; + skips++; + } continue; } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 83f41bd0c3a9..35465109d9c4 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -28,6 +28,11 @@ struct cachefiles_xattr { static const char cachefiles_xattr_cache[] = XATTR_USER_PREFIX "CacheFiles.cache"; +struct cachefiles_vol_xattr { + __be32 reserved; /* Reserved, should be 0 */ + __u8 data[]; /* netfs volume coherency data */ +} __packed; + /* * set the state xattr on a cache file */ @@ -185,6 +190,7 @@ void cachefiles_prepare_to_write(struct fscache_cookie *cookie) */ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) { + struct cachefiles_vol_xattr *buf; unsigned int len = volume->vcookie->coherency_len; const void *p = volume->vcookie->coherency; struct dentry *dentry = volume->dentry; @@ -192,10 +198,17 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) _enter("%x,#%d", volume->vcookie->debug_id, len); + len += sizeof(*buf); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return false; + buf->reserved = cpu_to_be32(0); + memcpy(buf->data, p, len); + ret = cachefiles_inject_write_error(); if (ret == 0) ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, - p, len, 0); + buf, len, 0); if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dentry), ret, cachefiles_trace_setxattr_error); @@ -209,6 +222,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) cachefiles_coherency_vol_set_ok); } + kfree(buf); _leave(" = %d", ret); return ret == 0; } @@ -218,7 +232,7 @@ bool cachefiles_set_volume_xattr(struct cachefiles_volume *volume) */ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) { - struct cachefiles_xattr *buf; + struct cachefiles_vol_xattr *buf; struct dentry *dentry = volume->dentry; unsigned int len = volume->vcookie->coherency_len; const void *p = volume->vcookie->coherency; @@ -228,6 +242,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) _enter(""); + len += sizeof(*buf); buf = kmalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -245,7 +260,9 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) "Failed to read xattr with error %zd", xlen); } why = cachefiles_coherency_vol_check_xattr; - } else if (memcmp(buf->data, p, len) != 0) { + } else if (buf->reserved != cpu_to_be32(0)) { + why = cachefiles_coherency_vol_check_resv; + } else if (memcmp(buf->data, p, len - sizeof(*buf)) != 0) { why = cachefiles_coherency_vol_check_cmp; } else { why = cachefiles_coherency_vol_check_ok; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2772dec9dcea..8bde30fa5387 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1105,17 +1105,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } - root = d_make_root(inode); - if (!root) { - status = -ENOMEM; - mlog_errno(status); - goto read_super_error; - } - - sb->s_root = root; - - ocfs2_complete_mount_recovery(osb); - osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj); if (!osb->osb_dev_kset) { @@ -1133,6 +1122,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } + root = d_make_root(inode); + if (!root) { + status = -ENOMEM; + mlog_errno(status); + goto read_super_error; + } + + sb->s_root = root; + + ocfs2_complete_mount_recovery(osb); + if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else diff --git a/fs/pipe.c b/fs/pipe.c index cc28623a67b6..2667db9506e2 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -253,7 +253,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) */ was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); for (;;) { - unsigned int head = pipe->head; + /* Read ->head with a barrier vs post_one_notification() */ + unsigned int head = smp_load_acquire(&pipe->head); unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; @@ -831,10 +832,8 @@ void free_pipe_info(struct pipe_inode_info *pipe) int i; #ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) { + if (pipe->watch_queue) watch_queue_clear(pipe->watch_queue); - put_watch_queue(pipe->watch_queue); - } #endif (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); @@ -844,6 +843,10 @@ void free_pipe_info(struct pipe_inode_info *pipe) if (buf->ops) pipe_buf_release(pipe, buf); } +#ifdef CONFIG_WATCH_QUEUE + if (pipe->watch_queue) + put_watch_queue(pipe->watch_queue); +#endif if (pipe->tmp_page) __free_page(pipe->tmp_page); kfree(pipe->bufs); diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b712217f7030..1ed52441972f 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3aae023a9353..d62ef428e3aa 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -119,6 +119,9 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, struct bridge_vlan_info *p_vinfo); +bool br_mst_enabled(const struct net_device *dev); +int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids); +int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state); #else static inline bool br_vlan_enabled(const struct net_device *dev) { @@ -151,6 +154,22 @@ static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid, { return -EINVAL; } + +static inline bool br_mst_enabled(const struct net_device *dev) +{ + return false; +} + +static inline int br_mst_get_info(const struct net_device *dev, u16 msti, + unsigned long *vids) +{ + return -EINVAL; +} +static inline int br_mst_get_state(const struct net_device *dev, u16 msti, + u8 *state) +{ + return -EINVAL; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 10c94a3936ca..b42294739063 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -21,6 +21,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; + netdevice_tracker dev_tracker; void *accel_priv; struct vlan_pcpu_stats __percpu *pcpu_stats; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105..0cc65d216701 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -329,6 +329,12 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_assert_none_held_once() \ lockdep_assert_once(!current->lockdep_depth) +/* + * Ensure that softirq is handled within the callchain and not delayed and + * handled by chance. + */ +#define lockdep_assert_softirq_will_run() \ + lockdep_assert_once(hardirq_count() | softirq_count()) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) @@ -414,6 +420,7 @@ extern int lockdep_is_held(const void *); #define lockdep_assert_held_read(l) do { (void)(l); } while (0) #define lockdep_assert_held_once(l) do { (void)(l); } while (0) #define lockdep_assert_none_held_once() do { } while (0) +#define lockdep_assert_softirq_will_run() do { } while (0) #define lockdep_recursing(tsk) (0) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 00a914b0716e..96cd740d94a3 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1009,9 +1009,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); -int mlx5_buf_alloc(struct mlx5_core_dev *dev, - int size, struct mlx5_frag_buf *buf); -void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_frag_buf *buf, int node); void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); @@ -1039,7 +1036,6 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0d994710b335..8cbe96ce0a2c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3858,10 +3858,14 @@ static inline struct net_device_core_stats *dev_core_stats(struct net_device *de #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ - struct net_device_core_stats *p = dev_core_stats(dev); \ + struct net_device_core_stats *p; \ + \ + preempt_disable(); \ + p = dev_core_stats(dev); \ \ if (p) \ local_inc(&p->FIELD); \ + preempt_enable(); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index c994d1b2cdba..3b9a40ae8bdb 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -28,7 +28,8 @@ struct watch_type_filter { struct watch_filter { union { struct rcu_head rcu; - unsigned long type_filter[2]; /* Bitmask of accepted types */ + /* Bitmask of accepted types */ + DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ struct watch_type_filter filters[]; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..f742e50207fb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); diff --git a/include/net/devlink.h b/include/net/devlink.h index 8d5349d2fb68..fd89a17adea1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1197,9 +1197,9 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); - int (*port_split)(struct devlink *devlink, unsigned int port_index, + int (*port_split)(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack); - int (*port_unsplit)(struct devlink *devlink, unsigned int port_index, + int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, @@ -1479,6 +1479,17 @@ void *devlink_priv(struct devlink *devlink); struct devlink *priv_to_devlink(void *priv); struct device *devlink_to_dev(const struct devlink *devlink); +/* Devlink instance explicit locking */ +void devl_lock(struct devlink *devlink); +void devl_unlock(struct devlink *devlink); +void devl_assert_locked(struct devlink *devlink); +bool devl_lock_is_held(struct devlink *devlink); + +int devl_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index); +void devl_port_unregister(struct devlink_port *devlink_port); + struct ib_device; struct net *devlink_net(const struct devlink *devlink); diff --git a/include/net/dsa.h b/include/net/dsa.h index 9d16505fc0e2..934958fda962 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -893,6 +893,18 @@ struct dsa_switch_ops { struct ethtool_ts_info *ts); /* + * DCB ops + */ + int (*port_get_default_prio)(struct dsa_switch *ds, int port); + int (*port_set_default_prio)(struct dsa_switch *ds, int port, + u8 prio); + int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp); + int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, + u8 prio); + + /* * Suspend and resume */ int (*suspend)(struct dsa_switch *ds); @@ -945,7 +957,10 @@ struct dsa_switch_ops { struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + int (*port_mst_state_set)(struct dsa_switch *ds, int port, + const struct switchdev_mst_state *state); void (*port_fast_age)(struct dsa_switch *ds, int port); + int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid); int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); @@ -964,6 +979,9 @@ struct dsa_switch_ops { struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); + int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge, + const struct switchdev_vlan_msti *msti); + /* * Forwarding database */ @@ -1010,7 +1028,7 @@ struct dsa_switch_ops { struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, - bool ingress); + bool ingress, struct netlink_ext_ack *extack); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, diff --git a/include/net/flow.h b/include/net/flow.h index 58beb16a49b8..987bd511d652 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -29,6 +29,7 @@ struct flowi_tunnel { struct flowi_common { int flowic_oif; int flowic_iif; + int flowic_l3mdev; __u32 flowic_mark; __u8 flowic_tos; __u8 flowic_scope; @@ -36,7 +37,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; struct flowi_tunnel flowic_tun_key; @@ -70,6 +70,7 @@ struct flowi4 { struct flowi_common __fl_common; #define flowi4_oif __fl_common.flowic_oif #define flowi4_iif __fl_common.flowic_iif +#define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark #define flowi4_tos __fl_common.flowic_tos #define flowi4_scope __fl_common.flowic_scope @@ -102,6 +103,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; + fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; @@ -132,6 +134,7 @@ struct flowi6 { struct flowi_common __fl_common; #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif +#define flowi6_l3mdev __fl_common.flowic_l3mdev #define flowi6_mark __fl_common.flowic_mark #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto @@ -177,6 +180,7 @@ struct flowi { } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif +#define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark #define flowi_tos u.__fl_common.flowic_tos #define flowi_scope u.__fl_common.flowic_scope diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 92267d23083e..021778a7e1af 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -150,6 +150,8 @@ enum flow_action_id { FLOW_ACTION_PPPOE_PUSH, FLOW_ACTION_JUMP, FLOW_ACTION_PIPE, + FLOW_ACTION_VLAN_PUSH_ETH, + FLOW_ACTION_VLAN_POP_ETH, NUM_FLOW_ACTIONS, }; @@ -211,6 +213,10 @@ struct flow_action_entry { __be16 proto; u8 prio; } vlan; + struct { /* FLOW_ACTION_VLAN_PUSH_ETH */ + unsigned char dst[ETH_ALEN]; + unsigned char src[ETH_ALEN]; + } vlan_push_eth; struct { /* FLOW_ACTION_MANGLE */ /* FLOW_ACTION_ADD */ enum flow_action_mangle_base htype; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8731d5bcb47d..b08b70989d2c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -97,7 +97,6 @@ struct nf_conn { unsigned long status; u16 cpu; - u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 3e424d40fae3..aa0171d5786d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -19,6 +19,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, + SWITCHDEV_ATTR_ID_PORT_MST_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_PORT_MROUTER, @@ -27,7 +28,14 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL, SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + SWITCHDEV_ATTR_ID_BRIDGE_MST, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, + SWITCHDEV_ATTR_ID_VLAN_MSTI, +}; + +struct switchdev_mst_state { + u16 msti; + u8 state; }; struct switchdev_brport_flags { @@ -35,6 +43,11 @@ struct switchdev_brport_flags { unsigned long mask; }; +struct switchdev_vlan_msti { + u16 vid; + u16 msti; +}; + struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; @@ -43,13 +56,16 @@ struct switchdev_attr { void (*complete)(struct net_device *dev, int err, void *priv); union { u8 stp_state; /* PORT_STP_STATE */ + struct switchdev_mst_state mst_state; /* PORT_MST_STATE */ struct switchdev_brport_flags brport_flags; /* PORT_BRIDGE_FLAGS */ bool mrouter; /* PORT_MROUTER */ clock_t ageing_time; /* BRIDGE_AGEING_TIME */ bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ + bool mst; /* BRIDGE_MST */ bool mc_disabled; /* MC_DISABLED */ u8 mrp_port_role; /* MRP_PORT_ROLE */ + struct switchdev_vlan_msti vlan_msti; /* VLAN_MSTI */ } u; }; diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index f94b8bc26f9e..a97600f742de 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -78,4 +78,14 @@ static inline u8 tcf_vlan_push_prio(const struct tc_action *a) return tcfv_push_prio; } + +static inline void tcf_vlan_push_eth(unsigned char *src, unsigned char *dest, + const struct tc_action *a) +{ + rcu_read_lock(); + memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_dst, ETH_ALEN); + memcpy(dest, rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_src, ETH_ALEN); + rcu_read_unlock(); +} + #endif /* __NET_TC_VLAN_H */ diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index ee3c59639d70..9b4e6c78d0f4 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -642,6 +642,11 @@ struct ocelot_lag_fdb { struct list_head list; }; +struct ocelot_mirror { + refcount_t refcount; + int to; +}; + struct ocelot_port { struct ocelot *ocelot; @@ -723,6 +728,7 @@ struct ocelot { struct ocelot_vcap_block block[3]; struct ocelot_vcap_policer vcap_pol; struct vcap_props *vcap; + struct ocelot_mirror *mirror; struct ocelot_psfp_list psfp; @@ -869,6 +875,11 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); void ocelot_port_bridge_flags(struct ocelot *ocelot, int port, struct switchdev_brport_flags val); +int ocelot_port_get_default_prio(struct ocelot *ocelot, int port); +int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio); +int ocelot_port_get_dscp_prio(struct ocelot *ocelot, int port, u8 dscp); +int ocelot_port_add_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio); +int ocelot_port_del_dscp_prio(struct ocelot *ocelot, int port, u8 dscp, u8 prio); int ocelot_port_bridge_join(struct ocelot *ocelot, int port, struct net_device *bridge, int bridge_num, struct netlink_ext_ack *extack); @@ -903,6 +914,9 @@ int ocelot_get_max_mtu(struct ocelot *ocelot, int port); int ocelot_port_policer_add(struct ocelot *ocelot, int port, struct ocelot_policer *pol); int ocelot_port_policer_del(struct ocelot *ocelot, int port); +int ocelot_port_mirror_add(struct ocelot *ocelot, int from, int to, + bool ingress, struct netlink_ext_ack *extack); +void ocelot_port_mirror_del(struct ocelot *ocelot, int from, bool ingress); int ocelot_cls_flower_replace(struct ocelot *ocelot, int port, struct flow_cls_offload *f, bool ingress); int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index deb2ad9eb0a5..7b2bf9b1fe69 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -654,6 +654,7 @@ struct ocelot_vcap_action { enum ocelot_mask_mode mask_mode; unsigned long port_mask; bool police_ena; + bool mirror_ena; struct ocelot_policer pol; u32 pol_ix; }; @@ -697,6 +698,7 @@ struct ocelot_vcap_filter { unsigned long ingress_port_mask; /* For VCAP ES0 */ struct ocelot_vcap_port ingress_port; + /* For VCAP IS2 mirrors and ES0 */ struct ocelot_vcap_port egress_port; enum ocelot_vcap_bit dmac_mc; diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index c6f5aa74db89..2c530637e10a 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -56,6 +56,7 @@ enum cachefiles_coherency_trace { cachefiles_coherency_set_ok, cachefiles_coherency_vol_check_cmp, cachefiles_coherency_vol_check_ok, + cachefiles_coherency_vol_check_resv, cachefiles_coherency_vol_check_xattr, cachefiles_coherency_vol_set_fail, cachefiles_coherency_vol_set_ok, @@ -139,6 +140,7 @@ enum cachefiles_error_trace { EM(cachefiles_coherency_set_ok, "SET ok ") \ EM(cachefiles_coherency_vol_check_cmp, "VOL BAD cmp ") \ EM(cachefiles_coherency_vol_check_ok, "VOL OK ") \ + EM(cachefiles_coherency_vol_check_resv, "VOL BAD resv") \ EM(cachefiles_coherency_vol_check_xattr,"VOL BAD xatt") \ EM(cachefiles_coherency_vol_set_fail, "VOL SET fail") \ E_(cachefiles_coherency_vol_set_ok, "VOL SET ok ") diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2711c3522010..a86a7e7b811f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -122,6 +122,7 @@ enum { IFLA_BRIDGE_VLAN_TUNNEL_INFO, IFLA_BRIDGE_MRP, IFLA_BRIDGE_CFM, + IFLA_BRIDGE_MST, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -453,6 +454,21 @@ enum { #define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) +enum { + IFLA_BRIDGE_MST_UNSPEC, + IFLA_BRIDGE_MST_ENTRY, + __IFLA_BRIDGE_MST_MAX, +}; +#define IFLA_BRIDGE_MST_MAX (__IFLA_BRIDGE_MST_MAX - 1) + +enum { + IFLA_BRIDGE_MST_ENTRY_UNSPEC, + IFLA_BRIDGE_MST_ENTRY_MSTI, + IFLA_BRIDGE_MST_ENTRY_STATE, + __IFLA_BRIDGE_MST_ENTRY_MAX, +}; +#define IFLA_BRIDGE_MST_ENTRY_MAX (__IFLA_BRIDGE_MST_ENTRY_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; @@ -564,6 +580,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, + BRIDGE_VLANDB_GOPTS_MSTI, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) @@ -759,6 +776,7 @@ struct br_mcast_stats { enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MCAST_VLAN_SNOOPING, + BR_BOOLOPT_MST_ENABLE, BR_BOOLOPT_MAX }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bd24c7dc10a2..cc284c048e69 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -842,6 +842,7 @@ enum { IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 51530aade46e..83849a37db5b 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -817,6 +817,7 @@ enum { #define RTEXT_FILTER_MRP (1 << 4) #define RTEXT_FILTER_CFM_CONFIG (1 << 5) #define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) /* End of information exported to user level */ diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 07df6d93c4df..e8db8d938661 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -16,6 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y # # Compile-time checks and compiler options # +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_FRAME_WARN=2048 diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a4b462b6f944..6105b7036482 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7790,7 +7790,7 @@ int ftrace_is_dead(void) /** * register_ftrace_function - register a function for profiling - * @ops - ops structure that holds the function for profiling. + * @ops: ops structure that holds the function for profiling. * * Register a function to be called by all functions in the * kernel. @@ -7817,7 +7817,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_function); /** * unregister_ftrace_function - unregister a function for profiling. - * @ops - ops structure that holds the function to unregister + * @ops: ops structure that holds the function to unregister * * Unregister a function that was added to be called by ftrace profiling. */ diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index cfddb30e65ab..5e3c62a08fc0 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1387,6 +1387,26 @@ static int run_osnoise(void) } /* + * In some cases, notably when running on a nohz_full CPU with + * a stopped tick PREEMPT_RCU has no way to account for QSs. + * This will eventually cause unwarranted noise as PREEMPT_RCU + * will force preemption as the means of ending the current + * grace period. We avoid this problem by calling + * rcu_momentary_dyntick_idle(), which performs a zero duration + * EQS allowing PREEMPT_RCU to end the current grace period. + * This call shouldn't be wrapped inside an RCU critical + * section. + * + * Note that in non PREEMPT_RCU kernels QSs are handled through + * cond_resched() + */ + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { + local_irq_disable(); + rcu_momentary_dyntick_idle(); + local_irq_enable(); + } + + /* * For the non-preemptive kernel config: let threads runs, if * they so wish. */ @@ -2200,6 +2220,17 @@ static void osnoise_workload_stop(void) if (osnoise_has_registered_instances()) return; + /* + * If callbacks were already disabled in a previous stop + * call, there is no need to disable then again. + * + * For instance, this happens when tracing is stopped via: + * echo 0 > tracing_on + * echo nop > current_tracer. + */ + if (!trace_osnoise_callback_enabled) + return; + trace_osnoise_callback_enabled = false; /* * Make sure that ftrace_nmi_enter/exit() see diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 9c9eb20dd2c5..00703444a219 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -54,6 +54,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, bit += page->index; set_bit(bit, wqueue->notes_bitmap); + generic_pipe_buf_release(pipe, buf); } // No try_steal function => no stealing @@ -112,7 +113,7 @@ static bool post_one_notification(struct watch_queue *wqueue, buf->offset = offset; buf->len = len; buf->flags = PIPE_BUF_FLAG_WHOLE; - pipe->head = head + 1; + smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { spin_unlock_irq(&pipe->rd_wait.lock); @@ -219,7 +220,6 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) struct page **pages; unsigned long *bitmap; unsigned long user_bufs; - unsigned int bmsize; int ret, i, nr_pages; if (!wqueue) @@ -243,7 +243,8 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) goto error; } - ret = pipe_resize_ring(pipe, nr_notes); + nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; + ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes)); if (ret < 0) goto error; @@ -258,17 +259,15 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE; } - bmsize = (nr_notes + BITS_PER_LONG - 1) / BITS_PER_LONG; - bmsize *= sizeof(unsigned long); - bitmap = kmalloc(bmsize, GFP_KERNEL); + bitmap = bitmap_alloc(nr_notes, GFP_KERNEL); if (!bitmap) goto error_p; - memset(bitmap, 0xff, bmsize); + bitmap_fill(bitmap, nr_notes); wqueue->notes = pages; wqueue->notes_bitmap = bitmap; wqueue->nr_pages = nr_pages; - wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; + wqueue->nr_notes = nr_notes; return 0; error_p: @@ -320,7 +319,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, tf[i].info_mask & WATCH_INFO_LENGTH) goto err_filter; /* Ignore any unknown types */ - if (tf[i].type >= sizeof(wfilter->type_filter) * 8) + if (tf[i].type >= WATCH_TYPE__NR) continue; nr_filter++; } @@ -336,7 +335,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, q = wfilter->filters; for (i = 0; i < filter.nr_filters; i++) { - if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) + if (tf[i].type >= WATCH_TYPE__NR) continue; q->type = tf[i].type; @@ -371,6 +370,7 @@ static void __put_watch_queue(struct kref *kref) for (i = 0; i < wqueue->nr_pages; i++) __free_page(wqueue->notes[i]); + bitmap_free(wqueue->notes_bitmap); wfilter = rcu_access_pointer(wqueue->filter); if (wfilter) @@ -566,7 +566,7 @@ void watch_queue_clear(struct watch_queue *wqueue) rcu_read_lock(); spin_lock_bh(&wqueue->lock); - /* Prevent new additions and prevent notifications from happening */ + /* Prevent new notifications from being stored. */ wqueue->defunct = true; while (!hlist_empty(&wqueue->watches)) { diff --git a/mm/swap_state.c b/mm/swap_state.c index 8d4104242100..ee67164531c0 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -478,7 +478,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * __read_swap_cache_async(), which has set SWAP_HAS_CACHE * in swap_map, but not yet added its page to swap cache. */ - cond_resched(); + schedule_timeout_uninterruptible(1); } /* diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 08bf6c839e25..7825c129742a 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -280,7 +280,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) const struct vlan_priority_tci_mapping *mp = vlan->egress_priority_map[i]; while (mp) { - seq_printf(seq, "%u:%hu ", + seq_printf(seq, "%u:%d ", mp->priority, ((mp->vlan_qos >> 13) & 0x7)); mp = mp->next; } diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 7fb9a021873b..24bd1c0a9a5a 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o -bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o br_mst.o bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o diff --git a/net/bridge/br.c b/net/bridge/br.c index b1dea3febeea..96e91d69a9a8 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -265,6 +265,9 @@ int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on, case BR_BOOLOPT_MCAST_VLAN_SNOOPING: err = br_multicast_toggle_vlan_snooping(br, on, extack); break; + case BR_BOOLOPT_MST_ENABLE: + err = br_mst_set_enabled(br, on, extack); + break; default: /* shouldn't be called with unsupported options */ WARN_ON(1); @@ -281,6 +284,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt) return br_opt_get(br, BROPT_NO_LL_LEARN); case BR_BOOLOPT_MCAST_VLAN_SNOOPING: return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED); + case BR_BOOLOPT_MST_ENABLE: + return br_opt_get(br, BROPT_MST_ENABLED); default: /* shouldn't be called with unsupported options */ WARN_ON(1); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e0c13fcc50ed..196417859c4a 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -78,13 +78,22 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb u16 vid = 0; u8 state; - if (!p || p->state == BR_STATE_DISABLED) + if (!p) goto drop; br = p->br; + + if (br_mst_is_enabled(br)) { + state = BR_STATE_FORWARDING; + } else { + if (p->state == BR_STATE_DISABLED) + goto drop; + + state = p->state; + } + brmctx = &p->br->multicast_ctx; pmctx = &p->multicast_ctx; - state = p->state; if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid, &state, &vlan)) goto out; @@ -370,9 +379,13 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; forward: + if (br_mst_is_enabled(p->br)) + goto defer_stp_filtering; + switch (p->state) { case BR_STATE_FORWARDING: case BR_STATE_LEARNING: +defer_stp_filtering: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c new file mode 100644 index 000000000000..ee680adcee17 --- /dev/null +++ b/net/bridge/br_mst.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Bridge Multiple Spanning Tree Support + * + * Authors: + * Tobias Waldekranz <tobias@waldekranz.com> + */ + +#include <linux/kernel.h> +#include <net/switchdev.h> + +#include "br_private.h" + +DEFINE_STATIC_KEY_FALSE(br_mst_used); + +bool br_mst_enabled(const struct net_device *dev) +{ + if (!netif_is_bridge_master(dev)) + return false; + + return br_opt_get(netdev_priv(dev), BROPT_MST_ENABLED); +} +EXPORT_SYMBOL_GPL(br_mst_enabled); + +int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) +{ + const struct net_bridge_vlan_group *vg; + const struct net_bridge_vlan *v; + const struct net_bridge *br; + + ASSERT_RTNL(); + + if (!netif_is_bridge_master(dev)) + return -EINVAL; + + br = netdev_priv(dev); + if (!br_opt_get(br, BROPT_MST_ENABLED)) + return -EINVAL; + + vg = br_vlan_group(br); + + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (v->msti == msti) + __set_bit(v->vid, vids); + } + + return 0; +} +EXPORT_SYMBOL_GPL(br_mst_get_info); + +int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) +{ + const struct net_bridge_port *p = NULL; + const struct net_bridge_vlan_group *vg; + const struct net_bridge_vlan *v; + + ASSERT_RTNL(); + + p = br_port_get_check_rtnl(dev); + if (!p || !br_opt_get(p->br, BROPT_MST_ENABLED)) + return -EINVAL; + + vg = nbp_vlan_group(p); + + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (v->brvlan->msti == msti) { + *state = v->state; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(br_mst_get_state); + +static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, + u8 state) +{ + struct net_bridge_vlan_group *vg = nbp_vlan_group(p); + + if (v->state == state) + return; + + br_vlan_set_state(v, state); + + if (v->vid == vg->pvid) + br_vlan_set_pvid_state(vg, state); +} + +int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, + struct netlink_ext_ack *extack) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_PORT_MST_STATE, + .orig_dev = p->dev, + .u.mst_state = { + .msti = msti, + .state = state, + }, + }; + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + int err; + + vg = nbp_vlan_group(p); + if (!vg) + return 0; + + /* MSTI 0 (CST) state changes are notified via the regular + * SWITCHDEV_ATTR_ID_PORT_STP_STATE. + */ + if (msti) { + err = switchdev_port_attr_set(p->dev, &attr, extack); + if (err && err != -EOPNOTSUPP) + return err; + } + + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (v->brvlan->msti != msti) + continue; + + br_mst_vlan_set_state(p, v, state); + } + + return 0; +} + +static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) +{ + struct net_bridge_vlan_group *vg = nbp_vlan_group(pv->port); + struct net_bridge_vlan *v; + + list_for_each_entry(v, &vg->vlan_list, vlist) { + /* If this port already has a defined state in this + * MSTI (through some other VLAN membership), inherit + * it. + */ + if (v != pv && v->brvlan->msti == msti) { + br_mst_vlan_set_state(pv->port, pv, v->state); + return; + } + } + + /* Otherwise, start out in a new MSTI with all ports disabled. */ + return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); +} + +int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_VLAN_MSTI, + .orig_dev = mv->br->dev, + .u.vlan_msti = { + .vid = mv->vid, + .msti = msti, + }, + }; + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *pv; + struct net_bridge_port *p; + int err; + + if (mv->msti == msti) + return 0; + + err = switchdev_port_attr_set(mv->br->dev, &attr, NULL); + if (err && err != -EOPNOTSUPP) + return err; + + mv->msti = msti; + + list_for_each_entry(p, &mv->br->port_list, list) { + vg = nbp_vlan_group(p); + + pv = br_vlan_find(vg, mv->vid); + if (pv) + br_mst_vlan_sync_state(pv, msti); + } + + return 0; +} + +void br_mst_vlan_init_state(struct net_bridge_vlan *v) +{ + /* VLANs always start out in MSTI 0 (CST) */ + v->msti = 0; + + if (br_vlan_is_master(v)) + v->state = BR_STATE_FORWARDING; + else + v->state = v->port->state; +} + +int br_mst_set_enabled(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_BRIDGE_MST, + .orig_dev = br->dev, + .u.mst = on, + }; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + int err; + + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + + if (!vg->num_vlans) + continue; + + NL_SET_ERR_MSG(extack, + "MST mode can't be changed while VLANs exist"); + return -EBUSY; + } + + if (br_opt_get(br, BROPT_MST_ENABLED) == on) + return 0; + + err = switchdev_port_attr_set(br->dev, &attr, extack); + if (err && err != -EOPNOTSUPP) + return err; + + if (on) + static_branch_enable(&br_mst_used); + else + static_branch_disable(&br_mst_used); + + br_opt_toggle(br, BROPT_MST_ENABLED, on); + return 0; +} + +size_t br_mst_info_size(const struct net_bridge_vlan_group *vg) +{ + DECLARE_BITMAP(seen, VLAN_N_VID) = { 0 }; + const struct net_bridge_vlan *v; + size_t sz; + + /* IFLA_BRIDGE_MST */ + sz = nla_total_size(0); + + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + if (test_bit(v->brvlan->msti, seen)) + continue; + + /* IFLA_BRIDGE_MST_ENTRY */ + sz += nla_total_size(0) + + /* IFLA_BRIDGE_MST_ENTRY_MSTI */ + nla_total_size(sizeof(u16)) + + /* IFLA_BRIDGE_MST_ENTRY_STATE */ + nla_total_size(sizeof(u8)); + + __set_bit(v->brvlan->msti, seen); + } + + return sz; +} + +int br_mst_fill_info(struct sk_buff *skb, + const struct net_bridge_vlan_group *vg) +{ + DECLARE_BITMAP(seen, VLAN_N_VID) = { 0 }; + const struct net_bridge_vlan *v; + struct nlattr *nest; + int err = 0; + + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (test_bit(v->brvlan->msti, seen)) + continue; + + nest = nla_nest_start_noflag(skb, IFLA_BRIDGE_MST_ENTRY); + if (!nest || + nla_put_u16(skb, IFLA_BRIDGE_MST_ENTRY_MSTI, v->brvlan->msti) || + nla_put_u8(skb, IFLA_BRIDGE_MST_ENTRY_STATE, v->state)) { + err = -EMSGSIZE; + break; + } + nla_nest_end(skb, nest); + + __set_bit(v->brvlan->msti, seen); + } + + return err; +} + +static const struct nla_policy br_mst_nl_policy[IFLA_BRIDGE_MST_ENTRY_MAX + 1] = { + [IFLA_BRIDGE_MST_ENTRY_MSTI] = NLA_POLICY_RANGE(NLA_U16, + 1, /* 0 reserved for CST */ + VLAN_N_VID - 1), + [IFLA_BRIDGE_MST_ENTRY_STATE] = NLA_POLICY_RANGE(NLA_U8, + BR_STATE_DISABLED, + BR_STATE_BLOCKING), +}; + +static int br_mst_process_one(struct net_bridge_port *p, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MST_ENTRY_MAX + 1]; + u16 msti; + u8 state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MST_ENTRY_MAX, attr, + br_mst_nl_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MST_ENTRY_MSTI]) { + NL_SET_ERR_MSG_MOD(extack, "MSTI not specified"); + return -EINVAL; + } + + if (!tb[IFLA_BRIDGE_MST_ENTRY_STATE]) { + NL_SET_ERR_MSG_MOD(extack, "State not specified"); + return -EINVAL; + } + + msti = nla_get_u16(tb[IFLA_BRIDGE_MST_ENTRY_MSTI]); + state = nla_get_u8(tb[IFLA_BRIDGE_MST_ENTRY_STATE]); + + return br_mst_set_state(p, msti, state, extack); +} + +int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *attr; + int err, msts = 0; + int rem; + + if (!br_opt_get(p->br, BROPT_MST_ENABLED)) { + NL_SET_ERR_MSG_MOD(extack, "Can't modify MST state when MST is disabled"); + return -EBUSY; + } + + nla_for_each_nested(attr, mst_attr, rem) { + switch (nla_type(attr)) { + case IFLA_BRIDGE_MST_ENTRY: + err = br_mst_process_one(p, attr, extack); + break; + default: + continue; + } + + msts++; + if (err) + break; + } + + if (!msts) { + NL_SET_ERR_MSG_MOD(extack, "Found no MST entries to process"); + err = -EINVAL; + } + + return err; +} diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 7d4432ca9a20..a8d90fa8621e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -119,6 +119,9 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, /* Each VLAN is returned in bridge_vlan_info along with flags */ vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + if (filter_mask & RTEXT_FILTER_MST) + vinfo_sz += br_mst_info_size(vg); + if (!(filter_mask & RTEXT_FILTER_CFM_STATUS)) return vinfo_sz; @@ -485,7 +488,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, RTEXT_FILTER_BRVLAN_COMPRESSED | RTEXT_FILTER_MRP | RTEXT_FILTER_CFM_CONFIG | - RTEXT_FILTER_CFM_STATUS)) { + RTEXT_FILTER_CFM_STATUS | + RTEXT_FILTER_MST)) { af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af) goto nla_put_failure; @@ -564,7 +568,28 @@ static int br_fill_ifinfo(struct sk_buff *skb, nla_nest_end(skb, cfm_nest); } + if ((filter_mask & RTEXT_FILTER_MST) && + br_opt_get(br, BROPT_MST_ENABLED) && port) { + const struct net_bridge_vlan_group *vg = nbp_vlan_group(port); + struct nlattr *mst_nest; + int err; + + if (!vg || !vg->num_vlans) + goto done; + + mst_nest = nla_nest_start(skb, IFLA_BRIDGE_MST); + if (!mst_nest) + goto nla_put_failure; + + err = br_mst_fill_info(skb, vg); + if (err) + goto nla_put_failure; + + nla_nest_end(skb, mst_nest); + } + done: + if (af) nla_nest_end(skb, af); nlmsg_end(skb, nlh); @@ -803,6 +828,23 @@ static int br_afspec(struct net_bridge *br, if (err) return err; break; + case IFLA_BRIDGE_MST: + if (!p) { + NL_SET_ERR_MSG(extack, + "MST states can only be set on bridge ports"); + return -EINVAL; + } + + if (cmd != RTM_SETLINK) { + NL_SET_ERR_MSG(extack, + "MST states can only be set through RTM_SETLINK"); + return -EINVAL; + } + + err = br_mst_process(p, attr, extack); + if (err) + return err; + break; } } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 48bc61ebc211..18ccc3d5d296 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -178,6 +178,7 @@ enum { * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast * context + * @msti: if MASTER flag set, this holds the VLANs MST instance * @vlist: sorted list of VLAN entries * @rcu: used for entry destruction * @@ -210,6 +211,8 @@ struct net_bridge_vlan { struct net_bridge_mcast_port port_mcast_ctx; }; + u16 msti; + struct list_head vlist; struct rcu_head rcu; @@ -445,6 +448,7 @@ enum net_bridge_opts { BROPT_NO_LL_LEARN, BROPT_VLAN_BRIDGE_BINDING, BROPT_MCAST_VLAN_SNOOPING_ENABLED, + BROPT_MST_ENABLED, }; struct net_bridge { @@ -1765,6 +1769,63 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) } #endif +/* br_mst.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +DECLARE_STATIC_KEY_FALSE(br_mst_used); +static inline bool br_mst_is_enabled(struct net_bridge *br) +{ + return static_branch_unlikely(&br_mst_used) && + br_opt_get(br, BROPT_MST_ENABLED); +} + +int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, + struct netlink_ext_ack *extack); +int br_mst_vlan_set_msti(struct net_bridge_vlan *v, u16 msti); +void br_mst_vlan_init_state(struct net_bridge_vlan *v); +int br_mst_set_enabled(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack); +size_t br_mst_info_size(const struct net_bridge_vlan_group *vg); +int br_mst_fill_info(struct sk_buff *skb, + const struct net_bridge_vlan_group *vg); +int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, + struct netlink_ext_ack *extack); +#else +static inline bool br_mst_is_enabled(struct net_bridge *br) +{ + return false; +} + +static inline int br_mst_set_state(struct net_bridge_port *p, u16 msti, + u8 state, struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline int br_mst_set_enabled(struct net_bridge *br, bool on, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline size_t br_mst_info_size(const struct net_bridge_vlan_group *vg) +{ + return 0; +} + +static inline int br_mst_fill_info(struct sk_buff *skb, + const struct net_bridge_vlan_group *vg) +{ + return -EOPNOTSUPP; +} + +static inline int br_mst_process(struct net_bridge_port *p, + const struct nlattr *mst_attr, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} +#endif + struct nf_br_ops { int (*br_dev_xmit_hook)(struct sk_buff *skb); }; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 1d80f34a139c..7d27b2e6038f 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -43,6 +43,12 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) return; p->state = state; + if (br_opt_get(p->br, BROPT_MST_ENABLED)) { + err = br_mst_set_state(p, 0, state, NULL); + if (err) + br_warn(p->br, "error setting MST state on port %u(%s)\n", + p->port_no, netdev_name(p->dev)); + } err = switchdev_port_attr_set(p->dev, &attr, NULL); if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 6f6a70121a5e..8cc44c367231 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -331,6 +331,46 @@ br_switchdev_fdb_replay(const struct net_device *br_dev, const void *ctx, return err; } +static int br_switchdev_vlan_attr_replay(struct net_device *br_dev, + const void *ctx, + struct notifier_block *nb, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_port_attr_info attr_info = { + .info = { + .dev = br_dev, + .extack = extack, + .ctx = ctx, + }, + }; + struct net_bridge *br = netdev_priv(br_dev); + struct net_bridge_vlan_group *vg; + struct switchdev_attr attr; + struct net_bridge_vlan *v; + int err; + + attr_info.attr = &attr; + attr.orig_dev = br_dev; + + vg = br_vlan_group(br); + + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (v->msti) { + attr.id = SWITCHDEV_ATTR_ID_VLAN_MSTI; + attr.u.vlan_msti.vid = v->vid; + attr.u.vlan_msti.msti = v->msti; + + err = nb->notifier_call(nb, SWITCHDEV_PORT_ATTR_SET, + &attr_info); + err = notifier_to_errno(err); + if (err) + return err; + } + } + + return 0; +} + static int br_switchdev_vlan_replay_one(struct notifier_block *nb, struct net_device *dev, @@ -425,6 +465,12 @@ static int br_switchdev_vlan_replay(struct net_device *br_dev, return err; } + if (adding) { + err = br_switchdev_vlan_attr_replay(br_dev, ctx, nb, extack); + if (err) + return err; + } + return 0; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 7557e90b60e1..0f5e75ccac79 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -226,6 +226,24 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu) kfree(v); } +static void br_vlan_init_state(struct net_bridge_vlan *v) +{ + struct net_bridge *br; + + if (br_vlan_is_master(v)) + br = v->br; + else + br = v->port->br; + + if (br_opt_get(br, BROPT_MST_ENABLED)) { + br_mst_vlan_init_state(v); + return; + } + + v->state = BR_STATE_FORWARDING; + v->msti = 0; +} + /* This is the shared VLAN add function which works for both ports and bridge * devices. There are four possible calls to this function in terms of the * vlan entry type: @@ -322,7 +340,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } /* set the state before publishing */ - v->state = BR_STATE_FORWARDING; + br_vlan_init_state(v); err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode, br_vlan_rht_params); diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index a6382973b3e7..a2724d03278c 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -99,6 +99,11 @@ static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, return -EBUSY; } + if (br_opt_get(br, BROPT_MST_ENABLED)) { + NL_SET_ERR_MSG_MOD(extack, "Can't modify vlan state directly when MST is enabled"); + return -EBUSY; + } + if (v->state == state) return 0; @@ -291,6 +296,7 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *r_end) { return v_curr->vid - r_end->vid == 1 && + v_curr->msti == r_end->msti && ((v_curr->priv_flags ^ r_end->priv_flags) & BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 && br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx, @@ -379,6 +385,9 @@ bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, #endif #endif + if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_MSTI, v_opts->msti)) + goto out_err; + nla_nest_end(skb, nest); return true; @@ -410,6 +419,7 @@ static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v) + nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */ + br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */ #endif + + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_MSTI */ + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */ } @@ -559,6 +569,15 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, } #endif #endif + if (tb[BRIDGE_VLANDB_GOPTS_MSTI]) { + u16 msti; + + msti = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_MSTI]); + err = br_mst_vlan_set_msti(v, msti); + if (err) + return err; + *changed = true; + } return 0; } @@ -578,6 +597,7 @@ static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = { [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL] = { .type = NLA_U64 }, [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 }, [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 }, + [BRIDGE_VLANDB_GOPTS_MSTI] = NLA_POLICY_MAX(NLA_U16, VLAN_N_VID - 1), }; int br_vlan_rtm_process_global_options(struct net_device *dev, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index ebfb2a5c59e4..73242962be5d 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -381,7 +381,7 @@ static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb) protoff = skb_network_offset(skb) + ip_hdrlen(skb); break; case htons(ETH_P_IPV6): { - unsigned char pnum = ipv6_hdr(skb)->nexthdr; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; __be16 frag_off; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, diff --git a/net/can/isotp.c b/net/can/isotp.c index d4c0b4704987..f6f8ba1f816d 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1046,26 +1046,29 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, { struct sock *sk = sock->sk; struct sk_buff *skb; - int err = 0; - int noblock; + struct isotp_sock *so = isotp_sk(sk); + int noblock = flags & MSG_DONTWAIT; + int ret = 0; - noblock = flags & MSG_DONTWAIT; - flags &= ~MSG_DONTWAIT; + if (flags & ~(MSG_DONTWAIT | MSG_TRUNC)) + return -EINVAL; - skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!so->bound) + return -EADDRNOTAVAIL; + + flags &= ~MSG_DONTWAIT; + skb = skb_recv_datagram(sk, flags, noblock, &ret); if (!skb) - return err; + return ret; if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else size = skb->len; - err = memcpy_to_msg(msg, skb->data, size); - if (err < 0) { - skb_free_datagram(sk, skb); - return err; - } + ret = memcpy_to_msg(msg, skb->data, size); + if (ret < 0) + goto out_err; sock_recv_timestamp(msg, sk, skb); @@ -1075,9 +1078,13 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } + /* set length of return value */ + ret = (flags & MSG_TRUNC) ? skb->len : size; + +out_err: skb_free_datagram(sk, skb); - return size; + return ret; } static int isotp_release(struct socket *sock) @@ -1148,6 +1155,7 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net *net = sock_net(sk); int ifindex; struct net_device *dev; + canid_t tx_id, rx_id; int err = 0; int notify_enetdown = 0; int do_rx_reg = 1; @@ -1155,8 +1163,18 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; + /* sanitize tx/rx CAN identifiers */ + tx_id = addr->can_addr.tp.tx_id; + if (tx_id & CAN_EFF_FLAG) + tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + tx_id &= CAN_SFF_MASK; + + rx_id = addr->can_addr.tp.rx_id; + if (rx_id & CAN_EFF_FLAG) + rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK); + else + rx_id &= CAN_SFF_MASK; if (!addr->can_ifindex) return -ENODEV; @@ -1168,21 +1186,13 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) do_rx_reg = 0; /* do not validate rx address for functional addressing */ - if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { - err = -EADDRNOTAVAIL; - goto out; - } - - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { - err = -EADDRNOTAVAIL; - goto out; - } + if (do_rx_reg && rx_id == tx_id) { + err = -EADDRNOTAVAIL; + goto out; } if (so->bound && addr->can_ifindex == so->ifindex && - addr->can_addr.tp.rx_id == so->rxid && - addr->can_addr.tp.tx_id == so->txid) + rx_id == so->rxid && tx_id == so->txid) goto out; dev = dev_get_by_index(net, addr->can_ifindex); @@ -1206,16 +1216,14 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; if (do_rx_reg) { - can_rx_register(net, dev, addr->can_addr.tp.rx_id, - SINGLE_MASK(addr->can_addr.tp.rx_id), + can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); /* no consecutive frame echo skb in flight */ so->cfecho = 0; /* register for echo skb's */ - can_rx_register(net, dev, addr->can_addr.tp.tx_id, - SINGLE_MASK(addr->can_addr.tp.tx_id), + can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id), isotp_rcv_echo, sk, "isotpe", sk); } @@ -1239,8 +1247,8 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) /* switch to new settings */ so->ifindex = ifindex; - so->rxid = addr->can_addr.tp.rx_id; - so->txid = addr->can_addr.tp.tx_id; + so->rxid = rx_id; + so->txid = tx_id; so->bound = 1; out: diff --git a/net/core/dev.c b/net/core/dev.c index 8d25ec5b3af7..75bab5b0dbae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4265,6 +4265,9 @@ static inline void ____napi_schedule(struct softnet_data *sd, { struct task_struct *thread; + lockdep_assert_softirq_will_run(); + lockdep_assert_irqs_disabled(); + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { /* Paired with smp_mb__before_atomic() in * napi_enable()/dev_set_threaded(). @@ -4872,7 +4875,7 @@ int __netif_rx(struct sk_buff *skb) { int ret; - lockdep_assert_once(hardirq_count() | softirq_count()); + lockdep_assert_softirq_will_run(); trace_netif_rx_entry(skb); ret = netif_rx_internal(skb); diff --git a/net/core/devlink.c b/net/core/devlink.c index fcd9f6d85cf1..f2a277053ec6 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -225,6 +225,33 @@ struct devlink *__must_check devlink_try_get(struct devlink *devlink) return NULL; } +void devl_assert_locked(struct devlink *devlink) +{ + lockdep_assert_held(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_assert_locked); + +#ifdef CONFIG_LOCKDEP +/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */ +bool devl_lock_is_held(struct devlink *devlink) +{ + return lockdep_is_held(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_lock_is_held); +#endif + +void devl_lock(struct devlink *devlink) +{ + mutex_lock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_lock); + +void devl_unlock(struct devlink *devlink) +{ + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_unlock); + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { @@ -1541,35 +1568,20 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, return 0; } -static int devlink_port_split(struct devlink *devlink, u32 port_index, - u32 count, struct netlink_ext_ack *extack) - -{ - if (devlink->ops->port_split) - return devlink->ops->port_split(devlink, port_index, count, - extack); - return -EOPNOTSUPP; -} - static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; - struct devlink_port *devlink_port; - u32 port_index; u32 count; - if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] || - !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) + if (!info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) return -EINVAL; + if (!devlink->ops->port_split) + return -EOPNOTSUPP; - devlink_port = devlink_port_get_from_info(devlink, info); - port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); - if (IS_ERR(devlink_port)) - return -EINVAL; - if (!devlink_port->attrs.splittable) { /* Split ports cannot be split. */ if (devlink_port->attrs.split) @@ -1584,29 +1596,19 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, return -EINVAL; } - return devlink_port_split(devlink, port_index, count, info->extack); -} - -static int devlink_port_unsplit(struct devlink *devlink, u32 port_index, - struct netlink_ext_ack *extack) - -{ - if (devlink->ops->port_unsplit) - return devlink->ops->port_unsplit(devlink, port_index, extack); - return -EOPNOTSUPP; + return devlink->ops->port_split(devlink, devlink_port, count, + info->extack); } static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; - u32 port_index; - - if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) - return -EINVAL; - port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); - return devlink_port_unsplit(devlink, port_index, info->extack); + if (!devlink->ops->port_unsplit) + return -EOPNOTSUPP; + return devlink->ops->port_unsplit(devlink, devlink_port, info->extack); } static int devlink_port_new_notifiy(struct devlink *devlink, @@ -8645,14 +8647,14 @@ static const struct genl_small_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_split_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_unsplit_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_NEW, @@ -9249,6 +9251,32 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port) cancel_delayed_work_sync(&devlink_port->type_warn_dw); } +int devl_port_register(struct devlink *devlink, + struct devlink_port *devlink_port, + unsigned int port_index) +{ + lockdep_assert_held(&devlink->lock); + + if (devlink_port_index_exists(devlink, port_index)) + return -EEXIST; + + WARN_ON(devlink_port->devlink); + devlink_port->devlink = devlink; + devlink_port->index = port_index; + spin_lock_init(&devlink_port->type_lock); + INIT_LIST_HEAD(&devlink_port->reporter_list); + mutex_init(&devlink_port->reporters_lock); + list_add_tail(&devlink_port->list, &devlink->port_list); + INIT_LIST_HEAD(&devlink_port->param_list); + INIT_LIST_HEAD(&devlink_port->region_list); + + INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); + devlink_port_type_warn_schedule(devlink_port); + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + return 0; +} +EXPORT_SYMBOL_GPL(devl_port_register); + /** * devlink_port_register - Register devlink port * @@ -9266,29 +9294,28 @@ int devlink_port_register(struct devlink *devlink, struct devlink_port *devlink_port, unsigned int port_index) { - mutex_lock(&devlink->lock); - if (devlink_port_index_exists(devlink, port_index)) { - mutex_unlock(&devlink->lock); - return -EEXIST; - } + int err; - WARN_ON(devlink_port->devlink); - devlink_port->devlink = devlink; - devlink_port->index = port_index; - spin_lock_init(&devlink_port->type_lock); - INIT_LIST_HEAD(&devlink_port->reporter_list); - mutex_init(&devlink_port->reporters_lock); - list_add_tail(&devlink_port->list, &devlink->port_list); - INIT_LIST_HEAD(&devlink_port->param_list); - INIT_LIST_HEAD(&devlink_port->region_list); + mutex_lock(&devlink->lock); + err = devl_port_register(devlink, devlink_port, port_index); mutex_unlock(&devlink->lock); - INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); - devlink_port_type_warn_schedule(devlink_port); - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); - return 0; + return err; } EXPORT_SYMBOL_GPL(devlink_port_register); +void devl_port_unregister(struct devlink_port *devlink_port) +{ + lockdep_assert_held(&devlink_port->devlink->lock); + + devlink_port_type_warn_cancel(devlink_port); + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); + list_del(&devlink_port->list); + WARN_ON(!list_empty(&devlink_port->reporter_list)); + WARN_ON(!list_empty(&devlink_port->region_list)); + mutex_destroy(&devlink_port->reporters_lock); +} +EXPORT_SYMBOL_GPL(devl_port_unregister); + /** * devlink_port_unregister - Unregister devlink port * @@ -9298,14 +9325,9 @@ void devlink_port_unregister(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; - devlink_port_type_warn_cancel(devlink_port); - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); mutex_lock(&devlink->lock); - list_del(&devlink_port->list); + devl_port_unregister(devlink_port); mutex_unlock(&devlink->lock); - WARN_ON(!list_empty(&devlink_port->reporter_list)); - WARN_ON(!list_empty(&devlink_port->region_list)); - mutex_destroy(&devlink_port->reporters_lock); } EXPORT_SYMBOL_GPL(devlink_port_unregister); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index bef1aaa7ec1c..47b334226f4e 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1497,6 +1497,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) const char *user_protocol; master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); if (!master) return -EPROBE_DEFER; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f20bdd8ea0a8..5d3f4a67dce1 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -215,6 +215,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); +int dsa_port_set_mst_state(struct dsa_port *dp, + const struct switchdev_mst_state *state, + struct netlink_ext_ack *extack); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); @@ -234,6 +237,10 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct netlink_ext_ack *extack); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); +int dsa_port_mst_enable(struct dsa_port *dp, bool on, + struct netlink_ext_ack *extack); +int dsa_port_vlan_msti(struct dsa_port *dp, + const struct switchdev_vlan_msti *msti); int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, bool targeted_match); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, diff --git a/net/dsa/port.c b/net/dsa/port.c index 58291df14cdb..32d472a82241 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -30,12 +30,11 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return dsa_tree_notify(dp->ds->dst, e, v); } -static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp) +static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp, u16 vid) { struct net_device *brport_dev = dsa_port_to_bridge_port(dp); struct switchdev_notifier_fdb_info info = { - /* flush all VLANs */ - .vid = 0, + .vid = vid, }; /* When the port becomes standalone it has already left the bridge. @@ -57,7 +56,42 @@ static void dsa_port_fast_age(const struct dsa_port *dp) ds->ops->port_fast_age(ds, dp->index); - dsa_port_notify_bridge_fdb_flush(dp); + /* flush all VLANs */ + dsa_port_notify_bridge_fdb_flush(dp, 0); +} + +static int dsa_port_vlan_fast_age(const struct dsa_port *dp, u16 vid) +{ + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_vlan_fast_age) + return -EOPNOTSUPP; + + err = ds->ops->port_vlan_fast_age(ds, dp->index, vid); + + if (!err) + dsa_port_notify_bridge_fdb_flush(dp, vid); + + return err; +} + +static int dsa_port_msti_fast_age(const struct dsa_port *dp, u16 msti) +{ + DECLARE_BITMAP(vids, VLAN_N_VID) = { 0 }; + int err, vid; + + err = br_mst_get_info(dsa_port_bridge_dev_get(dp), msti, vids); + if (err) + return err; + + for_each_set_bit(vid, vids, VLAN_N_VID) { + err = dsa_port_vlan_fast_age(dp, vid); + if (err) + return err; + } + + return 0; } static bool dsa_port_can_configure_learning(struct dsa_port *dp) @@ -118,6 +152,42 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } +int dsa_port_set_mst_state(struct dsa_port *dp, + const struct switchdev_mst_state *state, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dp->ds; + u8 prev_state; + int err; + + if (!ds->ops->port_mst_state_set) + return -EOPNOTSUPP; + + err = br_mst_get_state(dsa_port_to_bridge_port(dp), state->msti, + &prev_state); + if (err) + return err; + + err = ds->ops->port_mst_state_set(ds, dp->index, state); + if (err) + return err; + + if (!(dp->learning && + (prev_state == BR_STATE_LEARNING || + prev_state == BR_STATE_FORWARDING) && + (state->state == BR_STATE_DISABLED || + state->state == BR_STATE_BLOCKING || + state->state == BR_STATE_LISTENING))) + return 0; + + err = dsa_port_msti_fast_age(dp, state->msti); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "Unable to flush associated VLANs"); + + return 0; +} + int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) { struct dsa_switch *ds = dp->ds; @@ -321,6 +391,16 @@ static void dsa_port_bridge_destroy(struct dsa_port *dp, kfree(bridge); } +static bool dsa_port_supports_mst(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + return ds->ops->vlan_msti_set && + ds->ops->port_mst_state_set && + ds->ops->port_vlan_fast_age && + dsa_port_can_configure_learning(dp); +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack) { @@ -334,6 +414,9 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct net_device *brport_dev; int err; + if (br_mst_enabled(br) && !dsa_port_supports_mst(dp)) + return -EOPNOTSUPP; + /* Here the interface is already bridged. Reflect the current * configuration so that drivers can program their chips accordingly. */ @@ -735,6 +818,17 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock) return 0; } +int dsa_port_mst_enable(struct dsa_port *dp, bool on, + struct netlink_ext_ack *extack) +{ + if (on && !dsa_port_supports_mst(dp)) { + NL_SET_ERR_MSG_MOD(extack, "Hardware does not support MST"); + return -EINVAL; + } + + return 0; +} + int dsa_port_pre_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) @@ -778,6 +872,17 @@ int dsa_port_bridge_flags(struct dsa_port *dp, return 0; } +int dsa_port_vlan_msti(struct dsa_port *dp, + const struct switchdev_vlan_msti *msti) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->vlan_msti_set) + return -EOPNOTSUPP; + + return ds->ops->vlan_msti_set(ds, *dp->bridge, msti); +} + int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, bool targeted_match) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a61a7c54af20..d1a3be158d8d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -19,6 +19,7 @@ #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> #include <linux/if_hsr.h> +#include <net/dcbnl.h> #include <linux/netpoll.h> #include "dsa_priv.h" @@ -450,6 +451,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_set_state(dp, attr->u.stp_state, true); break; + case SWITCHDEV_ATTR_ID_PORT_MST_STATE: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_set_mst_state(dp, &attr->u.mst_state, extack); + break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) return -EOPNOTSUPP; @@ -463,6 +470,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_ageing_time(dp, attr->u.ageing_time); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MST: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_mst_enable(dp, attr->u.mst, extack); + break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; @@ -476,6 +489,12 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; + case SWITCHDEV_ATTR_ID_VLAN_MSTI: + if (!dsa_port_offloads_bridge_dev(dp, attr->orig_dev)) + return -EOPNOTSUPP; + + ret = dsa_port_vlan_msti(dp, &attr->u.vlan_msti); + break; default: ret = -EOPNOTSUPP; break; @@ -1154,6 +1173,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, struct tc_cls_matchall_offload *cls, bool ingress) { + struct netlink_ext_ack *extack = cls->common.extack; struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_mirror_tc_entry *mirror; @@ -1191,7 +1211,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, mirror->to_local_port = to_dp->index; mirror->ingress = ingress; - err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress); + err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress, extack); if (err) { kfree(mall_tc_entry); return err; @@ -1852,6 +1872,209 @@ out_master_failed: return err; } +static int __maybe_unused +dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + + if (!ds->ops->port_set_default_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = __fls(mask); + + err = ds->ops->port_set_default_prio(ds, port, new_prio); + if (err) { + dcb_ieee_delapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused +dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + u8 dscp = app->protocol; + + if (!ds->ops->port_add_dscp_prio) + return -EOPNOTSUPP; + + if (dscp >= 64) { + netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n", + dscp); + return -EINVAL; + } + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = __fls(mask); + + err = ds->ops->port_add_dscp_prio(ds, port, dscp, new_prio); + if (err) { + dcb_ieee_delapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused dsa_slave_dcbnl_ieee_setapp(struct net_device *dev, + struct dcb_app *app) +{ + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + switch (app->protocol) { + case 0: + return dsa_slave_dcbnl_set_default_prio(dev, app); + default: + return -EOPNOTSUPP; + } + break; + case IEEE_8021QAZ_APP_SEL_DSCP: + return dsa_slave_dcbnl_add_dscp_prio(dev, app); + default: + return -EOPNOTSUPP; + } +} + +static int __maybe_unused +dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + unsigned long mask, new_prio; + int err, port = dp->index; + + if (!ds->ops->port_set_default_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + mask = dcb_ieee_getapp_mask(dev, app); + new_prio = mask ? __fls(mask) : 0; + + err = ds->ops->port_set_default_prio(ds, port, new_prio); + if (err) { + dcb_ieee_setapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused +dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int err, port = dp->index; + u8 dscp = app->protocol; + + if (!ds->ops->port_del_dscp_prio) + return -EOPNOTSUPP; + + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + err = ds->ops->port_del_dscp_prio(ds, port, dscp, app->priority); + if (err) { + dcb_ieee_setapp(dev, app); + return err; + } + + return 0; +} + +static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev, + struct dcb_app *app) +{ + switch (app->selector) { + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + switch (app->protocol) { + case 0: + return dsa_slave_dcbnl_del_default_prio(dev, app); + default: + return -EOPNOTSUPP; + } + break; + case IEEE_8021QAZ_APP_SEL_DSCP: + return dsa_slave_dcbnl_del_dscp_prio(dev, app); + default: + return -EOPNOTSUPP; + } +} + +/* Pre-populate the DCB application priority table with the priorities + * configured during switch setup, which we read from hardware here. + */ +static int dsa_slave_dcbnl_init(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err; + + if (ds->ops->port_get_default_prio) { + int prio = ds->ops->port_get_default_prio(ds, port); + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, + .protocol = 0, + .priority = prio, + }; + + if (prio < 0) + return prio; + + err = dcb_ieee_setapp(dev, &app); + if (err) + return err; + } + + if (ds->ops->port_get_dscp_prio) { + int protocol; + + for (protocol = 0; protocol < 64; protocol++) { + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_DSCP, + .protocol = protocol, + }; + int prio; + + prio = ds->ops->port_get_dscp_prio(ds, port, protocol); + if (prio == -EOPNOTSUPP) + continue; + if (prio < 0) + return prio; + + app.priority = prio; + + err = dcb_ieee_setapp(dev, &app); + if (err) + return err; + } + } + + return 0; +} + static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, @@ -1881,6 +2104,11 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .self_test = dsa_slave_net_selftest, }; +static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = { + .ieee_setapp = dsa_slave_dcbnl_ieee_setapp, + .ieee_delapp = dsa_slave_dcbnl_ieee_delapp, +}; + static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -2105,6 +2333,9 @@ int dsa_slave_create(struct dsa_port *port) return -ENOMEM; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; +#if IS_ENABLED(CONFIG_DCB) + slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops; +#endif if (!is_zero_ether_addr(port->mac)) eth_hw_addr_set(slave_dev, port->mac); else @@ -2162,6 +2393,17 @@ int dsa_slave_create(struct dsa_port *port) goto out_phy; } + if (IS_ENABLED(CONFIG_DCB)) { + ret = dsa_slave_dcbnl_init(slave_dev); + if (ret) { + netdev_err(slave_dev, + "failed to initialize DCB: %pe\n", + ERR_PTR(ret)); + rtnl_unlock(); + goto out_unregister; + } + } + ret = netdev_upper_dev_link(master, slave_dev, NULL); rtnl_unlock(); @@ -2624,6 +2866,9 @@ static int dsa_slave_fdb_event(struct net_device *dev, if (ctx && ctx != dp) return 0; + if (!dp->bridge) + return 0; + if (switchdev_fdb_is_dynamically_learned(fdb_info)) { if (dsa_port_offloads_bridge_port(dp, orig_dev)) return 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7408051632ac..af8209f912ab 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -291,7 +291,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, - .flowi4_oif = l3mdev_master_ifindex_rcu(dev), + .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, .flowi4_scope = scope, @@ -353,9 +353,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); - if (!fl4.flowi4_iif) - fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c5a29703185a..cc8e84ef2ae4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2234,7 +2234,7 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { - if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) + if (fl4->flowi4_oif) goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2af2b99e0bea..fb0e49c36c2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1429,11 +1429,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) return false; - if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { - if (flp->flowi4_oif && - flp->flowi4_oif != nhc->nhc_oif) - return false; - } + if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) + return false; return true; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f444f5983405..63f3256a407d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2263,6 +2263,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ + fl4.flowi4_l3mdev = 0; fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; @@ -2738,8 +2739,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, res->fi = NULL; res->table = NULL; if (fl4->flowi4_oif && - (ipv4_is_multicast(fl4->daddr) || - !netif_index_is_l3_master(net, fl4->flowi4_oif))) { + (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { /* Apparently, routing tables are wrong. Assume, * that the destination is on link. * diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9e83bcb6bc99..6fde0b184791 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; - fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; - fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; - rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b0ffbcd5432d..55d604c9b3b3 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -812,8 +812,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) struct tcphdr *th; offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); - - if (offset < 0) { + if (offset == -1) { err = -EINVAL; goto out; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e69fac576970..e23f058166af 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1035,8 +1035,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -1476,8 +1475,8 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (mtu < fragheaderlen || - ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) + if (mtu <= fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) goto emsgsize; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6188712f24b0..2fa10e60cccd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1209,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_node *fn; struct rt6_info *rt; - if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) - flags &= ~RT6_LOOKUP_F_IFACE; - rcu_read_lock(); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: @@ -2181,9 +2178,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; - if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) - oif = 0; - redo_rt6_select: rt6_select(net, fn, oif, res, strict); if (res->f6i == net->ipv6.fib6_null_entry) { @@ -3058,12 +3052,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_info *rt; struct fib6_node *fn; - /* l3mdev_update_flow overrides oif if the device is enslaved; in - * this case we must match on the real ingress device, so reset it - */ - if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) - fl6->flowi6_oif = skb->dev->ifindex; - /* Get the "current" route for this destination and * check if the redirect has come from appropriate router. * diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 55bb2cbae13d..e64e427a51cf 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, int err; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); - fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); fl6.flowi6_mark = mark; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) diff --git a/net/key/af_key.c b/net/key/af_key.c index 9bf52a09b5ff..fd51db3be91c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad xfrm_probe_algs(); - supp_skb = compose_sadb_supported(hdr, GFP_KERNEL); + supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 17927966abb3..4eb8892fb2ff 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct net_device *dev; int rc = 0; - rcu_read_lock(); + /* update flow ensures flowi_l3mdev is set when relevant */ + if (!fl->flowi_l3mdev) + return 0; - dev = dev_get_by_index_rcu(net, fl->flowi_oif); - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_fib_table) { - arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); - rc = 1; - goto out; - } + rcu_read_lock(); - dev = dev_get_by_index_rcu(net, fl->flowi_iif); + dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; - goto out; } -out: rcu_read_unlock(); return rc; @@ -277,31 +271,28 @@ out: void l3mdev_update_flow(struct net *net, struct flowi *fl) { struct net_device *dev; - int ifindex; rcu_read_lock(); if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { - ifindex = l3mdev_master_ifindex_rcu(dev); - if (ifindex) { - fl->flowi_oif = ifindex; - fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; - goto out; - } + if (!fl->flowi_l3mdev) + fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + + /* oif set to L3mdev directs lookup to its table; + * reset to avoid oif match in fib_lookup + */ + if (netif_is_l3_master(dev)) + fl->flowi_oif = 0; + goto out; } } - if (fl->flowi_iif) { + if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { dev = dev_get_by_index_rcu(net, fl->flowi_iif); - if (dev) { - ifindex = l3mdev_master_ifindex_rcu(dev); - if (ifindex) { - fl->flowi_iif = ifindex; - fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; - } - } + if (dev) + fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); } out: diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9b7f9c966f73..d1a58ed357a4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1757,9 +1757,6 @@ resolve_normal_ct(struct nf_conn *tmpl, return 0; if (IS_ERR(h)) return PTR_ERR(h); - - ct = nf_ct_tuplehash_to_ctrack(h); - ct->local_origin = state->hook == NF_INET_LOCAL_OUT; } ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 12f793d8fe0c..3b516cffc779 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -63,10 +63,8 @@ static bool udp_error(struct sk_buff *skb, } /* Packet with no checksum */ - if (!hdr->check) { - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (!hdr->check) return false; - } /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b90eca7a2f22..e66a375075c9 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -39,8 +39,14 @@ flow_offload_fill_dir(struct flow_offload *flow, ft->l3proto = ctt->src.l3num; ft->l4proto = ctt->dst.protonum; - ft->src_port = ctt->src.u.tcp.port; - ft->dst_port = ctt->dst.u.tcp.port; + + switch (ctt->dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ft->src_port = ctt->src.u.tcp.port; + ft->dst_port = ctt->dst.u.tcp.port; + break; + } } struct flow_offload *flow_offload_alloc(struct nf_conn *ct) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index f1d387129f02..010b0b4eb05d 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; + u8 ipproto; if (!pskb_may_pull(skb, sizeof(*iph) + offset)) return -1; @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, thoff += offset; - switch (iph->protocol) { + ipproto = iph->protocol; + switch (ipproto) { case IPPROTO_TCP: *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: *hdrsize = sizeof(struct udphdr); break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + *hdrsize = sizeof(struct gre_base_hdr); + break; +#endif default: return -1; } @@ -202,15 +209,29 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; + switch (ipproto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return -1; + break; + } + } + iph = (struct iphdr *)(skb_network_header(skb) + offset); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET; - tuple->l4proto = iph->protocol; + tuple->l4proto = ipproto; tuple->iifidx = dev->ifindex; nf_flow_tuple_encap(skb, tuple); @@ -521,6 +542,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; + u8 nexthdr; thoff = sizeof(*ip6h) + offset; if (!pskb_may_pull(skb, thoff)) @@ -528,13 +550,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - switch (ip6h->nexthdr) { + nexthdr = ip6h->nexthdr; + switch (nexthdr) { case IPPROTO_TCP: *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: *hdrsize = sizeof(struct udphdr); break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + *hdrsize = sizeof(struct gre_base_hdr); + break; +#endif default: return -1; } @@ -545,15 +573,29 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; + switch (nexthdr) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return -1; + break; + } + } + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET6; - tuple->l4proto = ip6h->nexthdr; + tuple->l4proto = nexthdr; tuple->iifidx = dev->ifindex; nf_flow_tuple_encap(skb, tuple); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index fc4265acd9c4..cac4468a8a6a 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -174,6 +174,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); break; case IPPROTO_UDP: + case IPPROTO_GRE: break; default: return -EOPNOTSUPP; @@ -182,15 +183,22 @@ static int nf_flow_rule_match(struct nf_flow_match *match, key->basic.ip_proto = tuple->l4proto; mask->basic.ip_proto = 0xff; - key->tp.src = tuple->src_port; - mask->tp.src = 0xffff; - key->tp.dst = tuple->dst_port; - mask->tp.dst = 0xffff; - match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | BIT(FLOW_DISSECTOR_KEY_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_PORTS); + BIT(FLOW_DISSECTOR_KEY_BASIC); + + switch (tuple->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + key->tp.src = tuple->src_port; + mask->tp.src = 0xffff; + key->tp.dst = tuple->dst_port; + mask->tp.dst = 0xffff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS); + break; + } + return 0; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 58c06ac10179..7981be526f26 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -494,38 +494,6 @@ another_round: goto another_round; } -static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple) -{ - u16 sp, dp; - - switch (tuple->dst.protonum) { - case IPPROTO_TCP: - sp = ntohs(tuple->src.u.tcp.port); - dp = ntohs(tuple->dst.u.tcp.port); - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - sp = ntohs(tuple->src.u.udp.port); - dp = ntohs(tuple->dst.u.udp.port); - break; - default: - return false; - } - - /* IANA: System port range: 1-1023, - * user port range: 1024-49151, - * private port range: 49152-65535. - * - * Linux default ephemeral port range is 32768-60999. - * - * Enforce port remapping if sport is significantly lower - * than dport to prevent NAT port shadowing, i.e. - * accidental match of 'new' inbound connection vs. - * existing outbound one. - */ - return sp < 16384 && dp >= 32768; -} - /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { - bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL; const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); - if (maniptype == NF_NAT_MANIP_SRC && - !random_port && - !ct->local_origin) - random_port = tuple_force_port_remap(orig_tuple); - /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * So far, we don't do local source mappings, so multiple * manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && !random_port) { + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!random_port) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c86748b3873b..e37ac88efa0a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1072,6 +1072,30 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(obj->key.name, k->name); } +static bool nft_supported_family(u8 family) +{ + return false +#ifdef CONFIG_NF_TABLES_INET + || family == NFPROTO_INET +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + || family == NFPROTO_IPV4 +#endif +#ifdef CONFIG_NF_TABLES_ARP + || family == NFPROTO_ARP +#endif +#ifdef CONFIG_NF_TABLES_NETDEV + || family == NFPROTO_NETDEV +#endif +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) + || family == NFPROTO_BRIDGE +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + || family == NFPROTO_IPV6 +#endif + ; +} + static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -1086,6 +1110,9 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, u32 flags = 0; int err; + if (!nft_supported_family(family)) + return -EOPNOTSUPP; + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask, @@ -8260,6 +8287,12 @@ void nf_tables_trans_destroy_flush_work(void) } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); +static bool nft_expr_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + return false; +} + static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { const struct nft_expr *expr, *last; @@ -8307,8 +8340,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; - if (expr->ops->reduce && - expr->ops->reduce(&track, expr)) { + if (nft_expr_reduce(&track, expr)) { expr = track.cur; continue; } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 0af34ad41479..731b5d87ef45 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, break; case IPPROTO_UDP: break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: { + struct nf_conntrack_tuple *tuple; + + if (ct->status & IPS_NAT_MASK) + goto out; + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + /* No support for GRE v1 */ + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) + goto out; + break; + } +#endif default: goto out; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1b93ce1a5600..c39c09899fd0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2318,8 +2318,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -3464,6 +3467,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3486,6 +3491,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 89e46f66e3d9..6a34f7b80a6d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -420,6 +420,19 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, break; case IPPROTO_UDP: break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: { + struct nf_conntrack_tuple *tuple; + + if (ct->status & IPS_NAT_MASK) + return; + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + /* No support for GRE v1 */ + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) + return; + break; + } +#endif default: return; } @@ -439,6 +452,8 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; + size_t hdrsize; + u8 ipproto; if (!pskb_network_may_pull(skb, sizeof(*iph))) return false; @@ -450,29 +465,54 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, unlikely(thoff != sizeof(struct iphdr))) return false; - if (iph->protocol != IPPROTO_TCP && - iph->protocol != IPPROTO_UDP) + ipproto = iph->protocol; + switch (ipproto) { + case IPPROTO_TCP: + hdrsize = sizeof(struct tcphdr); + break; + case IPPROTO_UDP: + hdrsize = sizeof(*ports); + break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + hdrsize = sizeof(struct gre_base_hdr); + break; +#endif + default: return false; + } if (iph->ttl <= 1) return false; - if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ? - thoff + sizeof(struct tcphdr) : - thoff + sizeof(*ports))) + if (!pskb_network_may_pull(skb, thoff + hdrsize)) return false; - iph = ip_hdr(skb); - if (iph->protocol == IPPROTO_TCP) + switch (ipproto) { + case IPPROTO_TCP: *tcph = (void *)(skb_network_header(skb) + thoff); + fallthrough; + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return false; + break; + } + } + + iph = ip_hdr(skb); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET; - tuple->l4proto = iph->protocol; + tuple->l4proto = ipproto; return true; } @@ -485,36 +525,63 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; + size_t hdrsize; + u8 nexthdr; if (!pskb_network_may_pull(skb, sizeof(*ip6h))) return false; ip6h = ipv6_hdr(skb); + thoff = sizeof(*ip6h); - if (ip6h->nexthdr != IPPROTO_TCP && - ip6h->nexthdr != IPPROTO_UDP) - return false; + nexthdr = ip6h->nexthdr; + switch (nexthdr) { + case IPPROTO_TCP: + hdrsize = sizeof(struct tcphdr); + break; + case IPPROTO_UDP: + hdrsize = sizeof(*ports); + break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + hdrsize = sizeof(struct gre_base_hdr); + break; +#endif + default: + return -1; + } if (ip6h->hop_limit <= 1) return false; - thoff = sizeof(*ip6h); - if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? - thoff + sizeof(struct tcphdr) : - thoff + sizeof(*ports))) + if (!pskb_network_may_pull(skb, thoff + hdrsize)) return false; - ip6h = ipv6_hdr(skb); - if (ip6h->nexthdr == IPPROTO_TCP) + switch (nexthdr) { + case IPPROTO_TCP: *tcph = (void *)(skb_network_header(skb) + thoff); + fallthrough; + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return false; + break; + } + } + + ip6h = ipv6_hdr(skb); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET6; - tuple->l4proto = ip6h->nexthdr; + tuple->l4proto = nexthdr; return true; } diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 756e2dcde1cd..883454c4f921 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -390,6 +390,13 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, entry->vlan.proto = tcf_vlan_push_proto(act); entry->vlan.prio = tcf_vlan_push_prio(act); break; + case TCA_VLAN_ACT_POP_ETH: + entry->id = FLOW_ACTION_VLAN_POP_ETH; + break; + case TCA_VLAN_ACT_PUSH_ETH: + entry->id = FLOW_ACTION_VLAN_PUSH_ETH; + tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act); + break; default: return -EOPNOTSUPP; } @@ -407,6 +414,12 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, case TCA_VLAN_ACT_MODIFY: fl_action->id = FLOW_ACTION_VLAN_MANGLE; break; + case TCA_VLAN_ACT_POP_ETH: + fl_action->id = FLOW_ACTION_VLAN_POP_ETH; + break; + case TCA_VLAN_ACT_PUSH_ETH: + fl_action->id = FLOW_ACTION_VLAN_PUSH_ETH; + break; default: return -EOPNOTSUPP; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } spin_unlock_bh(&vsock_table_lock); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fb3302fff627..5afc194a58bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -24,6 +24,7 @@ static struct workqueue_struct *virtio_vsock_workqueue; static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; @@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); break; } } @@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev) synchronize_rcu(); /* Reset all connected sockets when the device disappear */ - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, * so we can safely call virtio_reset_device(). diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7aef34e32bdf..b17dc9745188 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; static int PROTOCOL_OVERRIDE = -1; +static struct vsock_transport vmci_transport; /* forward declaration */ + /* Helper function to convert from a VMCI error code to a VSock error code. */ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) @@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - vsock_for_each_connected_socket(vmci_transport_handle_detach); + vsock_for_each_connected_socket(&vmci_transport, + vmci_transport_handle_detach); } static void vmci_transport_recv_pkt_work(struct work_struct *work) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e..323e251ed37b 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index ab4e53715d8a..65d147974f8d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ /* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 740ae764537e..134612bde0cb 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -106,7 +106,7 @@ static void nest_epollfd(void) printinfo("Nesting level(s): %d\n", nested); epollfdp = calloc(nested, sizeof(int)); - if (!epollfd) + if (!epollfdp) err(EXIT_FAILURE, "calloc"); for (i = 0; i < nested; i++) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9739b05b999e..24997925ae00 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1648,6 +1648,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, { struct parse_events_term *term; struct list_head *list = NULL; + struct list_head *orig_head = NULL; struct perf_pmu *pmu = NULL; int ok = 0; char *config; @@ -1674,7 +1675,6 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, } list_add_tail(&term->list, head); - /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); if (!list) @@ -1687,13 +1687,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { + parse_events_copy_term_list(head, &orig_head); if (!parse_events_add_pmu(parse_state, list, - pmu->name, head, + pmu->name, orig_head, true, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; } + parse_events_terms__delete(orig_head); } } } @@ -2193,7 +2195,7 @@ int perf_pmu__test_parse_init(void) for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) { tmp->type = symbols[i].type; tmp->symbol = strdup(symbols[i].symbol); - if (!list->symbol) + if (!tmp->symbol) goto err_free; } diff --git a/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh new file mode 100755 index 000000000000..941ba4c485c9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + l3_monitor_test +" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +l3_monitor_test() +{ + hw_stats_monitor_test $swp l3 \ + "ip addr add dev $swp 192.0.2.1/28" \ + "ip addr del dev $swp 192.0.2.1/28" +} + +trap cleanup EXIT + +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh new file mode 100755 index 000000000000..fe1898402987 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + l3_reporting_test + l3_fail_next_test + l3_counter_test + l3_rollback_test + l3_monitor_test +" + +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR_1=1337 +DEV_ADDR_2=1057 +DEV_ADDR_3=5417 +NUM_NETIFS=0 +source $lib_dir/lib.sh + +DUMMY_IFINDEX= + +DEV_ADDR() +{ + local n=$1; shift + local var=DEV_ADDR_$n + + echo ${!var} +} + +DEV() +{ + echo netdevsim$(DEV_ADDR $1) +} + +DEVLINK_DEV() +{ + echo netdevsim/$(DEV $1) +} + +SYSFS_NET_DIR() +{ + echo /sys/bus/netdevsim/devices/$(DEV $1)/net/ +} + +DEBUGFS_DIR() +{ + echo /sys/kernel/debug/netdevsim/$(DEV $1)/ +} + +nsim_add() +{ + local n=$1; shift + + echo "$(DEV_ADDR $n) 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $(SYSFS_NET_DIR $n) ] ; do :; done +} + +nsim_reload() +{ + local n=$1; shift + local ns=$1; shift + + devlink dev reload $(DEVLINK_DEV $n) netns $ns + + if [ $? -ne 0 ]; then + echo "Failed to reload $(DEV $n) into netns \"testns1\"" + exit 1 + fi + +} + +nsim_del() +{ + local n=$1; shift + + echo "$(DEV_ADDR $n)" > ${NETDEVSIM_PATH}/del_device +} + +nsim_hwstats_toggle() +{ + local action=$1; shift + local instance=$1; shift + local netdev=$1; shift + local type=$1; shift + + local ifindex=$($IP -j link show dev $netdev | jq '.[].ifindex') + + echo $ifindex > $(DEBUGFS_DIR $instance)/hwstats/$type/$action +} + +nsim_hwstats_enable() +{ + nsim_hwstats_toggle enable_ifindex "$@" +} + +nsim_hwstats_disable() +{ + nsim_hwstats_toggle disable_ifindex "$@" +} + +nsim_hwstats_fail_next_enable() +{ + nsim_hwstats_toggle fail_next_enable "$@" +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + nsim_add 1 + nsim_add 2 + nsim_add 3 + + ip netns add testns1 + + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + nsim_reload 1 testns1 + nsim_reload 2 testns1 + nsim_reload 3 testns1 + + IP="ip -n testns1" + + $IP link add name dummy1 type dummy + $IP link set dev dummy1 up + DUMMY_IFINDEX=$($IP -j link show dev dummy1 | jq '.[].ifindex') +} + +cleanup() +{ + pre_cleanup + + $IP link del name dummy1 + ip netns del testns1 + nsim_del 3 + nsim_del 2 + nsim_del 1 + modprobe -r netdevsim &> /dev/null +} + +netdev_hwstats_used() +{ + local netdev=$1; shift + local type=$1; shift + + $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info | + jq '.[].info.l3_stats.used' +} + +netdev_check_used() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_used $netdev $type) == "true" ]] +} + +netdev_check_unused() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_used $netdev $type) == "false" ]] +} + +netdev_hwstats_request() +{ + local netdev=$1; shift + local type=$1; shift + + $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info | + jq ".[].info.${type}_stats.request" +} + +netdev_check_requested() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_request $netdev $type) == "true" ]] +} + +netdev_check_unrequested() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_request $netdev $type) == "false" ]] +} + +reporting_test() +{ + local type=$1; shift + local instance=1 + + RET=0 + + [[ -n $(netdev_hwstats_used dummy1 $type) ]] + check_err $? "$type stats not reported" + + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before either device or netdevsim request" + + nsim_hwstats_enable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before device request" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested before device request" + + $IP stats set dev dummy1 ${type}_stats on + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after both device and netdevsim request" + netdev_check_requested dummy1 $type + check_err $? "$type stats reported as not requested after device request" + + nsim_hwstats_disable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after netdevsim request withdrawn" + + nsim_hwstats_enable $instance dummy1 $type + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after netdevsim request reenabled" + + $IP stats set dev dummy1 ${type}_stats off + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after device request withdrawn" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested after device request withdrawn" + + nsim_hwstats_disable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after both requests withdrawn" + + log_test "Reporting of $type stats usage" +} + +l3_reporting_test() +{ + reporting_test l3 +} + +__fail_next_test() +{ + local instance=$1; shift + local type=$1; shift + + RET=0 + + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before either device or netdevsim request" + + nsim_hwstats_enable $instance dummy1 $type + nsim_hwstats_fail_next_enable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before device request" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested before device request" + + $IP stats set dev dummy1 ${type}_stats on 2>/dev/null + check_fail $? "$type stats request not bounced as it should have been" + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after bounce" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested after bounce" + + $IP stats set dev dummy1 ${type}_stats on + check_err $? "$type stats request failed when it shouldn't have" + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after both device and netdevsim request" + netdev_check_requested dummy1 $type + check_err $? "$type stats reported as not requested after device request" + + $IP stats set dev dummy1 ${type}_stats off + nsim_hwstats_disable $instance dummy1 $type + + log_test "Injected failure of $type stats enablement (netdevsim #$instance)" +} + +fail_next_test() +{ + __fail_next_test 1 "$@" + __fail_next_test 2 "$@" + __fail_next_test 3 "$@" +} + +l3_fail_next_test() +{ + fail_next_test l3 +} + +get_hwstat() +{ + local netdev=$1; shift + local type=$1; shift + local selector=$1; shift + + $IP -j stats show dev $netdev group offload subgroup ${type}_stats | + jq ".[0].stats64.${selector}" +} + +counter_test() +{ + local type=$1; shift + local instance=1 + + RET=0 + + nsim_hwstats_enable $instance dummy1 $type + $IP stats set dev dummy1 ${type}_stats on + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after both device and netdevsim request" + + # Netdevsim counts 10pps on ingress. We should see maybe a couple + # packets, unless things take a reeealy long time. + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show >= 10 packets after first enablement" + + sleep 2 + + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts >= 20)) + check_err $? "$type stats show < 20 packets after 2s passed" + + $IP stats set dev dummy1 ${type}_stats off + + sleep 2 + + $IP stats set dev dummy1 ${type}_stats on + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show >= 10 packets after second enablement" + + $IP stats set dev dummy1 ${type}_stats off + nsim_hwstats_fail_next_enable $instance dummy1 $type + $IP stats set dev dummy1 ${type}_stats on 2>/dev/null + check_fail $? "$type stats request not bounced as it should have been" + + sleep 2 + + $IP stats set dev dummy1 ${type}_stats on + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show >= 10 packets after post-fail enablement" + + $IP stats set dev dummy1 ${type}_stats off + + log_test "Counter values in $type stats" +} + +l3_counter_test() +{ + counter_test l3 +} + +rollback_test() +{ + local type=$1; shift + + RET=0 + + nsim_hwstats_enable 1 dummy1 l3 + nsim_hwstats_enable 2 dummy1 l3 + nsim_hwstats_enable 3 dummy1 l3 + + # The three netdevsim instances are registered in order of their number + # one after another. It is reasonable to expect that whatever + # notifications take place hit no. 2 in between hitting nos. 1 and 3, + # whatever the actual order. This allows us to test that a fail caused + # by no. 2 does not leave the system in a partial state, and rolls + # everything back. + + nsim_hwstats_fail_next_enable 2 dummy1 l3 + $IP stats set dev dummy1 ${type}_stats on 2>/dev/null + check_fail $? "$type stats request not bounced as it should have been" + + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after bounce" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested after bounce" + + sleep 2 + + $IP stats set dev dummy1 ${type}_stats on + check_err $? "$type stats request not upheld as it should have been" + + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show $pkts packets after post-fail enablement" + + $IP stats set dev dummy1 ${type}_stats off + + nsim_hwstats_disable 3 dummy1 l3 + nsim_hwstats_disable 2 dummy1 l3 + nsim_hwstats_disable 1 dummy1 l3 + + log_test "Failure in $type stats enablement rolled back" +} + +l3_rollback_test() +{ + rollback_test l3 +} + +l3_monitor_test() +{ + hw_stats_monitor_test dummy1 l3 \ + "nsim_hwstats_enable 1 dummy1 l3" \ + "nsim_hwstats_disable 1 dummy1 l3" \ + "$IP" +} + +trap cleanup EXIT + +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index aed7845c08a8..bc2162909a1a 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -16,6 +16,8 @@ #include <linux/udp.h> #include <sys/socket.h> +#include "../kselftest.h" + enum { ERN_SUCCESS = 0, /* Well defined errors, callers may depend on these */ @@ -318,7 +320,7 @@ static const char *cs_ts_info2str(unsigned int info) [SCM_TSTAMP_ACK] = "ACK", }; - if (info < sizeof(names) / sizeof(names[0])) + if (info < ARRAY_SIZE(names)) return names[info]; return "unknown"; } diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 3f4c8cfe7aca..47c4d4b4a44a 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -750,7 +750,7 @@ ipv4_ping_vrf() log_start show_hint "Fails since address on vrf device is out of device scope" run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} - log_test_addr ${a} $? 1 "ping local, device bind" + log_test_addr ${a} $? 2 "ping local, device bind" done # diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 159afc7f0979..664b9ecaf228 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1498,3 +1498,63 @@ brmcast_check_sg_state() check_err_fail $should_fail $? "Entry $src has blocked flag" done } + +start_ip_monitor() +{ + local mtype=$1; shift + local ip=${1-ip}; shift + + # start the monitor in the background + tmpfile=`mktemp /var/run/nexthoptestXXX` + mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null` + sleep 0.2 + echo "$mpid $tmpfile" +} + +stop_ip_monitor() +{ + local mpid=$1; shift + local tmpfile=$1; shift + local el=$1; shift + local what=$1; shift + + sleep 0.2 + kill $mpid + local lines=`grep '^\w' $tmpfile | wc -l` + test $lines -eq $el + check_err $? "$what: $lines lines of events, expected $el" + rm -rf $tmpfile +} + +hw_stats_monitor_test() +{ + local dev=$1; shift + local type=$1; shift + local make_suitable=$1; shift + local make_unsuitable=$1; shift + local ip=${1-ip}; shift + + RET=0 + + # Expect a notification about enablement. + local ipmout=$(start_ip_monitor stats "$ip") + $ip stats set dev $dev ${type}_stats on + stop_ip_monitor $ipmout 1 "${type}_stats enablement" + + # Expect a notification about offload. + local ipmout=$(start_ip_monitor stats "$ip") + $make_suitable + stop_ip_monitor $ipmout 1 "${type}_stats installation" + + # Expect a notification about loss of offload. + local ipmout=$(start_ip_monitor stats "$ip") + $make_unsuitable + stop_ip_monitor $ipmout 1 "${type}_stats deinstallation" + + # Expect a notification about disablement + local ipmout=$(start_ip_monitor stats "$ip") + $ip stats set dev $dev ${type}_stats off + stop_ip_monitor $ipmout 1 "${type}_stats disablement" + + log_test "${type}_stats notifications" +} diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 3653d6468c67..1a736f700be4 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -53,6 +53,7 @@ #include <unistd.h> #include "psock_lib.h" +#include "../kselftest.h" #define RING_NUM_FRAMES 20 @@ -117,7 +118,7 @@ static void sock_fanout_set_cbpf(int fd) struct sock_fprog bpf_prog; bpf_prog.filter = bpf_filter; - bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + bpf_prog.len = ARRAY_SIZE(bpf_filter); if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, sizeof(bpf_prog))) { @@ -162,7 +163,7 @@ static void sock_fanout_set_ebpf(int fd) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insns = (unsigned long) prog; - attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.insn_cnt = ARRAY_SIZE(prog); attr.license = (unsigned long) "GPL"; attr.log_buf = (unsigned long) log_buf, attr.log_size = sizeof(log_buf), diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index c5489341cfb8..90026a27eac0 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -52,6 +52,8 @@ #include <sys/types.h> #include <unistd.h> +#include "../kselftest.h" + #define TOEPLITZ_KEY_MIN_LEN 40 #define TOEPLITZ_KEY_MAX_LEN 60 @@ -295,7 +297,7 @@ static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) struct sock_fprog prog = {}; prog.filter = filter; - prog.len = sizeof(filter) / sizeof(struct sock_filter); + prog.len = ARRAY_SIZE(filter); if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) error(1, errno, "setsockopt filter"); } @@ -324,7 +326,7 @@ static void set_filter_null(int fd) struct sock_fprog prog = {}; prog.filter = filter; - prog.len = sizeof(filter) / sizeof(struct sock_filter); + prog.len = ARRAY_SIZE(filter); if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) error(1, errno, "setsockopt filter"); } diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 79fe627b9e81..eb8543b9a5c4 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -880,9 +880,8 @@ EOF return $ksft_skip fi - # test default behaviour. Packet from ns1 to ns0 is not redirected - # due to automatic port translation. - test_port_shadow "default" "ROUTER" + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" # test packet filter based mitigation: prevent forwarding of # packets claiming to come from the service port. diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index ea04f04c173e..ccb0f06ef9e3 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -21,7 +21,7 @@ NAMES = { 'BATCH_FILE': './batch.txt', 'BATCH_DIR': 'tmp', # Length of time in seconds to wait before terminating a command - 'TIMEOUT': 12, + 'TIMEOUT': 24, # Name of the namespace to use 'NS': 'tcut', # Directory containing eBPF test programs diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 1607322a112c..a14b5b800897 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests +LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h + include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) @@ -140,10 +142,6 @@ endif $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap -$(OUTPUT)/gup_test: ../../../../mm/gup_test.h - -$(OUTPUT)/hmm-tests: local_config.h - # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) |
