summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml42
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml50
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-net.txt10
-rw-r--r--Documentation/devicetree/bindings/net/mscc,miim.yaml61
-rw-r--r--Documentation/devicetree/bindings/net/mscc-miim.txt26
-rw-r--r--Documentation/networking/device_drivers/ethernet/index.rst1
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst35
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622.dtsi32
-rw-r--r--arch/arm64/include/asm/insn.h9
-rw-r--r--arch/arm64/lib/insn.c67
-rw-r--r--arch/arm64/net/bpf_jit.h17
-rw-r--r--arch/arm64/net/bpf_jit_comp.c255
-rw-r--r--arch/mips/configs/gpr_defconfig2
-rw-r--r--arch/mips/configs/mtx1_defconfig2
-rw-r--r--drivers/atm/Kconfig25
-rw-r--r--drivers/atm/Makefile1
-rw-r--r--drivers/atm/ambassador.c2400
-rw-r--r--drivers/atm/ambassador.h648
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c383
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c223
-rw-r--r--drivers/infiniband/hw/mlx5/main.c31
-rw-r--r--drivers/isdn/mISDN/socket.c2
-rw-r--r--drivers/net/dsa/mt7530.c330
-rw-r--r--drivers/net/dsa/mt7530.h26
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c304
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h16
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c191
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h16
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c22
-rw-r--r--drivers/net/ethernet/calxeda/xgmac.c2
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h2
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c22
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_devids.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c25
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c49
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c489
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c29
-rw-r--r--drivers/net/ethernet/marvell/Kconfig1
-rw-r--r--drivers/net/ethernet/marvell/Makefile1
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/Kconfig20
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/Makefile9
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c737
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_config.h204
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c256
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h170
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c194
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h299
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c463
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c1177
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.h366
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h367
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_rx.c508
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_rx.h199
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_tx.c335
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_tx.h284
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_acl.c7
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_flower.c18
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c11
-rw-r--r--drivers/net/ethernet/mediatek/Kconfig4
-rw-r--r--drivers/net/ethernet/mediatek/Makefile5
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c131
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h14
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.c369
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe.h89
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_offload.c189
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c878
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.h135
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_debugfs.c175
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_ops.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed_regs.h251
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c179
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h156
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c)95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c245
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c)51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h28
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c247
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h132
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c390
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c1582
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c622
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h83
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c35
-rw-r--r--drivers/net/ethernet/microchip/lan966x/Makefile2
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c842
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c59
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h117
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_port.c3
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_regs.h106
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h26
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c4
-rw-r--r--drivers/net/ethernet/sfc/ef10.c2
-rw-r--r--drivers/net/ethernet/sfc/efx.h1
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c52
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.h4
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c4
-rw-r--r--drivers/net/ethernet/sfc/falcon/rx.c4
-rw-r--r--drivers/net/ethernet/sfc/farch.c1
-rw-r--r--drivers/net/ethernet/sfc/mcdi_pcol.h4
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h5
-rw-r--r--drivers/net/ethernet/sfc/tx.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c24
-rw-r--r--drivers/net/ethernet/ti/Kconfig1
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c33
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-qos.c180
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-qos.h8
-rw-r--r--drivers/net/ethernet/ti/cpsw.c38
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c66
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.h2
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c205
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.h9
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c2
-rw-r--r--drivers/net/ethernet/xscale/ptp_ixp46x.c2
-rw-r--r--drivers/net/hyperv/hyperv_net.h69
-rw-r--r--drivers/net/hyperv/netvsc.c16
-rw-r--r--drivers/net/hyperv/netvsc_bpf.c101
-rw-r--r--drivers/net/hyperv/netvsc_drv.c150
-rw-r--r--drivers/net/mdio/mdio-aspeed.c123
-rw-r--r--drivers/net/mdio/mdio-mscc-miim.c81
-rw-r--r--drivers/net/netdevsim/fib.c9
-rw-r--r--drivers/net/phy/micrel.c221
-rw-r--r--drivers/net/phy/phylink.c28
-rw-r--r--drivers/net/ppp/pppoe.c3
-rw-r--r--drivers/net/usb/cdc_ether.c3
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/rndis_host.c47
-rw-r--r--drivers/net/wan/Kconfig28
-rw-r--r--drivers/net/wan/Makefile2
-rw-r--r--drivers/net/wan/lmc/Makefile18
-rw-r--r--drivers/net/wan/lmc/lmc.h33
-rw-r--r--drivers/net/wan/lmc/lmc_debug.c65
-rw-r--r--drivers/net/wan/lmc/lmc_debug.h52
-rw-r--r--drivers/net/wan/lmc/lmc_ioctl.h255
-rw-r--r--drivers/net/wan/lmc/lmc_main.c2009
-rw-r--r--drivers/net/wan/lmc/lmc_media.c1206
-rw-r--r--drivers/net/wan/lmc/lmc_proto.c106
-rw-r--r--drivers/net/wan/lmc/lmc_proto.h18
-rw-r--r--drivers/net/wan/lmc/lmc_var.h468
-rw-r--r--include/linux/icmpv6.h11
-rw-r--r--include/linux/mlx5/accel.h35
-rw-r--r--include/linux/mlx5/driver.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc_fpga.h211
-rw-r--r--include/linux/mlx5/port.h2
-rw-r--r--include/linux/netdevice.h89
-rw-r--r--include/linux/phylink.h6
-rw-r--r--include/linux/skbuff.h37
-rw-r--r--include/linux/soc/mediatek/mtk_wed.h131
-rw-r--r--include/linux/usb/rndis_host.h1
-rw-r--r--include/linux/usb/usbnet.h1
-rw-r--r--include/net/act_api.h3
-rw-r--r--include/net/if_inet6.h8
-rw-r--r--include/net/ip_fib.h4
-rw-r--r--include/net/netfilter/nf_conntrack.h8
-rw-r--r--include/net/ping.h4
-rw-r--r--include/net/pkt_cls.h6
-rw-r--r--include/net/rtnetlink.h16
-rw-r--r--include/net/sctp/sctp.h2
-rw-r--r--include/net/sock.h12
-rw-r--r--include/net/strparser.h4
-rw-r--r--include/net/tc_act/tc_gact.h15
-rw-r--r--include/net/tc_act/tc_skbedit.h12
-rw-r--r--include/net/tcp.h33
-rw-r--r--include/net/tls.h15
-rw-r--r--include/net/udp.h8
-rw-r--r--include/rdma/ib_verbs.h8
-rw-r--r--include/trace/events/skb.h7
-rw-r--r--include/trace/events/tcp.h47
-rw-r--r--include/uapi/linux/btf.h4
-rw-r--r--include/uapi/linux/if_link.h5
-rw-r--r--include/uapi/linux/neighbour.h2
-rw-r--r--include/uapi/linux/netlink.h1
-rw-r--r--include/uapi/linux/tipc_config.h28
-rw-r--r--kernel/bpf/bpf_iter.c30
-rw-r--r--kernel/bpf/stackmap.c2
-rw-r--r--kernel/bpf/verifier.c61
-rw-r--r--kernel/trace/bpf_trace.c6
-rw-r--r--lib/test_bpf.c315
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/atm/common.c2
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/bluetooth/af_bluetooth.c3
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_fdb.c157
-rw-r--r--net/bridge/br_mdb.c12
-rw-r--r--net/bridge/br_netlink.c9
-rw-r--r--net/bridge/br_private.h18
-rw-r--r--net/bridge/br_switchdev.c3
-rw-r--r--net/bridge/br_sysfs_br.c6
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/bcm.c5
-rw-r--r--net/can/isotp.c4
-rw-r--r--net/can/j1939/socket.c2
-rw-r--r--net/can/raw.c6
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/dev.h91
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/net-procfs.c2
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/page_pool.c15
-rw-r--r--net/core/rtnetlink.c120
-rw-r--r--net/core/sock.c35
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/dccp/dccp.h4
-rw-r--r--net/dccp/proto.c6
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/socket.c8
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c10
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/icmp.c75
-rw-r--r--net/ipv4/ip_forward.c13
-rw-r--r--net/ipv4/ip_input.c1
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/ping.c38
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c10
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_bbr.c20
-rw-r--r--net/ipv4/tcp_bic.c14
-rw-r--r--net/ipv4/tcp_bpf.c15
-rw-r--r--net/ipv4/tcp_cdg.c30
-rw-r--r--net/ipv4/tcp_cong.c30
-rw-r--r--net/ipv4/tcp_cubic.c22
-rw-r--r--net/ipv4/tcp_dctcp.c11
-rw-r--r--net/ipv4/tcp_highspeed.c18
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_hybla.c18
-rw-r--r--net/ipv4/tcp_illinois.c12
-rw-r--r--net/ipv4/tcp_input.c36
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_nv.c24
-rw-r--r--net/ipv4/tcp_output.c30
-rw-r--r--net/ipv4/tcp_rate.c2
-rw-r--r--net/ipv4/tcp_scalable.c4
-rw-r--r--net/ipv4/tcp_vegas.c21
-rw-r--r--net/ipv4/tcp_veno.c24
-rw-r--r--net/ipv4/tcp_westwood.c3
-rw-r--r--net/ipv4/tcp_yeah.c30
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/udp_bpf.c17
-rw-r--r--net/ipv4/udp_impl.h4
-rw-r--r--net/ipv6/addrconf.c30
-rw-r--r--net/ipv6/af_inet6.c5
-rw-r--r--net/ipv6/exthdrs.c44
-rw-r--r--net/ipv6/icmp.c31
-rw-r--r--net/ipv6/ip6_input.c41
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c4
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/udp_impl.h4
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/l2tp/l2tp_ppp.c3
-rw-r--r--net/mctp/af_mctp.c2
-rw-r--r--net/mctp/test/route-test.c8
-rw-r--r--net/mpls/af_mpls.c3
-rw-r--r--net/mptcp/mptcp_diag.c105
-rw-r--r--net/mptcp/pm.c19
-rw-r--r--net/mptcp/pm_netlink.c43
-rw-r--r--net/mptcp/protocol.c22
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/sockopt.c6
-rw-r--r--net/netfilter/nf_conntrack_bpf.c22
-rw-r--r--net/netfilter/nf_conntrack_ecache.c19
-rw-r--r--net/netfilter/nf_conntrack_netlink.c68
-rw-r--r--net/netfilter/nf_log_syslog.c136
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c14
-rw-r--r--net/netfilter/nft_bitwise.c13
-rw-r--r--net/netfilter/nft_fib.c4
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/core.c1
-rw-r--r--net/nfc/llcp_sock.c3
-rw-r--r--net/nfc/rawsock.c3
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c7
-rw-r--r--net/qrtr/af_qrtr.c3
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/sched/act_api.c4
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/act_gact.c13
-rw-r--r--net/sched/act_gate.c3
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_mpls.c10
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c20
-rw-r--r--net/sched/act_sample.c3
-rw-r--r--net/sched/act_skbedit.c10
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/act_vlan.c4
-rw-r--r--net/sched/cls_api.c22
-rw-r--r--net/sched/cls_flower.c14
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sctp/socket.c16
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/tls/tls_device.c6
-rw-r--r--net/tls/tls_sw.c485
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/unix/unix_bpf.c5
-rw-r--r--net/vmw_vsock/vmci_transport.c5
-rw-r--r--net/x25/af_x25.c3
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--samples/bpf/Makefile10
-rw-r--r--samples/bpf/syscall_tp_user.c3
-rw-r--r--samples/bpf/xdp_router_ipv4.bpf.c180
-rw-r--r--samples/bpf/xdp_router_ipv4_kern.c186
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c455
-rw-r--r--tools/bpf/bpftool/feature.c2
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/include/uapi/linux/btf.h4
-rw-r--r--tools/lib/bpf/Build3
-rw-r--r--tools/lib/bpf/Makefile2
-rw-r--r--tools/lib/bpf/btf.c6
-rw-r--r--tools/lib/bpf/libbpf.c488
-rw-r--r--tools/lib/bpf/libbpf.h41
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/libbpf_internal.h30
-rw-r--r--tools/lib/bpf/usdt.bpf.h259
-rw-r--r--tools/lib/bpf/usdt.c1335
-rw-r--r--tools/testing/selftests/bpf/Makefile25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c421
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c27
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c8
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c8
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c4
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func17.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_urandom_usdt.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c96
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt_multispec.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c12
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c2
-rw-r--r--tools/testing/selftests/bpf/sdt-config.h6
-rw-r--r--tools/testing/selftests/bpf/sdt.h513
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py2
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c3
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c6
-rw-r--r--tools/testing/selftests/bpf/test_progs.h2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c9
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c63
-rw-r--r--tools/testing/selftests/bpf/urandom_read_aux.c9
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib1.c13
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib2.c8
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh103
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh38
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh50
434 files changed, 18693 insertions, 15676 deletions
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
new file mode 100644
index 000000000000..9fbeb626ab23
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-pcie-mirror.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: MediaTek PCIE Mirror Controller for MT7622
+
+maintainers:
+ - Lorenzo Bianconi <lorenzo@kernel.org>
+ - Felix Fietkau <nbd@nbd.name>
+
+description:
+ The mediatek PCIE mirror provides a configuration interface for PCIE
+ controller on MT7622 soc.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7622-pcie-mirror
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ pcie_mirror: pcie-mirror@10000400 {
+ compatible = "mediatek,mt7622-pcie-mirror", "syscon";
+ reg = <0 0x10000400 0 0x10>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
new file mode 100644
index 000000000000..787d6673f952
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,mt7622-wed.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: MediaTek Wireless Ethernet Dispatch Controller for MT7622
+
+maintainers:
+ - Lorenzo Bianconi <lorenzo@kernel.org>
+ - Felix Fietkau <nbd@nbd.name>
+
+description:
+ The mediatek wireless ethernet dispatch controller can be configured to
+ intercept and handle access to the WLAN DMA queues and PCIe interrupts
+ and implement hardware flow offloading from ethernet to WLAN.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - mediatek,mt7622-wed
+ - const: syscon
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ wed0: wed@1020a000 {
+ compatible = "mediatek,mt7622-wed","syscon";
+ reg = <0 0x1020a000 0 0x1000>;
+ interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 72d03e07cf7c..f18d70189375 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -41,6 +41,16 @@ Required properties:
- mediatek,pctl: phandle to the syscon node that handles the ports slew rate
and driver current: only for MT2701 and MT7623 SoC
+Optional properties:
+- dma-coherent: present if dma operations are coherent
+- mediatek,cci-control: phandle to the cache coherent interconnect node
+- mediatek,hifsys: phandle to the mediatek hifsys controller used to provide
+ various clocks and reset to the system.
+- mediatek,wed: a list of phandles to wireless ethernet dispatch nodes for
+ MT7622 SoC.
+- mediatek,pcie-mirror: phandle to the mediatek pcie-mirror controller for
+ MT7622 SoC.
+
* Ethernet MAC node
Required properties:
diff --git a/Documentation/devicetree/bindings/net/mscc,miim.yaml b/Documentation/devicetree/bindings/net/mscc,miim.yaml
new file mode 100644
index 000000000000..2c451cfa4e0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mscc,miim.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/mscc,miim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microsemi MII Management Controller (MIIM)
+
+maintainers:
+ - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+allOf:
+ - $ref: "mdio.yaml#"
+
+properties:
+ compatible:
+ enum:
+ - mscc,ocelot-miim
+ - microchip,lan966x-miim
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ reg:
+ items:
+ - description: base address
+ - description: associated reset register for internal PHYs
+ minItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-frequency: true
+
+required:
+ - compatible
+ - reg
+ - "#address-cells"
+ - "#size-cells"
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ mdio@107009c {
+ compatible = "mscc,ocelot-miim";
+ reg = <0x107009c 0x36>, <0x10700f0 0x8>;
+ interrupts = <14>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/mscc-miim.txt b/Documentation/devicetree/bindings/net/mscc-miim.txt
deleted file mode 100644
index 70e0cb1ee485..000000000000
--- a/Documentation/devicetree/bindings/net/mscc-miim.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Microsemi MII Management Controller (MIIM) / MDIO
-=================================================
-
-Properties:
-- compatible: must be "mscc,ocelot-miim" or "microchip,lan966x-miim"
-- reg: The base address of the MDIO bus controller register bank. Optionally, a
- second register bank can be defined if there is an associated reset register
- for internal PHYs
-- #address-cells: Must be <1>.
-- #size-cells: Must be <0>. MDIO addresses have no size component.
-- interrupts: interrupt specifier (refer to the interrupt binding)
-
-Typically an MDIO bus might have several children.
-
-Example:
- mdio@107009c {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "mscc,ocelot-miim";
- reg = <0x107009c 0x36>, <0x10700f0 0x8>;
- interrupts = <14>;
-
- phy0: ethernet-phy@0 {
- reg = <0>;
- };
- };
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 6b5dc203da2b..21a97703421d 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -39,6 +39,7 @@ Contents:
intel/iavf
intel/ice
marvell/octeontx2
+ marvell/octeon_ep
mellanox/mlx5
microsoft/netvsc
neterion/s2io
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
new file mode 100644
index 000000000000..bc562c49011b
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+====================================================================
+Linux kernel networking driver for Marvell's Octeon PCI Endpoint NIC
+====================================================================
+
+Network driver for Marvell's Octeon PCI EndPoint NIC.
+Copyright (c) 2020 Marvell International Ltd.
+
+Contents
+========
+
+- `Overview`_
+- `Supported Devices`_
+- `Interface Control`_
+
+Overview
+========
+This driver implements networking functionality of Marvell's Octeon PCI
+EndPoint NIC.
+
+Supported Devices
+=================
+Currently, this driver support following devices:
+ * Network controller: Cavium, Inc. Device b200
+
+Interface Control
+=================
+Network Interface control like changing mtu, link speed, link down/up are
+done by writing command to mailbox command queue, a mailbox interface
+implemented through a reserved region in BAR4.
+This driver writes the commands into the mailbox and the firmware on the
+Octeon device processes them. The firmware also sends unsolicited notifications
+to driver for events suchs as link change, through notification queue
+implemented as part of mailbox interface.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7341667e7313..84158e08e6e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11830,6 +11830,13 @@ S: Supported
F: Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt
F: drivers/mmc/host/sdhci-xenon*
+MARVELL OCTEON ENDPOINT DRIVER
+M: Veerasenareddy Burru <vburru@marvell.com>
+M: Abhijit Ayarekar <aayarekar@marvell.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/marvell/octeon_ep
+
MATROX FRAMEBUFFER DRIVER
L: linux-fbdev@vger.kernel.org
S: Orphan
diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
index 6f8cb3ad1e84..f232f8baf4e8 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
@@ -357,7 +357,7 @@
};
cci_control2: slave-if@5000 {
- compatible = "arm,cci-400-ctrl-if";
+ compatible = "arm,cci-400-ctrl-if", "syscon";
interface-type = "ace";
reg = <0x5000 0x1000>;
};
@@ -901,6 +901,11 @@
};
};
+ hifsys: syscon@1af00000 {
+ compatible = "mediatek,mt7622-hifsys", "syscon";
+ reg = <0 0x1af00000 0 0x70>;
+ };
+
ethsys: syscon@1b000000 {
compatible = "mediatek,mt7622-ethsys",
"syscon";
@@ -919,6 +924,26 @@
#dma-cells = <1>;
};
+ pcie_mirror: pcie-mirror@10000400 {
+ compatible = "mediatek,mt7622-pcie-mirror",
+ "syscon";
+ reg = <0 0x10000400 0 0x10>;
+ };
+
+ wed0: wed@1020a000 {
+ compatible = "mediatek,mt7622-wed",
+ "syscon";
+ reg = <0 0x1020a000 0 0x1000>;
+ interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ wed1: wed@1020b000 {
+ compatible = "mediatek,mt7622-wed",
+ "syscon";
+ reg = <0 0x1020b000 0 0x1000>;
+ interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_LOW>;
+ };
+
eth: ethernet@1b100000 {
compatible = "mediatek,mt7622-eth",
"mediatek,mt2701-eth",
@@ -945,6 +970,11 @@
power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>;
mediatek,ethsys = <&ethsys>;
mediatek,sgmiisys = <&sgmiisys>;
+ cci-control-port = <&cci_control2>;
+ mediatek,wed = <&wed0>, <&wed1>;
+ mediatek,pcie-mirror = <&pcie_mirror>;
+ mediatek,hifsys = <&hifsys>;
+ dma-coherent;
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 1e5760d567ae..6aa2dc836db1 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -201,6 +201,8 @@ enum aarch64_insn_size_type {
enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_LOAD_REG_OFFSET,
AARCH64_INSN_LDST_STORE_REG_OFFSET,
+ AARCH64_INSN_LDST_LOAD_IMM_OFFSET,
+ AARCH64_INSN_LDST_STORE_IMM_OFFSET,
AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
@@ -335,6 +337,7 @@ __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
@@ -342,6 +345,7 @@ __AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
+__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
@@ -501,6 +505,11 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
enum aarch64_insn_register offset,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ unsigned int imm,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 5e90887deec4..695d7368fadc 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -299,29 +299,24 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
return insn;
}
+static const u32 aarch64_insn_ldst_size[] = {
+ [AARCH64_INSN_SIZE_8] = 0,
+ [AARCH64_INSN_SIZE_16] = 1,
+ [AARCH64_INSN_SIZE_32] = 2,
+ [AARCH64_INSN_SIZE_64] = 3,
+};
+
static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
u32 insn)
{
u32 size;
- switch (type) {
- case AARCH64_INSN_SIZE_8:
- size = 0;
- break;
- case AARCH64_INSN_SIZE_16:
- size = 1;
- break;
- case AARCH64_INSN_SIZE_32:
- size = 2;
- break;
- case AARCH64_INSN_SIZE_64:
- size = 3;
- break;
- default:
+ if (type < AARCH64_INSN_SIZE_8 || type > AARCH64_INSN_SIZE_64) {
pr_err("%s: unknown size encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
+ size = aarch64_insn_ldst_size[type];
insn &= ~GENMASK(31, 30);
insn |= size << 30;
@@ -504,6 +499,50 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
offset);
}
+u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ unsigned int imm,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type)
+{
+ u32 insn;
+ u32 shift;
+
+ if (size < AARCH64_INSN_SIZE_8 || size > AARCH64_INSN_SIZE_64) {
+ pr_err("%s: unknown size encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ shift = aarch64_insn_ldst_size[size];
+ if (imm & ~(BIT(12 + shift) - BIT(shift))) {
+ pr_err("%s: invalid imm: %d\n", __func__, imm);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ imm >>= shift;
+
+ switch (type) {
+ case AARCH64_INSN_LDST_LOAD_IMM_OFFSET:
+ insn = aarch64_insn_get_ldr_imm_value();
+ break;
+ case AARCH64_INSN_LDST_STORE_IMM_OFFSET:
+ insn = aarch64_insn_get_str_imm_value();
+ break;
+ default:
+ pr_err("%s: unknown load/store encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ base);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+}
+
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
enum aarch64_insn_register reg2,
enum aarch64_insn_register base,
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index dd59b5ad8fe4..194c95ccc1cf 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -66,6 +66,20 @@
#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)
+/* Load/store register (immediate offset) */
+#define A64_LS_IMM(Rt, Rn, imm, size, type) \
+ aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \
+ AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_##type##_IMM_OFFSET)
+#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE)
+#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD)
+#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE)
+#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD)
+#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE)
+#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD)
+#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE)
+#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)
+
/* Load/store register pair */
#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
@@ -249,6 +263,9 @@
/* HINTs */
#define A64_HINT(x) aarch64_insn_gen_hint(x)
+#define A64_PACIASP A64_HINT(AARCH64_INSN_HINT_PACIASP)
+#define A64_AUTIASP A64_HINT(AARCH64_INSN_HINT_AUTIASP)
+
/* BTI */
#define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC)
#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index fcc675aa1670..8ab4035dea27 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -26,6 +26,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
+#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
#define check_imm(bits, imm) do { \
if ((((imm) > 0) && ((imm) >> (bits))) || \
@@ -63,6 +64,7 @@ static const int bpf2a64[] = {
[TCALL_CNT] = A64_R(26),
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
+ [FP_BOTTOM] = A64_R(27),
};
struct jit_ctx {
@@ -73,6 +75,7 @@ struct jit_ctx {
int exentry_idx;
__le32 *image;
u32 stack_size;
+ int fpb_offset;
};
static inline void emit(const u32 insn, struct jit_ctx *ctx)
@@ -191,11 +194,53 @@ static bool is_addsub_imm(u32 imm)
return !(imm & ~0xfff) || !(imm & ~0xfff000);
}
+/*
+ * There are 3 types of AArch64 LDR/STR (immediate) instruction:
+ * Post-index, Pre-index, Unsigned offset.
+ *
+ * For BPF ldr/str, the "unsigned offset" type is sufficient.
+ *
+ * "Unsigned offset" type LDR(immediate) format:
+ *
+ * 3 2 1 0
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * scale
+ *
+ * "Unsigned offset" type STR(immediate) format:
+ * 3 2 1 0
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * scale
+ *
+ * The offset is calculated from imm12 and scale in the following way:
+ *
+ * offset = (u64)imm12 << scale
+ */
+static bool is_lsi_offset(int offset, int scale)
+{
+ if (offset < 0)
+ return false;
+
+ if (offset > (0xFFF << scale))
+ return false;
+
+ if (offset & ((1 << scale) - 1))
+ return false;
+
+ return true;
+}
+
/* Tail call offset to jump into */
-#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
-#define PROLOGUE_OFFSET 8
+#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \
+ IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)
+#define PROLOGUE_OFFSET 9
#else
-#define PROLOGUE_OFFSET 7
+#define PROLOGUE_OFFSET 8
#endif
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
@@ -207,6 +252,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tcc = bpf2a64[TCALL_CNT];
+ const u8 fpb = bpf2a64[FP_BOTTOM];
const int idx0 = ctx->idx;
int cur_offset;
@@ -233,8 +279,11 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
*
*/
+ /* Sign lr */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
+ emit(A64_PACIASP, ctx);
/* BTI landing pad */
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
emit(A64_BTI_C, ctx);
/* Save FP and LR registers to stay align with ARM64 AAPCS */
@@ -245,6 +294,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_PUSH(r6, r7, A64_SP), ctx);
emit(A64_PUSH(r8, r9, A64_SP), ctx);
emit(A64_PUSH(fp, tcc, A64_SP), ctx);
+ emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
@@ -265,6 +315,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_BTI_J, ctx);
}
+ emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
+
/* Stack must be multiples of 16B */
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
@@ -512,10 +564,13 @@ static void build_epilogue(struct jit_ctx *ctx)
const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP];
+ const u8 fpb = bpf2a64[FP_BOTTOM];
/* We're done with BPF stack */
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+ /* Restore x27 and x28 */
+ emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
/* Restore fs (x25) and x26 */
emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
@@ -529,6 +584,10 @@ static void build_epilogue(struct jit_ctx *ctx)
/* Set return value */
emit(A64_MOV(1, A64_R(0), r0), ctx);
+ /* Authenticate lr */
+ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
+ emit(A64_AUTIASP, ctx);
+
emit(A64_RET(A64_LR), ctx);
}
@@ -609,6 +668,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
+ const u8 fp = bpf2a64[BPF_REG_FP];
+ const u8 fpb = bpf2a64[FP_BOTTOM];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@@ -617,6 +678,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
u8 jmp_cond;
s32 jmp_offset;
u32 a64_insn;
+ u8 src_adj;
+ u8 dst_adj;
+ int off_adj;
int ret;
switch (code) {
@@ -971,19 +1035,45 @@ emit_cond_jmp:
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
- emit_a64_mov_i(1, tmp, off, ctx);
+ if (ctx->fpb_offset > 0 && src == fp) {
+ src_adj = fpb;
+ off_adj = off + ctx->fpb_offset;
+ } else {
+ src_adj = src;
+ off_adj = off;
+ }
switch (BPF_SIZE(code)) {
case BPF_W:
- emit(A64_LDR32(dst, src, tmp), ctx);
+ if (is_lsi_offset(off_adj, 2)) {
+ emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_LDR32(dst, src, tmp), ctx);
+ }
break;
case BPF_H:
- emit(A64_LDRH(dst, src, tmp), ctx);
+ if (is_lsi_offset(off_adj, 1)) {
+ emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_LDRH(dst, src, tmp), ctx);
+ }
break;
case BPF_B:
- emit(A64_LDRB(dst, src, tmp), ctx);
+ if (is_lsi_offset(off_adj, 0)) {
+ emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_LDRB(dst, src, tmp), ctx);
+ }
break;
case BPF_DW:
- emit(A64_LDR64(dst, src, tmp), ctx);
+ if (is_lsi_offset(off_adj, 3)) {
+ emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_LDR64(dst, src, tmp), ctx);
+ }
break;
}
@@ -1010,21 +1100,47 @@ emit_cond_jmp:
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_DW:
+ if (ctx->fpb_offset > 0 && dst == fp) {
+ dst_adj = fpb;
+ off_adj = off + ctx->fpb_offset;
+ } else {
+ dst_adj = dst;
+ off_adj = off;
+ }
/* Load imm to a register then store it */
- emit_a64_mov_i(1, tmp2, off, ctx);
emit_a64_mov_i(1, tmp, imm, ctx);
switch (BPF_SIZE(code)) {
case BPF_W:
- emit(A64_STR32(tmp, dst, tmp2), ctx);
+ if (is_lsi_offset(off_adj, 2)) {
+ emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp2, off, ctx);
+ emit(A64_STR32(tmp, dst, tmp2), ctx);
+ }
break;
case BPF_H:
- emit(A64_STRH(tmp, dst, tmp2), ctx);
+ if (is_lsi_offset(off_adj, 1)) {
+ emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp2, off, ctx);
+ emit(A64_STRH(tmp, dst, tmp2), ctx);
+ }
break;
case BPF_B:
- emit(A64_STRB(tmp, dst, tmp2), ctx);
+ if (is_lsi_offset(off_adj, 0)) {
+ emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp2, off, ctx);
+ emit(A64_STRB(tmp, dst, tmp2), ctx);
+ }
break;
case BPF_DW:
- emit(A64_STR64(tmp, dst, tmp2), ctx);
+ if (is_lsi_offset(off_adj, 3)) {
+ emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp2, off, ctx);
+ emit(A64_STR64(tmp, dst, tmp2), ctx);
+ }
break;
}
break;
@@ -1034,19 +1150,45 @@ emit_cond_jmp:
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_B:
case BPF_STX | BPF_MEM | BPF_DW:
- emit_a64_mov_i(1, tmp, off, ctx);
+ if (ctx->fpb_offset > 0 && dst == fp) {
+ dst_adj = fpb;
+ off_adj = off + ctx->fpb_offset;
+ } else {
+ dst_adj = dst;
+ off_adj = off;
+ }
switch (BPF_SIZE(code)) {
case BPF_W:
- emit(A64_STR32(src, dst, tmp), ctx);
+ if (is_lsi_offset(off_adj, 2)) {
+ emit(A64_STR32I(src, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_STR32(src, dst, tmp), ctx);
+ }
break;
case BPF_H:
- emit(A64_STRH(src, dst, tmp), ctx);
+ if (is_lsi_offset(off_adj, 1)) {
+ emit(A64_STRHI(src, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_STRH(src, dst, tmp), ctx);
+ }
break;
case BPF_B:
- emit(A64_STRB(src, dst, tmp), ctx);
+ if (is_lsi_offset(off_adj, 0)) {
+ emit(A64_STRBI(src, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_STRB(src, dst, tmp), ctx);
+ }
break;
case BPF_DW:
- emit(A64_STR64(src, dst, tmp), ctx);
+ if (is_lsi_offset(off_adj, 3)) {
+ emit(A64_STR64I(src, dst_adj, off_adj), ctx);
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_STR64(src, dst, tmp), ctx);
+ }
break;
}
break;
@@ -1069,6 +1211,79 @@ emit_cond_jmp:
return 0;
}
+/*
+ * Return 0 if FP may change at runtime, otherwise find the minimum negative
+ * offset to FP, converts it to positive number, and align down to 8 bytes.
+ */
+static int find_fpb_offset(struct bpf_prog *prog)
+{
+ int i;
+ int offset = 0;
+
+ for (i = 0; i < prog->len; i++) {
+ const struct bpf_insn *insn = &prog->insnsi[i];
+ const u8 class = BPF_CLASS(insn->code);
+ const u8 mode = BPF_MODE(insn->code);
+ const u8 src = insn->src_reg;
+ const u8 dst = insn->dst_reg;
+ const s32 imm = insn->imm;
+ const s16 off = insn->off;
+
+ switch (class) {
+ case BPF_STX:
+ case BPF_ST:
+ /* fp holds atomic operation result */
+ if (class == BPF_STX && mode == BPF_ATOMIC &&
+ ((imm == BPF_XCHG ||
+ imm == (BPF_FETCH | BPF_ADD) ||
+ imm == (BPF_FETCH | BPF_AND) ||
+ imm == (BPF_FETCH | BPF_XOR) ||
+ imm == (BPF_FETCH | BPF_OR)) &&
+ src == BPF_REG_FP))
+ return 0;
+
+ if (mode == BPF_MEM && dst == BPF_REG_FP &&
+ off < offset)
+ offset = insn->off;
+ break;
+
+ case BPF_JMP32:
+ case BPF_JMP:
+ break;
+
+ case BPF_LDX:
+ case BPF_LD:
+ /* fp holds load result */
+ if (dst == BPF_REG_FP)
+ return 0;
+
+ if (class == BPF_LDX && mode == BPF_MEM &&
+ src == BPF_REG_FP && off < offset)
+ offset = off;
+ break;
+
+ case BPF_ALU:
+ case BPF_ALU64:
+ default:
+ /* fp holds ALU result */
+ if (dst == BPF_REG_FP)
+ return 0;
+ }
+ }
+
+ if (offset < 0) {
+ /*
+ * safely be converted to a positive 'int', since insn->off
+ * is 's16'
+ */
+ offset = -offset;
+ /* align down to 8 bytes */
+ offset = ALIGN_DOWN(offset, 8);
+ }
+
+ return offset;
+}
+
static int build_body(struct jit_ctx *ctx, bool extra_pass)
{
const struct bpf_prog *prog = ctx->prog;
@@ -1190,6 +1405,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
+ ctx.fpb_offset = find_fpb_offset(prog);
+
/*
* 1. Initial fake pass to compute ctx->idx and ctx->offset.
*
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index 5cb91509bb7c..b0489437621a 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -182,7 +182,6 @@ CONFIG_ATM_FIRESTREAM=m
CONFIG_ATM_ZATM=m
CONFIG_ATM_NICSTAR=m
CONFIG_ATM_IDT77252=m
-CONFIG_ATM_AMBASSADOR=m
CONFIG_ATM_HORIZON=m
CONFIG_ATM_IA=m
CONFIG_ATM_FORE200E=m
@@ -214,7 +213,6 @@ CONFIG_ATH_DEBUG=y
CONFIG_ATH5K=y
CONFIG_ATH5K_DEBUG=y
CONFIG_WAN=y
-CONFIG_LANMEDIA=m
CONFIG_HDLC=m
CONFIG_HDLC_RAW=m
CONFIG_HDLC_RAW_ETH=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 205d3b34528c..c98099f0b354 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -259,7 +259,6 @@ CONFIG_ATM_FIRESTREAM=m
CONFIG_ATM_ZATM=m
CONFIG_ATM_NICSTAR=m
CONFIG_ATM_IDT77252=m
-CONFIG_ATM_AMBASSADOR=m
CONFIG_ATM_HORIZON=m
CONFIG_ATM_IA=m
CONFIG_ATM_FORE200E=m
@@ -363,7 +362,6 @@ CONFIG_USB_AN2720=y
CONFIG_USB_EPSON2888=y
CONFIG_USB_SIERRA_NET=m
CONFIG_WAN=y
-CONFIG_LANMEDIA=m
CONFIG_HDLC=m
CONFIG_HDLC_RAW=m
CONFIG_HDLC_RAW_ETH=m
diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig
index b9370bbca828..7be08e24955c 100644
--- a/drivers/atm/Kconfig
+++ b/drivers/atm/Kconfig
@@ -244,31 +244,6 @@ config ATM_IDT77252_USE_SUNI
depends on ATM_IDT77252
default y
-config ATM_AMBASSADOR
- tristate "Madge Ambassador (Collage PCI 155 Server)"
- depends on PCI && VIRT_TO_BUS
- select BITREVERSE
- help
- This is a driver for ATMizer based ATM card produced by Madge
- Networks Ltd. Say Y (or M to compile as a module named ambassador)
- here if you have one of these cards.
-
-config ATM_AMBASSADOR_DEBUG
- bool "Enable debugging messages"
- depends on ATM_AMBASSADOR
- help
- Somewhat useful debugging messages are available. The choice of
- messages is controlled by a bitmap. This may be specified as a
- module argument (kernel command line argument as well?), changed
- dynamically using an ioctl (not yet) or changed by sending the
- string "Dxxxx" to VCI 1023 (where x is a hex digit). See the file
- <file:drivers/atm/ambassador.h> for the meanings of the bits in the
- mask.
-
- When active, these messages can have a significant impact on the
- speed of the driver, and the size of your syslog files! When
- inactive, they will have only a modest impact on performance.
-
config ATM_HORIZON
tristate "Madge Horizon [Ultra] (Collage PCI 25 and Collage PCI 155 Client)"
depends on PCI && VIRT_TO_BUS
diff --git a/drivers/atm/Makefile b/drivers/atm/Makefile
index aa191616a72e..99ecbc280643 100644
--- a/drivers/atm/Makefile
+++ b/drivers/atm/Makefile
@@ -7,7 +7,6 @@ fore_200e-y := fore200e.o
obj-$(CONFIG_ATM_ZATM) += zatm.o uPD98402.o
obj-$(CONFIG_ATM_NICSTAR) += nicstar.o
-obj-$(CONFIG_ATM_AMBASSADOR) += ambassador.o
obj-$(CONFIG_ATM_HORIZON) += horizon.o
obj-$(CONFIG_ATM_IA) += iphase.o suni.o
obj-$(CONFIG_ATM_FORE200E) += fore_200e.o
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
deleted file mode 100644
index c039b8a4fefe..000000000000
--- a/drivers/atm/ambassador.c
+++ /dev/null
@@ -1,2400 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- Madge Ambassador ATM Adapter driver.
- Copyright (C) 1995-1999 Madge Networks Ltd.
-
-*/
-
-/* * dedicated to the memory of Graham Gordon 1971-1998 * */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/atmdev.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/poison.h>
-#include <linux/bitrev.h>
-#include <linux/mutex.h>
-#include <linux/firmware.h>
-#include <linux/ihex.h>
-#include <linux/slab.h>
-
-#include <linux/atomic.h>
-#include <asm/io.h>
-#include <asm/byteorder.h>
-
-#include "ambassador.h"
-
-#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
-#define description_string "Madge ATM Ambassador driver"
-#define version_string "1.2.4"
-
-static inline void __init show_version (void) {
- printk ("%s version %s\n", description_string, version_string);
-}
-
-/*
-
- Theory of Operation
-
- I Hardware, detection, initialisation and shutdown.
-
- 1. Supported Hardware
-
- This driver is for the PCI ATMizer-based Ambassador card (except
- very early versions). It is not suitable for the similar EISA "TR7"
- card. Commercially, both cards are known as Collage Server ATM
- adapters.
-
- The loader supports image transfer to the card, image start and few
- other miscellaneous commands.
-
- Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
-
- The cards are big-endian.
-
- 2. Detection
-
- Standard PCI stuff, the early cards are detected and rejected.
-
- 3. Initialisation
-
- The cards are reset and the self-test results are checked. The
- microcode image is then transferred and started. This waits for a
- pointer to a descriptor containing details of the host-based queues
- and buffers and various parameters etc. Once they are processed
- normal operations may begin. The BIA is read using a microcode
- command.
-
- 4. Shutdown
-
- This may be accomplished either by a card reset or via the microcode
- shutdown command. Further investigation required.
-
- 5. Persistent state
-
- The card reset does not affect PCI configuration (good) or the
- contents of several other "shared run-time registers" (bad) which
- include doorbell and interrupt control as well as EEPROM and PCI
- control. The driver must be careful when modifying these registers
- not to touch bits it does not use and to undo any changes at exit.
-
- II Driver software
-
- 0. Generalities
-
- The adapter is quite intelligent (fast) and has a simple interface
- (few features). VPI is always zero, 1024 VCIs are supported. There
- is limited cell rate support. UBR channels can be capped and ABR
- (explicit rate, but not EFCI) is supported. There is no CBR or VBR
- support.
-
- 1. Driver <-> Adapter Communication
-
- Apart from the basic loader commands, the driver communicates
- through three entities: the command queue (CQ), the transmit queue
- pair (TXQ) and the receive queue pairs (RXQ). These three entities
- are set up by the host and passed to the microcode just after it has
- been started.
-
- All queues are host-based circular queues. They are contiguous and
- (due to hardware limitations) have some restrictions as to their
- locations in (bus) memory. They are of the "full means the same as
- empty so don't do that" variety since the adapter uses pointers
- internally.
-
- The queue pairs work as follows: one queue is for supply to the
- adapter, items in it are pending and are owned by the adapter; the
- other is the queue for return from the adapter, items in it have
- been dealt with by the adapter. The host adds items to the supply
- (TX descriptors and free RX buffer descriptors) and removes items
- from the return (TX and RX completions). The adapter deals with out
- of order completions.
-
- Interrupts (card to host) and the doorbell (host to card) are used
- for signalling.
-
- 1. CQ
-
- This is to communicate "open VC", "close VC", "get stats" etc. to
- the adapter. At most one command is retired every millisecond by the
- card. There is no out of order completion or notification. The
- driver needs to check the return code of the command, waiting as
- appropriate.
-
- 2. TXQ
-
- TX supply items are of variable length (scatter gather support) and
- so the queue items are (more or less) pointers to the real thing.
- Each TX supply item contains a unique, host-supplied handle (the skb
- bus address seems most sensible as this works for Alphas as well,
- there is no need to do any endian conversions on the handles).
-
- TX return items consist of just the handles above.
-
- 3. RXQ (up to 4 of these with different lengths and buffer sizes)
-
- RX supply items consist of a unique, host-supplied handle (the skb
- bus address again) and a pointer to the buffer data area.
-
- RX return items consist of the handle above, the VC, length and a
- status word. This just screams "oh so easy" doesn't it?
-
- Note on RX pool sizes:
-
- Each pool should have enough buffers to handle a back-to-back stream
- of minimum sized frames on a single VC. For example:
-
- frame spacing = 3us (about right)
-
- delay = IRQ lat + RX handling + RX buffer replenish = 20 (us) (a guess)
-
- min number of buffers for one VC = 1 + delay/spacing (buffers)
-
- delay/spacing = latency = (20+2)/3 = 7 (buffers) (rounding up)
-
- The 20us delay assumes that there is no need to sleep; if we need to
- sleep to get buffers we are going to drop frames anyway.
-
- In fact, each pool should have enough buffers to support the
- simultaneous reassembly of a separate frame on each VC and cope with
- the case in which frames complete in round robin cell fashion on
- each VC.
-
- Only one frame can complete at each cell arrival, so if "n" VCs are
- open, the worst case is to have them all complete frames together
- followed by all starting new frames together.
-
- desired number of buffers = n + delay/spacing
-
- These are the extreme requirements, however, they are "n+k" for some
- "k" so we have only the constant to choose. This is the argument
- rx_lats which current defaults to 7.
-
- Actually, "n ? n+k : 0" is better and this is what is implemented,
- subject to the limit given by the pool size.
-
- 4. Driver locking
-
- Simple spinlocks are used around the TX and RX queue mechanisms.
- Anyone with a faster, working method is welcome to implement it.
-
- The adapter command queue is protected with a spinlock. We always
- wait for commands to complete.
-
- A more complex form of locking is used around parts of the VC open
- and close functions. There are three reasons for a lock: 1. we need
- to do atomic rate reservation and release (not used yet), 2. Opening
- sometimes involves two adapter commands which must not be separated
- by another command on the same VC, 3. the changes to RX pool size
- must be atomic. The lock needs to work over context switches, so we
- use a semaphore.
-
- III Hardware Features and Microcode Bugs
-
- 1. Byte Ordering
-
- *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
-
- 2. Memory access
-
- All structures that are not accessed using DMA must be 4-byte
- aligned (not a problem) and must not cross 4MB boundaries.
-
- There is a DMA memory hole at E0000000-E00000FF (groan).
-
- TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
- but for a hardware bug).
-
- RX buffers (DMA write) must not cross 16MB boundaries and must
- include spare trailing bytes up to the next 4-byte boundary; they
- will be written with rubbish.
-
- The PLX likes to prefetch; if reading up to 4 u32 past the end of
- each TX fragment is not a problem, then TX can be made to go a
- little faster by passing a flag at init that disables a prefetch
- workaround. We do not pass this flag. (new microcode only)
-
- Now we:
- . Note that alloc_skb rounds up size to a 16byte boundary.
- . Ensure all areas do not traverse 4MB boundaries.
- . Ensure all areas do not start at a E00000xx bus address.
- (I cannot be certain, but this may always hold with Linux)
- . Make all failures cause a loud message.
- . Discard non-conforming SKBs (causes TX failure or RX fill delay).
- . Discard non-conforming TX fragment descriptors (the TX fails).
- In the future we could:
- . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
- . Segment TX areas into some/more fragments, when necessary.
- . Relax checks for non-DMA items (ignore hole).
- . Give scatter-gather (iovec) requirements using ???. (?)
-
- 3. VC close is broken (only for new microcode)
-
- The VC close adapter microcode command fails to do anything if any
- frames have been received on the VC but none have been transmitted.
- Frames continue to be reassembled and passed (with IRQ) to the
- driver.
-
- IV To Do List
-
- . Fix bugs!
-
- . Timer code may be broken.
-
- . Deal with buggy VC close (somehow) in microcode 12.
-
- . Handle interrupted and/or non-blocking writes - is this a job for
- the protocol layer?
-
- . Add code to break up TX fragments when they span 4MB boundaries.
-
- . Add SUNI phy layer (need to know where SUNI lives on card).
-
- . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
- leave extra headroom space for Ambassador TX descriptors.
-
- . Understand these elements of struct atm_vcc: recvq (proto?),
- sleep, callback, listenq, backlog_quota, reply and user_back.
-
- . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
-
- . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
-
- . Decide whether RX buffer recycling is or can be made completely safe;
- turn it back on. It looks like Werner is going to axe this.
-
- . Implement QoS changes on open VCs (involves extracting parts of VC open
- and close into separate functions and using them to make changes).
-
- . Hack on command queue so that someone can issue multiple commands and wait
- on the last one (OR only "no-op" or "wait" commands are waited for).
-
- . Eliminate need for while-schedule around do_command.
-
-*/
-
-static void do_housekeeping (struct timer_list *t);
-/********** globals **********/
-
-static unsigned short debug = 0;
-static unsigned int cmds = 8;
-static unsigned int txs = 32;
-static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
-static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
-static unsigned int rx_lats = 7;
-static unsigned char pci_lat = 0;
-
-static const unsigned long onegigmask = -1 << 30;
-
-/********** access to adapter **********/
-
-static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
- PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
-#ifdef AMB_MMIO
- dev->membase[addr / sizeof(u32)] = data;
-#else
- outl (data, dev->iobase + addr);
-#endif
-}
-
-static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
-#ifdef AMB_MMIO
- u32 data = dev->membase[addr / sizeof(u32)];
-#else
- u32 data = inl (dev->iobase + addr);
-#endif
- PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
- return data;
-}
-
-static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
- __be32 be = cpu_to_be32 (data);
- PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
-#ifdef AMB_MMIO
- dev->membase[addr / sizeof(u32)] = be;
-#else
- outl (be, dev->iobase + addr);
-#endif
-}
-
-static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
-#ifdef AMB_MMIO
- __be32 be = dev->membase[addr / sizeof(u32)];
-#else
- __be32 be = inl (dev->iobase + addr);
-#endif
- u32 data = be32_to_cpu (be);
- PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
- return data;
-}
-
-/********** dump routines **********/
-
-static inline void dump_registers (const amb_dev * dev) {
-#ifdef DEBUG_AMBASSADOR
- if (debug & DBG_REGS) {
- size_t i;
- PRINTD (DBG_REGS, "reading PLX control: ");
- for (i = 0x00; i < 0x30; i += sizeof(u32))
- rd_mem (dev, i);
- PRINTD (DBG_REGS, "reading mailboxes: ");
- for (i = 0x40; i < 0x60; i += sizeof(u32))
- rd_mem (dev, i);
- PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
- for (i = 0x60; i < 0x70; i += sizeof(u32))
- rd_mem (dev, i);
- }
-#else
- (void) dev;
-#endif
- return;
-}
-
-static inline void dump_loader_block (volatile loader_block * lb) {
-#ifdef DEBUG_AMBASSADOR
- unsigned int i;
- PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
- lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
- for (i = 0; i < MAX_COMMAND_DATA; ++i)
- PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
- PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
-#else
- (void) lb;
-#endif
- return;
-}
-
-static inline void dump_command (command * cmd) {
-#ifdef DEBUG_AMBASSADOR
- unsigned int i;
- PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
- cmd, /*be32_to_cpu*/ (cmd->request));
- for (i = 0; i < 3; ++i)
- PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
- PRINTDE (DBG_CMD, "");
-#else
- (void) cmd;
-#endif
- return;
-}
-
-static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
-#ifdef DEBUG_AMBASSADOR
- unsigned int i;
- unsigned char * data = skb->data;
- PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
- for (i=0; i<skb->len && i < 256;i++)
- PRINTDM (DBG_DATA, "%02x ", data[i]);
- PRINTDE (DBG_DATA,"");
-#else
- (void) prefix;
- (void) vc;
- (void) skb;
-#endif
- return;
-}
-
-/********** check memory areas for use by Ambassador **********/
-
-/* see limitations under Hardware Features */
-
-static int check_area (void * start, size_t length) {
- // assumes length > 0
- const u32 fourmegmask = -1 << 22;
- const u32 twofivesixmask = -1 << 8;
- const u32 starthole = 0xE0000000;
- u32 startaddress = virt_to_bus (start);
- u32 lastaddress = startaddress+length-1;
- if ((startaddress ^ lastaddress) & fourmegmask ||
- (startaddress & twofivesixmask) == starthole) {
- PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
- startaddress, lastaddress);
- return -1;
- } else {
- return 0;
- }
-}
-
-/********** free an skb (as per ATM device driver documentation) **********/
-
-static void amb_kfree_skb (struct sk_buff * skb) {
- if (ATM_SKB(skb)->vcc->pop) {
- ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
- } else {
- dev_kfree_skb_any (skb);
- }
-}
-
-/********** TX completion **********/
-
-static void tx_complete (amb_dev * dev, tx_out * tx) {
- tx_simple * tx_descr = bus_to_virt (tx->handle);
- struct sk_buff * skb = tx_descr->skb;
-
- PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
-
- // VC layer stats
- atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
-
- // free the descriptor
- kfree (tx_descr);
-
- // free the skb
- amb_kfree_skb (skb);
-
- dev->stats.tx_ok++;
- return;
-}
-
-/********** RX completion **********/
-
-static void rx_complete (amb_dev * dev, rx_out * rx) {
- struct sk_buff * skb = bus_to_virt (rx->handle);
- u16 vc = be16_to_cpu (rx->vc);
- // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
- u16 status = be16_to_cpu (rx->status);
- u16 rx_len = be16_to_cpu (rx->length);
-
- PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
-
- // XXX move this in and add to VC stats ???
- if (!status) {
- struct atm_vcc * atm_vcc = dev->rxer[vc];
- dev->stats.rx.ok++;
-
- if (atm_vcc) {
-
- if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
-
- if (atm_charge (atm_vcc, skb->truesize)) {
-
- // prepare socket buffer
- ATM_SKB(skb)->vcc = atm_vcc;
- skb_put (skb, rx_len);
-
- dump_skb ("<<<", vc, skb);
-
- // VC layer stats
- atomic_inc(&atm_vcc->stats->rx);
- __net_timestamp(skb);
- // end of our responsibility
- atm_vcc->push (atm_vcc, skb);
- return;
-
- } else {
- // someone fix this (message), please!
- PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
- // drop stats incremented in atm_charge
- }
-
- } else {
- PRINTK (KERN_INFO, "dropped over-size frame");
- // should we count this?
- atomic_inc(&atm_vcc->stats->rx_drop);
- }
-
- } else {
- PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
- // this is an adapter bug, only in new version of microcode
- }
-
- } else {
- dev->stats.rx.error++;
- if (status & CRC_ERR)
- dev->stats.rx.badcrc++;
- if (status & LEN_ERR)
- dev->stats.rx.toolong++;
- if (status & ABORT_ERR)
- dev->stats.rx.aborted++;
- if (status & UNUSED_ERR)
- dev->stats.rx.unused++;
- }
-
- dev_kfree_skb_any (skb);
- return;
-}
-
-/*
-
- Note on queue handling.
-
- Here "give" and "take" refer to queue entries and a queue (pair)
- rather than frames to or from the host or adapter. Empty frame
- buffers are given to the RX queue pair and returned unused or
- containing RX frames. TX frames (well, pointers to TX fragment
- lists) are given to the TX queue pair, completions are returned.
-
-*/
-
-/********** command queue **********/
-
-// I really don't like this, but it's the best I can do at the moment
-
-// also, the callers are responsible for byte order as the microcode
-// sometimes does 16-bit accesses (yuk yuk yuk)
-
-static int command_do (amb_dev * dev, command * cmd) {
- amb_cq * cq = &dev->cq;
- volatile amb_cq_ptrs * ptrs = &cq->ptrs;
- command * my_slot;
-
- PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
-
- if (test_bit (dead, &dev->flags))
- return 0;
-
- spin_lock (&cq->lock);
-
- // if not full...
- if (cq->pending < cq->maximum) {
- // remember my slot for later
- my_slot = ptrs->in;
- PRINTD (DBG_CMD, "command in slot %p", my_slot);
-
- dump_command (cmd);
-
- // copy command in
- *ptrs->in = *cmd;
- cq->pending++;
- ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
-
- // mail the command
- wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
-
- if (cq->pending > cq->high)
- cq->high = cq->pending;
- spin_unlock (&cq->lock);
-
- // these comments were in a while-loop before, msleep removes the loop
- // go to sleep
- // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
- msleep(cq->pending);
-
- // wait for my slot to be reached (all waiters are here or above, until...)
- while (ptrs->out != my_slot) {
- PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
-
- // wait on my slot (... one gets to its slot, and... )
- while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
- PRINTD (DBG_CMD, "wait: command slot completion");
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
-
- PRINTD (DBG_CMD, "command complete");
- // update queue (... moves the queue along to the next slot)
- spin_lock (&cq->lock);
- cq->pending--;
- // copy command out
- *cmd = *ptrs->out;
- ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
- spin_unlock (&cq->lock);
-
- return 0;
- } else {
- cq->filled++;
- spin_unlock (&cq->lock);
- return -EAGAIN;
- }
-
-}
-
-/********** TX queue pair **********/
-
-static int tx_give (amb_dev * dev, tx_in * tx) {
- amb_txq * txq = &dev->txq;
- unsigned long flags;
-
- PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
-
- if (test_bit (dead, &dev->flags))
- return 0;
-
- spin_lock_irqsave (&txq->lock, flags);
-
- if (txq->pending < txq->maximum) {
- PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
-
- *txq->in.ptr = *tx;
- txq->pending++;
- txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
- // hand over the TX and ring the bell
- wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
- wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
-
- if (txq->pending > txq->high)
- txq->high = txq->pending;
- spin_unlock_irqrestore (&txq->lock, flags);
- return 0;
- } else {
- txq->filled++;
- spin_unlock_irqrestore (&txq->lock, flags);
- return -EAGAIN;
- }
-}
-
-static int tx_take (amb_dev * dev) {
- amb_txq * txq = &dev->txq;
- unsigned long flags;
-
- PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
-
- spin_lock_irqsave (&txq->lock, flags);
-
- if (txq->pending && txq->out.ptr->handle) {
- // deal with TX completion
- tx_complete (dev, txq->out.ptr);
- // mark unused again
- txq->out.ptr->handle = 0;
- // remove item
- txq->pending--;
- txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
-
- spin_unlock_irqrestore (&txq->lock, flags);
- return 0;
- } else {
-
- spin_unlock_irqrestore (&txq->lock, flags);
- return -1;
- }
-}
-
-/********** RX queue pairs **********/
-
-static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
- amb_rxq * rxq = &dev->rxq[pool];
- unsigned long flags;
-
- PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
-
- spin_lock_irqsave (&rxq->lock, flags);
-
- if (rxq->pending < rxq->maximum) {
- PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
-
- *rxq->in.ptr = *rx;
- rxq->pending++;
- rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
- // hand over the RX buffer
- wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
-
- spin_unlock_irqrestore (&rxq->lock, flags);
- return 0;
- } else {
- spin_unlock_irqrestore (&rxq->lock, flags);
- return -1;
- }
-}
-
-static int rx_take (amb_dev * dev, unsigned char pool) {
- amb_rxq * rxq = &dev->rxq[pool];
- unsigned long flags;
-
- PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
-
- spin_lock_irqsave (&rxq->lock, flags);
-
- if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
- // deal with RX completion
- rx_complete (dev, rxq->out.ptr);
- // mark unused again
- rxq->out.ptr->status = 0;
- rxq->out.ptr->length = 0;
- // remove item
- rxq->pending--;
- rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
-
- if (rxq->pending < rxq->low)
- rxq->low = rxq->pending;
- spin_unlock_irqrestore (&rxq->lock, flags);
- return 0;
- } else {
- if (!rxq->pending && rxq->buffers_wanted)
- rxq->emptied++;
- spin_unlock_irqrestore (&rxq->lock, flags);
- return -1;
- }
-}
-
-/********** RX Pool handling **********/
-
-/* pre: buffers_wanted = 0, post: pending = 0 */
-static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
- amb_rxq * rxq = &dev->rxq[pool];
-
- PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
-
- if (test_bit (dead, &dev->flags))
- return;
-
- /* we are not quite like the fill pool routines as we cannot just
- remove one buffer, we have to remove all of them, but we might as
- well pretend... */
- if (rxq->pending > rxq->buffers_wanted) {
- command cmd;
- cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
- cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
- while (command_do (dev, &cmd))
- schedule();
- /* the pool may also be emptied via the interrupt handler */
- while (rxq->pending > rxq->buffers_wanted)
- if (rx_take (dev, pool))
- schedule();
- }
-
- return;
-}
-
-static void drain_rx_pools (amb_dev * dev) {
- unsigned char pool;
-
- PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
-
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- drain_rx_pool (dev, pool);
-}
-
-static void fill_rx_pool (amb_dev * dev, unsigned char pool,
- gfp_t priority)
-{
- rx_in rx;
- amb_rxq * rxq;
-
- PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
-
- if (test_bit (dead, &dev->flags))
- return;
-
- rxq = &dev->rxq[pool];
- while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
-
- struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
- if (!skb) {
- PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
- return;
- }
- if (check_area (skb->data, skb->truesize)) {
- dev_kfree_skb_any (skb);
- return;
- }
- // cast needed as there is no %? for pointer differences
- PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
- skb, skb->head, (long) skb_end_offset(skb));
- rx.handle = virt_to_bus (skb);
- rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
- if (rx_give (dev, &rx, pool))
- dev_kfree_skb_any (skb);
-
- }
-
- return;
-}
-
-// top up all RX pools
-static void fill_rx_pools (amb_dev * dev) {
- unsigned char pool;
-
- PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
-
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- fill_rx_pool (dev, pool, GFP_ATOMIC);
-
- return;
-}
-
-/********** enable host interrupts **********/
-
-static void interrupts_on (amb_dev * dev) {
- wr_plain (dev, offsetof(amb_mem, interrupt_control),
- rd_plain (dev, offsetof(amb_mem, interrupt_control))
- | AMB_INTERRUPT_BITS);
-}
-
-/********** disable host interrupts **********/
-
-static void interrupts_off (amb_dev * dev) {
- wr_plain (dev, offsetof(amb_mem, interrupt_control),
- rd_plain (dev, offsetof(amb_mem, interrupt_control))
- &~ AMB_INTERRUPT_BITS);
-}
-
-/********** interrupt handling **********/
-
-static irqreturn_t interrupt_handler(int irq, void *dev_id) {
- amb_dev * dev = dev_id;
-
- PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
-
- {
- u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
-
- // for us or someone else sharing the same interrupt
- if (!interrupt) {
- PRINTD (DBG_IRQ, "irq not for me: %d", irq);
- return IRQ_NONE;
- }
-
- // definitely for us
- PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
- wr_plain (dev, offsetof(amb_mem, interrupt), -1);
- }
-
- {
- unsigned int irq_work = 0;
- unsigned char pool;
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- while (!rx_take (dev, pool))
- ++irq_work;
- while (!tx_take (dev))
- ++irq_work;
-
- if (irq_work) {
- fill_rx_pools (dev);
-
- PRINTD (DBG_IRQ, "work done: %u", irq_work);
- } else {
- PRINTD (DBG_IRQ|DBG_WARN, "no work done");
- }
- }
-
- PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
- return IRQ_HANDLED;
-}
-
-/********** make rate (not quite as much fun as Horizon) **********/
-
-static int make_rate (unsigned int rate, rounding r,
- u16 * bits, unsigned int * actual) {
- unsigned char exp = -1; // hush gcc
- unsigned int man = -1; // hush gcc
-
- PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
-
- // rates in cells per second, ITU format (nasty 16-bit floating-point)
- // given 5-bit e and 9-bit m:
- // rate = EITHER (1+m/2^9)*2^e OR 0
- // bits = EITHER 1<<14 | e<<9 | m OR 0
- // (bit 15 is "reserved", bit 14 "non-zero")
- // smallest rate is 0 (special representation)
- // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
- // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
- // simple algorithm:
- // find position of top bit, this gives e
- // remove top bit and shift (rounding if feeling clever) by 9-e
-
- // ucode bug: please don't set bit 14! so 0 rate not representable
-
- if (rate > 0xffc00000U) {
- // larger than largest representable rate
-
- if (r == round_up) {
- return -EINVAL;
- } else {
- exp = 31;
- man = 511;
- }
-
- } else if (rate) {
- // representable rate
-
- exp = 31;
- man = rate;
-
- // invariant: rate = man*2^(exp-31)
- while (!(man & (1<<31))) {
- exp = exp - 1;
- man = man<<1;
- }
-
- // man has top bit set
- // rate = (2^31+(man-2^31))*2^(exp-31)
- // rate = (1+(man-2^31)/2^31)*2^exp
- man = man<<1;
- man &= 0xffffffffU; // a nop on 32-bit systems
- // rate = (1+man/2^32)*2^exp
-
- // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
- // time to lose significance... we want m in the range 0 to 2^9-1
- // rounding presents a minor problem... we first decide which way
- // we are rounding (based on given rounding direction and possibly
- // the bits of the mantissa that are to be discarded).
-
- switch (r) {
- case round_down: {
- // just truncate
- man = man>>(32-9);
- break;
- }
- case round_up: {
- // check all bits that we are discarding
- if (man & (~0U>>9)) {
- man = (man>>(32-9)) + 1;
- if (man == (1<<9)) {
- // no need to check for round up outside of range
- man = 0;
- exp += 1;
- }
- } else {
- man = (man>>(32-9));
- }
- break;
- }
- case round_nearest: {
- // check msb that we are discarding
- if (man & (1<<(32-9-1))) {
- man = (man>>(32-9)) + 1;
- if (man == (1<<9)) {
- // no need to check for round up outside of range
- man = 0;
- exp += 1;
- }
- } else {
- man = (man>>(32-9));
- }
- break;
- }
- }
-
- } else {
- // zero rate - not representable
-
- if (r == round_down) {
- return -EINVAL;
- } else {
- exp = 0;
- man = 0;
- }
-
- }
-
- PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
-
- if (bits)
- *bits = /* (1<<14) | */ (exp<<9) | man;
-
- if (actual)
- *actual = (exp >= 9)
- ? (1 << exp) + (man << (exp-9))
- : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
-
- return 0;
-}
-
-/********** Linux ATM Operations **********/
-
-// some are not yet implemented while others do not make sense for
-// this device
-
-/********** Open a VC **********/
-
-static int amb_open (struct atm_vcc * atm_vcc)
-{
- int error;
-
- struct atm_qos * qos;
- struct atm_trafprm * txtp;
- struct atm_trafprm * rxtp;
- u16 tx_rate_bits = -1; // hush gcc
- u16 tx_vc_bits = -1; // hush gcc
- u16 tx_frame_bits = -1; // hush gcc
-
- amb_dev * dev = AMB_DEV(atm_vcc->dev);
- amb_vcc * vcc;
- unsigned char pool = -1; // hush gcc
- short vpi = atm_vcc->vpi;
- int vci = atm_vcc->vci;
-
- PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
-
-#ifdef ATM_VPI_UNSPEC
- // UNSPEC is deprecated, remove this code eventually
- if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
- PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
- return -EINVAL;
- }
-#endif
-
- if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
- 0 <= vci && vci < (1<<NUM_VCI_BITS))) {
- PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
- return -EINVAL;
- }
-
- qos = &atm_vcc->qos;
-
- if (qos->aal != ATM_AAL5) {
- PRINTD (DBG_QOS, "AAL not supported");
- return -EINVAL;
- }
-
- // traffic parameters
-
- PRINTD (DBG_QOS, "TX:");
- txtp = &qos->txtp;
- if (txtp->traffic_class != ATM_NONE) {
- switch (txtp->traffic_class) {
- case ATM_UBR: {
- // we take "the PCR" as a rate-cap
- int pcr = atm_pcr_goal (txtp);
- if (!pcr) {
- // no rate cap
- tx_rate_bits = 0;
- tx_vc_bits = TX_UBR;
- tx_frame_bits = TX_FRAME_NOTCAP;
- } else {
- rounding r;
- if (pcr < 0) {
- r = round_down;
- pcr = -pcr;
- } else {
- r = round_up;
- }
- error = make_rate (pcr, r, &tx_rate_bits, NULL);
- if (error)
- return error;
- tx_vc_bits = TX_UBR_CAPPED;
- tx_frame_bits = TX_FRAME_CAPPED;
- }
- break;
- }
-#if 0
- case ATM_ABR: {
- pcr = atm_pcr_goal (txtp);
- PRINTD (DBG_QOS, "pcr goal = %d", pcr);
- break;
- }
-#endif
- default: {
- // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
- PRINTD (DBG_QOS, "request for non-UBR denied");
- return -EINVAL;
- }
- }
- PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
- tx_rate_bits, tx_vc_bits);
- }
-
- PRINTD (DBG_QOS, "RX:");
- rxtp = &qos->rxtp;
- if (rxtp->traffic_class == ATM_NONE) {
- // do nothing
- } else {
- // choose an RX pool (arranged in increasing size)
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
- PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
- pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
- break;
- }
- if (pool == NUM_RX_POOLS) {
- PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
- "no pool suitable for VC (RX max_sdu %d is too large)",
- rxtp->max_sdu);
- return -EINVAL;
- }
-
- switch (rxtp->traffic_class) {
- case ATM_UBR: {
- break;
- }
-#if 0
- case ATM_ABR: {
- pcr = atm_pcr_goal (rxtp);
- PRINTD (DBG_QOS, "pcr goal = %d", pcr);
- break;
- }
-#endif
- default: {
- // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
- PRINTD (DBG_QOS, "request for non-UBR denied");
- return -EINVAL;
- }
- }
- }
-
- // get space for our vcc stuff
- vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
- if (!vcc) {
- PRINTK (KERN_ERR, "out of memory!");
- return -ENOMEM;
- }
- atm_vcc->dev_data = (void *) vcc;
-
- // no failures beyond this point
-
- // we are not really "immediately before allocating the connection
- // identifier in hardware", but it will just have to do!
- set_bit(ATM_VF_ADDR,&atm_vcc->flags);
-
- if (txtp->traffic_class != ATM_NONE) {
- command cmd;
-
- vcc->tx_frame_bits = tx_frame_bits;
-
- mutex_lock(&dev->vcc_sf);
- if (dev->rxer[vci]) {
- // RXer on the channel already, just modify rate...
- cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
- cmd.args.modify_rate.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
- while (command_do (dev, &cmd))
- schedule();
- // ... and TX flags, preserving the RX pool
- cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
- cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.modify_flags.flags = cpu_to_be32
- ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
- | (tx_vc_bits << SRB_FLAGS_SHIFT) );
- while (command_do (dev, &cmd))
- schedule();
- } else {
- // no RXer on the channel, just open (with pool zero)
- cmd.request = cpu_to_be32 (SRB_OPEN_VC);
- cmd.args.open.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
- cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
- while (command_do (dev, &cmd))
- schedule();
- }
- dev->txer[vci].tx_present = 1;
- mutex_unlock(&dev->vcc_sf);
- }
-
- if (rxtp->traffic_class != ATM_NONE) {
- command cmd;
-
- vcc->rx_info.pool = pool;
-
- mutex_lock(&dev->vcc_sf);
- /* grow RX buffer pool */
- if (!dev->rxq[pool].buffers_wanted)
- dev->rxq[pool].buffers_wanted = rx_lats;
- dev->rxq[pool].buffers_wanted += 1;
- fill_rx_pool (dev, pool, GFP_KERNEL);
-
- if (dev->txer[vci].tx_present) {
- // TXer on the channel already
- // switch (from pool zero) to this pool, preserving the TX bits
- cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
- cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.modify_flags.flags = cpu_to_be32
- ( (pool << SRB_POOL_SHIFT)
- | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
- } else {
- // no TXer on the channel, open the VC (with no rate info)
- cmd.request = cpu_to_be32 (SRB_OPEN_VC);
- cmd.args.open.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
- cmd.args.open.rate = cpu_to_be32 (0);
- }
- while (command_do (dev, &cmd))
- schedule();
- // this link allows RX frames through
- dev->rxer[vci] = atm_vcc;
- mutex_unlock(&dev->vcc_sf);
- }
-
- // indicate readiness
- set_bit(ATM_VF_READY,&atm_vcc->flags);
-
- return 0;
-}
-
-/********** Close a VC **********/
-
-static void amb_close (struct atm_vcc * atm_vcc) {
- amb_dev * dev = AMB_DEV (atm_vcc->dev);
- amb_vcc * vcc = AMB_VCC (atm_vcc);
- u16 vci = atm_vcc->vci;
-
- PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
-
- // indicate unreadiness
- clear_bit(ATM_VF_READY,&atm_vcc->flags);
-
- // disable TXing
- if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
- command cmd;
-
- mutex_lock(&dev->vcc_sf);
- if (dev->rxer[vci]) {
- // RXer still on the channel, just modify rate... XXX not really needed
- cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
- cmd.args.modify_rate.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.modify_rate.rate = cpu_to_be32 (0);
- // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
- } else {
- // no RXer on the channel, close channel
- cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
- cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
- }
- dev->txer[vci].tx_present = 0;
- while (command_do (dev, &cmd))
- schedule();
- mutex_unlock(&dev->vcc_sf);
- }
-
- // disable RXing
- if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
- command cmd;
-
- // this is (the?) one reason why we need the amb_vcc struct
- unsigned char pool = vcc->rx_info.pool;
-
- mutex_lock(&dev->vcc_sf);
- if (dev->txer[vci].tx_present) {
- // TXer still on the channel, just go to pool zero XXX not really needed
- cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
- cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0
- cmd.args.modify_flags.flags = cpu_to_be32
- (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
- } else {
- // no TXer on the channel, close the VC
- cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
- cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
- }
- // forget the rxer - no more skbs will be pushed
- if (atm_vcc != dev->rxer[vci])
- PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
- "arghhh! we're going to die!",
- vcc, dev->rxer[vci]);
- dev->rxer[vci] = NULL;
- while (command_do (dev, &cmd))
- schedule();
-
- /* shrink RX buffer pool */
- dev->rxq[pool].buffers_wanted -= 1;
- if (dev->rxq[pool].buffers_wanted == rx_lats) {
- dev->rxq[pool].buffers_wanted = 0;
- drain_rx_pool (dev, pool);
- }
- mutex_unlock(&dev->vcc_sf);
- }
-
- // free our structure
- kfree (vcc);
-
- // say the VPI/VCI is free again
- clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
-
- return;
-}
-
-/********** Send **********/
-
-static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
- amb_dev * dev = AMB_DEV(atm_vcc->dev);
- amb_vcc * vcc = AMB_VCC(atm_vcc);
- u16 vc = atm_vcc->vci;
- unsigned int tx_len = skb->len;
- unsigned char * tx_data = skb->data;
- tx_simple * tx_descr;
- tx_in tx;
-
- if (test_bit (dead, &dev->flags))
- return -EIO;
-
- PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
- vc, tx_data, tx_len);
-
- dump_skb (">>>", vc, skb);
-
- if (!dev->txer[vc].tx_present) {
- PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
- return -EBADFD;
- }
-
- // this is a driver private field so we have to set it ourselves,
- // despite the fact that we are _required_ to use it to check for a
- // pop function
- ATM_SKB(skb)->vcc = atm_vcc;
-
- if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
- PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
- return -EIO;
- }
-
- if (check_area (skb->data, skb->len)) {
- atomic_inc(&atm_vcc->stats->tx_err);
- return -ENOMEM; // ?
- }
-
- // allocate memory for fragments
- tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
- if (!tx_descr) {
- PRINTK (KERN_ERR, "could not allocate TX descriptor");
- return -ENOMEM;
- }
- if (check_area (tx_descr, sizeof(tx_simple))) {
- kfree (tx_descr);
- return -ENOMEM;
- }
- PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
-
- tx_descr->skb = skb;
-
- tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
- tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
-
- tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
- tx_descr->tx_frag_end.vc = 0;
- tx_descr->tx_frag_end.next_descriptor_length = 0;
- tx_descr->tx_frag_end.next_descriptor = 0;
-#ifdef AMB_NEW_MICROCODE
- tx_descr->tx_frag_end.cpcs_uu = 0;
- tx_descr->tx_frag_end.cpi = 0;
- tx_descr->tx_frag_end.pad = 0;
-#endif
-
- tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
- tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
- tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
-
- while (tx_give (dev, &tx))
- schedule();
- return 0;
-}
-
-/********** Change QoS on a VC **********/
-
-// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
-
-/********** Free RX Socket Buffer **********/
-
-#if 0
-static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
- amb_dev * dev = AMB_DEV (atm_vcc->dev);
- amb_vcc * vcc = AMB_VCC (atm_vcc);
- unsigned char pool = vcc->rx_info.pool;
- rx_in rx;
-
- // This may be unsafe for various reasons that I cannot really guess
- // at. However, I note that the ATM layer calls kfree_skb rather
- // than dev_kfree_skb at this point so we are least covered as far
- // as buffer locking goes. There may be bugs if pcap clones RX skbs.
-
- PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
- skb, atm_vcc, vcc);
-
- rx.handle = virt_to_bus (skb);
- rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
-
- skb->data = skb->head;
- skb_reset_tail_pointer(skb);
- skb->len = 0;
-
- if (!rx_give (dev, &rx, pool)) {
- // success
- PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
- return;
- }
-
- // just do what the ATM layer would have done
- dev_kfree_skb_any (skb);
-
- return;
-}
-#endif
-
-/********** Proc File Output **********/
-
-static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
- amb_dev * dev = AMB_DEV (atm_dev);
- int left = *pos;
- unsigned char pool;
-
- PRINTD (DBG_FLOW, "amb_proc_read");
-
- /* more diagnostics here? */
-
- if (!left--) {
- amb_stats * s = &dev->stats;
- return sprintf (page,
- "frames: TX OK %lu, RX OK %lu, RX bad %lu "
- "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
- s->tx_ok, s->rx.ok, s->rx.error,
- s->rx.badcrc, s->rx.toolong,
- s->rx.aborted, s->rx.unused);
- }
-
- if (!left--) {
- amb_cq * c = &dev->cq;
- return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
- c->pending, c->high, c->maximum);
- }
-
- if (!left--) {
- amb_txq * t = &dev->txq;
- return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
- t->pending, t->maximum, t->high, t->filled);
- }
-
- if (!left--) {
- unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
- for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
- amb_rxq * r = &dev->rxq[pool];
- count += sprintf (page+count, " %u/%u/%u %u %u",
- r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
- }
- count += sprintf (page+count, ".\n");
- return count;
- }
-
- if (!left--) {
- unsigned int count = sprintf (page, "RX buffer sizes:");
- for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
- amb_rxq * r = &dev->rxq[pool];
- count += sprintf (page+count, " %u", r->buffer_size);
- }
- count += sprintf (page+count, ".\n");
- return count;
- }
-
-#if 0
- if (!left--) {
- // suni block etc?
- }
-#endif
-
- return 0;
-}
-
-/********** Operation Structure **********/
-
-static const struct atmdev_ops amb_ops = {
- .open = amb_open,
- .close = amb_close,
- .send = amb_send,
- .proc_read = amb_proc_read,
- .owner = THIS_MODULE,
-};
-
-/********** housekeeping **********/
-static void do_housekeeping (struct timer_list *t) {
- amb_dev * dev = from_timer(dev, t, housekeeping);
-
- // could collect device-specific (not driver/atm-linux) stats here
-
- // last resort refill once every ten seconds
- fill_rx_pools (dev);
- mod_timer(&dev->housekeeping, jiffies + 10*HZ);
-
- return;
-}
-
-/********** creation of communication queues **********/
-
-static int create_queues(amb_dev *dev, unsigned int cmds, unsigned int txs,
- unsigned int *rxs, unsigned int *rx_buffer_sizes)
-{
- unsigned char pool;
- size_t total = 0;
- void * memory;
- void * limit;
-
- PRINTD (DBG_FLOW, "create_queues %p", dev);
-
- total += cmds * sizeof(command);
-
- total += txs * (sizeof(tx_in) + sizeof(tx_out));
-
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
-
- memory = kmalloc (total, GFP_KERNEL);
- if (!memory) {
- PRINTK (KERN_ERR, "could not allocate queues");
- return -ENOMEM;
- }
- if (check_area (memory, total)) {
- PRINTK (KERN_ERR, "queues allocated in nasty area");
- kfree (memory);
- return -ENOMEM;
- }
-
- limit = memory + total;
- PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
-
- PRINTD (DBG_CMD, "command queue at %p", memory);
-
- {
- command * cmd = memory;
- amb_cq * cq = &dev->cq;
-
- cq->pending = 0;
- cq->high = 0;
- cq->maximum = cmds - 1;
-
- cq->ptrs.start = cmd;
- cq->ptrs.in = cmd;
- cq->ptrs.out = cmd;
- cq->ptrs.limit = cmd + cmds;
-
- memory = cq->ptrs.limit;
- }
-
- PRINTD (DBG_TX, "TX queue pair at %p", memory);
-
- {
- tx_in * in = memory;
- tx_out * out;
- amb_txq * txq = &dev->txq;
-
- txq->pending = 0;
- txq->high = 0;
- txq->filled = 0;
- txq->maximum = txs - 1;
-
- txq->in.start = in;
- txq->in.ptr = in;
- txq->in.limit = in + txs;
-
- memory = txq->in.limit;
- out = memory;
-
- txq->out.start = out;
- txq->out.ptr = out;
- txq->out.limit = out + txs;
-
- memory = txq->out.limit;
- }
-
- PRINTD (DBG_RX, "RX queue pairs at %p", memory);
-
- for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
- rx_in * in = memory;
- rx_out * out;
- amb_rxq * rxq = &dev->rxq[pool];
-
- rxq->buffer_size = rx_buffer_sizes[pool];
- rxq->buffers_wanted = 0;
-
- rxq->pending = 0;
- rxq->low = rxs[pool] - 1;
- rxq->emptied = 0;
- rxq->maximum = rxs[pool] - 1;
-
- rxq->in.start = in;
- rxq->in.ptr = in;
- rxq->in.limit = in + rxs[pool];
-
- memory = rxq->in.limit;
- out = memory;
-
- rxq->out.start = out;
- rxq->out.ptr = out;
- rxq->out.limit = out + rxs[pool];
-
- memory = rxq->out.limit;
- }
-
- if (memory == limit) {
- return 0;
- } else {
- PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
- kfree (limit - total);
- return -ENOMEM;
- }
-
-}
-
-/********** destruction of communication queues **********/
-
-static void destroy_queues (amb_dev * dev) {
- // all queues assumed empty
- void * memory = dev->cq.ptrs.start;
- // includes txq.in, txq.out, rxq[].in and rxq[].out
-
- PRINTD (DBG_FLOW, "destroy_queues %p", dev);
-
- PRINTD (DBG_INIT, "freeing queues at %p", memory);
- kfree (memory);
-
- return;
-}
-
-/********** basic loader commands and error handling **********/
-// centisecond timeouts - guessing away here
-static unsigned int command_timeouts [] = {
- [host_memory_test] = 15,
- [read_adapter_memory] = 2,
- [write_adapter_memory] = 2,
- [adapter_start] = 50,
- [get_version_number] = 10,
- [interrupt_host] = 1,
- [flash_erase_sector] = 1,
- [adap_download_block] = 1,
- [adap_erase_flash] = 1,
- [adap_run_in_iram] = 1,
- [adap_end_download] = 1
-};
-
-
-static unsigned int command_successes [] = {
- [host_memory_test] = COMMAND_PASSED_TEST,
- [read_adapter_memory] = COMMAND_READ_DATA_OK,
- [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
- [adapter_start] = COMMAND_COMPLETE,
- [get_version_number] = COMMAND_COMPLETE,
- [interrupt_host] = COMMAND_COMPLETE,
- [flash_erase_sector] = COMMAND_COMPLETE,
- [adap_download_block] = COMMAND_COMPLETE,
- [adap_erase_flash] = COMMAND_COMPLETE,
- [adap_run_in_iram] = COMMAND_COMPLETE,
- [adap_end_download] = COMMAND_COMPLETE
-};
-
-static int decode_loader_result (loader_command cmd, u32 result)
-{
- int res;
- const char *msg;
-
- if (result == command_successes[cmd])
- return 0;
-
- switch (result) {
- case BAD_COMMAND:
- res = -EINVAL;
- msg = "bad command";
- break;
- case COMMAND_IN_PROGRESS:
- res = -ETIMEDOUT;
- msg = "command in progress";
- break;
- case COMMAND_PASSED_TEST:
- res = 0;
- msg = "command passed test";
- break;
- case COMMAND_FAILED_TEST:
- res = -EIO;
- msg = "command failed test";
- break;
- case COMMAND_READ_DATA_OK:
- res = 0;
- msg = "command read data ok";
- break;
- case COMMAND_READ_BAD_ADDRESS:
- res = -EINVAL;
- msg = "command read bad address";
- break;
- case COMMAND_WRITE_DATA_OK:
- res = 0;
- msg = "command write data ok";
- break;
- case COMMAND_WRITE_BAD_ADDRESS:
- res = -EINVAL;
- msg = "command write bad address";
- break;
- case COMMAND_WRITE_FLASH_FAILURE:
- res = -EIO;
- msg = "command write flash failure";
- break;
- case COMMAND_COMPLETE:
- res = 0;
- msg = "command complete";
- break;
- case COMMAND_FLASH_ERASE_FAILURE:
- res = -EIO;
- msg = "command flash erase failure";
- break;
- case COMMAND_WRITE_BAD_DATA:
- res = -EINVAL;
- msg = "command write bad data";
- break;
- default:
- res = -EINVAL;
- msg = "unknown error";
- PRINTD (DBG_LOAD|DBG_ERR,
- "decode_loader_result got %d=%x !",
- result, result);
- break;
- }
-
- PRINTK (KERN_ERR, "%s", msg);
- return res;
-}
-
-static int do_loader_command(volatile loader_block *lb, const amb_dev *dev,
- loader_command cmd)
-{
-
- unsigned long timeout;
-
- PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
-
- /* do a command
-
- Set the return value to zero, set the command type and set the
- valid entry to the right magic value. The payload is already
- correctly byte-ordered so we leave it alone. Hit the doorbell
- with the bus address of this structure.
-
- */
-
- lb->result = 0;
- lb->command = cpu_to_be32 (cmd);
- lb->valid = cpu_to_be32 (DMA_VALID);
- // dump_registers (dev);
- // dump_loader_block (lb);
- wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
-
- timeout = command_timeouts[cmd] * 10;
-
- while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
- if (timeout) {
- timeout = msleep_interruptible(timeout);
- } else {
- PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
- dump_registers (dev);
- dump_loader_block (lb);
- return -ETIMEDOUT;
- }
-
- if (cmd == adapter_start) {
- // wait for start command to acknowledge...
- timeout = 100;
- while (rd_plain (dev, offsetof(amb_mem, doorbell)))
- if (timeout) {
- timeout = msleep_interruptible(timeout);
- } else {
- PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
- be32_to_cpu (lb->result));
- dump_registers (dev);
- return -ETIMEDOUT;
- }
- return 0;
- } else {
- return decode_loader_result (cmd, be32_to_cpu (lb->result));
- }
-
-}
-
-/* loader: determine loader version */
-
-static int get_loader_version(loader_block *lb, const amb_dev *dev,
- u32 *version)
-{
- int res;
-
- PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
-
- res = do_loader_command (lb, dev, get_version_number);
- if (res)
- return res;
- if (version)
- *version = be32_to_cpu (lb->payload.version);
- return 0;
-}
-
-/* loader: write memory data blocks */
-
-static int loader_write(loader_block *lb, const amb_dev *dev,
- const struct ihex_binrec *rec)
-{
- transfer_block * tb = &lb->payload.transfer;
-
- PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
-
- tb->address = rec->addr;
- tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
- memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
- return do_loader_command (lb, dev, write_adapter_memory);
-}
-
-/* loader: verify memory data blocks */
-
-static int loader_verify(loader_block *lb, const amb_dev *dev,
- const struct ihex_binrec *rec)
-{
- transfer_block * tb = &lb->payload.transfer;
- int res;
-
- PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
-
- tb->address = rec->addr;
- tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
- res = do_loader_command (lb, dev, read_adapter_memory);
- if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
- res = -EINVAL;
- return res;
-}
-
-/* loader: start microcode */
-
-static int loader_start(loader_block *lb, const amb_dev *dev, u32 address)
-{
- PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
-
- lb->payload.start = cpu_to_be32 (address);
- return do_loader_command (lb, dev, adapter_start);
-}
-
-/********** reset card **********/
-
-static inline void sf (const char * msg)
-{
- PRINTK (KERN_ERR, "self-test failed: %s", msg);
-}
-
-static int amb_reset (amb_dev * dev, int diags) {
- u32 word;
-
- PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
-
- word = rd_plain (dev, offsetof(amb_mem, reset_control));
- // put card into reset state
- wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
- // wait a short while
- udelay (10);
-#if 1
- // put card into known good state
- wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
- // clear all interrupts just in case
- wr_plain (dev, offsetof(amb_mem, interrupt), -1);
-#endif
- // clear self-test done flag
- wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
- // take card out of reset state
- wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
-
- if (diags) {
- unsigned long timeout;
- // 4.2 second wait
- msleep(4200);
- // half second time-out
- timeout = 500;
- while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
- if (timeout) {
- timeout = msleep_interruptible(timeout);
- } else {
- PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
- return -ETIMEDOUT;
- }
-
- // get results of self-test
- // XXX double check byte-order
- word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
- if (word & SELF_TEST_FAILURE) {
- if (word & GPINT_TST_FAILURE)
- sf ("interrupt");
- if (word & SUNI_DATA_PATTERN_FAILURE)
- sf ("SUNI data pattern");
- if (word & SUNI_DATA_BITS_FAILURE)
- sf ("SUNI data bits");
- if (word & SUNI_UTOPIA_FAILURE)
- sf ("SUNI UTOPIA interface");
- if (word & SUNI_FIFO_FAILURE)
- sf ("SUNI cell buffer FIFO");
- if (word & SRAM_FAILURE)
- sf ("bad SRAM");
- // better return value?
- return -EIO;
- }
-
- }
- return 0;
-}
-
-/********** transfer and start the microcode **********/
-
-static int ucode_init(loader_block *lb, amb_dev *dev)
-{
- const struct firmware *fw;
- unsigned long start_address;
- const struct ihex_binrec *rec;
- const char *errmsg = NULL;
- int res;
-
- res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
- if (res) {
- PRINTK (KERN_ERR, "Cannot load microcode data");
- return res;
- }
-
- /* First record contains just the start address */
- rec = (const struct ihex_binrec *)fw->data;
- if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
- errmsg = "no start record";
- goto fail;
- }
- start_address = be32_to_cpup((__be32 *)rec->data);
-
- rec = ihex_next_binrec(rec);
-
- PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
-
- while (rec) {
- PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
- be16_to_cpu(rec->len));
- if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
- errmsg = "record too long";
- goto fail;
- }
- if (be16_to_cpu(rec->len) & 3) {
- errmsg = "odd number of bytes";
- goto fail;
- }
- res = loader_write(lb, dev, rec);
- if (res)
- break;
-
- res = loader_verify(lb, dev, rec);
- if (res)
- break;
- rec = ihex_next_binrec(rec);
- }
- release_firmware(fw);
- if (!res)
- res = loader_start(lb, dev, start_address);
-
- return res;
-fail:
- release_firmware(fw);
- PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
- return -EINVAL;
-}
-
-/********** give adapter parameters **********/
-
-static inline __be32 bus_addr(void * addr) {
- return cpu_to_be32 (virt_to_bus (addr));
-}
-
-static int amb_talk(amb_dev *dev)
-{
- adap_talk_block a;
- unsigned char pool;
- unsigned long timeout;
-
- PRINTD (DBG_FLOW, "amb_talk %p", dev);
-
- a.command_start = bus_addr (dev->cq.ptrs.start);
- a.command_end = bus_addr (dev->cq.ptrs.limit);
- a.tx_start = bus_addr (dev->txq.in.start);
- a.tx_end = bus_addr (dev->txq.in.limit);
- a.txcom_start = bus_addr (dev->txq.out.start);
- a.txcom_end = bus_addr (dev->txq.out.limit);
-
- for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
- // the other "a" items are set up by the adapter
- a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
- a.rec_struct[pool].buffer_end = bus_addr (dev->rxq[pool].in.limit);
- a.rec_struct[pool].rx_start = bus_addr (dev->rxq[pool].out.start);
- a.rec_struct[pool].rx_end = bus_addr (dev->rxq[pool].out.limit);
- a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
- }
-
-#ifdef AMB_NEW_MICROCODE
- // disable fast PLX prefetching
- a.init_flags = 0;
-#endif
-
- // pass the structure
- wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
-
- // 2.2 second wait (must not touch doorbell during 2 second DMA test)
- msleep(2200);
- // give the adapter another half second?
- timeout = 500;
- while (rd_plain (dev, offsetof(amb_mem, doorbell)))
- if (timeout) {
- timeout = msleep_interruptible(timeout);
- } else {
- PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
-// get microcode version
-static void amb_ucode_version(amb_dev *dev)
-{
- u32 major;
- u32 minor;
- command cmd;
- cmd.request = cpu_to_be32 (SRB_GET_VERSION);
- while (command_do (dev, &cmd)) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
- major = be32_to_cpu (cmd.args.version.major);
- minor = be32_to_cpu (cmd.args.version.minor);
- PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
-}
-
-// get end station address
-static void amb_esi(amb_dev *dev, u8 *esi)
-{
- u32 lower4;
- u16 upper2;
- command cmd;
-
- cmd.request = cpu_to_be32 (SRB_GET_BIA);
- while (command_do (dev, &cmd)) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule();
- }
- lower4 = be32_to_cpu (cmd.args.bia.lower4);
- upper2 = be32_to_cpu (cmd.args.bia.upper2);
- PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
-
- if (esi) {
- unsigned int i;
-
- PRINTDB (DBG_INIT, "ESI:");
- for (i = 0; i < ESI_LEN; ++i) {
- if (i < 4)
- esi[i] = bitrev8(lower4>>(8*i));
- else
- esi[i] = bitrev8(upper2>>(8*(i-4)));
- PRINTDM (DBG_INIT, " %02x", esi[i]);
- }
-
- PRINTDE (DBG_INIT, "");
- }
-
- return;
-}
-
-static void fixup_plx_window (amb_dev *dev, loader_block *lb)
-{
- // fix up the PLX-mapped window base address to match the block
- unsigned long blb;
- u32 mapreg;
- blb = virt_to_bus(lb);
- // the kernel stack had better not ever cross a 1Gb boundary!
- mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
- mapreg &= ~onegigmask;
- mapreg |= blb & onegigmask;
- wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
- return;
-}
-
-static int amb_init(amb_dev *dev)
-{
- loader_block lb;
-
- u32 version;
-
- if (amb_reset (dev, 1)) {
- PRINTK (KERN_ERR, "card reset failed!");
- } else {
- fixup_plx_window (dev, &lb);
-
- if (get_loader_version (&lb, dev, &version)) {
- PRINTK (KERN_INFO, "failed to get loader version");
- } else {
- PRINTK (KERN_INFO, "loader version is %08x", version);
-
- if (ucode_init (&lb, dev)) {
- PRINTK (KERN_ERR, "microcode failure");
- } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
- PRINTK (KERN_ERR, "failed to get memory for queues");
- } else {
-
- if (amb_talk (dev)) {
- PRINTK (KERN_ERR, "adapter did not accept queues");
- } else {
-
- amb_ucode_version (dev);
- return 0;
-
- } /* amb_talk */
-
- destroy_queues (dev);
- } /* create_queues, ucode_init */
-
- amb_reset (dev, 0);
- } /* get_loader_version */
-
- } /* amb_reset */
-
- return -EINVAL;
-}
-
-static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
-{
- unsigned char pool;
-
- // set up known dev items straight away
- dev->pci_dev = pci_dev;
- pci_set_drvdata(pci_dev, dev);
-
- dev->iobase = pci_resource_start (pci_dev, 1);
- dev->irq = pci_dev->irq;
- dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
-
- // flags (currently only dead)
- dev->flags = 0;
-
- // Allocate cell rates (fibre)
- // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
- // to be really pedantic, this should be ATM_OC3c_PCR
- dev->tx_avail = ATM_OC3_PCR;
- dev->rx_avail = ATM_OC3_PCR;
-
- // semaphore for txer/rxer modifications - we cannot use a
- // spinlock as the critical region needs to switch processes
- mutex_init(&dev->vcc_sf);
- // queue manipulation spinlocks; we want atomic reads and
- // writes to the queue descriptors (handles IRQ and SMP)
- // consider replacing "int pending" -> "atomic_t available"
- // => problem related to who gets to move queue pointers
- spin_lock_init (&dev->cq.lock);
- spin_lock_init (&dev->txq.lock);
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- spin_lock_init (&dev->rxq[pool].lock);
-}
-
-static void setup_pci_dev(struct pci_dev *pci_dev)
-{
- unsigned char lat;
-
- // enable bus master accesses
- pci_set_master(pci_dev);
-
- // frobnicate latency (upwards, usually)
- pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
-
- if (!pci_lat)
- pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
-
- if (lat != pci_lat) {
- PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
- lat, pci_lat);
- pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
- }
-}
-
-static int amb_probe(struct pci_dev *pci_dev,
- const struct pci_device_id *pci_ent)
-{
- amb_dev * dev;
- int err;
- unsigned int irq;
-
- err = pci_enable_device(pci_dev);
- if (err < 0) {
- PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
- goto out;
- }
-
- // read resources from PCI configuration space
- irq = pci_dev->irq;
-
- if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
- PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
- err = -EINVAL;
- goto out_disable;
- }
-
- PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
- " IO %llx, IRQ %u, MEM %p",
- (unsigned long long)pci_resource_start(pci_dev, 1),
- irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
-
- // check IO region
- err = pci_request_region(pci_dev, 1, DEV_LABEL);
- if (err < 0) {
- PRINTK (KERN_ERR, "IO range already in use!");
- goto out_disable;
- }
-
- dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
- if (!dev) {
- PRINTK (KERN_ERR, "out of memory!");
- err = -ENOMEM;
- goto out_release;
- }
-
- setup_dev(dev, pci_dev);
-
- err = amb_init(dev);
- if (err < 0) {
- PRINTK (KERN_ERR, "adapter initialisation failure");
- goto out_free;
- }
-
- setup_pci_dev(pci_dev);
-
- // grab (but share) IRQ and install handler
- err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
- if (err < 0) {
- PRINTK (KERN_ERR, "request IRQ failed!");
- goto out_reset;
- }
-
- dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1,
- NULL);
- if (!dev->atm_dev) {
- PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
- err = -EINVAL;
- goto out_free_irq;
- }
-
- PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
- dev->atm_dev->number, dev, dev->atm_dev);
- dev->atm_dev->dev_data = (void *) dev;
-
- // register our address
- amb_esi (dev, dev->atm_dev->esi);
-
- // 0 bits for vpi, 10 bits for vci
- dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
- dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
-
- timer_setup(&dev->housekeeping, do_housekeeping, 0);
- mod_timer(&dev->housekeeping, jiffies);
-
- // enable host interrupts
- interrupts_on (dev);
-
-out:
- return err;
-
-out_free_irq:
- free_irq(irq, dev);
-out_reset:
- amb_reset(dev, 0);
-out_free:
- kfree(dev);
-out_release:
- pci_release_region(pci_dev, 1);
-out_disable:
- pci_disable_device(pci_dev);
- goto out;
-}
-
-
-static void amb_remove_one(struct pci_dev *pci_dev)
-{
- struct amb_dev *dev;
-
- dev = pci_get_drvdata(pci_dev);
-
- PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
- del_timer_sync(&dev->housekeeping);
- // the drain should not be necessary
- drain_rx_pools(dev);
- interrupts_off(dev);
- amb_reset(dev, 0);
- free_irq(dev->irq, dev);
- pci_disable_device(pci_dev);
- destroy_queues(dev);
- atm_dev_deregister(dev->atm_dev);
- kfree(dev);
- pci_release_region(pci_dev, 1);
-}
-
-static void __init amb_check_args (void) {
- unsigned char pool;
- unsigned int max_rx_size;
-
-#ifdef DEBUG_AMBASSADOR
- PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
-#else
- if (debug)
- PRINTK (KERN_NOTICE, "no debugging support");
-#endif
-
- if (cmds < MIN_QUEUE_SIZE)
- PRINTK (KERN_NOTICE, "cmds has been raised to %u",
- cmds = MIN_QUEUE_SIZE);
-
- if (txs < MIN_QUEUE_SIZE)
- PRINTK (KERN_NOTICE, "txs has been raised to %u",
- txs = MIN_QUEUE_SIZE);
-
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- if (rxs[pool] < MIN_QUEUE_SIZE)
- PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
- pool, rxs[pool] = MIN_QUEUE_SIZE);
-
- // buffers sizes should be greater than zero and strictly increasing
- max_rx_size = 0;
- for (pool = 0; pool < NUM_RX_POOLS; ++pool)
- if (rxs_bs[pool] <= max_rx_size)
- PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
- pool, rxs_bs[pool]);
- else
- max_rx_size = rxs_bs[pool];
-
- if (rx_lats < MIN_RX_BUFFERS)
- PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
- rx_lats = MIN_RX_BUFFERS);
-
- return;
-}
-
-/********** module stuff **********/
-
-MODULE_AUTHOR(maintainer_string);
-MODULE_DESCRIPTION(description_string);
-MODULE_LICENSE("GPL");
-MODULE_FIRMWARE("atmsar11.fw");
-module_param(debug, ushort, 0644);
-module_param(cmds, uint, 0);
-module_param(txs, uint, 0);
-module_param_array(rxs, uint, NULL, 0);
-module_param_array(rxs_bs, uint, NULL, 0);
-module_param(rx_lats, uint, 0);
-module_param(pci_lat, byte, 0);
-MODULE_PARM_DESC(debug, "debug bitmap, see .h file");
-MODULE_PARM_DESC(cmds, "number of command queue entries");
-MODULE_PARM_DESC(txs, "number of TX queue entries");
-MODULE_PARM_DESC(rxs, "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
-MODULE_PARM_DESC(rxs_bs, "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
-MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
-MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
-
-/********** module entry **********/
-
-static const struct pci_device_id amb_pci_tbl[] = {
- { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
- { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
- { 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
-
-static struct pci_driver amb_driver = {
- .name = "amb",
- .probe = amb_probe,
- .remove = amb_remove_one,
- .id_table = amb_pci_tbl,
-};
-
-static int __init amb_module_init (void)
-{
- PRINTD (DBG_FLOW|DBG_INIT, "init_module");
-
- BUILD_BUG_ON(sizeof(amb_mem) != 4*16 + 4*12);
-
- show_version();
-
- amb_check_args();
-
- // get the juice
- return pci_register_driver(&amb_driver);
-}
-
-/********** module exit **********/
-
-static void __exit amb_module_exit (void)
-{
- PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
-
- pci_unregister_driver(&amb_driver);
-}
-
-module_init(amb_module_init);
-module_exit(amb_module_exit);
diff --git a/drivers/atm/ambassador.h b/drivers/atm/ambassador.h
deleted file mode 100644
index 086ceb8568dc..000000000000
--- a/drivers/atm/ambassador.h
+++ /dev/null
@@ -1,648 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- Madge Ambassador ATM Adapter driver.
- Copyright (C) 1995-1999 Madge Networks Ltd.
-
-*/
-
-#ifndef AMBASSADOR_H
-#define AMBASSADOR_H
-
-
-#ifdef CONFIG_ATM_AMBASSADOR_DEBUG
-#define DEBUG_AMBASSADOR
-#endif
-
-#define DEV_LABEL "amb"
-
-#ifndef PCI_VENDOR_ID_MADGE
-#define PCI_VENDOR_ID_MADGE 0x10B6
-#endif
-#ifndef PCI_VENDOR_ID_MADGE_AMBASSADOR
-#define PCI_DEVICE_ID_MADGE_AMBASSADOR 0x1001
-#endif
-#ifndef PCI_VENDOR_ID_MADGE_AMBASSADOR_BAD
-#define PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD 0x1002
-#endif
-
-// diagnostic output
-
-#define PRINTK(severity,format,args...) \
- printk(severity DEV_LABEL ": " format "\n" , ## args)
-
-#ifdef DEBUG_AMBASSADOR
-
-#define DBG_ERR 0x0001
-#define DBG_WARN 0x0002
-#define DBG_INFO 0x0004
-#define DBG_INIT 0x0008
-#define DBG_LOAD 0x0010
-#define DBG_VCC 0x0020
-#define DBG_QOS 0x0040
-#define DBG_CMD 0x0080
-#define DBG_TX 0x0100
-#define DBG_RX 0x0200
-#define DBG_SKB 0x0400
-#define DBG_POOL 0x0800
-#define DBG_IRQ 0x1000
-#define DBG_FLOW 0x2000
-#define DBG_REGS 0x4000
-#define DBG_DATA 0x8000
-#define DBG_MASK 0xffff
-
-/* the ## prevents the annoying double expansion of the macro arguments */
-/* KERN_INFO is used since KERN_DEBUG often does not make it to the console */
-#define PRINTDB(bits,format,args...) \
- ( (debug & (bits)) ? printk (KERN_INFO DEV_LABEL ": " format , ## args) : 1 )
-#define PRINTDM(bits,format,args...) \
- ( (debug & (bits)) ? printk (format , ## args) : 1 )
-#define PRINTDE(bits,format,args...) \
- ( (debug & (bits)) ? printk (format "\n" , ## args) : 1 )
-#define PRINTD(bits,format,args...) \
- ( (debug & (bits)) ? printk (KERN_INFO DEV_LABEL ": " format "\n" , ## args) : 1 )
-
-#else
-
-#define PRINTD(bits,format,args...)
-#define PRINTDB(bits,format,args...)
-#define PRINTDM(bits,format,args...)
-#define PRINTDE(bits,format,args...)
-
-#endif
-
-#define PRINTDD(bits,format,args...)
-#define PRINTDDB(sec,fmt,args...)
-#define PRINTDDM(sec,fmt,args...)
-#define PRINTDDE(sec,fmt,args...)
-
-// tunable values (?)
-
-/* MUST be powers of two -- why ? */
-#define COM_Q_ENTRIES 8
-#define TX_Q_ENTRIES 32
-#define RX_Q_ENTRIES 64
-
-// fixed values
-
-// guessing
-#define AMB_EXTENT 0x80
-
-// Minimum allowed size for an Ambassador queue
-#define MIN_QUEUE_SIZE 2
-
-// Ambassador microcode allows 1 to 4 pools, we use 4 (simpler)
-#define NUM_RX_POOLS 4
-
-// minimum RX buffers required to cope with replenishing delay
-#define MIN_RX_BUFFERS 1
-
-// minimum PCI latency we will tolerate (32 IS TOO SMALL)
-#define MIN_PCI_LATENCY 64 // 255
-
-// VCs supported by card (VPI always 0)
-#define NUM_VPI_BITS 0
-#define NUM_VCI_BITS 10
-#define NUM_VCS 1024
-
-/* The status field bits defined so far. */
-#define RX_ERR 0x8000 // always present if there is an error (hmm)
-#define CRC_ERR 0x4000 // AAL5 CRC error
-#define LEN_ERR 0x2000 // overlength frame
-#define ABORT_ERR 0x1000 // zero length field in received frame
-#define UNUSED_ERR 0x0800 // buffer returned unused
-
-// Adaptor commands
-
-#define SRB_OPEN_VC 0
-/* par_0: dwordswap(VC_number) */
-/* par_1: dwordswap(flags<<16) or wordswap(flags)*/
-/* flags: */
-
-/* LANE: 0x0004 */
-/* NOT_UBR: 0x0008 */
-/* ABR: 0x0010 */
-
-/* RxPool0: 0x0000 */
-/* RxPool1: 0x0020 */
-/* RxPool2: 0x0040 */
-/* RxPool3: 0x0060 */
-
-/* par_2: dwordswap(fp_rate<<16) or wordswap(fp_rate) */
-
-#define SRB_CLOSE_VC 1
-/* par_0: dwordswap(VC_number) */
-
-#define SRB_GET_BIA 2
-/* returns */
-/* par_0: dwordswap(half BIA) */
-/* par_1: dwordswap(half BIA) */
-
-#define SRB_GET_SUNI_STATS 3
-/* par_0: dwordswap(physical_host_address) */
-
-#define SRB_SET_BITS_8 4
-#define SRB_SET_BITS_16 5
-#define SRB_SET_BITS_32 6
-#define SRB_CLEAR_BITS_8 7
-#define SRB_CLEAR_BITS_16 8
-#define SRB_CLEAR_BITS_32 9
-/* par_0: dwordswap(ATMizer address) */
-/* par_1: dwordswap(mask) */
-
-#define SRB_SET_8 10
-#define SRB_SET_16 11
-#define SRB_SET_32 12
-/* par_0: dwordswap(ATMizer address) */
-/* par_1: dwordswap(data) */
-
-#define SRB_GET_32 13
-/* par_0: dwordswap(ATMizer address) */
-/* returns */
-/* par_1: dwordswap(ATMizer data) */
-
-#define SRB_GET_VERSION 14
-/* returns */
-/* par_0: dwordswap(Major Version) */
-/* par_1: dwordswap(Minor Version) */
-
-#define SRB_FLUSH_BUFFER_Q 15
-/* Only flags to define which buffer pool; all others must be zero */
-/* par_0: dwordswap(flags<<16) or wordswap(flags)*/
-
-#define SRB_GET_DMA_SPEEDS 16
-/* returns */
-/* par_0: dwordswap(Read speed (bytes/sec)) */
-/* par_1: dwordswap(Write speed (bytes/sec)) */
-
-#define SRB_MODIFY_VC_RATE 17
-/* par_0: dwordswap(VC_number) */
-/* par_1: dwordswap(fp_rate<<16) or wordswap(fp_rate) */
-
-#define SRB_MODIFY_VC_FLAGS 18
-/* par_0: dwordswap(VC_number) */
-/* par_1: dwordswap(flags<<16) or wordswap(flags)*/
-
-/* flags: */
-
-/* LANE: 0x0004 */
-/* NOT_UBR: 0x0008 */
-/* ABR: 0x0010 */
-
-/* RxPool0: 0x0000 */
-/* RxPool1: 0x0020 */
-/* RxPool2: 0x0040 */
-/* RxPool3: 0x0060 */
-
-#define SRB_RATE_SHIFT 16
-#define SRB_POOL_SHIFT (SRB_FLAGS_SHIFT+5)
-#define SRB_FLAGS_SHIFT 16
-
-#define SRB_STOP_TASKING 19
-#define SRB_START_TASKING 20
-#define SRB_SHUT_DOWN 21
-#define MAX_SRB 21
-
-#define SRB_COMPLETE 0xffffffff
-
-#define TX_FRAME 0x80000000
-
-// number of types of SRB MUST be a power of two -- why?
-#define NUM_OF_SRB 32
-
-// number of bits of period info for rate
-#define MAX_RATE_BITS 6
-
-#define TX_UBR 0x0000
-#define TX_UBR_CAPPED 0x0008
-#define TX_ABR 0x0018
-#define TX_FRAME_NOTCAP 0x0000
-#define TX_FRAME_CAPPED 0x8000
-
-#define FP_155_RATE 0x24b1
-#define FP_25_RATE 0x1f9d
-
-/* #define VERSION_NUMBER 0x01000000 // initial release */
-/* #define VERSION_NUMBER 0x01010000 // fixed startup probs PLX MB0 not cleared */
-/* #define VERSION_NUMBER 0x01020000 // changed SUNI reset timings; allowed r/w onchip */
-
-/* #define VERSION_NUMBER 0x01030000 // clear local doorbell int reg on reset */
-/* #define VERSION_NUMBER 0x01040000 // PLX bug work around version PLUS */
-/* remove race conditions on basic interface */
-/* indicate to the host that diagnostics */
-/* have finished; if failed, how and what */
-/* failed */
-/* fix host memory test to fix PLX bug */
-/* allow flash upgrade and BIA upgrade directly */
-/* */
-#define VERSION_NUMBER 0x01050025 /* Jason's first hacked version. */
-/* Change in download algorithm */
-
-#define DMA_VALID 0xb728e149 /* completely random */
-
-#define FLASH_BASE 0xa0c00000
-#define FLASH_SIZE 0x00020000 /* 128K */
-#define BIA_BASE (FLASH_BASE+0x0001c000) /* Flash Sector 7 */
-#define BIA_ADDRESS ((void *)0xa0c1c000)
-#define PLX_BASE 0xe0000000
-
-typedef enum {
- host_memory_test = 1,
- read_adapter_memory,
- write_adapter_memory,
- adapter_start,
- get_version_number,
- interrupt_host,
- flash_erase_sector,
- adap_download_block = 0x20,
- adap_erase_flash,
- adap_run_in_iram,
- adap_end_download
-} loader_command;
-
-#define BAD_COMMAND (-1)
-#define COMMAND_IN_PROGRESS 1
-#define COMMAND_PASSED_TEST 2
-#define COMMAND_FAILED_TEST 3
-#define COMMAND_READ_DATA_OK 4
-#define COMMAND_READ_BAD_ADDRESS 5
-#define COMMAND_WRITE_DATA_OK 6
-#define COMMAND_WRITE_BAD_ADDRESS 7
-#define COMMAND_WRITE_FLASH_FAILURE 8
-#define COMMAND_COMPLETE 9
-#define COMMAND_FLASH_ERASE_FAILURE 10
-#define COMMAND_WRITE_BAD_DATA 11
-
-/* bit fields for mailbox[0] return values */
-
-#define GPINT_TST_FAILURE 0x00000001
-#define SUNI_DATA_PATTERN_FAILURE 0x00000002
-#define SUNI_DATA_BITS_FAILURE 0x00000004
-#define SUNI_UTOPIA_FAILURE 0x00000008
-#define SUNI_FIFO_FAILURE 0x00000010
-#define SRAM_FAILURE 0x00000020
-#define SELF_TEST_FAILURE 0x0000003f
-
-/* mailbox[1] = 0 in progress, -1 on completion */
-/* mailbox[2] = current test 00 00 test(8 bit) phase(8 bit) */
-/* mailbox[3] = last failure, 00 00 test(8 bit) phase(8 bit) */
-/* mailbox[4],mailbox[5],mailbox[6] random failure values */
-
-/* PLX/etc. memory map including command structure */
-
-/* These registers may also be memory mapped in PCI memory */
-
-#define UNUSED_LOADER_MAILBOXES 6
-
-typedef struct {
- u32 stuff[16];
- union {
- struct {
- u32 result;
- u32 ready;
- u32 stuff[UNUSED_LOADER_MAILBOXES];
- } loader;
- struct {
- u32 cmd_address;
- u32 tx_address;
- u32 rx_address[NUM_RX_POOLS];
- u32 gen_counter;
- u32 spare;
- } adapter;
- } mb;
- u32 doorbell;
- u32 interrupt;
- u32 interrupt_control;
- u32 reset_control;
-} amb_mem;
-
-/* RESET bit, IRQ (card to host) and doorbell (host to card) enable bits */
-#define AMB_RESET_BITS 0x40000000
-#define AMB_INTERRUPT_BITS 0x00000300
-#define AMB_DOORBELL_BITS 0x00030000
-
-/* loader commands */
-
-#define MAX_COMMAND_DATA 13
-#define MAX_TRANSFER_DATA 11
-
-typedef struct {
- __be32 address;
- __be32 count;
- __be32 data[MAX_TRANSFER_DATA];
-} transfer_block;
-
-typedef struct {
- __be32 result;
- __be32 command;
- union {
- transfer_block transfer;
- __be32 version;
- __be32 start;
- __be32 data[MAX_COMMAND_DATA];
- } payload;
- __be32 valid;
-} loader_block;
-
-/* command queue */
-
-/* Again all data are BIG ENDIAN */
-
-typedef struct {
- union {
- struct {
- __be32 vc;
- __be32 flags;
- __be32 rate;
- } open;
- struct {
- __be32 vc;
- __be32 rate;
- } modify_rate;
- struct {
- __be32 vc;
- __be32 flags;
- } modify_flags;
- struct {
- __be32 vc;
- } close;
- struct {
- __be32 lower4;
- __be32 upper2;
- } bia;
- struct {
- __be32 address;
- } suni;
- struct {
- __be32 major;
- __be32 minor;
- } version;
- struct {
- __be32 read;
- __be32 write;
- } speed;
- struct {
- __be32 flags;
- } flush;
- struct {
- __be32 address;
- __be32 data;
- } memory;
- __be32 par[3];
- } args;
- __be32 request;
-} command;
-
-/* transmit queues and associated structures */
-
-/* The hosts transmit structure. All BIG ENDIAN; host address
- restricted to first 1GByte, but address passed to the card must
- have the top MS bit or'ed in. -- check this */
-
-/* TX is described by 1+ tx_frags followed by a tx_frag_end */
-
-typedef struct {
- __be32 bytes;
- __be32 address;
-} tx_frag;
-
-/* apart from handle the fields here are for the adapter to play with
- and should be set to zero */
-
-typedef struct {
- u32 handle;
- u16 vc;
- u16 next_descriptor_length;
- u32 next_descriptor;
-#ifdef AMB_NEW_MICROCODE
- u8 cpcs_uu;
- u8 cpi;
- u16 pad;
-#endif
-} tx_frag_end;
-
-typedef struct {
- tx_frag tx_frag;
- tx_frag_end tx_frag_end;
- struct sk_buff * skb;
-} tx_simple;
-
-#if 0
-typedef union {
- tx_frag fragment;
- tx_frag_end end_of_list;
-} tx_descr;
-#endif
-
-/* this "points" to the sequence of fragments and trailer */
-
-typedef struct {
- __be16 vc;
- __be16 tx_descr_length;
- __be32 tx_descr_addr;
-} tx_in;
-
-/* handle is the handle from tx_in */
-
-typedef struct {
- u32 handle;
-} tx_out;
-
-/* receive frame structure */
-
-/* All BIG ENDIAN; handle is as passed from host; length is zero for
- aborted frames, and frames with errors. Header is actually VC
- number, lec-id is NOT yet supported. */
-
-typedef struct {
- u32 handle;
- __be16 vc;
- __be16 lec_id; // unused
- __be16 status;
- __be16 length;
-} rx_out;
-
-/* buffer supply structure */
-
-typedef struct {
- u32 handle;
- __be32 host_address;
-} rx_in;
-
-/* This first structure is the area in host memory where the adapter
- writes its pointer values. These pointer values are BIG ENDIAN and
- reside in the same 4MB 'page' as this structure. The host gives the
- adapter the address of this block by sending a doorbell interrupt
- to the adapter after downloading the code and setting it going. The
- addresses have the top 10 bits set to 1010000010b -- really?
-
- The host must initialise these before handing the block to the
- adapter. */
-
-typedef struct {
- __be32 command_start; /* SRB commands completions */
- __be32 command_end; /* SRB commands completions */
- __be32 tx_start;
- __be32 tx_end;
- __be32 txcom_start; /* tx completions */
- __be32 txcom_end; /* tx completions */
- struct {
- __be32 buffer_start;
- __be32 buffer_end;
- u32 buffer_q_get;
- u32 buffer_q_end;
- u32 buffer_aptr;
- __be32 rx_start; /* rx completions */
- __be32 rx_end;
- u32 rx_ptr;
- __be32 buffer_size; /* size of host buffer */
- } rec_struct[NUM_RX_POOLS];
-#ifdef AMB_NEW_MICROCODE
- u16 init_flags;
- u16 talk_block_spare;
-#endif
-} adap_talk_block;
-
-/* This structure must be kept in line with the vcr image in sarmain.h
-
- This is the structure in the host filled in by the adapter by
- GET_SUNI_STATS */
-
-typedef struct {
- u8 racp_chcs;
- u8 racp_uhcs;
- u16 spare;
- u32 racp_rcell;
- u32 tacp_tcell;
- u32 flags;
- u32 dropped_cells;
- u32 dropped_frames;
-} suni_stats;
-
-typedef enum {
- dead
-} amb_flags;
-
-#define NEXTQ(current,start,limit) \
- ( (current)+1 < (limit) ? (current)+1 : (start) )
-
-typedef struct {
- command * start;
- command * in;
- command * out;
- command * limit;
-} amb_cq_ptrs;
-
-typedef struct {
- spinlock_t lock;
- unsigned int pending;
- unsigned int high;
- unsigned int filled;
- unsigned int maximum; // size - 1 (q implementation)
- amb_cq_ptrs ptrs;
-} amb_cq;
-
-typedef struct {
- spinlock_t lock;
- unsigned int pending;
- unsigned int high;
- unsigned int filled;
- unsigned int maximum; // size - 1 (q implementation)
- struct {
- tx_in * start;
- tx_in * ptr;
- tx_in * limit;
- } in;
- struct {
- tx_out * start;
- tx_out * ptr;
- tx_out * limit;
- } out;
-} amb_txq;
-
-typedef struct {
- spinlock_t lock;
- unsigned int pending;
- unsigned int low;
- unsigned int emptied;
- unsigned int maximum; // size - 1 (q implementation)
- struct {
- rx_in * start;
- rx_in * ptr;
- rx_in * limit;
- } in;
- struct {
- rx_out * start;
- rx_out * ptr;
- rx_out * limit;
- } out;
- unsigned int buffers_wanted;
- unsigned int buffer_size;
-} amb_rxq;
-
-typedef struct {
- unsigned long tx_ok;
- struct {
- unsigned long ok;
- unsigned long error;
- unsigned long badcrc;
- unsigned long toolong;
- unsigned long aborted;
- unsigned long unused;
- } rx;
-} amb_stats;
-
-// a single struct pointed to by atm_vcc->dev_data
-
-typedef struct {
- u8 tx_vc_bits:7;
- u8 tx_present:1;
-} amb_tx_info;
-
-typedef struct {
- unsigned char pool;
-} amb_rx_info;
-
-typedef struct {
- amb_rx_info rx_info;
- u16 tx_frame_bits;
- unsigned int tx_rate;
- unsigned int rx_rate;
-} amb_vcc;
-
-struct amb_dev {
- u8 irq;
- unsigned long flags;
- u32 iobase;
- u32 * membase;
-
- amb_cq cq;
- amb_txq txq;
- amb_rxq rxq[NUM_RX_POOLS];
-
- struct mutex vcc_sf;
- amb_tx_info txer[NUM_VCS];
- struct atm_vcc * rxer[NUM_VCS];
- unsigned int tx_avail;
- unsigned int rx_avail;
-
- amb_stats stats;
-
- struct atm_dev * atm_dev;
- struct pci_dev * pci_dev;
- struct timer_list housekeeping;
-};
-
-typedef struct amb_dev amb_dev;
-
-#define AMB_DEV(atm_dev) ((amb_dev *) (atm_dev)->dev_data)
-#define AMB_VCC(atm_vcc) ((amb_vcc *) (atm_vcc)->dev_data)
-
-/* rate rounding */
-
-typedef enum {
- round_up,
- round_down,
- round_nearest
-} rounding;
-
-#endif
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a311df07b1bd..4deb60a3b43f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2613,7 +2613,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
SET_DEVICE_OP(dev_ops, create_flow);
- SET_DEVICE_OP(dev_ops, create_flow_action_esp);
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
SET_DEVICE_OP(dev_ops, create_srq);
@@ -2676,7 +2675,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
- SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index d42ed7ff223e..0ddcf6da66c4 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -46,385 +46,6 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
return action->device->ops.destroy_flow_action(action);
}
-static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
- u32 flags, bool is_modify)
-{
- u64 verbs_flags = flags;
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
-
- if (is_modify && uverbs_attr_is_valid(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
-
- return verbs_flags;
-};
-
-static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
-{
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
- &keymat->keymat.aes_gcm;
-
- if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return -EOPNOTSUPP;
-
- if (aes_gcm->key_len != 32 &&
- aes_gcm->key_len != 24 &&
- aes_gcm->key_len != 16)
- return -EINVAL;
-
- if (aes_gcm->icv_len != 16 &&
- aes_gcm->icv_len != 8 &&
- aes_gcm->icv_len != 12)
- return -EINVAL;
-
- return 0;
-}
-
-static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
-};
-
-static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* This is used in order to modify an esp flow action with an enabled
- * replay protection to a disabled one. This is only supported via
- * modify, as in create verb we can simply drop the REPLAY attribute and
- * achieve the same thing.
- */
- return is_modify ? 0 : -EINVAL;
-}
-
-static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* Some replay protections could always be enabled without validating
- * anything.
- */
- return 0;
-}
-
-static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify) = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
-};
-
-static int parse_esp_ip(enum ib_flow_spec_type proto,
- const void __user *val_ptr,
- size_t len, union ib_flow_spec *out)
-{
- int ret;
- const struct ib_uverbs_flow_ipv4_filter ipv4 = {
- .src_ip = cpu_to_be32(0xffffffffUL),
- .dst_ip = cpu_to_be32(0xffffffffUL),
- .proto = 0xff,
- .tos = 0xff,
- .ttl = 0xff,
- .flags = 0xff,
- };
- const struct ib_uverbs_flow_ipv6_filter ipv6 = {
- .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .flow_label = cpu_to_be32(0xffffffffUL),
- .next_hdr = 0xff,
- .traffic_class = 0xff,
- .hop_limit = 0xff,
- };
- union {
- struct ib_uverbs_flow_ipv4_filter ipv4;
- struct ib_uverbs_flow_ipv6_filter ipv6;
- } user_val = {};
- const void *user_pmask;
- size_t val_len;
-
- /* If the flow IPv4/IPv6 flow specifications are extended, the mask
- * should be changed as well.
- */
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
- sizeof(ipv4.flags) != sizeof(ipv4));
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
- sizeof(ipv6.reserved) != sizeof(ipv6));
-
- switch (proto) {
- case IB_FLOW_SPEC_IPV4:
- if (len > sizeof(user_val.ipv4) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
- len - sizeof(user_val.ipv4)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv4));
- ret = copy_from_user(&user_val.ipv4, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv4;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (len > sizeof(user_val.ipv6) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
- len - sizeof(user_val.ipv6)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv6));
- ret = copy_from_user(&user_val.ipv6, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv6;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
- &user_val,
- val_len, out);
-}
-
-static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uverbs_flow_action_esp_encap uverbs_encap;
- int ret;
-
- ret = uverbs_copy_from(&uverbs_encap, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
- if (ret)
- return ret;
-
- /* We currently support only one encap */
- if (uverbs_encap.next_ptr)
- return -EOPNOTSUPP;
-
- if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
- uverbs_encap.type != IB_FLOW_SPEC_IPV6)
- return -EOPNOTSUPP;
-
- return parse_esp_ip(uverbs_encap.type,
- u64_to_user_ptr(uverbs_encap.val_ptr),
- uverbs_encap.len,
- &out->spec);
-}
-
-struct ib_flow_action_esp_attr {
- struct ib_flow_action_attrs_esp hdr;
- struct ib_flow_action_attrs_esp_keymats keymat;
- struct ib_flow_action_attrs_esp_replays replay;
- /* We currently support only one spec */
- struct ib_flow_spec_list encap;
-};
-
-#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
-static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct uverbs_attr_bundle *attrs,
- struct ib_flow_action_esp_attr *esp_attr,
- bool is_modify)
-{
- struct ib_uverbs_flow_action_esp uverbs_esp = {};
- int ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
- if (IS_UVERBS_COPY_ERR(ret))
- return ret;
-
- /* This can be called from FLOW_ACTION_ESP_MODIFY where
- * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
- */
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
- ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
- if (ret)
- return ret;
-
- if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
- return -EOPNOTSUPP;
-
- esp_attr->hdr.spi = uverbs_esp.spi;
- esp_attr->hdr.seq = uverbs_esp.seq;
- esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
- esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
- }
- esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
- is_modify);
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
- esp_attr->keymat.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- if (ret)
- return ret;
-
- ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
- if (ret)
- return ret;
-
- esp_attr->hdr.keymat = &esp_attr->keymat;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
- esp_attr->replay.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
-
- ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
- if (ret)
- return ret;
-
- ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
- is_modify);
- if (ret)
- return ret;
-
- esp_attr->hdr.replay = &esp_attr->replay;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
- ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
- if (ret)
- return ret;
-
- esp_attr->hdr.encap = &esp_attr->encap;
- }
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
- struct ib_device *ib_dev = attrs->context->device;
- int ret;
- struct ib_flow_action *action;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!ib_dev->ops.create_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
- if (ret)
- return ret;
-
- /* No need to check as this attribute is marked as MANDATORY */
- action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr,
- attrs);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, ib_dev,
- IB_FLOW_ACTION_ESP);
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
- struct ib_flow_action *action = uobj->object;
- int ret;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!action->device->ops.modify_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
- if (ret)
- return ret;
-
- if (action->type != IB_FLOW_ACTION_ESP)
- return -EINVAL;
-
- return action->device->ops.modify_flow_action_esp(action,
- &esp_attr.hdr,
- attrs);
-}
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
- aes_key),
- },
-};
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
- size),
- },
-};
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
- UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_WRITE,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_FLOW_ACTION_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
@@ -435,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_FLOW_ACTION,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY));
const struct uapi_definition uverbs_def_obj_flow_action[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 661ed2b44508..9c2886bc72cb 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -15,7 +15,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
-#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
#include <net/inet_ecn.h>
#include "mlx5_ib.h"
@@ -148,16 +147,6 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
{
switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
case IB_FLOW_ACTION_UNSPECIFIED:
if (maction->flow_action_raw.sub_type ==
MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
@@ -368,14 +357,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev,
ib_spec->type & IB_FLOW_SPEC_INNER);
break;
case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
+ return -EOPNOTSUPP;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
@@ -587,47 +569,6 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
return false;
}
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop &&
- !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
const struct ib_flow_attr *flow_attr,
bool check_inner)
@@ -1154,8 +1095,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ if (is_egress) {
err = -EINVAL;
goto free;
}
@@ -1740,149 +1680,6 @@ unlock:
return ERR_PTR(err);
}
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \
- MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
{
switch (maction->flow_action_raw.sub_type) {
@@ -1906,13 +1703,6 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
struct mlx5_ib_flow_action *maction = to_mflow_act(action);
switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
case IB_FLOW_ACTION_UNSPECIFIED:
destroy_flow_action_raw(maction);
break;
@@ -2709,11 +2499,6 @@ static const struct ib_device_ops flow_ops = {
.destroy_flow_action = mlx5_ib_destroy_flow_action,
};
-static const struct ib_device_ops flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
@@ -2724,9 +2509,5 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops);
-
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 32a0ea820573..61aa196d6484 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -41,7 +41,6 @@
#include "wr.h"
#include "restrack.h"
#include "counters.h"
-#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@@ -906,10 +905,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP |
MLX5_RX_HASH_INNER;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- resp.rss_caps.rx_hash_fields_mask |=
- MLX5_RX_HASH_IPSEC_SPI;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
@@ -1791,23 +1786,6 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
MLX5_CAP_GEN(dev->mdev,
num_of_uars_per_page) : 1;
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_EGRESS))
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
- }
-
resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
resp->num_ports = dev->num_ports;
@@ -3605,13 +3583,6 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
&UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
ADD_UVERBS_ATTRIBUTES_SIMPLE(
- mlx5_ib_flow_action,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- enum mlx5_ib_uapi_flow_action_flags));
-
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_query_context,
UVERBS_OBJECT_DEVICE,
UVERBS_METHOD_QUERY_CONTEXT,
@@ -3628,8 +3599,6 @@ static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN(mlx5_ib_dm_defs),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index a6606736d8c5..2776ca5fc33f 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -121,7 +121,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (sk->sk_state == MISDN_CLOSED)
return 0;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 19f0035d4410..c4ea73d996e8 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -24,6 +24,11 @@
#include "mt7530.h"
+static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs)
+{
+ return container_of(pcs, struct mt753x_pcs, pcs);
+}
+
/* String, offset, and register size in bytes if different from 4 bytes */
static const struct mt7530_mib_desc mt7530_mib[] = {
MIB_DESC(1, 0x00, "TxDrop"),
@@ -2389,35 +2394,30 @@ mt7531_setup(struct dsa_switch *ds)
return 0;
}
-static bool
-mt7530_phy_mode_supported(struct dsa_switch *ds, int port,
- const struct phylink_link_state *state)
+static void mt7530_mac_port_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- struct mt7530_priv *priv = ds->priv;
-
switch (port) {
case 0 ... 4: /* Internal phy */
- if (state->interface != PHY_INTERFACE_MODE_GMII)
- return false;
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
break;
+
case 5: /* 2nd cpu port with phy of port 0 or 4 / external phy */
- if (!phy_interface_mode_is_rgmii(state->interface) &&
- state->interface != PHY_INTERFACE_MODE_MII &&
- state->interface != PHY_INTERFACE_MODE_GMII)
- return false;
+ phy_interface_set_rgmii(config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_MII,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
break;
+
case 6: /* 1st cpu port */
- if (state->interface != PHY_INTERFACE_MODE_RGMII &&
- state->interface != PHY_INTERFACE_MODE_TRGMII)
- return false;
+ __set_bit(PHY_INTERFACE_MODE_RGMII,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_TRGMII,
+ config->supported_interfaces);
break;
- default:
- dev_err(priv->dev, "%s: unsupported port: %i\n", __func__,
- port);
- return false;
}
-
- return true;
}
static bool mt7531_is_rgmii_port(struct mt7530_priv *priv, u32 port)
@@ -2425,42 +2425,35 @@ static bool mt7531_is_rgmii_port(struct mt7530_priv *priv, u32 port)
return (port == 5) && (priv->p5_intf_sel != P5_INTF_SEL_GMAC5_SGMII);
}
-static bool
-mt7531_phy_mode_supported(struct dsa_switch *ds, int port,
- const struct phylink_link_state *state)
+static void mt7531_mac_port_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
struct mt7530_priv *priv = ds->priv;
switch (port) {
case 0 ... 4: /* Internal phy */
- if (state->interface != PHY_INTERFACE_MODE_GMII)
- return false;
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
break;
+
case 5: /* 2nd cpu port supports either rgmii or sgmii/8023z */
- if (mt7531_is_rgmii_port(priv, port))
- return phy_interface_mode_is_rgmii(state->interface);
+ if (mt7531_is_rgmii_port(priv, port)) {
+ phy_interface_set_rgmii(config->supported_interfaces);
+ break;
+ }
fallthrough;
+
case 6: /* 1st cpu port supports sgmii/8023z only */
- if (state->interface != PHY_INTERFACE_MODE_SGMII &&
- !phy_interface_mode_is_8023z(state->interface))
- return false;
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+ config->supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+ config->supported_interfaces);
+
+ config->mac_capabilities |= MAC_2500FD;
break;
- default:
- dev_err(priv->dev, "%s: unsupported port: %i\n", __func__,
- port);
- return false;
}
-
- return true;
-}
-
-static bool
-mt753x_phy_mode_supported(struct dsa_switch *ds, int port,
- const struct phylink_link_state *state)
-{
- struct mt7530_priv *priv = ds->priv;
-
- return priv->info->phy_mode_supported(ds, port, state);
}
static int
@@ -2533,30 +2526,11 @@ static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port,
return 0;
}
-static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port,
- unsigned long *supported)
-{
- /* Port5 supports ethier RGMII or SGMII.
- * Port6 supports SGMII only.
- */
- switch (port) {
- case 5:
- if (mt7531_is_rgmii_port(priv, port))
- break;
- fallthrough;
- case 6:
- phylink_set(supported, 1000baseX_Full);
- phylink_set(supported, 2500baseX_Full);
- phylink_set(supported, 2500baseT_Full);
- }
-}
-
-static void
-mt7531_sgmii_link_up_force(struct dsa_switch *ds, int port,
- unsigned int mode, phy_interface_t interface,
- int speed, int duplex)
+static void mt7531_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface, int speed, int duplex)
{
- struct mt7530_priv *priv = ds->priv;
+ struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv;
+ int port = pcs_to_mt753x_pcs(pcs)->port;
unsigned int val;
/* For adjusting speed and duplex of SGMII force mode. */
@@ -2582,6 +2556,9 @@ mt7531_sgmii_link_up_force(struct dsa_switch *ds, int port,
/* MT7531 SGMII 1G force mode can only work in full duplex mode,
* no matter MT7531_SGMII_FORCE_HALF_DUPLEX is set or not.
+ *
+ * The speed check is unnecessary as the MAC capabilities apply
+ * this restriction. --rmk
*/
if ((speed == SPEED_10 || speed == SPEED_100) &&
duplex != DUPLEX_FULL)
@@ -2657,9 +2634,10 @@ static int mt7531_sgmii_setup_mode_an(struct mt7530_priv *priv, int port,
return 0;
}
-static void mt7531_sgmii_restart_an(struct dsa_switch *ds, int port)
+static void mt7531_pcs_an_restart(struct phylink_pcs *pcs)
{
- struct mt7530_priv *priv = ds->priv;
+ struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv;
+ int port = pcs_to_mt753x_pcs(pcs)->port;
u32 val;
/* Only restart AN when AN is enabled */
@@ -2716,6 +2694,24 @@ mt753x_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
return priv->info->mac_port_config(ds, port, mode, state->interface);
}
+static struct phylink_pcs *
+mt753x_phylink_mac_select_pcs(struct dsa_switch *ds, int port,
+ phy_interface_t interface)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ switch (interface) {
+ case PHY_INTERFACE_MODE_TRGMII:
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_1000BASEX:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ return &priv->pcs[port].pcs;
+
+ default:
+ return NULL;
+ }
+}
+
static void
mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
const struct phylink_link_state *state)
@@ -2723,9 +2719,6 @@ mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
struct mt7530_priv *priv = ds->priv;
u32 mcr_cur, mcr_new;
- if (!mt753x_phy_mode_supported(ds, port, state))
- goto unsupported;
-
switch (port) {
case 0 ... 4: /* Internal phy */
if (state->interface != PHY_INTERFACE_MODE_GMII)
@@ -2780,17 +2773,6 @@ unsupported:
mt7530_write(priv, MT7530_PMCR_P(port), mcr_new);
}
-static void
-mt753x_phylink_mac_an_restart(struct dsa_switch *ds, int port)
-{
- struct mt7530_priv *priv = ds->priv;
-
- if (!priv->info->mac_pcs_an_restart)
- return;
-
- priv->info->mac_pcs_an_restart(ds, port);
-}
-
static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface)
@@ -2800,16 +2782,13 @@ static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port,
mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK);
}
-static void mt753x_mac_pcs_link_up(struct dsa_switch *ds, int port,
- unsigned int mode, phy_interface_t interface,
- int speed, int duplex)
+static void mt753x_phylink_pcs_link_up(struct phylink_pcs *pcs,
+ unsigned int mode,
+ phy_interface_t interface,
+ int speed, int duplex)
{
- struct mt7530_priv *priv = ds->priv;
-
- if (!priv->info->mac_pcs_link_up)
- return;
-
- priv->info->mac_pcs_link_up(ds, port, mode, interface, speed, duplex);
+ if (pcs->ops->pcs_link_up)
+ pcs->ops->pcs_link_up(pcs, mode, interface, speed, duplex);
}
static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
@@ -2822,8 +2801,6 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
struct mt7530_priv *priv = ds->priv;
u32 mcr;
- mt753x_mac_pcs_link_up(ds, port, mode, interface, speed, duplex);
-
mcr = PMCR_RX_EN | PMCR_TX_EN | PMCR_FORCE_LNK;
/* MT753x MAC works in 1G full duplex mode for all up-clocked
@@ -2903,81 +2880,51 @@ mt7531_cpu_port_config(struct dsa_switch *ds, int port)
return ret;
mt7530_write(priv, MT7530_PMCR_P(port),
PMCR_CPU_PORT_SETTING(priv->id));
+ mt753x_phylink_pcs_link_up(&priv->pcs[port].pcs, MLO_AN_FIXED,
+ interface, speed, DUPLEX_FULL);
mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, interface, NULL,
speed, DUPLEX_FULL, true, true);
return 0;
}
-static void
-mt7530_mac_port_validate(struct dsa_switch *ds, int port,
- unsigned long *supported)
+static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port,
+ struct phylink_config *config)
{
- if (port == 5)
- phylink_set(supported, 1000baseX_Full);
-}
-
-static void mt7531_mac_port_validate(struct dsa_switch *ds, int port,
- unsigned long *supported)
-{
- struct mt7530_priv *priv = ds->priv;
-
- mt7531_sgmii_validate(priv, port, supported);
-}
-
-static void
-mt753x_phylink_validate(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state)
-{
- __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
struct mt7530_priv *priv = ds->priv;
- if (state->interface != PHY_INTERFACE_MODE_NA &&
- !mt753x_phy_mode_supported(ds, port, state)) {
- linkmode_zero(supported);
- return;
- }
-
- phylink_set_port_modes(mask);
-
- if (state->interface != PHY_INTERFACE_MODE_TRGMII &&
- !phy_interface_mode_is_8023z(state->interface)) {
- phylink_set(mask, 10baseT_Half);
- phylink_set(mask, 10baseT_Full);
- phylink_set(mask, 100baseT_Half);
- phylink_set(mask, 100baseT_Full);
- phylink_set(mask, Autoneg);
- }
-
- /* This switch only supports 1G full-duplex. */
- if (state->interface != PHY_INTERFACE_MODE_MII)
- phylink_set(mask, 1000baseT_Full);
+ /* This switch only supports full-duplex at 1Gbps */
+ config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000FD;
- priv->info->mac_port_validate(ds, port, mask);
+ /* This driver does not make use of the speed, duplex, pause or the
+ * advertisement in its mac_config, so it is safe to mark this driver
+ * as non-legacy.
+ */
+ config->legacy_pre_march2020 = false;
- phylink_set(mask, Pause);
- phylink_set(mask, Asym_Pause);
+ priv->info->mac_port_get_caps(ds, port, config);
+}
- linkmode_and(supported, supported, mask);
- linkmode_and(state->advertising, state->advertising, mask);
+static int mt753x_pcs_validate(struct phylink_pcs *pcs,
+ unsigned long *supported,
+ const struct phylink_link_state *state)
+{
+ /* Autonegotiation is not supported in TRGMII nor 802.3z modes */
+ if (state->interface == PHY_INTERFACE_MODE_TRGMII ||
+ phy_interface_mode_is_8023z(state->interface))
+ phylink_clear(supported, Autoneg);
- /* We can only operate at 2500BaseX or 1000BaseX. If requested
- * to advertise both, only report advertising at 2500BaseX.
- */
- phylink_helper_basex_speed(state);
+ return 0;
}
-static int
-mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state)
+static void mt7530_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
{
- struct mt7530_priv *priv = ds->priv;
+ struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv;
+ int port = pcs_to_mt753x_pcs(pcs)->port;
u32 pmsr;
- if (port < 0 || port >= MT7530_NUM_PORTS)
- return -EINVAL;
-
pmsr = mt7530_read(priv, MT7530_PMSR_P(port));
state->link = (pmsr & PMSR_LINK);
@@ -3004,8 +2951,6 @@ mt7530_phylink_mac_link_state(struct dsa_switch *ds, int port,
state->pause |= MLO_PAUSE_RX;
if (pmsr & PMSR_TX_FC)
state->pause |= MLO_PAUSE_TX;
-
- return 1;
}
static int
@@ -3047,32 +2992,51 @@ mt7531_sgmii_pcs_get_state_an(struct mt7530_priv *priv, int port,
return 0;
}
-static int
-mt7531_phylink_mac_link_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state)
+static void mt7531_pcs_get_state(struct phylink_pcs *pcs,
+ struct phylink_link_state *state)
{
- struct mt7530_priv *priv = ds->priv;
+ struct mt7530_priv *priv = pcs_to_mt753x_pcs(pcs)->priv;
+ int port = pcs_to_mt753x_pcs(pcs)->port;
if (state->interface == PHY_INTERFACE_MODE_SGMII)
- return mt7531_sgmii_pcs_get_state_an(priv, port, state);
-
- return -EOPNOTSUPP;
+ mt7531_sgmii_pcs_get_state_an(priv, port, state);
+ else
+ state->link = false;
}
-static int
-mt753x_phylink_mac_link_state(struct dsa_switch *ds, int port,
- struct phylink_link_state *state)
+static int mt753x_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+ phy_interface_t interface,
+ const unsigned long *advertising,
+ bool permit_pause_to_mac)
{
- struct mt7530_priv *priv = ds->priv;
+ return 0;
+}
- return priv->info->mac_port_get_state(ds, port, state);
+static void mt7530_pcs_an_restart(struct phylink_pcs *pcs)
+{
}
+static const struct phylink_pcs_ops mt7530_pcs_ops = {
+ .pcs_validate = mt753x_pcs_validate,
+ .pcs_get_state = mt7530_pcs_get_state,
+ .pcs_config = mt753x_pcs_config,
+ .pcs_an_restart = mt7530_pcs_an_restart,
+};
+
+static const struct phylink_pcs_ops mt7531_pcs_ops = {
+ .pcs_validate = mt753x_pcs_validate,
+ .pcs_get_state = mt7531_pcs_get_state,
+ .pcs_config = mt753x_pcs_config,
+ .pcs_an_restart = mt7531_pcs_an_restart,
+ .pcs_link_up = mt7531_pcs_link_up,
+};
+
static int
mt753x_setup(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
int ret = priv->info->sw_setup(ds);
+ int i;
if (ret)
return ret;
@@ -3085,6 +3049,13 @@ mt753x_setup(struct dsa_switch *ds)
if (ret && priv->irq)
mt7530_free_irq_common(priv);
+ /* Initialise the PCS devices */
+ for (i = 0; i < priv->ds->num_ports; i++) {
+ priv->pcs[i].pcs.ops = priv->info->pcs_ops;
+ priv->pcs[i].priv = priv;
+ priv->pcs[i].port = i;
+ }
+
return ret;
}
@@ -3144,10 +3115,9 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
.port_vlan_del = mt7530_port_vlan_del,
.port_mirror_add = mt753x_port_mirror_add,
.port_mirror_del = mt753x_port_mirror_del,
- .phylink_validate = mt753x_phylink_validate,
- .phylink_mac_link_state = mt753x_phylink_mac_link_state,
+ .phylink_get_caps = mt753x_phylink_get_caps,
+ .phylink_mac_select_pcs = mt753x_phylink_mac_select_pcs,
.phylink_mac_config = mt753x_phylink_mac_config,
- .phylink_mac_an_restart = mt753x_phylink_mac_an_restart,
.phylink_mac_link_down = mt753x_phylink_mac_link_down,
.phylink_mac_link_up = mt753x_phylink_mac_link_up,
.get_mac_eee = mt753x_get_mac_eee,
@@ -3157,39 +3127,34 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
static const struct mt753x_info mt753x_table[] = {
[ID_MT7621] = {
.id = ID_MT7621,
+ .pcs_ops = &mt7530_pcs_ops,
.sw_setup = mt7530_setup,
.phy_read = mt7530_phy_read,
.phy_write = mt7530_phy_write,
.pad_setup = mt7530_pad_clk_setup,
- .phy_mode_supported = mt7530_phy_mode_supported,
- .mac_port_validate = mt7530_mac_port_validate,
- .mac_port_get_state = mt7530_phylink_mac_link_state,
+ .mac_port_get_caps = mt7530_mac_port_get_caps,
.mac_port_config = mt7530_mac_config,
},
[ID_MT7530] = {
.id = ID_MT7530,
+ .pcs_ops = &mt7530_pcs_ops,
.sw_setup = mt7530_setup,
.phy_read = mt7530_phy_read,
.phy_write = mt7530_phy_write,
.pad_setup = mt7530_pad_clk_setup,
- .phy_mode_supported = mt7530_phy_mode_supported,
- .mac_port_validate = mt7530_mac_port_validate,
- .mac_port_get_state = mt7530_phylink_mac_link_state,
+ .mac_port_get_caps = mt7530_mac_port_get_caps,
.mac_port_config = mt7530_mac_config,
},
[ID_MT7531] = {
.id = ID_MT7531,
+ .pcs_ops = &mt7531_pcs_ops,
.sw_setup = mt7531_setup,
.phy_read = mt7531_ind_phy_read,
.phy_write = mt7531_ind_phy_write,
.pad_setup = mt7531_pad_setup,
.cpu_port_config = mt7531_cpu_port_config,
- .phy_mode_supported = mt7531_phy_mode_supported,
- .mac_port_validate = mt7531_mac_port_validate,
- .mac_port_get_state = mt7531_phylink_mac_link_state,
+ .mac_port_get_caps = mt7531_mac_port_get_caps,
.mac_port_config = mt7531_mac_config,
- .mac_pcs_an_restart = mt7531_sgmii_restart_an,
- .mac_pcs_link_up = mt7531_sgmii_link_up_force,
},
};
@@ -3246,9 +3211,8 @@ mt7530_probe(struct mdio_device *mdiodev)
*/
if (!priv->info->sw_setup || !priv->info->pad_setup ||
!priv->info->phy_read || !priv->info->phy_write ||
- !priv->info->phy_mode_supported ||
- !priv->info->mac_port_validate ||
- !priv->info->mac_port_get_state || !priv->info->mac_port_config)
+ !priv->info->mac_port_get_caps ||
+ !priv->info->mac_port_config)
return -EINVAL;
priv->id = priv->info->id;
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 91508e2feef9..71e36b69b96d 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -741,6 +741,12 @@ static const char *p5_intf_modes(unsigned int p5_interface)
struct mt7530_priv;
+struct mt753x_pcs {
+ struct phylink_pcs pcs;
+ struct mt7530_priv *priv;
+ int port;
+};
+
/* struct mt753x_info - This is the main data structure for holding the specific
* part for each supported device
* @sw_setup: Holding the handler to a device initialization
@@ -752,36 +758,27 @@ struct mt7530_priv;
* port
* @mac_port_validate: Holding the way to set addition validate type for a
* certan MAC port
- * @mac_port_get_state: Holding the way getting the MAC/PCS state for a certain
- * MAC port
* @mac_port_config: Holding the way setting up the PHY attribute to a
* certain MAC port
- * @mac_pcs_an_restart Holding the way restarting PCS autonegotiation for a
- * certain MAC port
- * @mac_pcs_link_up: Holding the way setting up the PHY attribute to the pcs
- * of the certain MAC port
*/
struct mt753x_info {
enum mt753x_id id;
+ const struct phylink_pcs_ops *pcs_ops;
+
int (*sw_setup)(struct dsa_switch *ds);
int (*phy_read)(struct mt7530_priv *priv, int port, int regnum);
int (*phy_write)(struct mt7530_priv *priv, int port, int regnum, u16 val);
int (*pad_setup)(struct dsa_switch *ds, phy_interface_t interface);
int (*cpu_port_config)(struct dsa_switch *ds, int port);
- bool (*phy_mode_supported)(struct dsa_switch *ds, int port,
- const struct phylink_link_state *state);
+ void (*mac_port_get_caps)(struct dsa_switch *ds, int port,
+ struct phylink_config *config);
void (*mac_port_validate)(struct dsa_switch *ds, int port,
+ phy_interface_t interface,
unsigned long *supported);
- int (*mac_port_get_state)(struct dsa_switch *ds, int port,
- struct phylink_link_state *state);
int (*mac_port_config)(struct dsa_switch *ds, int port,
unsigned int mode,
phy_interface_t interface);
- void (*mac_pcs_an_restart)(struct dsa_switch *ds, int port);
- void (*mac_pcs_link_up)(struct dsa_switch *ds, int port,
- unsigned int mode, phy_interface_t interface,
- int speed, int duplex);
};
/* struct mt7530_priv - This is the main data structure for holding the state
@@ -823,6 +820,7 @@ struct mt7530_priv {
u8 mirror_tx;
struct mt7530_port ports[MT7530_NUM_PORTS];
+ struct mt753x_pcs pcs[MT7530_NUM_PORTS];
/* protect among processes for registers access*/
struct mutex reg_mutex;
int irq;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 5caa75b41b73..4e9215bce4ad 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -2212,7 +2212,7 @@
* MAC DA 2. The reset default is set to mask out all parameters.
*/
#define NIG_REG_P0_LLH_PTP_PARAM_MASK 0x187a0
-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set
+/* [RW 14] Mask register for the rules used in detecting PTP packets. Set
* each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} .
* 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP
* 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1;
@@ -2381,7 +2381,7 @@
* MAC DA 2. The reset default is set to mask out all parameters.
*/
#define NIG_REG_P1_LLH_PTP_PARAM_MASK 0x187c8
-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set
+/* [RW 14] Mask register for the rules used in detecting PTP packets. Set
* each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} .
* 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP
* 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1;
@@ -2493,7 +2493,7 @@
* MAC DA 2. The reset default is set to mask out all parameters.
*/
#define NIG_REG_P0_TLLH_PTP_PARAM_MASK 0x187f0
-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set
+/* [RW 14] Mask register for the rules used in detecting PTP packets. Set
* each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} .
* 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP
* 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1;
@@ -2529,7 +2529,7 @@
* MAC DA 2. The reset default is set to mask out all parameters.
*/
#define NIG_REG_P1_TLLH_PTP_PARAM_MASK 0x187f8
-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set
+/* [RW 14] Mask register for the rules used in detecting PTP packets. Set
* each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} .
* 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP
* 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1;
@@ -6218,7 +6218,7 @@
#define AEU_INPUTS_ATTN_BITS_GPIO0_FUNCTION_0 (0x1<<2)
#define AEU_INPUTS_ATTN_BITS_IGU_PARITY_ERROR (0x1<<12)
#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_ROM_PARITY (0x1<<28)
-#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY (0x1<<31)
+#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_SCPAD_PARITY (0x1U<<31)
#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_RX_PARITY (0x1<<29)
#define AEU_INPUTS_ATTN_BITS_MCP_LATCHED_UMP_TX_PARITY (0x1<<30)
#define AEU_INPUTS_ATTN_BITS_MISC_HW_INTERRUPT (0x1<<15)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 874fad0a5cf8..0489c1c2e7dd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -56,6 +56,7 @@
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <net/page_pool.h>
+#include <linux/align.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
@@ -738,7 +739,6 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
}
- *mapping += bp->rx_dma_offset;
return page;
}
@@ -780,6 +780,7 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
if (!page)
return -ENOMEM;
+ mapping += bp->rx_dma_offset;
rx_buf->data = page;
rx_buf->data_ptr = page_address(page) + bp->rx_offset;
} else {
@@ -840,33 +841,41 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
u16 sw_prod = rxr->rx_sw_agg_prod;
unsigned int offset = 0;
- if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) {
- page = rxr->rx_page;
- if (!page) {
+ if (BNXT_RX_PAGE_MODE(bp)) {
+ page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+
+ if (!page)
+ return -ENOMEM;
+
+ } else {
+ if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) {
+ page = rxr->rx_page;
+ if (!page) {
+ page = alloc_page(gfp);
+ if (!page)
+ return -ENOMEM;
+ rxr->rx_page = page;
+ rxr->rx_page_offset = 0;
+ }
+ offset = rxr->rx_page_offset;
+ rxr->rx_page_offset += BNXT_RX_PAGE_SIZE;
+ if (rxr->rx_page_offset == PAGE_SIZE)
+ rxr->rx_page = NULL;
+ else
+ get_page(page);
+ } else {
page = alloc_page(gfp);
if (!page)
return -ENOMEM;
- rxr->rx_page = page;
- rxr->rx_page_offset = 0;
}
- offset = rxr->rx_page_offset;
- rxr->rx_page_offset += BNXT_RX_PAGE_SIZE;
- if (rxr->rx_page_offset == PAGE_SIZE)
- rxr->rx_page = NULL;
- else
- get_page(page);
- } else {
- page = alloc_page(gfp);
- if (!page)
- return -ENOMEM;
- }
- mapping = dma_map_page_attrs(&pdev->dev, page, offset,
- BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
- DMA_ATTR_WEAK_ORDERING);
- if (dma_mapping_error(&pdev->dev, mapping)) {
- __free_page(page);
- return -EIO;
+ mapping = dma_map_page_attrs(&pdev->dev, page, offset,
+ BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
+ DMA_ATTR_WEAK_ORDERING);
+ if (dma_mapping_error(&pdev->dev, mapping)) {
+ __free_page(page);
+ return -EIO;
+ }
}
if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap)))
@@ -962,6 +971,39 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
rxr->rx_sw_agg_prod = sw_prod;
}
+static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
+ struct bnxt_rx_ring_info *rxr,
+ u16 cons, void *data, u8 *data_ptr,
+ dma_addr_t dma_addr,
+ unsigned int offset_and_len)
+{
+ unsigned int len = offset_and_len & 0xffff;
+ struct page *page = data;
+ u16 prod = rxr->rx_prod;
+ struct sk_buff *skb;
+ int err;
+
+ err = bnxt_alloc_rx_data(bp, rxr, prod, GFP_ATOMIC);
+ if (unlikely(err)) {
+ bnxt_reuse_rx_data(rxr, cons, data);
+ return NULL;
+ }
+ dma_addr -= bp->rx_dma_offset;
+ dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
+ DMA_ATTR_WEAK_ORDERING);
+ skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE +
+ bp->rx_dma_offset);
+ if (!skb) {
+ __free_page(page);
+ return NULL;
+ }
+ skb_mark_for_recycle(skb);
+ skb_reserve(skb, bp->rx_dma_offset);
+ __skb_put(skb, len);
+
+ return skb;
+}
+
static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
struct bnxt_rx_ring_info *rxr,
u16 cons, void *data, u8 *data_ptr,
@@ -984,7 +1026,6 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
dma_addr -= bp->rx_dma_offset;
dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
- page_pool_release_page(rxr->page_pool, page);
if (unlikely(!payload))
payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -995,6 +1036,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
return NULL;
}
+ skb_mark_for_recycle(skb);
off = (void *)data_ptr - page_address(page);
skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE);
memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN,
@@ -1038,22 +1080,24 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp,
return skb;
}
-static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
- struct bnxt_cp_ring_info *cpr,
- struct sk_buff *skb, u16 idx,
- u32 agg_bufs, bool tpa)
+static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ struct skb_shared_info *shinfo,
+ u16 idx, u32 agg_bufs, bool tpa,
+ struct xdp_buff *xdp)
{
struct bnxt_napi *bnapi = cpr->bnapi;
struct pci_dev *pdev = bp->pdev;
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
u16 prod = rxr->rx_agg_prod;
+ u32 i, total_frag_len = 0;
bool p5_tpa = false;
- u32 i;
if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
p5_tpa = true;
for (i = 0; i < agg_bufs; i++) {
+ skb_frag_t *frag = &shinfo->frags[i];
u16 cons, frag_len;
struct rx_agg_cmp *agg;
struct bnxt_sw_rx_agg_bd *cons_rx_buf;
@@ -1069,8 +1113,10 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
cons_rx_buf = &rxr->rx_agg_ring[cons];
- skb_fill_page_desc(skb, i, cons_rx_buf->page,
- cons_rx_buf->offset, frag_len);
+ skb_frag_off_set(frag, cons_rx_buf->offset);
+ skb_frag_size_set(frag, frag_len);
+ __skb_frag_set_page(frag, cons_rx_buf->page);
+ shinfo->nr_frags = i + 1;
__clear_bit(cons, rxr->rx_agg_bmap);
/* It is possible for bnxt_alloc_rx_page() to allocate
@@ -1081,16 +1127,14 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
page = cons_rx_buf->page;
cons_rx_buf->page = NULL;
+ if (xdp && page_is_pfmemalloc(page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+
if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) {
- struct skb_shared_info *shinfo;
unsigned int nr_frags;
- shinfo = skb_shinfo(skb);
nr_frags = --shinfo->nr_frags;
__skb_frag_set_page(&shinfo->frags[nr_frags], NULL);
-
- dev_kfree_skb(skb);
-
cons_rx_buf->page = page;
/* Update prod since possibly some pages have been
@@ -1098,23 +1142,62 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
*/
rxr->rx_agg_prod = prod;
bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa);
- return NULL;
+ return 0;
}
dma_unmap_page_attrs(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE,
- DMA_FROM_DEVICE,
+ bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
- skb->data_len += frag_len;
- skb->len += frag_len;
- skb->truesize += PAGE_SIZE;
-
+ total_frag_len += frag_len;
prod = NEXT_RX_AGG(prod);
}
rxr->rx_agg_prod = prod;
+ return total_frag_len;
+}
+
+static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ struct sk_buff *skb, u16 idx,
+ u32 agg_bufs, bool tpa)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ u32 total_frag_len = 0;
+
+ total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, idx,
+ agg_bufs, tpa, NULL);
+ if (!total_frag_len) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+
+ skb->data_len += total_frag_len;
+ skb->len += total_frag_len;
+ skb->truesize += PAGE_SIZE * agg_bufs;
return skb;
}
+static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ struct xdp_buff *xdp, u16 idx,
+ u32 agg_bufs, bool tpa)
+{
+ struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp);
+ u32 total_frag_len = 0;
+
+ if (!xdp_buff_has_frags(xdp))
+ shinfo->nr_frags = 0;
+
+ total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo,
+ idx, agg_bufs, tpa, xdp);
+ if (total_frag_len) {
+ xdp_buff_set_frags_flag(xdp);
+ shinfo->nr_frags = agg_bufs;
+ shinfo->xdp_frags_size = total_frag_len;
+ }
+ return total_frag_len;
+}
+
static int bnxt_agg_bufs_valid(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
u8 agg_bufs, u32 *raw_cons)
{
@@ -1644,7 +1727,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
}
if (agg_bufs) {
- skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
+ skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true);
if (!skb) {
/* Page reuse already handled by bnxt_rx_pages(). */
cpr->sw_stats.rx.rx_oom_discards += 1;
@@ -1729,8 +1812,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
struct bnxt_sw_rx_bd *rx_buf;
unsigned int len;
u8 *data_ptr, agg_bufs, cmp_type;
+ bool xdp_active = false;
dma_addr_t dma_addr;
struct sk_buff *skb;
+ struct xdp_buff xdp;
u32 flags, misc;
void *data;
int rc = 0;
@@ -1839,18 +1924,39 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
len = flags >> RX_CMP_LEN_SHIFT;
dma_addr = rx_buf->mapping;
- if (bnxt_rx_xdp(bp, rxr, cons, data, &data_ptr, &len, event)) {
- rc = 1;
- goto next_rx;
+ if (bnxt_xdp_attached(bp, rxr)) {
+ bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp);
+ if (agg_bufs) {
+ u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp,
+ cp_cons, agg_bufs,
+ false);
+ if (!frag_len) {
+ cpr->sw_stats.rx.rx_oom_discards += 1;
+ rc = -ENOMEM;
+ goto next_rx;
+ }
+ }
+ xdp_active = true;
+ }
+
+ if (xdp_active) {
+ if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) {
+ rc = 1;
+ goto next_rx;
+ }
}
if (len <= bp->rx_copy_thresh) {
skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr);
bnxt_reuse_rx_data(rxr, cons, data);
if (!skb) {
- if (agg_bufs)
- bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
- agg_bufs, false);
+ if (agg_bufs) {
+ if (!xdp_active)
+ bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
+ agg_bufs, false);
+ else
+ bnxt_xdp_buff_frags_free(rxr, &xdp);
+ }
cpr->sw_stats.rx.rx_oom_discards += 1;
rc = -ENOMEM;
goto next_rx;
@@ -1872,11 +1978,22 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
if (agg_bufs) {
- skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
- if (!skb) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
+ if (!xdp_active) {
+ skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false);
+ if (!skb) {
+ cpr->sw_stats.rx.rx_oom_discards += 1;
+ rc = -ENOMEM;
+ goto next_rx;
+ }
+ } else {
+ skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1);
+ if (!skb) {
+ /* we should be able to free the old skb here */
+ bnxt_xdp_buff_frags_free(rxr, &xdp);
+ cpr->sw_stats.rx.rx_oom_discards += 1;
+ rc = -ENOMEM;
+ goto next_rx;
+ }
}
}
@@ -2492,10 +2609,13 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
- if (bnapi->events & BNXT_AGG_EVENT)
- bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
}
+ if (bnapi->events & BNXT_AGG_EVENT) {
+ struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+
+ bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+ }
bnapi->events = 0;
}
@@ -2872,14 +2992,23 @@ skip_rx_buf_free:
if (!page)
continue;
- dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
- BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
- DMA_ATTR_WEAK_ORDERING);
+ if (BNXT_RX_PAGE_MODE(bp)) {
+ dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
+ BNXT_RX_PAGE_SIZE, bp->rx_dir,
+ DMA_ATTR_WEAK_ORDERING);
+ rx_agg_buf->page = NULL;
+ __clear_bit(i, rxr->rx_agg_bmap);
- rx_agg_buf->page = NULL;
- __clear_bit(i, rxr->rx_agg_bmap);
+ page_pool_recycle_direct(rxr->page_pool, page);
+ } else {
+ dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
+ BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
+ DMA_ATTR_WEAK_ORDERING);
+ rx_agg_buf->page = NULL;
+ __clear_bit(i, rxr->rx_agg_bmap);
- __free_page(page);
+ __free_page(page);
+ }
}
skip_rx_agg_free:
@@ -3793,7 +3922,7 @@ void bnxt_set_ring_params(struct bnxt *bp)
/* 8 for CRC and VLAN */
rx_size = SKB_DATA_ALIGN(bp->dev->mtu + ETH_HLEN + NET_IP_ALIGN + 8);
- rx_space = rx_size + NET_SKB_PAD +
+ rx_space = rx_size + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
bp->rx_copy_thresh = BNXT_RX_COPY_THRESH;
@@ -3834,9 +3963,15 @@ void bnxt_set_ring_params(struct bnxt *bp)
}
bp->rx_agg_ring_size = agg_ring_size;
bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1;
- rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN);
- rx_space = rx_size + NET_SKB_PAD +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ if (BNXT_RX_PAGE_MODE(bp)) {
+ rx_space = BNXT_PAGE_MODE_BUF_SIZE;
+ rx_size = BNXT_MAX_PAGE_MODE_MTU;
+ } else {
+ rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN);
+ rx_space = rx_size + NET_SKB_PAD +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
}
bp->rx_buf_use_size = rx_size;
@@ -3877,14 +4012,21 @@ void bnxt_set_ring_params(struct bnxt *bp)
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
if (page_mode) {
- if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU)
- return -EOPNOTSUPP;
- bp->dev->max_mtu =
- min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
bp->flags &= ~BNXT_FLAG_AGG_RINGS;
- bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE;
+ bp->flags |= BNXT_FLAG_RX_PAGE_MODE;
+
+ if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+ bp->flags |= BNXT_FLAG_JUMBO;
+ bp->rx_skb_func = bnxt_rx_multi_page_skb;
+ bp->dev->max_mtu =
+ min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
+ } else {
+ bp->flags |= BNXT_FLAG_NO_AGG_RINGS;
+ bp->rx_skb_func = bnxt_rx_page_skb;
+ bp->dev->max_mtu =
+ min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
+ }
bp->rx_dir = DMA_BIDIRECTIONAL;
- bp->rx_skb_func = bnxt_rx_page_skb;
/* Disable LRO or GRO_HW */
netdev_update_features(bp->dev);
} else {
@@ -5226,12 +5368,15 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
if (rc)
return rc;
- req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT |
- VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
- VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
- req->enables =
- cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID |
- VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
+ req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT);
+ req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID);
+
+ if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) {
+ req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
+ VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
+ req->enables |=
+ cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
+ }
/* thresholds not implemented in firmware yet */
req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
@@ -11031,6 +11176,9 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
if (bp->flags & BNXT_FLAG_NO_AGG_RINGS)
features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
+ if (!(bp->flags & BNXT_FLAG_TPA))
+ features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
+
if (!(features & NETIF_F_GRO))
features &= ~NETIF_F_GRO_HW;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 98453a78cbd0..a498ee297946 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -591,10 +591,12 @@ struct nqe_cn {
#define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
#define BNXT_MAX_MTU 9500
-#define BNXT_MAX_PAGE_MODE_MTU \
+#define BNXT_PAGE_MODE_BUF_SIZE \
((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \
- XDP_PACKET_HEADROOM - \
- SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)))
+ XDP_PACKET_HEADROOM)
+#define BNXT_MAX_PAGE_MODE_MTU \
+ BNXT_PAGE_MODE_BUF_SIZE - \
+ SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))
#define BNXT_MIN_PKT_SIZE 52
@@ -699,13 +701,12 @@ struct bnxt_sw_tx_bd {
};
DEFINE_DMA_UNMAP_ADDR(mapping);
DEFINE_DMA_UNMAP_LEN(len);
+ struct page *page;
u8 is_gso;
u8 is_push;
u8 action;
- union {
- unsigned short nr_frags;
- u16 rx_prod;
- };
+ unsigned short nr_frags;
+ u16 rx_prod;
};
struct bnxt_sw_rx_bd {
@@ -1817,6 +1818,7 @@ struct bnxt {
#define BNXT_SUPPORTS_TPA(bp) (!BNXT_CHIP_TYPE_NITRO_A0(bp) && \
(!((bp)->flags & BNXT_FLAG_CHIP_P5) || \
(bp)->max_tpa_v2) && !is_kdump_kernel())
+#define BNXT_RX_JUMBO_MODE(bp) ((bp)->flags & BNXT_FLAG_JUMBO)
#define BNXT_CHIP_SR2(bp) \
((bp)->chip_num == CHIP_NUM_58818)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 22e965e18fbc..b3a48d6675fe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -3491,7 +3491,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
dev_kfree_skb(skb);
return -EIO;
}
- bnxt_xmit_bd(bp, txr, map, pkt_size);
+ bnxt_xmit_bd(bp, txr, map, pkt_size, NULL);
/* Sync BD data before updating doorbell */
wmb();
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 03b1d6c04504..f02fe906dedb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -24,36 +24,91 @@ DEFINE_STATIC_KEY_FALSE(bnxt_xdp_locking_key);
struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
struct bnxt_tx_ring_info *txr,
- dma_addr_t mapping, u32 len)
+ dma_addr_t mapping, u32 len,
+ struct xdp_buff *xdp)
{
- struct bnxt_sw_tx_bd *tx_buf;
+ struct skb_shared_info *sinfo;
+ struct bnxt_sw_tx_bd *tx_buf, *first_buf;
struct tx_bd *txbd;
+ int num_frags = 0;
u32 flags;
u16 prod;
+ int i;
+
+ if (xdp && xdp_buff_has_frags(xdp)) {
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ num_frags = sinfo->nr_frags;
+ }
+ /* fill up the first buffer */
prod = txr->tx_prod;
tx_buf = &txr->tx_buf_ring[prod];
+ first_buf = tx_buf;
+ tx_buf->nr_frags = num_frags;
+ if (xdp)
+ tx_buf->page = virt_to_head_page(xdp->data);
txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
- flags = (len << TX_BD_LEN_SHIFT) | (1 << TX_BD_FLAGS_BD_CNT_SHIFT) |
- TX_BD_FLAGS_PACKET_END | bnxt_lhint_arr[len >> 9];
+ flags = ((len) << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT);
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
txbd->tx_bd_opaque = prod;
txbd->tx_bd_haddr = cpu_to_le64(mapping);
+ /* now let us fill up the frags into the next buffers */
+ for (i = 0; i < num_frags ; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ struct bnxt_sw_tx_bd *frag_tx_buf;
+ struct pci_dev *pdev = bp->pdev;
+ dma_addr_t frag_mapping;
+ int frag_len;
+
+ prod = NEXT_TX(prod);
+ txr->tx_prod = prod;
+
+ /* first fill up the first buffer */
+ frag_tx_buf = &txr->tx_buf_ring[prod];
+ frag_tx_buf->page = skb_frag_page(frag);
+
+ txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+
+ frag_len = skb_frag_size(frag);
+ frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0,
+ frag_len, DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(&pdev->dev, frag_mapping)))
+ return NULL;
+
+ dma_unmap_addr_set(frag_tx_buf, mapping, frag_mapping);
+
+ flags = frag_len << TX_BD_LEN_SHIFT;
+ txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
+ txbd->tx_bd_opaque = prod;
+ txbd->tx_bd_haddr = cpu_to_le64(frag_mapping);
+
+ len = frag_len;
+ }
+
+ flags &= ~TX_BD_LEN;
+ txbd->tx_bd_len_flags_type = cpu_to_le32(((len) << TX_BD_LEN_SHIFT) | flags |
+ TX_BD_FLAGS_PACKET_END);
+ /* Sync TX BD */
+ wmb();
prod = NEXT_TX(prod);
txr->tx_prod = prod;
- return tx_buf;
+
+ return first_buf;
}
static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
- dma_addr_t mapping, u32 len, u16 rx_prod)
+ dma_addr_t mapping, u32 len, u16 rx_prod,
+ struct xdp_buff *xdp)
{
struct bnxt_sw_tx_bd *tx_buf;
- tx_buf = bnxt_xmit_bd(bp, txr, mapping, len);
+ tx_buf = bnxt_xmit_bd(bp, txr, mapping, len, xdp);
tx_buf->rx_prod = rx_prod;
tx_buf->action = XDP_TX;
+
}
static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
@@ -63,7 +118,7 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
{
struct bnxt_sw_tx_bd *tx_buf;
- tx_buf = bnxt_xmit_bd(bp, txr, mapping, len);
+ tx_buf = bnxt_xmit_bd(bp, txr, mapping, len, NULL);
tx_buf->action = XDP_REDIRECT;
tx_buf->xdpf = xdpf;
dma_unmap_addr_set(tx_buf, mapping, mapping);
@@ -78,7 +133,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
struct bnxt_sw_tx_bd *tx_buf;
u16 tx_cons = txr->tx_cons;
u16 last_tx_cons = tx_cons;
- int i;
+ int i, j, frags;
for (i = 0; i < nr_pkts; i++) {
tx_buf = &txr->tx_buf_ring[tx_cons];
@@ -96,6 +151,13 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
} else if (tx_buf->action == XDP_TX) {
rx_doorbell_needed = true;
last_tx_cons = tx_cons;
+
+ frags = tx_buf->nr_frags;
+ for (j = 0; j < frags; j++) {
+ tx_cons = NEXT_TX(tx_cons);
+ tx_buf = &txr->tx_buf_ring[tx_cons];
+ page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
+ }
}
tx_cons = NEXT_TX(tx_cons);
}
@@ -103,7 +165,52 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
if (rx_doorbell_needed) {
tx_buf = &txr->tx_buf_ring[last_tx_cons];
bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod);
+
+ }
+}
+
+bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
+{
+ struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog);
+
+ return !!xdp_prog;
+}
+
+void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+ u16 cons, u8 **data_ptr, unsigned int *len,
+ struct xdp_buff *xdp)
+{
+ struct bnxt_sw_rx_bd *rx_buf;
+ struct pci_dev *pdev;
+ dma_addr_t mapping;
+ u32 offset;
+
+ pdev = bp->pdev;
+ rx_buf = &rxr->rx_buf_ring[cons];
+ offset = bp->rx_offset;
+
+ mapping = rx_buf->mapping - bp->rx_dma_offset;
+ dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
+
+ xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq);
+ xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
+}
+
+void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
+ struct xdp_buff *xdp)
+{
+ struct skb_shared_info *shinfo;
+ int i;
+
+ if (!xdp || !xdp_buff_has_frags(xdp))
+ return;
+ shinfo = xdp_get_shared_info_from_buff(xdp);
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ struct page *page = skb_frag_page(&shinfo->frags[i]);
+
+ page_pool_recycle_direct(rxr->page_pool, page);
}
+ shinfo->nr_frags = 0;
}
/* returns the following:
@@ -111,14 +218,14 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
* false - packet should be passed to the stack.
*/
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
- struct page *page, u8 **data_ptr, unsigned int *len, u8 *event)
+ struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event)
{
struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog);
struct bnxt_tx_ring_info *txr;
struct bnxt_sw_rx_bd *rx_buf;
struct pci_dev *pdev;
- struct xdp_buff xdp;
dma_addr_t mapping;
+ u32 tx_needed = 1;
void *orig_data;
u32 tx_avail;
u32 offset;
@@ -128,16 +235,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
return false;
pdev = bp->pdev;
- rx_buf = &rxr->rx_buf_ring[cons];
offset = bp->rx_offset;
- mapping = rx_buf->mapping - bp->rx_dma_offset;
- dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
-
txr = rxr->bnapi->tx_ring;
/* BNXT_RX_PAGE_MODE(bp) when XDP enabled */
- xdp_init_buff(&xdp, PAGE_SIZE, &rxr->xdp_rxq);
- xdp_prepare_buff(&xdp, *data_ptr - offset, offset, *len, false);
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -150,26 +251,38 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
*event &= ~BNXT_RX_EVENT;
*len = xdp.data_end - xdp.data;
- if (orig_data != xdp.data) {
+ if (orig_data != xdp.data)
offset = xdp.data - xdp.data_hard_start;
- *data_ptr = xdp.data_hard_start + offset;
- }
+
switch (act) {
case XDP_PASS:
return false;
case XDP_TX:
- if (tx_avail < 1) {
+ rx_buf = &rxr->rx_buf_ring[cons];
+ mapping = rx_buf->mapping - bp->rx_dma_offset;
+ *event = 0;
+
+ if (unlikely(xdp_buff_has_frags(&xdp))) {
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(&xdp);
+
+ tx_needed += sinfo->nr_frags;
+ *event = BNXT_AGG_EVENT;
+ }
+
+ if (tx_avail < tx_needed) {
trace_xdp_exception(bp->dev, xdp_prog, act);
+ bnxt_xdp_buff_frags_free(rxr, &xdp);
bnxt_reuse_rx_data(rxr, cons, page);
return true;
}
- *event = BNXT_TX_EVENT;
dma_sync_single_for_device(&pdev->dev, mapping + offset, *len,
bp->rx_dir);
+
+ *event |= BNXT_TX_EVENT;
__bnxt_xmit_xdp(bp, txr, mapping + offset, *len,
- NEXT_RX(rxr->rx_prod));
+ NEXT_RX(rxr->rx_prod), &xdp);
bnxt_reuse_rx_data(rxr, cons, page);
return true;
case XDP_REDIRECT:
@@ -177,6 +290,8 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
* redirect is coming from a frame received by the
* bnxt_en driver.
*/
+ rx_buf = &rxr->rx_buf_ring[cons];
+ mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_unmap_page_attrs(&pdev->dev, mapping,
PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
@@ -184,6 +299,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
/* if we are unable to allocate a new buffer, abort and reuse */
if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) {
trace_xdp_exception(bp->dev, xdp_prog, act);
+ bnxt_xdp_buff_frags_free(rxr, &xdp);
bnxt_reuse_rx_data(rxr, cons, page);
return true;
}
@@ -203,6 +319,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
trace_xdp_exception(bp->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
+ bnxt_xdp_buff_frags_free(rxr, &xdp);
bnxt_reuse_rx_data(rxr, cons, page);
break;
}
@@ -270,8 +387,9 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
int tx_xdp = 0, rc, tc;
struct bpf_prog *old;
- if (prog && bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
- netdev_warn(dev, "MTU %d larger than largest XDP supported MTU %d.\n",
+ if (prog && !prog->aux->xdp_has_frags &&
+ bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+ netdev_warn(dev, "MTU %d larger than %d without XDP frag support.\n",
bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU);
return -EOPNOTSUPP;
}
@@ -337,3 +455,26 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
return rc;
}
+
+struct sk_buff *
+bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
+ struct page_pool *pool, struct xdp_buff *xdp,
+ struct rx_cmp_ext *rxcmp1)
+{
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+
+ if (!skb)
+ return NULL;
+ skb_checksum_none_assert(skb);
+ if (RX_CMP_L4_CS_OK(rxcmp1)) {
+ if (bp->dev->features & NETIF_F_RXCSUM) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = RX_CMP_ENCAP(rxcmp1);
+ }
+ }
+ xdp_update_skb_shared_info(skb, num_frags,
+ sinfo->xdp_frags_size,
+ PAGE_SIZE * sinfo->nr_frags,
+ xdp_buff_is_frag_pfmemalloc(xdp));
+ return skb;
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index 067bb5e821f5..505911ae095d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -14,13 +14,25 @@ DECLARE_STATIC_KEY_FALSE(bnxt_xdp_locking_key);
struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
struct bnxt_tx_ring_info *txr,
- dma_addr_t mapping, u32 len);
+ dma_addr_t mapping, u32 len,
+ struct xdp_buff *xdp);
void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
- struct page *page, u8 **data_ptr, unsigned int *len,
+ struct xdp_buff xdp, struct page *page, unsigned int *len,
u8 *event);
int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
struct xdp_frame **frames, u32 flags);
+bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr);
+
+void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+ u16 cons, u8 **data_ptr, unsigned int *len,
+ struct xdp_buff *xdp);
+void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr,
+ struct xdp_buff *xdp);
+struct sk_buff *bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb,
+ u8 num_frags, struct page_pool *pool,
+ struct xdp_buff *xdp,
+ struct rx_cmp_ext *rxcmp1);
#endif
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index e475be29845c..a5140d4d3baf 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -337,11 +337,9 @@ static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
struct macb *bp = bus->priv;
int status;
- status = pm_runtime_get_sync(&bp->pdev->dev);
- if (status < 0) {
- pm_runtime_put_noidle(&bp->pdev->dev);
+ status = pm_runtime_resume_and_get(&bp->pdev->dev);
+ if (status < 0)
goto mdio_pm_exit;
- }
status = macb_mdio_wait_for_idle(bp);
if (status < 0)
@@ -391,11 +389,9 @@ static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
struct macb *bp = bus->priv;
int status;
- status = pm_runtime_get_sync(&bp->pdev->dev);
- if (status < 0) {
- pm_runtime_put_noidle(&bp->pdev->dev);
+ status = pm_runtime_resume_and_get(&bp->pdev->dev);
+ if (status < 0)
goto mdio_pm_exit;
- }
status = macb_mdio_wait_for_idle(bp);
if (status < 0)
@@ -2753,9 +2749,9 @@ static int macb_open(struct net_device *dev)
netdev_dbg(bp->dev, "open\n");
- err = pm_runtime_get_sync(&bp->pdev->dev);
+ err = pm_runtime_resume_and_get(&bp->pdev->dev);
if (err < 0)
- goto pm_exit;
+ return err;
/* RX buffers initialization */
macb_init_rx_buffer_size(bp, bufsz);
@@ -4142,11 +4138,9 @@ static int at91ether_open(struct net_device *dev)
u32 ctl;
int ret;
- ret = pm_runtime_get_sync(&lp->pdev->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(&lp->pdev->dev);
+ ret = pm_runtime_resume_and_get(&lp->pdev->dev);
+ if (ret < 0)
return ret;
- }
/* Clear internal statistics */
ctl = macb_readl(lp, NCR);
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 457cb7121000..1281d1565ef8 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1224,7 +1224,7 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit)
* @budget : maximum number of packets that the current CPU can receive from
* all interfaces.
* Description :
- * This function implements the the reception process.
+ * This function implements the reception process.
* Also it runs the TX completion thread
*/
static int xgmac_poll(struct napi_struct *napi, int budget)
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
index 9e2378013642..41714203ace8 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
@@ -567,7 +567,7 @@ void chtls_shutdown(struct sock *sk, int how);
void chtls_destroy_sock(struct sock *sk);
int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags, int *addr_len);
+ size_t len, int flags, int *addr_len);
int chtls_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int send_tx_flowc_wr(struct sock *sk, int compl,
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
index c320cc8ca68d..539992dad8ba 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -1426,7 +1426,7 @@ static void chtls_cleanup_rbuf(struct sock *sk, int copied)
}
static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
struct chtls_hws *hws = &csk->tlshws;
@@ -1441,7 +1441,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
buffers_freed = 0;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
@@ -1616,7 +1616,7 @@ skip_copy:
* Peek at data in a socket's receive buffer.
*/
static int peekmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags)
+ size_t len, int flags)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 peek_seq, offset;
@@ -1626,7 +1626,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg,
long timeo;
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
peek_seq = tp->copied_seq;
do {
@@ -1737,7 +1737,7 @@ found_ok_skb:
}
int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct chtls_sock *csk;
@@ -1750,25 +1750,23 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
buffers_freed = 0;
if (unlikely(flags & MSG_OOB))
- return tcp_prot.recvmsg(sk, msg, len, nonblock, flags,
- addr_len);
+ return tcp_prot.recvmsg(sk, msg, len, flags, addr_len);
if (unlikely(flags & MSG_PEEK))
- return peekmsg(sk, msg, len, nonblock, flags);
+ return peekmsg(sk, msg, len, flags);
if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
- sk_busy_loop(sk, nonblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
lock_sock(sk);
csk = rcu_dereference_sk_user_data(sk);
if (is_tls_rx(csk))
- return chtls_pt_recvmsg(sk, msg, len, nonblock,
- flags, addr_len);
+ return chtls_pt_recvmsg(sk, msg, len, flags, addr_len);
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 55c6bce5da61..18558a019353 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -852,6 +852,7 @@ struct i40e_vsi {
u64 tx_busy;
u64 tx_linearize;
u64 tx_force_wb;
+ u64 tx_stopped;
u64 rx_buf_failed;
u64 rx_page_failed;
u64 rx_page_reuse;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 6aefffd83615..2819e261a126 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -47,6 +47,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
case I40E_DEV_ID_1G_BASE_T_X722:
case I40E_DEV_ID_10G_BASE_T_X722:
case I40E_DEV_ID_SFP_I_X722:
+ case I40E_DEV_ID_SFP_X722_A:
hw->mac.type = I40E_MAC_X722;
break;
default:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index be7c6f34d45c..c9dcd6d92c83 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -309,10 +309,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
tx_ring->stats.bytes,
tx_ring->tx_stats.restart_queue);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n",
+ " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld, tx_stopped = %lld\n",
i,
tx_ring->tx_stats.tx_busy,
- tx_ring->tx_stats.tx_done_old);
+ tx_ring->tx_stats.tx_done_old,
+ tx_ring->tx_stats.tx_stopped);
dev_info(&pf->pdev->dev,
" tx_rings[%i]: size = %i\n",
i, tx_ring->size);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index 1bcb0ec0f0c0..2610338002fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -33,6 +33,7 @@
#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2
#define I40E_DEV_ID_SFP_I_X722 0x37D3
+#define I40E_DEV_ID_SFP_X722_A 0x0DDA
#endif /* _I40E_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index e48499624d22..610f00cbaff9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -293,12 +293,14 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
I40E_VSI_STAT("tx_linearize", tx_linearize),
I40E_VSI_STAT("tx_force_wb", tx_force_wb),
I40E_VSI_STAT("tx_busy", tx_busy),
+ I40E_VSI_STAT("tx_stopped", tx_stopped),
I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse),
I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc),
I40E_VSI_STAT("rx_cache_waive", rx_page_waive),
I40E_VSI_STAT("rx_cache_busy", rx_page_busy),
+ I40E_VSI_STAT("tx_restart", tx_restart),
};
/* These PF_STATs might look like duplicates of some NETDEV_STATs,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6778df2177a1..358c2edc118d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -77,6 +77,7 @@ static const struct pci_device_id i40e_pci_tbl[] = {
{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
+ {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722_A), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0},
@@ -785,6 +786,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
unsigned int start;
u64 tx_linearize;
u64 tx_force_wb;
+ u64 tx_stopped;
u64 rx_p, rx_b;
u64 tx_p, tx_b;
u16 q;
@@ -804,6 +806,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
rx_b = rx_p = 0;
tx_b = tx_p = 0;
tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
+ tx_stopped = 0;
rx_page = 0;
rx_buf = 0;
rx_reuse = 0;
@@ -828,6 +831,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
tx_busy += p->tx_stats.tx_busy;
tx_linearize += p->tx_stats.tx_linearize;
tx_force_wb += p->tx_stats.tx_force_wb;
+ tx_stopped += p->tx_stats.tx_stopped;
/* locate Rx ring */
p = READ_ONCE(vsi->rx_rings[q]);
@@ -872,6 +876,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
vsi->tx_busy = tx_busy;
vsi->tx_linearize = tx_linearize;
vsi->tx_force_wb = tx_force_wb;
+ vsi->tx_stopped = tx_stopped;
vsi->rx_page_failed = rx_page;
vsi->rx_buf_failed = rx_buf;
vsi->rx_page_reuse = rx_reuse;
@@ -13436,8 +13441,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
np->vsi = vsi;
hw_enc_features = NETIF_F_SG |
- NETIF_F_IP_CSUM |
- NETIF_F_IPV6_CSUM |
+ NETIF_F_HW_CSUM |
NETIF_F_HIGHDMA |
NETIF_F_SOFT_FEATURES |
NETIF_F_TSO |
@@ -13468,6 +13472,23 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
/* record features VLANs can make use of */
netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
+#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+ NETIF_F_GSO_GRE_CSUM | \
+ NETIF_F_GSO_IPXIP4 | \
+ NETIF_F_GSO_IPXIP6 | \
+ NETIF_F_GSO_UDP_TUNNEL | \
+ NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+ netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES;
+ netdev->features |= NETIF_F_GSO_PARTIAL |
+ I40E_GSO_PARTIAL_FEATURES;
+
+ netdev->mpls_features |= NETIF_F_SG;
+ netdev->mpls_features |= NETIF_F_HW_CSUM;
+ netdev->mpls_features |= NETIF_F_TSO;
+ netdev->mpls_features |= NETIF_F_TSO6;
+ netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES;
+
/* enable macvlan offloads */
netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 0eae5858f2fe..7bc1174edf6b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3,6 +3,7 @@
#include <linux/prefetch.h>
#include <linux/bpf_trace.h>
+#include <net/mpls.h>
#include <net/xdp.h>
#include "i40e.h"
#include "i40e_trace.h"
@@ -3015,6 +3016,7 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
{
struct sk_buff *skb = first->skb;
u64 cd_cmd, cd_tso_len, cd_mss;
+ __be16 protocol;
union {
struct iphdr *v4;
struct ipv6hdr *v6;
@@ -3026,7 +3028,7 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
unsigned char *hdr;
} l4;
u32 paylen, l4_offset;
- u16 gso_segs, gso_size;
+ u16 gso_size;
int err;
if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -3039,15 +3041,23 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
if (err < 0)
return err;
- ip.hdr = skb_network_header(skb);
- l4.hdr = skb_transport_header(skb);
+ protocol = vlan_get_protocol(skb);
+
+ if (eth_p_mpls(protocol))
+ ip.hdr = skb_inner_network_header(skb);
+ else
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_checksum_start(skb);
/* initialize outer IP header fields */
if (ip.v4->version == 4) {
ip.v4->tot_len = 0;
ip.v4->check = 0;
+
+ first->tx_flags |= I40E_TX_FLAGS_TSO;
} else {
ip.v6->payload_len = 0;
+ first->tx_flags |= I40E_TX_FLAGS_TSO;
}
if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
@@ -3100,10 +3110,9 @@ static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
/* pull values out of skb_shinfo */
gso_size = skb_shinfo(skb)->gso_size;
- gso_segs = skb_shinfo(skb)->gso_segs;
/* update GSO size and bytecount with header size */
- first->gso_segs = gso_segs;
+ first->gso_segs = skb_shinfo(skb)->gso_segs;
first->bytecount += (first->gso_segs - 1) * *hdr_len;
/* find the field values */
@@ -3187,13 +3196,27 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
unsigned char *exthdr;
u32 offset, cmd = 0;
__be16 frag_off;
+ __be16 protocol;
u8 l4_proto = 0;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
- ip.hdr = skb_network_header(skb);
- l4.hdr = skb_transport_header(skb);
+ protocol = vlan_get_protocol(skb);
+
+ if (eth_p_mpls(protocol))
+ ip.hdr = skb_inner_network_header(skb);
+ else
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_checksum_start(skb);
+
+ /* set the tx_flags to indicate the IP protocol type. this is
+ * required so that checksum header computation below is accurate.
+ */
+ if (ip.v4->version == 4)
+ *tx_flags |= I40E_TX_FLAGS_IPV4;
+ else
+ *tx_flags |= I40E_TX_FLAGS_IPV6;
/* compute outer L2 header size */
offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
@@ -3373,6 +3396,8 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
/* Memory barrier before checking head and tail */
smp_mb();
+ ++tx_ring->tx_stats.tx_stopped;
+
/* Check again in a case another CPU has just made room available. */
if (likely(I40E_DESC_UNUSED(tx_ring) < size))
return -EBUSY;
@@ -3749,7 +3774,6 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
struct i40e_tx_buffer *first;
u32 td_offset = 0;
u32 tx_flags = 0;
- __be16 protocol;
u32 td_cmd = 0;
u8 hdr_len = 0;
int tso, count;
@@ -3791,15 +3815,6 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
goto out_drop;
- /* obtain protocol of skb */
- protocol = vlan_get_protocol(skb);
-
- /* setup IPv4/IPv6 offloads */
- if (protocol == htons(ETH_P_IP))
- tx_flags |= I40E_TX_FLAGS_IPV4;
- else if (protocol == htons(ETH_P_IPV6))
- tx_flags |= I40E_TX_FLAGS_IPV6;
-
tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss);
if (tso < 0)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index c471c2da313c..41f86e9535a0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -290,6 +290,7 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
+ u64 tx_stopped;
int prev_pkt_ctr;
};
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5b1198859da7..fde839ef0613 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3336,7 +3336,9 @@ static void ice_set_netdev_features(struct net_device *netdev)
vlano_features | tso_features;
/* add support for HW_CSUM on packets with MPLS header */
- netdev->mpls_features = NETIF_F_HW_CSUM;
+ netdev->mpls_features = NETIF_F_HW_CSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO6;
/* enable features */
netdev->features |= netdev->hw_features;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 25b8f6f726eb..496250f9f8fc 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -30,12 +30,46 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
0x2, 0, 0, 0, 0, 0,
0x81, 0, 0, 0};
+enum {
+ ICE_PKT_VLAN = BIT(0),
+ ICE_PKT_OUTER_IPV6 = BIT(1),
+ ICE_PKT_TUN_GTPC = BIT(2),
+ ICE_PKT_TUN_GTPU = BIT(3),
+ ICE_PKT_TUN_NVGRE = BIT(4),
+ ICE_PKT_TUN_UDP = BIT(5),
+ ICE_PKT_INNER_IPV6 = BIT(6),
+ ICE_PKT_INNER_TCP = BIT(7),
+ ICE_PKT_INNER_UDP = BIT(8),
+ ICE_PKT_GTP_NOPAY = BIT(9),
+};
+
struct ice_dummy_pkt_offsets {
enum ice_protocol_type type;
u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */
};
-static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = {
+struct ice_dummy_pkt_profile {
+ const struct ice_dummy_pkt_offsets *offsets;
+ const u8 *pkt;
+ u32 match;
+ u16 pkt_len;
+};
+
+#define ICE_DECLARE_PKT_OFFSETS(type) \
+ static const struct ice_dummy_pkt_offsets \
+ ice_dummy_##type##_packet_offsets[]
+
+#define ICE_DECLARE_PKT_TEMPLATE(type) \
+ static const u8 ice_dummy_##type##_packet[]
+
+#define ICE_PKT_PROFILE(type, m) { \
+ .match = (m), \
+ .pkt = ice_dummy_##type##_packet, \
+ .pkt_len = sizeof(ice_dummy_##type##_packet), \
+ .offsets = ice_dummy_##type##_packet_offsets, \
+}
+
+ICE_DECLARE_PKT_OFFSETS(gre_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -47,7 +81,7 @@ static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_gre_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(gre_tcp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -82,7 +116,7 @@ static const u8 dummy_gre_tcp_packet[] = {
0x00, 0x00, 0x00, 0x00
};
-static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(gre_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -94,7 +128,7 @@ static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_gre_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(gre_udp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -126,7 +160,7 @@ static const u8 dummy_gre_udp_packet[] = {
0x00, 0x08, 0x00, 0x00,
};
-static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(udp_tun_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -141,7 +175,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_udp_tun_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_tcp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -179,7 +213,7 @@ static const u8 dummy_udp_tun_tcp_packet[] = {
0x00, 0x00, 0x00, 0x00
};
-static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(udp_tun_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -194,7 +228,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_udp_tun_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_udp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -229,8 +263,7 @@ static const u8 dummy_udp_tun_udp_packet[] = {
0x00, 0x08, 0x00, 0x00,
};
-static const struct ice_dummy_pkt_offsets
-dummy_gre_ipv6_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(gre_ipv6_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -242,7 +275,7 @@ dummy_gre_ipv6_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_gre_ipv6_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_tcp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -282,8 +315,7 @@ static const u8 dummy_gre_ipv6_tcp_packet[] = {
0x00, 0x00, 0x00, 0x00
};
-static const struct ice_dummy_pkt_offsets
-dummy_gre_ipv6_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(gre_ipv6_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -295,7 +327,7 @@ dummy_gre_ipv6_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_gre_ipv6_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_udp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -332,8 +364,7 @@ static const u8 dummy_gre_ipv6_udp_packet[] = {
0x00, 0x08, 0x00, 0x00,
};
-static const struct ice_dummy_pkt_offsets
-dummy_udp_tun_ipv6_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -348,7 +379,7 @@ dummy_udp_tun_ipv6_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_udp_tun_ipv6_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_tcp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -391,8 +422,7 @@ static const u8 dummy_udp_tun_ipv6_tcp_packet[] = {
0x00, 0x00, 0x00, 0x00
};
-static const struct ice_dummy_pkt_offsets
-dummy_udp_tun_ipv6_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -407,7 +437,7 @@ dummy_udp_tun_ipv6_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_udp_tun_ipv6_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_udp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -448,7 +478,7 @@ static const u8 dummy_udp_tun_ipv6_udp_packet[] = {
};
/* offset info for MAC + IPv4 + UDP dummy packet */
-static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -457,7 +487,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = {
};
/* Dummy packet for MAC + IPv4 + UDP */
-static const u8 dummy_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(udp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -477,7 +507,7 @@ static const u8 dummy_udp_packet[] = {
};
/* offset info for MAC + VLAN + IPv4 + UDP dummy packet */
-static const struct ice_dummy_pkt_offsets dummy_vlan_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(vlan_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_VLAN_OFOS, 12 },
{ ICE_ETYPE_OL, 16 },
@@ -487,7 +517,7 @@ static const struct ice_dummy_pkt_offsets dummy_vlan_udp_packet_offsets[] = {
};
/* C-tag (801.1Q), IPv4:UDP dummy packet */
-static const u8 dummy_vlan_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(vlan_udp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -509,7 +539,7 @@ static const u8 dummy_vlan_udp_packet[] = {
};
/* offset info for MAC + IPv4 + TCP dummy packet */
-static const struct ice_dummy_pkt_offsets dummy_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV4_OFOS, 14 },
@@ -518,7 +548,7 @@ static const struct ice_dummy_pkt_offsets dummy_tcp_packet_offsets[] = {
};
/* Dummy packet for MAC + IPv4 + TCP */
-static const u8 dummy_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(tcp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -541,7 +571,7 @@ static const u8 dummy_tcp_packet[] = {
};
/* offset info for MAC + VLAN (C-tag, 802.1Q) + IPv4 + TCP dummy packet */
-static const struct ice_dummy_pkt_offsets dummy_vlan_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(vlan_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_VLAN_OFOS, 12 },
{ ICE_ETYPE_OL, 16 },
@@ -551,7 +581,7 @@ static const struct ice_dummy_pkt_offsets dummy_vlan_tcp_packet_offsets[] = {
};
/* C-tag (801.1Q), IPv4:TCP dummy packet */
-static const u8 dummy_vlan_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(vlan_tcp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -575,7 +605,7 @@ static const u8 dummy_vlan_tcp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const struct ice_dummy_pkt_offsets dummy_tcp_ipv6_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(tcp_ipv6) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV6_OFOS, 14 },
@@ -583,7 +613,7 @@ static const struct ice_dummy_pkt_offsets dummy_tcp_ipv6_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_tcp_ipv6_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(tcp_ipv6) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -611,8 +641,7 @@ static const u8 dummy_tcp_ipv6_packet[] = {
};
/* C-tag (802.1Q): IPv6 + TCP */
-static const struct ice_dummy_pkt_offsets
-dummy_vlan_tcp_ipv6_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(vlan_tcp_ipv6) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_VLAN_OFOS, 12 },
{ ICE_ETYPE_OL, 16 },
@@ -622,7 +651,7 @@ dummy_vlan_tcp_ipv6_packet_offsets[] = {
};
/* C-tag (802.1Q), IPv6 + TCP dummy packet */
-static const u8 dummy_vlan_tcp_ipv6_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(vlan_tcp_ipv6) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -652,7 +681,7 @@ static const u8 dummy_vlan_tcp_ipv6_packet[] = {
};
/* IPv6 + UDP */
-static const struct ice_dummy_pkt_offsets dummy_udp_ipv6_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(udp_ipv6) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_ETYPE_OL, 12 },
{ ICE_IPV6_OFOS, 14 },
@@ -661,7 +690,7 @@ static const struct ice_dummy_pkt_offsets dummy_udp_ipv6_packet_offsets[] = {
};
/* IPv6 + UDP dummy packet */
-static const u8 dummy_udp_ipv6_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(udp_ipv6) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -689,8 +718,7 @@ static const u8 dummy_udp_ipv6_packet[] = {
};
/* C-tag (802.1Q): IPv6 + UDP */
-static const struct ice_dummy_pkt_offsets
-dummy_vlan_udp_ipv6_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(vlan_udp_ipv6) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_VLAN_OFOS, 12 },
{ ICE_ETYPE_OL, 16 },
@@ -700,7 +728,7 @@ dummy_vlan_udp_ipv6_packet_offsets[] = {
};
/* C-tag (802.1Q), IPv6 + UDP dummy packet */
-static const u8 dummy_vlan_udp_ipv6_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(vlan_udp_ipv6) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -727,8 +755,7 @@ static const u8 dummy_vlan_udp_ipv6_packet[] = {
};
/* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner TCP */
-static const
-struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV4_OFOS, 14 },
{ ICE_UDP_OF, 34 },
@@ -738,7 +765,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv4_gtpu_ipv4_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_tcp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -776,8 +803,7 @@ static const u8 dummy_ipv4_gtpu_ipv4_tcp_packet[] = {
};
/* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner UDP */
-static const
-struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV4_OFOS, 14 },
{ ICE_UDP_OF, 34 },
@@ -787,7 +813,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv4_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv4_gtpu_ipv4_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_udp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -822,8 +848,7 @@ static const u8 dummy_ipv4_gtpu_ipv4_udp_packet[] = {
};
/* Outer IPv6 + Outer UDP + GTP + Inner IPv4 + Inner TCP */
-static const
-struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV4_OFOS, 14 },
{ ICE_UDP_OF, 34 },
@@ -833,7 +858,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv4_gtpu_ipv6_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_tcp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -875,8 +900,7 @@ static const u8 dummy_ipv4_gtpu_ipv6_tcp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const
-struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV4_OFOS, 14 },
{ ICE_UDP_OF, 34 },
@@ -886,7 +910,7 @@ struct ice_dummy_pkt_offsets dummy_ipv4_gtpu_ipv6_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv4_gtpu_ipv6_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_udp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -925,8 +949,7 @@ static const u8 dummy_ipv4_gtpu_ipv6_udp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const
-struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV6_OFOS, 14 },
{ ICE_UDP_OF, 54 },
@@ -936,7 +959,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv6_gtpu_ipv4_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_tcp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -978,8 +1001,7 @@ static const u8 dummy_ipv6_gtpu_ipv4_tcp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const
-struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV6_OFOS, 14 },
{ ICE_UDP_OF, 54 },
@@ -989,7 +1011,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv4_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv6_gtpu_ipv4_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_udp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -1028,8 +1050,7 @@ static const u8 dummy_ipv6_gtpu_ipv4_udp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const
-struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_tcp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_tcp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV6_OFOS, 14 },
{ ICE_UDP_OF, 54 },
@@ -1039,7 +1060,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_tcp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv6_gtpu_ipv6_tcp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_tcp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -1086,8 +1107,7 @@ static const u8 dummy_ipv6_gtpu_ipv6_tcp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const
-struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_udp_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_udp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV6_OFOS, 14 },
{ ICE_UDP_OF, 54 },
@@ -1097,7 +1117,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtpu_ipv6_udp_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv6_gtpu_ipv6_udp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_udp) = {
0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -1141,7 +1161,15 @@ static const u8 dummy_ipv6_gtpu_ipv6_udp_packet[] = {
0x00, 0x00, /* 2 bytes for 4 byte alignment */
};
-static const u8 dummy_ipv4_gtpu_ipv4_packet[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4) = {
+ { ICE_MAC_OFOS, 0 },
+ { ICE_IPV4_OFOS, 14 },
+ { ICE_UDP_OF, 34 },
+ { ICE_GTP_NO_PAY, 42 },
+ { ICE_PROTOCOL_LAST, 0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -1171,17 +1199,7 @@ static const u8 dummy_ipv4_gtpu_ipv4_packet[] = {
0x00, 0x00,
};
-static const
-struct ice_dummy_pkt_offsets dummy_ipv4_gtp_no_pay_packet_offsets[] = {
- { ICE_MAC_OFOS, 0 },
- { ICE_IPV4_OFOS, 14 },
- { ICE_UDP_OF, 34 },
- { ICE_GTP_NO_PAY, 42 },
- { ICE_PROTOCOL_LAST, 0 },
-};
-
-static const
-struct ice_dummy_pkt_offsets dummy_ipv6_gtp_no_pay_packet_offsets[] = {
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtp) = {
{ ICE_MAC_OFOS, 0 },
{ ICE_IPV6_OFOS, 14 },
{ ICE_UDP_OF, 54 },
@@ -1189,7 +1207,7 @@ struct ice_dummy_pkt_offsets dummy_ipv6_gtp_no_pay_packet_offsets[] = {
{ ICE_PROTOCOL_LAST, 0 },
};
-static const u8 dummy_ipv6_gtp_packet[] = {
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtp) = {
0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
@@ -1215,6 +1233,55 @@ static const u8 dummy_ipv6_gtp_packet[] = {
0x00, 0x00,
};
+static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = {
+ ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPU | ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_GTP_NOPAY),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv6_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPU | ICE_PKT_GTP_NOPAY),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU |
+ ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU),
+ ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPC | ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPC),
+ ICE_PKT_PROFILE(gre_ipv6_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(gre_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(gre_ipv6_udp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(gre_udp, ICE_PKT_TUN_NVGRE),
+ ICE_PKT_PROFILE(udp_tun_ipv6_tcp, ICE_PKT_TUN_UDP |
+ ICE_PKT_INNER_IPV6 |
+ ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(udp_tun_tcp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_TCP),
+ ICE_PKT_PROFILE(udp_tun_ipv6_udp, ICE_PKT_TUN_UDP |
+ ICE_PKT_INNER_IPV6),
+ ICE_PKT_PROFILE(udp_tun_udp, ICE_PKT_TUN_UDP),
+ ICE_PKT_PROFILE(vlan_udp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP |
+ ICE_PKT_VLAN),
+ ICE_PKT_PROFILE(udp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(vlan_udp, ICE_PKT_INNER_UDP | ICE_PKT_VLAN),
+ ICE_PKT_PROFILE(udp, ICE_PKT_INNER_UDP),
+ ICE_PKT_PROFILE(vlan_tcp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_VLAN),
+ ICE_PKT_PROFILE(tcp_ipv6, ICE_PKT_OUTER_IPV6),
+ ICE_PKT_PROFILE(vlan_tcp, ICE_PKT_VLAN),
+ ICE_PKT_PROFILE(tcp, 0),
+};
+
#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \
(offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr) + \
(DUMMY_ETH_HDR_LEN * \
@@ -5501,212 +5568,66 @@ err_free_lkup_exts:
* structure per protocol header
* @lkups_cnt: number of protocols
* @tun_type: tunnel type
- * @pkt: dummy packet to fill according to filter match criteria
- * @pkt_len: packet length of dummy packet
- * @offsets: pointer to receive the pointer to the offsets for the packet
+ *
+ * Returns the &ice_dummy_pkt_profile corresponding to these lookup params.
*/
-static void
+static const struct ice_dummy_pkt_profile *
ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
- enum ice_sw_tunnel_type tun_type,
- const u8 **pkt, u16 *pkt_len,
- const struct ice_dummy_pkt_offsets **offsets)
+ enum ice_sw_tunnel_type tun_type)
{
- bool inner_tcp = false, inner_udp = false, outer_ipv6 = false;
- bool vlan = false, inner_ipv6 = false, gtp_no_pay = false;
+ const struct ice_dummy_pkt_profile *ret = ice_dummy_pkt_profiles;
+ u32 match = 0;
u16 i;
+ switch (tun_type) {
+ case ICE_SW_TUN_GTPC:
+ match |= ICE_PKT_TUN_GTPC;
+ break;
+ case ICE_SW_TUN_GTPU:
+ match |= ICE_PKT_TUN_GTPU;
+ break;
+ case ICE_SW_TUN_NVGRE:
+ match |= ICE_PKT_TUN_NVGRE;
+ break;
+ case ICE_SW_TUN_GENEVE:
+ case ICE_SW_TUN_VXLAN:
+ match |= ICE_PKT_TUN_UDP;
+ break;
+ default:
+ break;
+ }
+
for (i = 0; i < lkups_cnt; i++) {
if (lkups[i].type == ICE_UDP_ILOS)
- inner_udp = true;
+ match |= ICE_PKT_INNER_UDP;
else if (lkups[i].type == ICE_TCP_IL)
- inner_tcp = true;
+ match |= ICE_PKT_INNER_TCP;
else if (lkups[i].type == ICE_IPV6_OFOS)
- outer_ipv6 = true;
+ match |= ICE_PKT_OUTER_IPV6;
else if (lkups[i].type == ICE_VLAN_OFOS)
- vlan = true;
+ match |= ICE_PKT_VLAN;
else if (lkups[i].type == ICE_ETYPE_OL &&
lkups[i].h_u.ethertype.ethtype_id ==
cpu_to_be16(ICE_IPV6_ETHER_ID) &&
lkups[i].m_u.ethertype.ethtype_id ==
cpu_to_be16(0xFFFF))
- outer_ipv6 = true;
+ match |= ICE_PKT_OUTER_IPV6;
else if (lkups[i].type == ICE_ETYPE_IL &&
lkups[i].h_u.ethertype.ethtype_id ==
cpu_to_be16(ICE_IPV6_ETHER_ID) &&
lkups[i].m_u.ethertype.ethtype_id ==
cpu_to_be16(0xFFFF))
- inner_ipv6 = true;
+ match |= ICE_PKT_INNER_IPV6;
else if (lkups[i].type == ICE_IPV6_IL)
- inner_ipv6 = true;
+ match |= ICE_PKT_INNER_IPV6;
else if (lkups[i].type == ICE_GTP_NO_PAY)
- gtp_no_pay = true;
+ match |= ICE_PKT_GTP_NOPAY;
}
- if (tun_type == ICE_SW_TUN_GTPU) {
- if (outer_ipv6) {
- if (gtp_no_pay) {
- *pkt = dummy_ipv6_gtp_packet;
- *pkt_len = sizeof(dummy_ipv6_gtp_packet);
- *offsets = dummy_ipv6_gtp_no_pay_packet_offsets;
- } else if (inner_ipv6) {
- if (inner_udp) {
- *pkt = dummy_ipv6_gtpu_ipv6_udp_packet;
- *pkt_len = sizeof(dummy_ipv6_gtpu_ipv6_udp_packet);
- *offsets = dummy_ipv6_gtpu_ipv6_udp_packet_offsets;
- } else {
- *pkt = dummy_ipv6_gtpu_ipv6_tcp_packet;
- *pkt_len = sizeof(dummy_ipv6_gtpu_ipv6_tcp_packet);
- *offsets = dummy_ipv6_gtpu_ipv6_tcp_packet_offsets;
- }
- } else {
- if (inner_udp) {
- *pkt = dummy_ipv6_gtpu_ipv4_udp_packet;
- *pkt_len = sizeof(dummy_ipv6_gtpu_ipv4_udp_packet);
- *offsets = dummy_ipv6_gtpu_ipv4_udp_packet_offsets;
- } else {
- *pkt = dummy_ipv6_gtpu_ipv4_tcp_packet;
- *pkt_len = sizeof(dummy_ipv6_gtpu_ipv4_tcp_packet);
- *offsets = dummy_ipv6_gtpu_ipv4_tcp_packet_offsets;
- }
- }
- } else {
- if (gtp_no_pay) {
- *pkt = dummy_ipv4_gtpu_ipv4_packet;
- *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_packet);
- *offsets = dummy_ipv4_gtp_no_pay_packet_offsets;
- } else if (inner_ipv6) {
- if (inner_udp) {
- *pkt = dummy_ipv4_gtpu_ipv6_udp_packet;
- *pkt_len = sizeof(dummy_ipv4_gtpu_ipv6_udp_packet);
- *offsets = dummy_ipv4_gtpu_ipv6_udp_packet_offsets;
- } else {
- *pkt = dummy_ipv4_gtpu_ipv6_tcp_packet;
- *pkt_len = sizeof(dummy_ipv4_gtpu_ipv6_tcp_packet);
- *offsets = dummy_ipv4_gtpu_ipv6_tcp_packet_offsets;
- }
- } else {
- if (inner_udp) {
- *pkt = dummy_ipv4_gtpu_ipv4_udp_packet;
- *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_udp_packet);
- *offsets = dummy_ipv4_gtpu_ipv4_udp_packet_offsets;
- } else {
- *pkt = dummy_ipv4_gtpu_ipv4_tcp_packet;
- *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_tcp_packet);
- *offsets = dummy_ipv4_gtpu_ipv4_tcp_packet_offsets;
- }
- }
- }
- return;
- }
-
- if (tun_type == ICE_SW_TUN_GTPC) {
- if (outer_ipv6) {
- *pkt = dummy_ipv6_gtp_packet;
- *pkt_len = sizeof(dummy_ipv6_gtp_packet);
- *offsets = dummy_ipv6_gtp_no_pay_packet_offsets;
- } else {
- *pkt = dummy_ipv4_gtpu_ipv4_packet;
- *pkt_len = sizeof(dummy_ipv4_gtpu_ipv4_packet);
- *offsets = dummy_ipv4_gtp_no_pay_packet_offsets;
- }
- return;
- }
-
- if (tun_type == ICE_SW_TUN_NVGRE) {
- if (inner_tcp && inner_ipv6) {
- *pkt = dummy_gre_ipv6_tcp_packet;
- *pkt_len = sizeof(dummy_gre_ipv6_tcp_packet);
- *offsets = dummy_gre_ipv6_tcp_packet_offsets;
- return;
- }
- if (inner_tcp) {
- *pkt = dummy_gre_tcp_packet;
- *pkt_len = sizeof(dummy_gre_tcp_packet);
- *offsets = dummy_gre_tcp_packet_offsets;
- return;
- }
- if (inner_ipv6) {
- *pkt = dummy_gre_ipv6_udp_packet;
- *pkt_len = sizeof(dummy_gre_ipv6_udp_packet);
- *offsets = dummy_gre_ipv6_udp_packet_offsets;
- return;
- }
- *pkt = dummy_gre_udp_packet;
- *pkt_len = sizeof(dummy_gre_udp_packet);
- *offsets = dummy_gre_udp_packet_offsets;
- return;
- }
-
- if (tun_type == ICE_SW_TUN_VXLAN ||
- tun_type == ICE_SW_TUN_GENEVE) {
- if (inner_tcp && inner_ipv6) {
- *pkt = dummy_udp_tun_ipv6_tcp_packet;
- *pkt_len = sizeof(dummy_udp_tun_ipv6_tcp_packet);
- *offsets = dummy_udp_tun_ipv6_tcp_packet_offsets;
- return;
- }
- if (inner_tcp) {
- *pkt = dummy_udp_tun_tcp_packet;
- *pkt_len = sizeof(dummy_udp_tun_tcp_packet);
- *offsets = dummy_udp_tun_tcp_packet_offsets;
- return;
- }
- if (inner_ipv6) {
- *pkt = dummy_udp_tun_ipv6_udp_packet;
- *pkt_len = sizeof(dummy_udp_tun_ipv6_udp_packet);
- *offsets = dummy_udp_tun_ipv6_udp_packet_offsets;
- return;
- }
- *pkt = dummy_udp_tun_udp_packet;
- *pkt_len = sizeof(dummy_udp_tun_udp_packet);
- *offsets = dummy_udp_tun_udp_packet_offsets;
- return;
- }
+ while (ret->match && (match & ret->match) != ret->match)
+ ret++;
- if (inner_udp && !outer_ipv6) {
- if (vlan) {
- *pkt = dummy_vlan_udp_packet;
- *pkt_len = sizeof(dummy_vlan_udp_packet);
- *offsets = dummy_vlan_udp_packet_offsets;
- return;
- }
- *pkt = dummy_udp_packet;
- *pkt_len = sizeof(dummy_udp_packet);
- *offsets = dummy_udp_packet_offsets;
- return;
- } else if (inner_udp && outer_ipv6) {
- if (vlan) {
- *pkt = dummy_vlan_udp_ipv6_packet;
- *pkt_len = sizeof(dummy_vlan_udp_ipv6_packet);
- *offsets = dummy_vlan_udp_ipv6_packet_offsets;
- return;
- }
- *pkt = dummy_udp_ipv6_packet;
- *pkt_len = sizeof(dummy_udp_ipv6_packet);
- *offsets = dummy_udp_ipv6_packet_offsets;
- return;
- } else if ((inner_tcp && outer_ipv6) || outer_ipv6) {
- if (vlan) {
- *pkt = dummy_vlan_tcp_ipv6_packet;
- *pkt_len = sizeof(dummy_vlan_tcp_ipv6_packet);
- *offsets = dummy_vlan_tcp_ipv6_packet_offsets;
- return;
- }
- *pkt = dummy_tcp_ipv6_packet;
- *pkt_len = sizeof(dummy_tcp_ipv6_packet);
- *offsets = dummy_tcp_ipv6_packet_offsets;
- return;
- }
-
- if (vlan) {
- *pkt = dummy_vlan_tcp_packet;
- *pkt_len = sizeof(dummy_vlan_tcp_packet);
- *offsets = dummy_vlan_tcp_packet_offsets;
- } else {
- *pkt = dummy_tcp_packet;
- *pkt_len = sizeof(dummy_tcp_packet);
- *offsets = dummy_tcp_packet_offsets;
- }
+ return ret;
}
/**
@@ -5716,15 +5637,12 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
* structure per protocol header
* @lkups_cnt: number of protocols
* @s_rule: stores rule information from the match criteria
- * @dummy_pkt: dummy packet to fill according to filter match criteria
- * @pkt_len: packet length of dummy packet
- * @offsets: offset info for the dummy packet
+ * @profile: dummy packet profile (the template, its size and header offsets)
*/
static int
ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
struct ice_aqc_sw_rules_elem *s_rule,
- const u8 *dummy_pkt, u16 pkt_len,
- const struct ice_dummy_pkt_offsets *offsets)
+ const struct ice_dummy_pkt_profile *profile)
{
u8 *pkt;
u16 i;
@@ -5734,9 +5652,10 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
*/
pkt = s_rule->pdata.lkup_tx_rx.hdr;
- memcpy(pkt, dummy_pkt, pkt_len);
+ memcpy(pkt, profile->pkt, profile->pkt_len);
for (i = 0; i < lkups_cnt; i++) {
+ const struct ice_dummy_pkt_offsets *offsets = profile->offsets;
enum ice_protocol_type type;
u16 offset = 0, len = 0, j;
bool found = false;
@@ -5810,16 +5729,18 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
* indicated by the mask to make sure we don't improperly write
* over any significant packet data.
*/
- for (j = 0; j < len / sizeof(u16); j++)
- if (((u16 *)&lkups[i].m_u)[j])
- ((u16 *)(pkt + offset))[j] =
- (((u16 *)(pkt + offset))[j] &
- ~((u16 *)&lkups[i].m_u)[j]) |
- (((u16 *)&lkups[i].h_u)[j] &
- ((u16 *)&lkups[i].m_u)[j]);
+ for (j = 0; j < len / sizeof(u16); j++) {
+ u16 *ptr = (u16 *)(pkt + offset);
+ u16 mask = lkups[i].m_raw[j];
+
+ if (!mask)
+ continue;
+
+ ptr[j] = (ptr[j] & ~mask) | (lkups[i].h_raw[j] & mask);
+ }
}
- s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(pkt_len);
+ s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(profile->pkt_len);
return 0;
}
@@ -6042,12 +5963,11 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
struct ice_rule_query_data *added_entry)
{
struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL;
- u16 rid = 0, i, pkt_len, rule_buf_sz, vsi_handle;
- const struct ice_dummy_pkt_offsets *pkt_offsets;
struct ice_aqc_sw_rules_elem *s_rule = NULL;
+ const struct ice_dummy_pkt_profile *profile;
+ u16 rid = 0, i, rule_buf_sz, vsi_handle;
struct list_head *rule_head;
struct ice_switch_info *sw;
- const u8 *pkt = NULL;
u16 word_cnt;
u32 act = 0;
int status;
@@ -6065,24 +5985,18 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
/* get # of words we need to match */
word_cnt = 0;
for (i = 0; i < lkups_cnt; i++) {
- u16 j, *ptr;
+ u16 j;
- ptr = (u16 *)&lkups[i].m_u;
- for (j = 0; j < sizeof(lkups->m_u) / sizeof(u16); j++)
- if (ptr[j] != 0)
+ for (j = 0; j < ARRAY_SIZE(lkups->m_raw); j++)
+ if (lkups[i].m_raw[j])
word_cnt++;
}
if (!word_cnt || word_cnt > ICE_MAX_CHAIN_WORDS)
return -EINVAL;
- /* make sure that we can locate a dummy packet */
- ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type, &pkt, &pkt_len,
- &pkt_offsets);
- if (!pkt) {
- status = -EINVAL;
- goto err_ice_add_adv_rule;
- }
+ /* locate a dummy packet */
+ profile = ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type);
if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI ||
rinfo->sw_act.fltr_act == ICE_FWD_TO_Q ||
@@ -6123,7 +6037,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
}
return status;
}
- rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + pkt_len;
+ rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + profile->pkt_len;
s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
if (!s_rule)
return -ENOMEM;
@@ -6183,8 +6097,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(rid);
s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act);
- status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, pkt,
- pkt_len, pkt_offsets);
+ status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, profile);
if (status)
goto err_ice_add_adv_rule;
@@ -6192,7 +6105,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) {
status = ice_fill_adv_packet_tun(hw, rinfo->tun_type,
s_rule->pdata.lkup_tx_rx.hdr,
- pkt_offsets);
+ profile->offsets);
if (status)
goto err_ice_add_adv_rule;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index ed3d1d03befa..ecac75e71395 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -138,8 +138,16 @@ struct ice_update_recipe_lkup_idx_params {
struct ice_adv_lkup_elem {
enum ice_protocol_type type;
- union ice_prot_hdr h_u; /* Header values */
- union ice_prot_hdr m_u; /* Mask of header values to match */
+ union {
+ union ice_prot_hdr h_u; /* Header values */
+ /* Used to iterate over the headers */
+ u16 h_raw[sizeof(union ice_prot_hdr) / sizeof(u16)];
+ };
+ union {
+ union ice_prot_hdr m_u; /* Mask of header values to match */
+ /* Used to iterate over header mask */
+ u16 m_raw[sizeof(union ice_prot_hdr) / sizeof(u16)];
+ };
};
struct ice_sw_act_ctrl {
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index f9bf008471c9..3f8b7274ed2f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -8,6 +8,7 @@
#include <linux/prefetch.h>
#include <linux/bpf_trace.h>
#include <net/dsfield.h>
+#include <net/mpls.h>
#include <net/xdp.h>
#include "ice_txrx_lib.h"
#include "ice_lib.h"
@@ -1748,18 +1749,24 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
- ip.hdr = skb_network_header(skb);
- l4.hdr = skb_transport_header(skb);
+ protocol = vlan_get_protocol(skb);
+
+ if (eth_p_mpls(protocol))
+ ip.hdr = skb_inner_network_header(skb);
+ else
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_checksum_start(skb);
/* compute outer L2 header size */
l2_len = ip.hdr - skb->data;
offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
- protocol = vlan_get_protocol(skb);
-
- if (protocol == htons(ETH_P_IP))
+ /* set the tx_flags to indicate the IP protocol type. this is
+ * required so that checksum header computation below is accurate.
+ */
+ if (ip.v4->version == 4)
first->tx_flags |= ICE_TX_FLAGS_IPV4;
- else if (protocol == htons(ETH_P_IPV6))
+ else if (ip.v6->version == 6)
first->tx_flags |= ICE_TX_FLAGS_IPV6;
if (skb->encapsulation) {
@@ -1957,6 +1964,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
unsigned char *hdr;
} l4;
u64 cd_mss, cd_tso_len;
+ __be16 protocol;
u32 paylen;
u8 l4_start;
int err;
@@ -1972,8 +1980,13 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
return err;
/* cppcheck-suppress unreadVariable */
- ip.hdr = skb_network_header(skb);
- l4.hdr = skb_transport_header(skb);
+ protocol = vlan_get_protocol(skb);
+
+ if (eth_p_mpls(protocol))
+ ip.hdr = skb_inner_network_header(skb);
+ else
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_checksum_start(skb);
/* initialize outer IP header fields */
if (ip.v4->version == 4) {
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index fe0989c0fc25..4cb55724001b 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -177,6 +177,7 @@ config SKY2_DEBUG
source "drivers/net/ethernet/marvell/octeontx2/Kconfig"
+source "drivers/net/ethernet/marvell/octeon_ep/Kconfig"
source "drivers/net/ethernet/marvell/prestera/Kconfig"
endif # NET_VENDOR_MARVELL
diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile
index 9f88fe822555..ceba4aa4f026 100644
--- a/drivers/net/ethernet/marvell/Makefile
+++ b/drivers/net/ethernet/marvell/Makefile
@@ -11,5 +11,6 @@ obj-$(CONFIG_MVPP2) += mvpp2/
obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
obj-$(CONFIG_SKGE) += skge.o
obj-$(CONFIG_SKY2) += sky2.o
+obj-y += octeon_ep/
obj-y += octeontx2/
obj-y += prestera/
diff --git a/drivers/net/ethernet/marvell/octeon_ep/Kconfig b/drivers/net/ethernet/marvell/octeon_ep/Kconfig
new file mode 100644
index 000000000000..0d7db815340e
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/Kconfig
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Marvell's Octeon PCI Endpoint NIC Driver Configuration
+#
+
+config OCTEON_EP
+ tristate "Marvell Octeon PCI Endpoint NIC Driver"
+ depends on 64BIT
+ depends on PCI
+ depends on PTP_1588_CLOCK_OPTIONAL
+ help
+ This driver supports networking functionality of Marvell's
+ Octeon PCI Endpoint NIC.
+
+ To know the list of devices supported by this driver, refer
+ documentation in
+ <file:Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst>.
+
+ To compile this drivers as a module, choose M here. Name of the
+ module is octeon_ep.
diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile
new file mode 100644
index 000000000000..2026c8118158
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Network driver for Marvell's Octeon PCI Endpoint NIC
+#
+
+obj-$(CONFIG_OCTEON_EP) += octeon_ep.o
+
+octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \
+ octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
new file mode 100644
index 000000000000..1e47143c596d
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c
@@ -0,0 +1,737 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+#include "octep_regs_cn9k_pf.h"
+
+/* Names of Hardware non-queue generic interrupts */
+static char *cn93_non_ioq_msix_names[] = {
+ "epf_ire_rint",
+ "epf_ore_rint",
+ "epf_vfire_rint0",
+ "epf_vfire_rint1",
+ "epf_vfore_rint0",
+ "epf_vfore_rint1",
+ "epf_mbox_rint0",
+ "epf_mbox_rint1",
+ "epf_oei_rint",
+ "epf_dma_rint",
+ "epf_dma_vf_rint0",
+ "epf_dma_vf_rint1",
+ "epf_pp_vf_rint0",
+ "epf_pp_vf_rint1",
+ "epf_misc_rint",
+ "epf_rsvd",
+};
+
+/* Dump useful hardware CSRs for debug purpose */
+static void cn93_dump_regs(struct octep_device *oct, int qno)
+{
+ struct device *dev = &oct->pdev->dev;
+
+ dev_info(dev, "IQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_INSTR_DBELL(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(qno)));
+ dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_CONTROL(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_CONTROL(qno)));
+ dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_ENABLE(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_ENABLE(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_INSTR_BADDR(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_BADDR(qno)));
+ dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_INSTR_RSIZE(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_RSIZE(qno)));
+ dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_CNTS(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_CNTS(qno)));
+ dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_INT_LEVELS(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_PKT_CNT(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_IN_BYTE_CNT(qno),
+ octep_read_csr64(oct, CN93_SDP_R_IN_BYTE_CNT(qno)));
+
+ dev_info(dev, "OQ-%d register dump\n", qno);
+ dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_SLIST_DBELL(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_SLIST_DBELL(qno)));
+ dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_CONTROL(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(qno)));
+ dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_ENABLE(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_ENABLE(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_SLIST_BADDR(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_SLIST_BADDR(qno)));
+ dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_SLIST_RSIZE(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_SLIST_RSIZE(qno)));
+ dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_CNTS(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_CNTS(qno)));
+ dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_INT_LEVELS(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(qno)));
+ dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_PKT_CNT(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_PKT_CNT(qno)));
+ dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_OUT_BYTE_CNT(qno),
+ octep_read_csr64(oct, CN93_SDP_R_OUT_BYTE_CNT(qno)));
+ dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n",
+ qno, CN93_SDP_R_ERR_TYPE(qno),
+ octep_read_csr64(oct, CN93_SDP_R_ERR_TYPE(qno)));
+}
+
+/* Reset Hardware Tx queue */
+static int cn93_reset_iq(struct octep_device *oct, int q_no)
+{
+ struct octep_config *conf = oct->conf;
+ u64 val = 0ULL;
+
+ dev_dbg(&oct->pdev->dev, "Reset PF IQ-%d\n", q_no);
+
+ /* Get absolute queue number */
+ q_no += conf->pf_ring_cfg.srn;
+
+ /* Disable the Tx/Instruction Ring */
+ octep_write_csr64(oct, CN93_SDP_R_IN_ENABLE(q_no), val);
+
+ /* clear the Instruction Ring packet/byte counts and doorbell CSRs */
+ octep_write_csr64(oct, CN93_SDP_R_IN_CNTS(q_no), val);
+ octep_write_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(q_no), val);
+ octep_write_csr64(oct, CN93_SDP_R_IN_PKT_CNT(q_no), val);
+ octep_write_csr64(oct, CN93_SDP_R_IN_BYTE_CNT(q_no), val);
+ octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_BADDR(q_no), val);
+ octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_RSIZE(q_no), val);
+
+ val = 0xFFFFFFFF;
+ octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(q_no), val);
+
+ return 0;
+}
+
+/* Reset Hardware Rx queue */
+static void cn93_reset_oq(struct octep_device *oct, int q_no)
+{
+ u64 val = 0ULL;
+
+ q_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ /* Disable Output (Rx) Ring */
+ octep_write_csr64(oct, CN93_SDP_R_OUT_ENABLE(q_no), val);
+
+ /* Clear count CSRs */
+ val = octep_read_csr(oct, CN93_SDP_R_OUT_CNTS(q_no));
+ octep_write_csr(oct, CN93_SDP_R_OUT_CNTS(q_no), val);
+
+ octep_write_csr64(oct, CN93_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL);
+ octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF);
+}
+
+/* Reset all hardware Tx/Rx queues */
+static void octep_reset_io_queues_cn93_pf(struct octep_device *oct)
+{
+ struct pci_dev *pdev = oct->pdev;
+ int q;
+
+ dev_dbg(&pdev->dev, "Reset OCTEP_CN93 PF IO Queues\n");
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ cn93_reset_iq(oct, q);
+ cn93_reset_oq(oct, q);
+ }
+}
+
+/* Initialize windowed addresses to access some hardware registers */
+static void octep_setup_pci_window_regs_cn93_pf(struct octep_device *oct)
+{
+ u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr;
+
+ oct->pci_win_regs.pci_win_wr_addr = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_WR_ADDR64);
+ oct->pci_win_regs.pci_win_rd_addr = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_RD_ADDR64);
+ oct->pci_win_regs.pci_win_wr_data = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_WR_DATA64);
+ oct->pci_win_regs.pci_win_rd_data = (u8 __iomem *)(bar0_pciaddr + CN93_SDP_WIN_RD_DATA64);
+}
+
+/* Configure Hardware mapping: inform hardware which rings belong to PF. */
+static void octep_configure_ring_mapping_cn93_pf(struct octep_device *oct)
+{
+ struct octep_config *conf = oct->conf;
+ struct pci_dev *pdev = oct->pdev;
+ u64 pf_srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ int q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(conf); q++) {
+ u64 regval = 0;
+
+ if (oct->pcie_port)
+ regval = 8 << CN93_SDP_FUNC_SEL_EPF_BIT_POS;
+
+ octep_write_csr64(oct, CN93_SDP_EPVF_RING(pf_srn + q), regval);
+
+ regval = octep_read_csr64(oct, CN93_SDP_EPVF_RING(pf_srn + q));
+ dev_dbg(&pdev->dev, "Write SDP_EPVF_RING[0x%llx] = 0x%llx\n",
+ CN93_SDP_EPVF_RING(pf_srn + q), regval);
+ }
+}
+
+/* Initialize configuration limits and initial active config 93xx PF. */
+static void octep_init_config_cn93_pf(struct octep_device *oct)
+{
+ struct octep_config *conf = oct->conf;
+ struct pci_dev *pdev = oct->pdev;
+ u64 val;
+
+ /* Read ring configuration:
+ * PF ring count, number of VFs and rings per VF supported
+ */
+ val = octep_read_csr64(oct, CN93_SDP_EPF_RINFO);
+ conf->sriov_cfg.max_rings_per_vf = CN93_SDP_EPF_RINFO_RPVF(val);
+ conf->sriov_cfg.active_rings_per_vf = conf->sriov_cfg.max_rings_per_vf;
+ conf->sriov_cfg.max_vfs = CN93_SDP_EPF_RINFO_NVFS(val);
+ conf->sriov_cfg.active_vfs = conf->sriov_cfg.max_vfs;
+ conf->sriov_cfg.vf_srn = CN93_SDP_EPF_RINFO_SRN(val);
+
+ val = octep_read_csr64(oct, CN93_SDP_MAC_PF_RING_CTL(oct->pcie_port));
+ conf->pf_ring_cfg.srn = CN93_SDP_MAC_PF_RING_CTL_SRN(val);
+ conf->pf_ring_cfg.max_io_rings = CN93_SDP_MAC_PF_RING_CTL_RPPF(val);
+ conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings;
+ dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n",
+ conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf,
+ conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings);
+
+ conf->iq.num_descs = OCTEP_IQ_MAX_DESCRIPTORS;
+ conf->iq.instr_type = OCTEP_64BYTE_INSTR;
+ conf->iq.pkind = 0;
+ conf->iq.db_min = OCTEP_DB_MIN;
+ conf->iq.intr_threshold = OCTEP_IQ_INTR_THRESHOLD;
+
+ conf->oq.num_descs = OCTEP_OQ_MAX_DESCRIPTORS;
+ conf->oq.buf_size = OCTEP_OQ_BUF_SIZE;
+ conf->oq.refill_threshold = OCTEP_OQ_REFILL_THRESHOLD;
+ conf->oq.oq_intr_pkt = OCTEP_OQ_INTR_PKT_THRESHOLD;
+ conf->oq.oq_intr_time = OCTEP_OQ_INTR_TIME_THRESHOLD;
+
+ conf->msix_cfg.non_ioq_msix = CN93_NUM_NON_IOQ_INTR;
+ conf->msix_cfg.ioq_msix = conf->pf_ring_cfg.active_io_rings;
+ conf->msix_cfg.non_ioq_msix_names = cn93_non_ioq_msix_names;
+
+ conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + (0x400000ull * 7);
+}
+
+/* Setup registers for a hardware Tx Queue */
+static void octep_setup_iq_regs_cn93_pf(struct octep_device *oct, int iq_no)
+{
+ struct octep_iq *iq = oct->iq[iq_no];
+ u32 reset_instr_cnt;
+ u64 reg_val;
+
+ iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_CONTROL(iq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CN93_R_IN_CTL_IDLE)) {
+ do {
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_CONTROL(iq_no));
+ } while (!(reg_val & CN93_R_IN_CTL_IDLE));
+ }
+
+ reg_val |= CN93_R_IN_CTL_RDSIZE;
+ reg_val |= CN93_R_IN_CTL_IS_64B;
+ reg_val |= CN93_R_IN_CTL_ESR;
+ octep_write_csr64(oct, CN93_SDP_R_IN_CONTROL(iq_no), reg_val);
+
+ /* Write the start of the input queue's ring and its size */
+ octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_BADDR(iq_no),
+ iq->desc_ring_dma);
+ octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_RSIZE(iq_no),
+ iq->max_count);
+
+ /* Remember the doorbell & instruction count register addr
+ * for this queue
+ */
+ iq->doorbell_reg = oct->mmio[0].hw_addr +
+ CN93_SDP_R_IN_INSTR_DBELL(iq_no);
+ iq->inst_cnt_reg = oct->mmio[0].hw_addr +
+ CN93_SDP_R_IN_CNTS(iq_no);
+ iq->intr_lvl_reg = oct->mmio[0].hw_addr +
+ CN93_SDP_R_IN_INT_LEVELS(iq_no);
+
+ /* Store the current instruction counter (used in flush_iq calculation) */
+ reset_instr_cnt = readl(iq->inst_cnt_reg);
+ writel(reset_instr_cnt, iq->inst_cnt_reg);
+
+ /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */
+ reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff;
+ octep_write_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+}
+
+/* Setup registers for a hardware Rx Queue */
+static void octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no)
+{
+ u64 reg_val;
+ u64 oq_ctl = 0ULL;
+ u32 time_threshold = 0;
+ struct octep_oq *oq = oct->oq[oq_no];
+
+ oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no));
+
+ /* wait for IDLE to set to 1 */
+ if (!(reg_val & CN93_R_OUT_CTL_IDLE)) {
+ do {
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no));
+ } while (!(reg_val & CN93_R_OUT_CTL_IDLE));
+ }
+
+ reg_val &= ~(CN93_R_OUT_CTL_IMODE);
+ reg_val &= ~(CN93_R_OUT_CTL_ROR_P);
+ reg_val &= ~(CN93_R_OUT_CTL_NSR_P);
+ reg_val &= ~(CN93_R_OUT_CTL_ROR_I);
+ reg_val &= ~(CN93_R_OUT_CTL_NSR_I);
+ reg_val &= ~(CN93_R_OUT_CTL_ES_I);
+ reg_val &= ~(CN93_R_OUT_CTL_ROR_D);
+ reg_val &= ~(CN93_R_OUT_CTL_NSR_D);
+ reg_val &= ~(CN93_R_OUT_CTL_ES_D);
+ reg_val |= (CN93_R_OUT_CTL_ES_P);
+
+ octep_write_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no), reg_val);
+ octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_BADDR(oq_no),
+ oq->desc_ring_dma);
+ octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_RSIZE(oq_no),
+ oq->max_count);
+
+ oq_ctl = octep_read_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no));
+ oq_ctl &= ~0x7fffffULL; //clear the ISIZE and BSIZE (22-0)
+ oq_ctl |= (oq->buffer_size & 0xffff); //populate the BSIZE (15-0)
+ octep_write_csr64(oct, CN93_SDP_R_OUT_CONTROL(oq_no), oq_ctl);
+
+ /* Get the mapped address of the pkt_sent and pkts_credit regs */
+ oq->pkts_sent_reg = oct->mmio[0].hw_addr + CN93_SDP_R_OUT_CNTS(oq_no);
+ oq->pkts_credit_reg = oct->mmio[0].hw_addr +
+ CN93_SDP_R_OUT_SLIST_DBELL(oq_no);
+
+ time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf);
+ reg_val = ((u64)time_threshold << 32) |
+ CFG_GET_OQ_INTR_PKT(oct->conf);
+ octep_write_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+}
+
+/* Setup registers for a PF mailbox */
+static void octep_setup_mbox_regs_cn93_pf(struct octep_device *oct, int q_no)
+{
+ struct octep_mbox *mbox = oct->mbox[q_no];
+
+ mbox->q_no = q_no;
+
+ /* PF mbox interrupt reg */
+ mbox->mbox_int_reg = oct->mmio[0].hw_addr + CN93_SDP_EPF_MBOX_RINT(0);
+
+ /* PF to VF DATA reg. PF writes into this reg */
+ mbox->mbox_write_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_PF_VF_DATA(q_no);
+
+ /* VF to PF DATA reg. PF reads from this reg */
+ mbox->mbox_read_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_VF_PF_DATA(q_no);
+}
+
+/* Mailbox Interrupt handler */
+static void cn93_handle_pf_mbox_intr(struct octep_device *oct)
+{
+ u64 mbox_int_val = 0ULL, val = 0ULL, qno = 0ULL;
+
+ mbox_int_val = readq(oct->mbox[0]->mbox_int_reg);
+ for (qno = 0; qno < OCTEP_MAX_VF; qno++) {
+ val = readq(oct->mbox[qno]->mbox_read_reg);
+ dev_dbg(&oct->pdev->dev,
+ "PF MBOX READ: val:%llx from VF:%llx\n", val, qno);
+ }
+
+ writeq(mbox_int_val, oct->mbox[0]->mbox_int_reg);
+}
+
+/* Interrupts handler for all non-queue generic interrupts. */
+static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev)
+{
+ struct octep_device *oct = (struct octep_device *)dev;
+ struct pci_dev *pdev = oct->pdev;
+ u64 reg_val = 0;
+ int i = 0;
+
+ /* Check for IRERR INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_IRERR_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "received IRERR_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT, reg_val);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ reg_val = octep_read_csr64(oct,
+ CN93_SDP_R_ERR_TYPE(i));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received err type on IQ-%d: 0x%llx\n",
+ i, reg_val);
+ octep_write_csr64(oct, CN93_SDP_R_ERR_TYPE(i),
+ reg_val);
+ }
+ }
+ goto irq_handled;
+ }
+
+ /* Check for ORERR INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_ORERR_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received ORERR_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT, reg_val);
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_ERR_TYPE(i));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received err type on OQ-%d: 0x%llx\n",
+ i, reg_val);
+ octep_write_csr64(oct, CN93_SDP_R_ERR_TYPE(i),
+ reg_val);
+ }
+ }
+
+ goto irq_handled;
+ }
+
+ /* Check for VFIRE INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received VFIRE_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0), reg_val);
+ goto irq_handled;
+ }
+
+ /* Check for VFORE INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received VFORE_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0), reg_val);
+ goto irq_handled;
+ }
+
+ /* Check for MBOX INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MBOX_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received MBOX_RINT intr: 0x%llx\n", reg_val);
+ cn93_handle_pf_mbox_intr(oct);
+ goto irq_handled;
+ }
+
+ /* Check for OEI INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received OEI_EINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg_val);
+ queue_work(octep_wq, &oct->ctrl_mbox_task);
+ goto irq_handled;
+ }
+
+ /* Check for DMA INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_RINT);
+ if (reg_val) {
+ octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT, reg_val);
+ goto irq_handled;
+ }
+
+ /* Check for DMA VF INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received DMA_VF_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0), reg_val);
+ goto irq_handled;
+ }
+
+ /* Check for PPVF INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0));
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received PP_VF_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0), reg_val);
+ goto irq_handled;
+ }
+
+ /* Check for MISC INTR */
+ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MISC_RINT);
+ if (reg_val) {
+ dev_info(&pdev->dev,
+ "Received MISC_RINT intr: 0x%llx\n", reg_val);
+ octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT, reg_val);
+ goto irq_handled;
+ }
+
+ dev_info(&pdev->dev, "Reserved inerrupts raised; Ignore\n");
+irq_handled:
+ return IRQ_HANDLED;
+}
+
+/* Tx/Rx queue interrupt handler */
+static irqreturn_t octep_ioq_intr_handler_cn93_pf(void *data)
+{
+ struct octep_ioq_vector *vector = (struct octep_ioq_vector *)data;
+ struct octep_oq *oq = vector->oq;
+
+ napi_schedule_irqoff(oq->napi);
+ return IRQ_HANDLED;
+}
+
+/* soft reset of 93xx */
+static int octep_soft_reset_cn93_pf(struct octep_device *oct)
+{
+ dev_info(&oct->pdev->dev, "CN93XX: Doing soft reset\n");
+
+ octep_write_csr64(oct, CN93_SDP_WIN_WR_MASK_REG, 0xFF);
+
+ /* Set core domain reset bit */
+ OCTEP_PCI_WIN_WRITE(oct, CN93_RST_CORE_DOMAIN_W1S, 1);
+ /* Wait for 100ms as Octeon resets. */
+ mdelay(100);
+ /* clear core domain reset bit */
+ OCTEP_PCI_WIN_WRITE(oct, CN93_RST_CORE_DOMAIN_W1C, 1);
+
+ return 0;
+}
+
+/* Re-initialize Octeon hardware registers */
+static void octep_reinit_regs_cn93_pf(struct octep_device *oct)
+{
+ u32 i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_iq_regs(oct, i);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ oct->hw_ops.setup_oq_regs(oct, i);
+
+ oct->hw_ops.enable_interrupts(oct);
+ oct->hw_ops.enable_io_queues(oct);
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+ writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/* Enable all interrupts */
+static void octep_enable_interrupts_cn93_pf(struct octep_device *oct)
+{
+ u64 intr_mask = 0ULL;
+ int srn, num_rings, i;
+
+ srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ for (i = 0; i < num_rings; i++)
+ intr_mask |= (0x1ULL << (srn + i));
+
+ octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask);
+ octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask);
+ octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL);
+ octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask);
+ octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask);
+}
+
+/* Disable all interrupts */
+static void octep_disable_interrupts_cn93_pf(struct octep_device *oct)
+{
+ u64 intr_mask = 0ULL;
+ int srn, num_rings, i;
+
+ srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ for (i = 0; i < num_rings; i++)
+ intr_mask |= (0x1ULL << (srn + i));
+
+ octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
+ octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
+ octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL);
+ octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask);
+ octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask);
+}
+
+/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
+static u32 octep_update_iq_read_index_cn93_pf(struct octep_iq *iq)
+{
+ u32 pkt_in_done = readl(iq->inst_cnt_reg);
+ u32 last_done, new_idx;
+
+ last_done = pkt_in_done - iq->pkt_in_done;
+ iq->pkt_in_done = pkt_in_done;
+
+ new_idx = (iq->octep_read_index + last_done) % iq->max_count;
+
+ return new_idx;
+}
+
+/* Enable a hardware Tx Queue */
+static void octep_enable_iq_cn93_pf(struct octep_device *oct, int iq_no)
+{
+ u64 loop = HZ;
+ u64 reg_val;
+
+ iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ octep_write_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF);
+
+ while (octep_read_csr64(oct, CN93_SDP_R_IN_INSTR_DBELL(iq_no)) &&
+ loop--) {
+ schedule_timeout_interruptible(1);
+ }
+
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(iq_no));
+ reg_val |= (0x1ULL << 62);
+ octep_write_csr64(oct, CN93_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no));
+ reg_val |= 0x1ULL;
+ octep_write_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Enable a hardware Rx Queue */
+static void octep_enable_oq_cn93_pf(struct octep_device *oct, int oq_no)
+{
+ u64 reg_val = 0ULL;
+
+ oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no));
+ reg_val |= (0x1ULL << 62);
+ octep_write_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+
+ octep_write_csr64(oct, CN93_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF);
+
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no));
+ reg_val |= 0x1ULL;
+ octep_write_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Enable all hardware Tx/Rx Queues assined to PF */
+static void octep_enable_io_queues_cn93_pf(struct octep_device *oct)
+{
+ u8 q;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_enable_iq_cn93_pf(oct, q);
+ octep_enable_oq_cn93_pf(oct, q);
+ }
+}
+
+/* Disable a hardware Tx Queue assined to PF */
+static void octep_disable_iq_cn93_pf(struct octep_device *oct, int iq_no)
+{
+ u64 reg_val = 0ULL;
+
+ iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no));
+ reg_val &= ~0x1ULL;
+ octep_write_csr64(oct, CN93_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Disable a hardware Rx Queue assined to PF */
+static void octep_disable_oq_cn93_pf(struct octep_device *oct, int oq_no)
+{
+ u64 reg_val = 0ULL;
+
+ oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+ reg_val = octep_read_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no));
+ reg_val &= ~0x1ULL;
+ octep_write_csr64(oct, CN93_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Disable all hardware Tx/Rx Queues assined to PF */
+static void octep_disable_io_queues_cn93_pf(struct octep_device *oct)
+{
+ int q = 0;
+
+ for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+ octep_disable_iq_cn93_pf(oct, q);
+ octep_disable_oq_cn93_pf(oct, q);
+ }
+}
+
+/* Dump hardware registers (including Tx/Rx queues) for debugging. */
+static void octep_dump_registers_cn93_pf(struct octep_device *oct)
+{
+ u8 srn, num_rings, q;
+
+ srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+ num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ for (q = srn; q < srn + num_rings; q++)
+ cn93_dump_regs(oct, q);
+}
+
+/**
+ * octep_device_setup_cn93_pf() - Setup Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * - initialize hardware operations.
+ * - get target side pcie port number for the device.
+ * - setup window access to hardware registers.
+ * - set initial configuration and max limits.
+ * - setup hardware mapping of rings to the PF device.
+ */
+void octep_device_setup_cn93_pf(struct octep_device *oct)
+{
+ oct->hw_ops.setup_iq_regs = octep_setup_iq_regs_cn93_pf;
+ oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cn93_pf;
+ oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cn93_pf;
+
+ oct->hw_ops.non_ioq_intr_handler = octep_non_ioq_intr_handler_cn93_pf;
+ oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cn93_pf;
+ oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf;
+ oct->hw_ops.reinit_regs = octep_reinit_regs_cn93_pf;
+
+ oct->hw_ops.enable_interrupts = octep_enable_interrupts_cn93_pf;
+ oct->hw_ops.disable_interrupts = octep_disable_interrupts_cn93_pf;
+
+ oct->hw_ops.update_iq_read_idx = octep_update_iq_read_index_cn93_pf;
+
+ oct->hw_ops.enable_iq = octep_enable_iq_cn93_pf;
+ oct->hw_ops.enable_oq = octep_enable_oq_cn93_pf;
+ oct->hw_ops.enable_io_queues = octep_enable_io_queues_cn93_pf;
+
+ oct->hw_ops.disable_iq = octep_disable_iq_cn93_pf;
+ oct->hw_ops.disable_oq = octep_disable_oq_cn93_pf;
+ oct->hw_ops.disable_io_queues = octep_disable_io_queues_cn93_pf;
+ oct->hw_ops.reset_io_queues = octep_reset_io_queues_cn93_pf;
+
+ oct->hw_ops.dump_registers = octep_dump_registers_cn93_pf;
+
+ octep_setup_pci_window_regs_cn93_pf(oct);
+
+ oct->pcie_port = octep_read_csr64(oct, CN93_SDP_MAC_NUMBER) & 0xff;
+ dev_info(&oct->pdev->dev,
+ "Octeon device using PCIE Port %d\n", oct->pcie_port);
+
+ octep_init_config_cn93_pf(oct);
+ octep_configure_ring_mapping_cn93_pf(oct);
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
new file mode 100644
index 000000000000..f208f3f9a447
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_CONFIG_H_
+#define _OCTEP_CONFIG_H_
+
+/* Tx instruction types by length */
+#define OCTEP_32BYTE_INSTR 32
+#define OCTEP_64BYTE_INSTR 64
+
+/* Tx Queue: maximum descriptors per ring */
+#define OCTEP_IQ_MAX_DESCRIPTORS 1024
+/* Minimum input (Tx) requests to be enqueued to ring doorbell */
+#define OCTEP_DB_MIN 1
+/* Packet threshold for Tx queue interrupt */
+#define OCTEP_IQ_INTR_THRESHOLD 0x0
+
+/* Rx Queue: maximum descriptors per ring */
+#define OCTEP_OQ_MAX_DESCRIPTORS 1024
+
+/* Rx buffer size: Use page size buffers.
+ * Build skb from allocated page buffer once the packet is received.
+ * When a gathered packet is received, make head page as skb head and
+ * page buffers in consecutive Rx descriptors as fragments.
+ */
+#define OCTEP_OQ_BUF_SIZE (SKB_WITH_OVERHEAD(PAGE_SIZE))
+#define OCTEP_OQ_PKTS_PER_INTR 128
+#define OCTEP_OQ_REFILL_THRESHOLD (OCTEP_OQ_MAX_DESCRIPTORS / 4)
+
+#define OCTEP_OQ_INTR_PKT_THRESHOLD 1
+#define OCTEP_OQ_INTR_TIME_THRESHOLD 10
+
+#define OCTEP_MSIX_NAME_SIZE (IFNAMSIZ + 32)
+
+/* Tx Queue wake threshold
+ * wakeup a stopped Tx queue if minimum 2 descriptors are available.
+ * Even a skb with fragments consume only one Tx queue descriptor entry.
+ */
+#define OCTEP_WAKE_QUEUE_THRESHOLD 2
+
+/* Minimum MTU supported by Octeon network interface */
+#define OCTEP_MIN_MTU ETH_MIN_MTU
+/* Maximum MTU supported by Octeon interface*/
+#define OCTEP_MAX_MTU (10000 - (ETH_HLEN + ETH_FCS_LEN))
+/* Default MTU */
+#define OCTEP_DEFAULT_MTU 1500
+
+/* Macros to get octeon config params */
+#define CFG_GET_IQ_CFG(cfg) ((cfg)->iq)
+#define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs)
+#define CFG_GET_IQ_INSTR_TYPE(cfg) ((cfg)->iq.instr_type)
+#define CFG_GET_IQ_PKIND(cfg) ((cfg)->iq.pkind)
+#define CFG_GET_IQ_INSTR_SIZE(cfg) (64)
+#define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min)
+#define CFG_GET_IQ_INTR_THRESHOLD(cfg) ((cfg)->iq.intr_threshold)
+
+#define CFG_GET_OQ_NUM_DESC(cfg) ((cfg)->oq.num_descs)
+#define CFG_GET_OQ_BUF_SIZE(cfg) ((cfg)->oq.buf_size)
+#define CFG_GET_OQ_REFILL_THRESHOLD(cfg) ((cfg)->oq.refill_threshold)
+#define CFG_GET_OQ_INTR_PKT(cfg) ((cfg)->oq.oq_intr_pkt)
+#define CFG_GET_OQ_INTR_TIME(cfg) ((cfg)->oq.oq_intr_time)
+
+#define CFG_GET_PORTS_MAX_IO_RINGS(cfg) ((cfg)->pf_ring_cfg.max_io_rings)
+#define CFG_GET_PORTS_ACTIVE_IO_RINGS(cfg) ((cfg)->pf_ring_cfg.active_io_rings)
+#define CFG_GET_PORTS_PF_SRN(cfg) ((cfg)->pf_ring_cfg.srn)
+
+#define CFG_GET_DPI_PKIND(cfg) ((cfg)->core_cfg.dpi_pkind)
+#define CFG_GET_CORE_TICS_PER_US(cfg) ((cfg)->core_cfg.core_tics_per_us)
+#define CFG_GET_COPROC_TICS_PER_US(cfg) ((cfg)->core_cfg.coproc_tics_per_us)
+
+#define CFG_GET_MAX_VFS(cfg) ((cfg)->sriov_cfg.max_vfs)
+#define CFG_GET_ACTIVE_VFS(cfg) ((cfg)->sriov_cfg.active_vfs)
+#define CFG_GET_MAX_RPVF(cfg) ((cfg)->sriov_cfg.max_rings_per_vf)
+#define CFG_GET_ACTIVE_RPVF(cfg) ((cfg)->sriov_cfg.active_rings_per_vf)
+#define CFG_GET_VF_SRN(cfg) ((cfg)->sriov_cfg.vf_srn)
+
+#define CFG_GET_IOQ_MSIX(cfg) ((cfg)->msix_cfg.ioq_msix)
+#define CFG_GET_NON_IOQ_MSIX(cfg) ((cfg)->msix_cfg.non_ioq_msix)
+#define CFG_GET_NON_IOQ_MSIX_NAMES(cfg) ((cfg)->msix_cfg.non_ioq_msix_names)
+
+#define CFG_GET_CTRL_MBOX_MEM_ADDR(cfg) ((cfg)->ctrl_mbox_cfg.barmem_addr)
+
+/* Hardware Tx Queue configuration. */
+struct octep_iq_config {
+ /* Size of the Input queue (number of commands) */
+ u16 num_descs;
+
+ /* Command size - 32 or 64 bytes */
+ u16 instr_type;
+
+ /* pkind for packets sent to Octeon */
+ u16 pkind;
+
+ /* Minimum number of commands pending to be posted to Octeon before driver
+ * hits the Input queue doorbell.
+ */
+ u16 db_min;
+
+ /* Trigger the IQ interrupt when processed cmd count reaches
+ * this level.
+ */
+ u32 intr_threshold;
+};
+
+/* Hardware Rx Queue configuration. */
+struct octep_oq_config {
+ /* Size of Output queue (number of descriptors) */
+ u16 num_descs;
+
+ /* Size of buffer in this Output queue. */
+ u16 buf_size;
+
+ /* The number of buffers that were consumed during packet processing
+ * by the driver on this Output queue before the driver attempts to
+ * replenish the descriptor ring with new buffers.
+ */
+ u16 refill_threshold;
+
+ /* Interrupt Coalescing (Packet Count). Octeon will interrupt the host
+ * only if it sent as many packets as specified by this field.
+ * The driver usually does not use packet count interrupt coalescing.
+ */
+ u32 oq_intr_pkt;
+
+ /* Interrupt Coalescing (Time Interval). Octeon will interrupt the host
+ * if at least one packet was sent in the time interval specified by
+ * this field. The driver uses time interval interrupt coalescing by
+ * default. The time is specified in microseconds.
+ */
+ u32 oq_intr_time;
+};
+
+/* Tx/Rx configuration */
+struct octep_pf_ring_config {
+ /* Max number of IOQs */
+ u16 max_io_rings;
+
+ /* Number of active IOQs */
+ u16 active_io_rings;
+
+ /* Starting IOQ number: this changes based on which PEM is used */
+ u16 srn;
+};
+
+/* Octeon Hardware SRIOV config */
+struct octep_sriov_config {
+ /* Max number of VF devices supported */
+ u16 max_vfs;
+
+ /* Number of VF devices enabled */
+ u16 active_vfs;
+
+ /* Max number of rings assigned to VF */
+ u8 max_rings_per_vf;
+
+ /* Number of rings enabled per VF */
+ u8 active_rings_per_vf;
+
+ /* starting ring number of VF's: ring-0 of VF-0 of the PF */
+ u16 vf_srn;
+};
+
+/* Octeon MSI-x config. */
+struct octep_msix_config {
+ /* Number of IOQ interrupts */
+ u16 ioq_msix;
+
+ /* Number of Non IOQ interrupts */
+ u16 non_ioq_msix;
+
+ /* Names of Non IOQ interrupts */
+ char **non_ioq_msix_names;
+};
+
+struct octep_ctrl_mbox_config {
+ /* Barmem address for control mbox */
+ void __iomem *barmem_addr;
+};
+
+/* Data Structure to hold configuration limits and active config */
+struct octep_config {
+ /* Input Queue attributes. */
+ struct octep_iq_config iq;
+
+ /* Output Queue attributes. */
+ struct octep_oq_config oq;
+
+ /* NIC Port Configuration */
+ struct octep_pf_ring_config pf_ring_cfg;
+
+ /* SRIOV configuration of the PF */
+ struct octep_sriov_config sriov_cfg;
+
+ /* MSI-X interrupt config */
+ struct octep_msix_config msix_cfg;
+
+ /* ctrl mbox config */
+ struct octep_ctrl_mbox_config ctrl_mbox_cfg;
+};
+#endif /* _OCTEP_CONFIG_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
new file mode 100644
index 000000000000..8c196dadfad0
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+
+#include "octep_ctrl_mbox.h"
+#include "octep_config.h"
+#include "octep_main.h"
+
+/* Timeout in msecs for message response */
+#define OCTEP_CTRL_MBOX_MSG_TIMEOUT_MS 100
+/* Time in msecs to wait for message response */
+#define OCTEP_CTRL_MBOX_MSG_WAIT_MS 10
+
+#define OCTEP_CTRL_MBOX_INFO_MAGIC_NUM_OFFSET(m) (m)
+#define OCTEP_CTRL_MBOX_INFO_BARMEM_SZ_OFFSET(m) ((m) + 8)
+#define OCTEP_CTRL_MBOX_INFO_HOST_VERSION_OFFSET(m) ((m) + 16)
+#define OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(m) ((m) + 24)
+#define OCTEP_CTRL_MBOX_INFO_FW_VERSION_OFFSET(m) ((m) + 136)
+#define OCTEP_CTRL_MBOX_INFO_FW_STATUS_OFFSET(m) ((m) + 144)
+
+#define OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m) ((m) + OCTEP_CTRL_MBOX_INFO_SZ)
+#define OCTEP_CTRL_MBOX_H2FQ_PROD_OFFSET(m) (OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m))
+#define OCTEP_CTRL_MBOX_H2FQ_CONS_OFFSET(m) ((OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) + 4)
+#define OCTEP_CTRL_MBOX_H2FQ_ELEM_SZ_OFFSET(m) ((OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) + 8)
+#define OCTEP_CTRL_MBOX_H2FQ_ELEM_CNT_OFFSET(m) ((OCTEP_CTRL_MBOX_H2FQ_INFO_OFFSET(m)) + 12)
+
+#define OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m) ((m) + \
+ OCTEP_CTRL_MBOX_INFO_SZ + \
+ OCTEP_CTRL_MBOX_H2FQ_INFO_SZ)
+#define OCTEP_CTRL_MBOX_F2HQ_PROD_OFFSET(m) (OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m))
+#define OCTEP_CTRL_MBOX_F2HQ_CONS_OFFSET(m) ((OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) + 4)
+#define OCTEP_CTRL_MBOX_F2HQ_ELEM_SZ_OFFSET(m) ((OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) + 8)
+#define OCTEP_CTRL_MBOX_F2HQ_ELEM_CNT_OFFSET(m) ((OCTEP_CTRL_MBOX_F2HQ_INFO_OFFSET(m)) + 12)
+
+#define OCTEP_CTRL_MBOX_Q_OFFSET(m, i) ((m) + \
+ (sizeof(struct octep_ctrl_mbox_msg) * (i)))
+
+static u32 octep_ctrl_mbox_circq_inc(u32 index, u32 mask)
+{
+ return (index + 1) & mask;
+}
+
+static u32 octep_ctrl_mbox_circq_space(u32 pi, u32 ci, u32 mask)
+{
+ return mask - ((pi - ci) & mask);
+}
+
+static u32 octep_ctrl_mbox_circq_depth(u32 pi, u32 ci, u32 mask)
+{
+ return ((pi - ci) & mask);
+}
+
+int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox)
+{
+ u64 version, magic_num, status;
+
+ if (!mbox)
+ return -EINVAL;
+
+ if (!mbox->barmem) {
+ pr_info("octep_ctrl_mbox : Invalid barmem %p\n", mbox->barmem);
+ return -EINVAL;
+ }
+
+ magic_num = readq(OCTEP_CTRL_MBOX_INFO_MAGIC_NUM_OFFSET(mbox->barmem));
+ if (magic_num != OCTEP_CTRL_MBOX_MAGIC_NUMBER) {
+ pr_info("octep_ctrl_mbox : Invalid magic number %llx\n", magic_num);
+ return -EINVAL;
+ }
+
+ version = readq(OCTEP_CTRL_MBOX_INFO_FW_VERSION_OFFSET(mbox->barmem));
+ if (version != OCTEP_DRV_VERSION) {
+ pr_info("octep_ctrl_mbox : Firmware version mismatch %llx != %x\n",
+ version, OCTEP_DRV_VERSION);
+ return -EINVAL;
+ }
+
+ status = readq(OCTEP_CTRL_MBOX_INFO_FW_STATUS_OFFSET(mbox->barmem));
+ if (status != OCTEP_CTRL_MBOX_STATUS_READY) {
+ pr_info("octep_ctrl_mbox : Firmware is not ready.\n");
+ return -EINVAL;
+ }
+
+ mbox->barmem_sz = readl(OCTEP_CTRL_MBOX_INFO_BARMEM_SZ_OFFSET(mbox->barmem));
+
+ writeq(mbox->version, OCTEP_CTRL_MBOX_INFO_HOST_VERSION_OFFSET(mbox->barmem));
+ writeq(OCTEP_CTRL_MBOX_STATUS_INIT, OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem));
+
+ mbox->h2fq.elem_cnt = readl(OCTEP_CTRL_MBOX_H2FQ_ELEM_CNT_OFFSET(mbox->barmem));
+ mbox->h2fq.elem_sz = readl(OCTEP_CTRL_MBOX_H2FQ_ELEM_SZ_OFFSET(mbox->barmem));
+ mbox->h2fq.mask = (mbox->h2fq.elem_cnt - 1);
+ mutex_init(&mbox->h2fq_lock);
+
+ mbox->f2hq.elem_cnt = readl(OCTEP_CTRL_MBOX_F2HQ_ELEM_CNT_OFFSET(mbox->barmem));
+ mbox->f2hq.elem_sz = readl(OCTEP_CTRL_MBOX_F2HQ_ELEM_SZ_OFFSET(mbox->barmem));
+ mbox->f2hq.mask = (mbox->f2hq.elem_cnt - 1);
+ mutex_init(&mbox->f2hq_lock);
+
+ mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD_OFFSET(mbox->barmem);
+ mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS_OFFSET(mbox->barmem);
+ mbox->h2fq.hw_q = mbox->barmem +
+ OCTEP_CTRL_MBOX_INFO_SZ +
+ OCTEP_CTRL_MBOX_H2FQ_INFO_SZ +
+ OCTEP_CTRL_MBOX_F2HQ_INFO_SZ;
+
+ mbox->f2hq.hw_prod = OCTEP_CTRL_MBOX_F2HQ_PROD_OFFSET(mbox->barmem);
+ mbox->f2hq.hw_cons = OCTEP_CTRL_MBOX_F2HQ_CONS_OFFSET(mbox->barmem);
+ mbox->f2hq.hw_q = mbox->h2fq.hw_q +
+ ((mbox->h2fq.elem_sz + sizeof(union octep_ctrl_mbox_msg_hdr)) *
+ mbox->h2fq.elem_cnt);
+
+ /* ensure ready state is seen after everything is initialized */
+ wmb();
+ writeq(OCTEP_CTRL_MBOX_STATUS_READY, OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem));
+
+ pr_info("Octep ctrl mbox : Init successful.\n");
+
+ return 0;
+}
+
+int octep_ctrl_mbox_send(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg)
+{
+ unsigned long timeout = msecs_to_jiffies(OCTEP_CTRL_MBOX_MSG_TIMEOUT_MS);
+ unsigned long period = msecs_to_jiffies(OCTEP_CTRL_MBOX_MSG_WAIT_MS);
+ struct octep_ctrl_mbox_q *q;
+ unsigned long expire;
+ u64 *mbuf, *word0;
+ u8 __iomem *qidx;
+ u16 pi, ci;
+ int i;
+
+ if (!mbox || !msg)
+ return -EINVAL;
+
+ q = &mbox->h2fq;
+ pi = readl(q->hw_prod);
+ ci = readl(q->hw_cons);
+
+ if (!octep_ctrl_mbox_circq_space(pi, ci, q->mask))
+ return -ENOMEM;
+
+ qidx = OCTEP_CTRL_MBOX_Q_OFFSET(q->hw_q, pi);
+ mbuf = (u64 *)msg->msg;
+ word0 = &msg->hdr.word0;
+
+ mutex_lock(&mbox->h2fq_lock);
+ for (i = 1; i <= msg->hdr.sizew; i++)
+ writeq(*mbuf++, (qidx + (i * 8)));
+
+ writeq(*word0, qidx);
+
+ pi = octep_ctrl_mbox_circq_inc(pi, q->mask);
+ writel(pi, q->hw_prod);
+ mutex_unlock(&mbox->h2fq_lock);
+
+ /* don't check for notification response */
+ if (msg->hdr.flags & OCTEP_CTRL_MBOX_MSG_HDR_FLAG_NOTIFY)
+ return 0;
+
+ expire = jiffies + timeout;
+ while (true) {
+ *word0 = readq(qidx);
+ if (msg->hdr.flags == OCTEP_CTRL_MBOX_MSG_HDR_FLAG_RESP)
+ break;
+ schedule_timeout_interruptible(period);
+ if (signal_pending(current) || time_after(jiffies, expire)) {
+ pr_info("octep_ctrl_mbox: Timed out\n");
+ return -EBUSY;
+ }
+ }
+ mbuf = (u64 *)msg->msg;
+ for (i = 1; i <= msg->hdr.sizew; i++)
+ *mbuf++ = readq(qidx + (i * 8));
+
+ return 0;
+}
+
+int octep_ctrl_mbox_recv(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg)
+{
+ struct octep_ctrl_mbox_q *q;
+ u32 count, pi, ci;
+ u8 __iomem *qidx;
+ u64 *mbuf;
+ int i;
+
+ if (!mbox || !msg)
+ return -EINVAL;
+
+ q = &mbox->f2hq;
+ pi = readl(q->hw_prod);
+ ci = readl(q->hw_cons);
+ count = octep_ctrl_mbox_circq_depth(pi, ci, q->mask);
+ if (!count)
+ return -EAGAIN;
+
+ qidx = OCTEP_CTRL_MBOX_Q_OFFSET(q->hw_q, ci);
+ mbuf = (u64 *)msg->msg;
+
+ mutex_lock(&mbox->f2hq_lock);
+
+ msg->hdr.word0 = readq(qidx);
+ for (i = 1; i <= msg->hdr.sizew; i++)
+ *mbuf++ = readq(qidx + (i * 8));
+
+ ci = octep_ctrl_mbox_circq_inc(ci, q->mask);
+ writel(ci, q->hw_cons);
+
+ mutex_unlock(&mbox->f2hq_lock);
+
+ if (msg->hdr.flags != OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ || !mbox->process_req)
+ return 0;
+
+ mbox->process_req(mbox->user_ctx, msg);
+ mbuf = (u64 *)msg->msg;
+ for (i = 1; i <= msg->hdr.sizew; i++)
+ writeq(*mbuf++, (qidx + (i * 8)));
+
+ writeq(msg->hdr.word0, qidx);
+
+ return 0;
+}
+
+int octep_ctrl_mbox_uninit(struct octep_ctrl_mbox *mbox)
+{
+ if (!mbox)
+ return -EINVAL;
+
+ writeq(OCTEP_CTRL_MBOX_STATUS_UNINIT,
+ OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem));
+ /* ensure uninit state is written before uninitialization */
+ wmb();
+
+ mutex_destroy(&mbox->h2fq_lock);
+ mutex_destroy(&mbox->f2hq_lock);
+
+ writeq(OCTEP_CTRL_MBOX_STATUS_INVALID,
+ OCTEP_CTRL_MBOX_INFO_HOST_STATUS_OFFSET(mbox->barmem));
+ writeq(0, OCTEP_CTRL_MBOX_INFO_HOST_VERSION_OFFSET(mbox->barmem));
+
+ pr_info("Octep ctrl mbox : Uninit successful.\n");
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h
new file mode 100644
index 000000000000..2dc5753cfec6
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+ #ifndef __OCTEP_CTRL_MBOX_H__
+#define __OCTEP_CTRL_MBOX_H__
+
+/* barmem structure
+ * |===========================================|
+ * |Info (16 + 120 + 120 = 256 bytes) |
+ * |-------------------------------------------|
+ * |magic number (8 bytes) |
+ * |bar memory size (4 bytes) |
+ * |reserved (4 bytes) |
+ * |-------------------------------------------|
+ * |host version (8 bytes) |
+ * |host status (8 bytes) |
+ * |host reserved (104 bytes) |
+ * |-------------------------------------------|
+ * |fw version (8 bytes) |
+ * |fw status (8 bytes) |
+ * |fw reserved (104 bytes) |
+ * |===========================================|
+ * |Host to Fw Queue info (16 bytes) |
+ * |-------------------------------------------|
+ * |producer index (4 bytes) |
+ * |consumer index (4 bytes) |
+ * |element size (4 bytes) |
+ * |element count (4 bytes) |
+ * |===========================================|
+ * |Fw to Host Queue info (16 bytes) |
+ * |-------------------------------------------|
+ * |producer index (4 bytes) |
+ * |consumer index (4 bytes) |
+ * |element size (4 bytes) |
+ * |element count (4 bytes) |
+ * |===========================================|
+ * |Host to Fw Queue |
+ * |-------------------------------------------|
+ * |((elem_sz + hdr(8 bytes)) * elem_cnt) bytes|
+ * |===========================================|
+ * |===========================================|
+ * |Fw to Host Queue |
+ * |-------------------------------------------|
+ * |((elem_sz + hdr(8 bytes)) * elem_cnt) bytes|
+ * |===========================================|
+ */
+
+#define OCTEP_CTRL_MBOX_MAGIC_NUMBER 0xdeaddeadbeefbeefull
+
+/* Size of mbox info in bytes */
+#define OCTEP_CTRL_MBOX_INFO_SZ 256
+/* Size of mbox host to target queue info in bytes */
+#define OCTEP_CTRL_MBOX_H2FQ_INFO_SZ 16
+/* Size of mbox target to host queue info in bytes */
+#define OCTEP_CTRL_MBOX_F2HQ_INFO_SZ 16
+/* Size of mbox queue in bytes */
+#define OCTEP_CTRL_MBOX_Q_SZ(sz, cnt) (((sz) + 8) * (cnt))
+/* Size of mbox in bytes */
+#define OCTEP_CTRL_MBOX_SZ(hsz, hcnt, fsz, fcnt) (OCTEP_CTRL_MBOX_INFO_SZ + \
+ OCTEP_CTRL_MBOX_H2FQ_INFO_SZ + \
+ OCTEP_CTRL_MBOX_F2HQ_INFO_SZ + \
+ OCTEP_CTRL_MBOX_Q_SZ(hsz, hcnt) + \
+ OCTEP_CTRL_MBOX_Q_SZ(fsz, fcnt))
+
+/* Valid request message */
+#define OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ BIT(0)
+/* Valid response message */
+#define OCTEP_CTRL_MBOX_MSG_HDR_FLAG_RESP BIT(1)
+/* Valid notification, no response required */
+#define OCTEP_CTRL_MBOX_MSG_HDR_FLAG_NOTIFY BIT(2)
+
+enum octep_ctrl_mbox_status {
+ OCTEP_CTRL_MBOX_STATUS_INVALID = 0,
+ OCTEP_CTRL_MBOX_STATUS_INIT,
+ OCTEP_CTRL_MBOX_STATUS_READY,
+ OCTEP_CTRL_MBOX_STATUS_UNINIT
+};
+
+/* mbox message */
+union octep_ctrl_mbox_msg_hdr {
+ u64 word0;
+ struct {
+ /* OCTEP_CTRL_MBOX_MSG_HDR_FLAG_* */
+ u32 flags;
+ /* size of message in words excluding header */
+ u32 sizew;
+ };
+};
+
+/* mbox message */
+struct octep_ctrl_mbox_msg {
+ /* mbox transaction header */
+ union octep_ctrl_mbox_msg_hdr hdr;
+ /* pointer to message buffer */
+ void *msg;
+};
+
+/* Mbox queue */
+struct octep_ctrl_mbox_q {
+ /* q element size, should be aligned to unsigned long */
+ u16 elem_sz;
+ /* q element count, should be power of 2 */
+ u16 elem_cnt;
+ /* q mask */
+ u16 mask;
+ /* producer address in bar mem */
+ u8 __iomem *hw_prod;
+ /* consumer address in bar mem */
+ u8 __iomem *hw_cons;
+ /* q base address in bar mem */
+ u8 __iomem *hw_q;
+};
+
+struct octep_ctrl_mbox {
+ /* host driver version */
+ u64 version;
+ /* size of bar memory */
+ u32 barmem_sz;
+ /* pointer to BAR memory */
+ u8 __iomem *barmem;
+ /* user context for callback, can be null */
+ void *user_ctx;
+ /* callback handler for processing request, called from octep_ctrl_mbox_recv */
+ int (*process_req)(void *user_ctx, struct octep_ctrl_mbox_msg *msg);
+ /* host-to-fw queue */
+ struct octep_ctrl_mbox_q h2fq;
+ /* fw-to-host queue */
+ struct octep_ctrl_mbox_q f2hq;
+ /* lock for h2fq */
+ struct mutex h2fq_lock;
+ /* lock for f2hq */
+ struct mutex f2hq_lock;
+};
+
+/* Initialize control mbox.
+ *
+ * @param mbox: non-null pointer to struct octep_ctrl_mbox.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox);
+
+/* Send mbox message.
+ *
+ * @param mbox: non-null pointer to struct octep_ctrl_mbox.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_ctrl_mbox_send(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg);
+
+/* Retrieve mbox message.
+ *
+ * @param mbox: non-null pointer to struct octep_ctrl_mbox.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_ctrl_mbox_recv(struct octep_ctrl_mbox *mbox, struct octep_ctrl_mbox_msg *msg);
+
+/* Uninitialize control mbox.
+ *
+ * @param ep: non-null pointer to struct octep_ctrl_mbox.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_ctrl_mbox_uninit(struct octep_ctrl_mbox *mbox);
+
+#endif /* __OCTEP_CTRL_MBOX_H__ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
new file mode 100644
index 000000000000..7c00c896ab98
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+#include "octep_ctrl_net.h"
+
+int octep_get_link_status(struct octep_device *oct)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_net_h2f_resp *resp;
+ struct octep_ctrl_mbox_msg msg = {};
+ int err;
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS;
+ req.link.cmd = OCTEP_CTRL_NET_CMD_GET;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_STATE_REQ_SZW;
+ msg.msg = &req;
+ err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+ if (err)
+ return err;
+
+ resp = (struct octep_ctrl_net_h2f_resp *)&req;
+ return resp->link.state;
+}
+
+void octep_set_link_status(struct octep_device *oct, bool up)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_mbox_msg msg = {};
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS;
+ req.link.cmd = OCTEP_CTRL_NET_CMD_SET;
+ req.link.state = (up) ? OCTEP_CTRL_NET_STATE_UP : OCTEP_CTRL_NET_STATE_DOWN;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_STATE_REQ_SZW;
+ msg.msg = &req;
+ octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+}
+
+void octep_set_rx_state(struct octep_device *oct, bool up)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_mbox_msg msg = {};
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_RX_STATE;
+ req.link.cmd = OCTEP_CTRL_NET_CMD_SET;
+ req.link.state = (up) ? OCTEP_CTRL_NET_STATE_UP : OCTEP_CTRL_NET_STATE_DOWN;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_STATE_REQ_SZW;
+ msg.msg = &req;
+ octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+}
+
+int octep_get_mac_addr(struct octep_device *oct, u8 *addr)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_net_h2f_resp *resp;
+ struct octep_ctrl_mbox_msg msg = {};
+ int err;
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_MAC;
+ req.link.cmd = OCTEP_CTRL_NET_CMD_GET;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_MAC_REQ_SZW;
+ msg.msg = &req;
+ err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+ if (err)
+ return err;
+
+ resp = (struct octep_ctrl_net_h2f_resp *)&req;
+ memcpy(addr, resp->mac.addr, ETH_ALEN);
+
+ return err;
+}
+
+int octep_set_mac_addr(struct octep_device *oct, u8 *addr)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_mbox_msg msg = {};
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_MAC;
+ req.mac.cmd = OCTEP_CTRL_NET_CMD_SET;
+ memcpy(&req.mac.addr, addr, ETH_ALEN);
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_MAC_REQ_SZW;
+ msg.msg = &req;
+
+ return octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+}
+
+int octep_set_mtu(struct octep_device *oct, int mtu)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_mbox_msg msg = {};
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_MTU;
+ req.mtu.cmd = OCTEP_CTRL_NET_CMD_SET;
+ req.mtu.val = mtu;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_MTU_REQ_SZW;
+ msg.msg = &req;
+
+ return octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+}
+
+int octep_get_if_stats(struct octep_device *oct)
+{
+ void __iomem *iface_rx_stats;
+ void __iomem *iface_tx_stats;
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_mbox_msg msg = {};
+ int err;
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_GET_IF_STATS;
+ req.mac.cmd = OCTEP_CTRL_NET_CMD_GET;
+ req.get_stats.offset = oct->ctrl_mbox_ifstats_offset;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_GET_STATS_REQ_SZW;
+ msg.msg = &req;
+ err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+ if (err)
+ return err;
+
+ iface_rx_stats = oct->ctrl_mbox.barmem + oct->ctrl_mbox_ifstats_offset;
+ iface_tx_stats = oct->ctrl_mbox.barmem + oct->ctrl_mbox_ifstats_offset +
+ sizeof(struct octep_iface_rx_stats);
+ memcpy_fromio(&oct->iface_rx_stats, iface_rx_stats, sizeof(struct octep_iface_rx_stats));
+ memcpy_fromio(&oct->iface_tx_stats, iface_tx_stats, sizeof(struct octep_iface_tx_stats));
+
+ return err;
+}
+
+int octep_get_link_info(struct octep_device *oct)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_net_h2f_resp *resp;
+ struct octep_ctrl_mbox_msg msg = {};
+ int err;
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_INFO;
+ req.mac.cmd = OCTEP_CTRL_NET_CMD_GET;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_LINK_INFO_REQ_SZW;
+ msg.msg = &req;
+ err = octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+ if (err)
+ return err;
+
+ resp = (struct octep_ctrl_net_h2f_resp *)&req;
+ oct->link_info.supported_modes = resp->link_info.supported_modes;
+ oct->link_info.advertised_modes = resp->link_info.advertised_modes;
+ oct->link_info.autoneg = resp->link_info.autoneg;
+ oct->link_info.pause = resp->link_info.pause;
+ oct->link_info.speed = resp->link_info.speed;
+
+ return err;
+}
+
+int octep_set_link_info(struct octep_device *oct, struct octep_iface_link_info *link_info)
+{
+ struct octep_ctrl_net_h2f_req req = {};
+ struct octep_ctrl_mbox_msg msg = {};
+
+ req.hdr.cmd = OCTEP_CTRL_NET_H2F_CMD_LINK_INFO;
+ req.link_info.cmd = OCTEP_CTRL_NET_CMD_SET;
+ req.link_info.info.advertised_modes = link_info->advertised_modes;
+ req.link_info.info.autoneg = link_info->autoneg;
+ req.link_info.info.pause = link_info->pause;
+ req.link_info.info.speed = link_info->speed;
+
+ msg.hdr.flags = OCTEP_CTRL_MBOX_MSG_HDR_FLAG_REQ;
+ msg.hdr.sizew = OCTEP_CTRL_NET_H2F_LINK_INFO_REQ_SZW;
+ msg.msg = &req;
+
+ return octep_ctrl_mbox_send(&oct->ctrl_mbox, &msg);
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
new file mode 100644
index 000000000000..f23b58381322
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+#ifndef __OCTEP_CTRL_NET_H__
+#define __OCTEP_CTRL_NET_H__
+
+/* Supported commands */
+enum octep_ctrl_net_cmd {
+ OCTEP_CTRL_NET_CMD_GET = 0,
+ OCTEP_CTRL_NET_CMD_SET,
+};
+
+/* Supported states */
+enum octep_ctrl_net_state {
+ OCTEP_CTRL_NET_STATE_DOWN = 0,
+ OCTEP_CTRL_NET_STATE_UP,
+};
+
+/* Supported replies */
+enum octep_ctrl_net_reply {
+ OCTEP_CTRL_NET_REPLY_OK = 0,
+ OCTEP_CTRL_NET_REPLY_GENERIC_FAIL,
+ OCTEP_CTRL_NET_REPLY_INVALID_PARAM,
+};
+
+/* Supported host to fw commands */
+enum octep_ctrl_net_h2f_cmd {
+ OCTEP_CTRL_NET_H2F_CMD_INVALID = 0,
+ OCTEP_CTRL_NET_H2F_CMD_MTU,
+ OCTEP_CTRL_NET_H2F_CMD_MAC,
+ OCTEP_CTRL_NET_H2F_CMD_GET_IF_STATS,
+ OCTEP_CTRL_NET_H2F_CMD_GET_XSTATS,
+ OCTEP_CTRL_NET_H2F_CMD_GET_Q_STATS,
+ OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS,
+ OCTEP_CTRL_NET_H2F_CMD_RX_STATE,
+ OCTEP_CTRL_NET_H2F_CMD_LINK_INFO,
+};
+
+/* Supported fw to host commands */
+enum octep_ctrl_net_f2h_cmd {
+ OCTEP_CTRL_NET_F2H_CMD_INVALID = 0,
+ OCTEP_CTRL_NET_F2H_CMD_LINK_STATUS,
+};
+
+struct octep_ctrl_net_req_hdr {
+ /* sender id */
+ u16 sender;
+ /* receiver id */
+ u16 receiver;
+ /* octep_ctrl_net_h2t_cmd */
+ u16 cmd;
+ /* reserved */
+ u16 rsvd0;
+};
+
+/* get/set mtu request */
+struct octep_ctrl_net_h2f_req_cmd_mtu {
+ /* enum octep_ctrl_net_cmd */
+ u16 cmd;
+ /* 0-65535 */
+ u16 val;
+};
+
+/* get/set mac request */
+struct octep_ctrl_net_h2f_req_cmd_mac {
+ /* enum octep_ctrl_net_cmd */
+ u16 cmd;
+ /* xx:xx:xx:xx:xx:xx */
+ u8 addr[ETH_ALEN];
+};
+
+/* get if_stats, xstats, q_stats request */
+struct octep_ctrl_net_h2f_req_cmd_get_stats {
+ /* offset into barmem where fw should copy over stats */
+ u32 offset;
+};
+
+/* get/set link state, rx state */
+struct octep_ctrl_net_h2f_req_cmd_state {
+ /* enum octep_ctrl_net_cmd */
+ u16 cmd;
+ /* enum octep_ctrl_net_state */
+ u16 state;
+};
+
+/* link info */
+struct octep_ctrl_net_link_info {
+ /* Bitmap of Supported link speeds/modes */
+ u64 supported_modes;
+ /* Bitmap of Advertised link speeds/modes */
+ u64 advertised_modes;
+ /* Autonegotation state; bit 0=disabled; bit 1=enabled */
+ u8 autoneg;
+ /* Pause frames setting. bit 0=disabled; bit 1=enabled */
+ u8 pause;
+ /* Negotiated link speed in Mbps */
+ u32 speed;
+};
+
+/* get/set link info */
+struct octep_ctrl_net_h2f_req_cmd_link_info {
+ /* enum octep_ctrl_net_cmd */
+ u16 cmd;
+ /* struct octep_ctrl_net_link_info */
+ struct octep_ctrl_net_link_info info;
+};
+
+/* Host to fw request data */
+struct octep_ctrl_net_h2f_req {
+ struct octep_ctrl_net_req_hdr hdr;
+ union {
+ struct octep_ctrl_net_h2f_req_cmd_mtu mtu;
+ struct octep_ctrl_net_h2f_req_cmd_mac mac;
+ struct octep_ctrl_net_h2f_req_cmd_get_stats get_stats;
+ struct octep_ctrl_net_h2f_req_cmd_state link;
+ struct octep_ctrl_net_h2f_req_cmd_state rx;
+ struct octep_ctrl_net_h2f_req_cmd_link_info link_info;
+ };
+} __packed;
+
+struct octep_ctrl_net_resp_hdr {
+ /* sender id */
+ u16 sender;
+ /* receiver id */
+ u16 receiver;
+ /* octep_ctrl_net_h2t_cmd */
+ u16 cmd;
+ /* octep_ctrl_net_reply */
+ u16 reply;
+};
+
+/* get mtu response */
+struct octep_ctrl_net_h2f_resp_cmd_mtu {
+ /* 0-65535 */
+ u16 val;
+};
+
+/* get mac response */
+struct octep_ctrl_net_h2f_resp_cmd_mac {
+ /* xx:xx:xx:xx:xx:xx */
+ u8 addr[ETH_ALEN];
+};
+
+/* get link state, rx state response */
+struct octep_ctrl_net_h2f_resp_cmd_state {
+ /* enum octep_ctrl_net_state */
+ u16 state;
+};
+
+/* Host to fw response data */
+struct octep_ctrl_net_h2f_resp {
+ struct octep_ctrl_net_resp_hdr hdr;
+ union {
+ struct octep_ctrl_net_h2f_resp_cmd_mtu mtu;
+ struct octep_ctrl_net_h2f_resp_cmd_mac mac;
+ struct octep_ctrl_net_h2f_resp_cmd_state link;
+ struct octep_ctrl_net_h2f_resp_cmd_state rx;
+ struct octep_ctrl_net_link_info link_info;
+ };
+} __packed;
+
+/* link state notofication */
+struct octep_ctrl_net_f2h_req_cmd_state {
+ /* enum octep_ctrl_net_state */
+ u16 state;
+};
+
+/* Fw to host request data */
+struct octep_ctrl_net_f2h_req {
+ struct octep_ctrl_net_req_hdr hdr;
+ union {
+ struct octep_ctrl_net_f2h_req_cmd_state link;
+ };
+};
+
+/* Fw to host response data */
+struct octep_ctrl_net_f2h_resp {
+ struct octep_ctrl_net_resp_hdr hdr;
+};
+
+/* Size of host to fw octep_ctrl_mbox queue element */
+union octep_ctrl_net_h2f_data_sz {
+ struct octep_ctrl_net_h2f_req h2f_req;
+ struct octep_ctrl_net_h2f_resp h2f_resp;
+};
+
+/* Size of fw to host octep_ctrl_mbox queue element */
+union octep_ctrl_net_f2h_data_sz {
+ struct octep_ctrl_net_f2h_req f2h_req;
+ struct octep_ctrl_net_f2h_resp f2h_resp;
+};
+
+/* size of host to fw data in words */
+#define OCTEP_CTRL_NET_H2F_DATA_SZW ((sizeof(union octep_ctrl_net_h2f_data_sz)) / \
+ (sizeof(unsigned long)))
+
+/* size of fw to host data in words */
+#define OCTEP_CTRL_NET_F2H_DATA_SZW ((sizeof(union octep_ctrl_net_f2h_data_sz)) / \
+ (sizeof(unsigned long)))
+
+/* size in words of get/set mtu request */
+#define OCTEP_CTRL_NET_H2F_MTU_REQ_SZW 2
+/* size in words of get/set mac request */
+#define OCTEP_CTRL_NET_H2F_MAC_REQ_SZW 2
+/* size in words of get stats request */
+#define OCTEP_CTRL_NET_H2F_GET_STATS_REQ_SZW 2
+/* size in words of get/set state request */
+#define OCTEP_CTRL_NET_H2F_STATE_REQ_SZW 2
+/* size in words of get/set link info request */
+#define OCTEP_CTRL_NET_H2F_LINK_INFO_REQ_SZW 4
+
+/* size in words of get mtu response */
+#define OCTEP_CTRL_NET_H2F_GET_MTU_RESP_SZW 2
+/* size in words of set mtu response */
+#define OCTEP_CTRL_NET_H2F_SET_MTU_RESP_SZW 1
+/* size in words of get mac response */
+#define OCTEP_CTRL_NET_H2F_GET_MAC_RESP_SZW 2
+/* size in words of set mac response */
+#define OCTEP_CTRL_NET_H2F_SET_MAC_RESP_SZW 1
+/* size in words of get state request */
+#define OCTEP_CTRL_NET_H2F_GET_STATE_RESP_SZW 2
+/* size in words of set state request */
+#define OCTEP_CTRL_NET_H2F_SET_STATE_RESP_SZW 1
+/* size in words of get link info request */
+#define OCTEP_CTRL_NET_H2F_GET_LINK_INFO_RESP_SZW 4
+/* size in words of set link info request */
+#define OCTEP_CTRL_NET_H2F_SET_LINK_INFO_RESP_SZW 1
+
+/** Get link status from firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ *
+ * return value: link status 0=down, 1=up.
+ */
+int octep_get_link_status(struct octep_device *oct);
+
+/** Set link status in firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param up: boolean status.
+ */
+void octep_set_link_status(struct octep_device *oct, bool up);
+
+/** Set rx state in firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param up: boolean status.
+ */
+void octep_set_rx_state(struct octep_device *oct, bool up);
+
+/** Get mac address from firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param addr: non-null pointer to mac address.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_get_mac_addr(struct octep_device *oct, u8 *addr);
+
+/** Set mac address in firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param addr: non-null pointer to mac address.
+ */
+int octep_set_mac_addr(struct octep_device *oct, u8 *addr);
+
+/** Set mtu in firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ * @param mtu: mtu.
+ */
+int octep_set_mtu(struct octep_device *oct, int mtu);
+
+/** Get interface statistics from firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_get_if_stats(struct octep_device *oct);
+
+/** Get link info from firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ *
+ * return value: 0 on success, -errno on failure.
+ */
+int octep_get_link_info(struct octep_device *oct);
+
+/** Set link info in firmware.
+ *
+ * @param oct: non-null pointer to struct octep_device.
+ */
+int octep_set_link_info(struct octep_device *oct, struct octep_iface_link_info *link_info);
+
+#endif /* __OCTEP_CTRL_NET_H__ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c
new file mode 100644
index 000000000000..87ef129b269a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+#include "octep_ctrl_net.h"
+
+static const char octep_gstrings_global_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets",
+ "tx_packets",
+ "rx_bytes",
+ "tx_bytes",
+ "rx_alloc_errors",
+ "tx_busy_errors",
+ "rx_dropped",
+ "tx_dropped",
+ "tx_hw_pkts",
+ "tx_hw_octs",
+ "tx_hw_bcast",
+ "tx_hw_mcast",
+ "tx_hw_underflow",
+ "tx_hw_control",
+ "tx_less_than_64",
+ "tx_equal_64",
+ "tx_equal_65_to_127",
+ "tx_equal_128_to_255",
+ "tx_equal_256_to_511",
+ "tx_equal_512_to_1023",
+ "tx_equal_1024_to_1518",
+ "tx_greater_than_1518",
+ "rx_hw_pkts",
+ "rx_hw_bytes",
+ "rx_hw_bcast",
+ "rx_hw_mcast",
+ "rx_pause_pkts",
+ "rx_pause_bytes",
+ "rx_dropped_pkts_fifo_full",
+ "rx_dropped_bytes_fifo_full",
+ "rx_err_pkts",
+};
+
+#define OCTEP_GLOBAL_STATS_CNT (sizeof(octep_gstrings_global_stats) / ETH_GSTRING_LEN)
+
+static const char octep_gstrings_tx_q_stats[][ETH_GSTRING_LEN] = {
+ "tx_packets_posted[Q-%u]",
+ "tx_packets_completed[Q-%u]",
+ "tx_bytes[Q-%u]",
+ "tx_busy[Q-%u]",
+};
+
+#define OCTEP_TX_Q_STATS_CNT (sizeof(octep_gstrings_tx_q_stats) / ETH_GSTRING_LEN)
+
+static const char octep_gstrings_rx_q_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets[Q-%u]",
+ "rx_bytes[Q-%u]",
+ "rx_alloc_errors[Q-%u]",
+};
+
+#define OCTEP_RX_Q_STATS_CNT (sizeof(octep_gstrings_rx_q_stats) / ETH_GSTRING_LEN)
+
+static void octep_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+
+ strscpy(info->driver, OCTEP_DRV_NAME, sizeof(info->driver));
+ strscpy(info->bus_info, pci_name(oct->pdev), sizeof(info->bus_info));
+}
+
+static void octep_get_strings(struct net_device *netdev,
+ u32 stringset, u8 *data)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+ char *strings = (char *)data;
+ int i, j;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < OCTEP_GLOBAL_STATS_CNT; i++) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ octep_gstrings_global_stats[i]);
+ strings += ETH_GSTRING_LEN;
+ }
+
+ for (i = 0; i < num_queues; i++) {
+ for (j = 0; j < OCTEP_TX_Q_STATS_CNT; j++) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ octep_gstrings_tx_q_stats[j], i);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+
+ for (i = 0; i < num_queues; i++) {
+ for (j = 0; j < OCTEP_RX_Q_STATS_CNT; j++) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ octep_gstrings_rx_q_stats[j], i);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static int octep_get_sset_count(struct net_device *netdev, int sset)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return OCTEP_GLOBAL_STATS_CNT + (num_queues *
+ (OCTEP_TX_Q_STATS_CNT + OCTEP_RX_Q_STATS_CNT));
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void
+octep_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ struct octep_iface_tx_stats *iface_tx_stats;
+ struct octep_iface_rx_stats *iface_rx_stats;
+ u64 rx_packets, rx_bytes;
+ u64 tx_packets, tx_bytes;
+ u64 rx_alloc_errors, tx_busy_errors;
+ int q, i;
+
+ rx_packets = 0;
+ rx_bytes = 0;
+ tx_packets = 0;
+ tx_bytes = 0;
+ rx_alloc_errors = 0;
+ tx_busy_errors = 0;
+ tx_packets = 0;
+ tx_bytes = 0;
+ rx_packets = 0;
+ rx_bytes = 0;
+
+ octep_get_if_stats(oct);
+ iface_tx_stats = &oct->iface_tx_stats;
+ iface_rx_stats = &oct->iface_rx_stats;
+
+ for (q = 0; q < oct->num_oqs; q++) {
+ struct octep_iq *iq = oct->iq[q];
+ struct octep_oq *oq = oct->oq[q];
+
+ tx_packets += iq->stats.instr_completed;
+ tx_bytes += iq->stats.bytes_sent;
+ tx_busy_errors += iq->stats.tx_busy;
+
+ rx_packets += oq->stats.packets;
+ rx_bytes += oq->stats.bytes;
+ rx_alloc_errors += oq->stats.alloc_failures;
+ }
+ i = 0;
+ data[i++] = rx_packets;
+ data[i++] = tx_packets;
+ data[i++] = rx_bytes;
+ data[i++] = tx_bytes;
+ data[i++] = rx_alloc_errors;
+ data[i++] = tx_busy_errors;
+ data[i++] = iface_rx_stats->dropped_pkts_fifo_full +
+ iface_rx_stats->err_pkts;
+ data[i++] = iface_tx_stats->xscol +
+ iface_tx_stats->xsdef;
+ data[i++] = iface_tx_stats->pkts;
+ data[i++] = iface_tx_stats->octs;
+ data[i++] = iface_tx_stats->bcst;
+ data[i++] = iface_tx_stats->mcst;
+ data[i++] = iface_tx_stats->undflw;
+ data[i++] = iface_tx_stats->ctl;
+ data[i++] = iface_tx_stats->hist_lt64;
+ data[i++] = iface_tx_stats->hist_eq64;
+ data[i++] = iface_tx_stats->hist_65to127;
+ data[i++] = iface_tx_stats->hist_128to255;
+ data[i++] = iface_tx_stats->hist_256to511;
+ data[i++] = iface_tx_stats->hist_512to1023;
+ data[i++] = iface_tx_stats->hist_1024to1518;
+ data[i++] = iface_tx_stats->hist_gt1518;
+ data[i++] = iface_rx_stats->pkts;
+ data[i++] = iface_rx_stats->octets;
+ data[i++] = iface_rx_stats->mcast_pkts;
+ data[i++] = iface_rx_stats->bcast_pkts;
+ data[i++] = iface_rx_stats->pause_pkts;
+ data[i++] = iface_rx_stats->pause_octets;
+ data[i++] = iface_rx_stats->dropped_pkts_fifo_full;
+ data[i++] = iface_rx_stats->dropped_octets_fifo_full;
+ data[i++] = iface_rx_stats->err_pkts;
+
+ /* Per Tx Queue stats */
+ for (q = 0; q < oct->num_iqs; q++) {
+ struct octep_iq *iq = oct->iq[q];
+
+ data[i++] = iq->stats.instr_posted;
+ data[i++] = iq->stats.instr_completed;
+ data[i++] = iq->stats.bytes_sent;
+ data[i++] = iq->stats.tx_busy;
+ }
+
+ /* Per Rx Queue stats */
+ for (q = 0; q < oct->num_oqs; q++) {
+ struct octep_oq *oq = oct->oq[q];
+
+ data[i++] = oq->stats.packets;
+ data[i++] = oq->stats.bytes;
+ data[i++] = oq->stats.alloc_failures;
+ }
+}
+
+#define OCTEP_SET_ETHTOOL_LINK_MODES_BITMAP(octep_speeds, ksettings, name) \
+{ \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_T)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseT_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_R)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseR_FEC); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_CR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseCR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_KR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseKR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_LR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseLR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_10GBASE_SR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseSR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_25GBASE_CR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseCR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_25GBASE_KR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseKR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_25GBASE_SR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseSR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_CR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseCR4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_KR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseKR4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_LR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseLR4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_40GBASE_SR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseSR4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_CR2)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR2_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_KR2)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR2_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_SR2)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR2_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_CR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_KR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_LR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseLR_ER_FR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_50GBASE_SR)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_CR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseCR4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_KR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseKR4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_LR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseLR4_ER4_Full); \
+ if ((octep_speeds) & BIT(OCTEP_LINK_MODE_100GBASE_SR4)) \
+ ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseSR4_Full); \
+}
+
+static int octep_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ struct octep_iface_link_info *link_info;
+ u32 advertised_modes, supported_modes;
+
+ ethtool_link_ksettings_zero_link_mode(cmd, supported);
+ ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+
+ octep_get_link_info(oct);
+
+ advertised_modes = oct->link_info.advertised_modes;
+ supported_modes = oct->link_info.supported_modes;
+ link_info = &oct->link_info;
+
+ OCTEP_SET_ETHTOOL_LINK_MODES_BITMAP(supported_modes, cmd, supported);
+ OCTEP_SET_ETHTOOL_LINK_MODES_BITMAP(advertised_modes, cmd, advertising);
+
+ if (link_info->autoneg) {
+ if (link_info->autoneg & OCTEP_LINK_MODE_AUTONEG_SUPPORTED)
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+ if (link_info->autoneg & OCTEP_LINK_MODE_AUTONEG_ADVERTISED) {
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+ cmd->base.autoneg = AUTONEG_ENABLE;
+ } else {
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ }
+ } else {
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ }
+
+ if (link_info->pause) {
+ if (link_info->pause & OCTEP_LINK_MODE_PAUSE_SUPPORTED)
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+ if (link_info->pause & OCTEP_LINK_MODE_PAUSE_ADVERTISED)
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+ }
+
+ cmd->base.port = PORT_FIBRE;
+ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+
+ if (netif_carrier_ok(netdev)) {
+ cmd->base.speed = link_info->speed;
+ cmd->base.duplex = DUPLEX_FULL;
+ } else {
+ cmd->base.speed = SPEED_UNKNOWN;
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ }
+ return 0;
+}
+
+static int octep_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ struct octep_iface_link_info link_info_new;
+ struct octep_iface_link_info *link_info;
+ u64 advertised = 0;
+ u8 autoneg = 0;
+ int err;
+
+ link_info = &oct->link_info;
+ memcpy(&link_info_new, link_info, sizeof(struct octep_iface_link_info));
+
+ /* Only Full duplex is supported;
+ * Assume full duplex when duplex is unknown.
+ */
+ if (cmd->base.duplex != DUPLEX_FULL &&
+ cmd->base.duplex != DUPLEX_UNKNOWN)
+ return -EOPNOTSUPP;
+
+ if (cmd->base.autoneg == AUTONEG_ENABLE) {
+ if (!(link_info->autoneg & OCTEP_LINK_MODE_AUTONEG_SUPPORTED))
+ return -EOPNOTSUPP;
+ autoneg = 1;
+ }
+
+ if (!bitmap_subset(cmd->link_modes.advertising,
+ cmd->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ return -EINVAL;
+
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 10000baseT_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_10GBASE_T);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 10000baseR_FEC))
+ advertised |= BIT(OCTEP_LINK_MODE_10GBASE_R);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 10000baseCR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_10GBASE_CR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 10000baseKR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_10GBASE_KR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 10000baseLR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_10GBASE_LR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 10000baseSR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_10GBASE_SR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 25000baseCR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_25GBASE_CR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 25000baseKR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_25GBASE_KR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 25000baseSR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_25GBASE_SR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 40000baseCR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_40GBASE_CR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 40000baseKR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_40GBASE_KR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 40000baseLR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_40GBASE_LR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 40000baseSR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_40GBASE_SR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseCR2_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_CR2);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseKR2_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_KR2);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseSR2_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_SR2);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseCR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_CR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseKR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_KR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseLR_ER_FR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_LR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 50000baseSR_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_50GBASE_SR);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 100000baseCR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_100GBASE_CR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 100000baseKR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_100GBASE_KR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 100000baseLR4_ER4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_100GBASE_LR4);
+ if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+ 100000baseSR4_Full))
+ advertised |= BIT(OCTEP_LINK_MODE_100GBASE_SR4);
+
+ if (advertised == link_info->advertised_modes &&
+ cmd->base.speed == link_info->speed &&
+ cmd->base.autoneg == link_info->autoneg)
+ return 0;
+
+ link_info_new.advertised_modes = advertised;
+ link_info_new.speed = cmd->base.speed;
+ link_info_new.autoneg = autoneg;
+
+ err = octep_set_link_info(oct, &link_info_new);
+ if (err)
+ return err;
+
+ memcpy(link_info, &link_info_new, sizeof(struct octep_iface_link_info));
+ return 0;
+}
+
+static const struct ethtool_ops octep_ethtool_ops = {
+ .get_drvinfo = octep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = octep_get_strings,
+ .get_sset_count = octep_get_sset_count,
+ .get_ethtool_stats = octep_get_ethtool_stats,
+ .get_link_ksettings = octep_get_link_ksettings,
+ .set_link_ksettings = octep_set_link_ksettings,
+};
+
+void octep_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &octep_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
new file mode 100644
index 000000000000..5d39c857ea41
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -0,0 +1,1177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/vmalloc.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+#include "octep_ctrl_net.h"
+
+struct workqueue_struct *octep_wq;
+
+/* Supported Devices */
+static const struct pci_device_id octep_pci_id_tbl[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)},
+ {0, },
+};
+MODULE_DEVICE_TABLE(pci, octep_pci_id_tbl);
+
+MODULE_AUTHOR("Veerasenareddy Burru <vburru@marvell.com>");
+MODULE_DESCRIPTION(OCTEP_DRV_STRING);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(OCTEP_DRV_VERSION_STR);
+
+/**
+ * octep_alloc_ioq_vectors() - Allocate Tx/Rx Queue interrupt info.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Allocate resources to hold per Tx/Rx queue interrupt info.
+ * This is the information passed to interrupt handler, from which napi poll
+ * is scheduled and includes quick access to private data of Tx/Rx queue
+ * corresponding to the interrupt being handled.
+ *
+ * Return: 0, on successful allocation of resources for all queue interrupts.
+ * -1, if failed to allocate any resource.
+ */
+static int octep_alloc_ioq_vectors(struct octep_device *oct)
+{
+ int i;
+ struct octep_ioq_vector *ioq_vector;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ oct->ioq_vector[i] = vzalloc(sizeof(*oct->ioq_vector[i]));
+ if (!oct->ioq_vector[i])
+ goto free_ioq_vector;
+
+ ioq_vector = oct->ioq_vector[i];
+ ioq_vector->iq = oct->iq[i];
+ ioq_vector->oq = oct->oq[i];
+ ioq_vector->octep_dev = oct;
+ }
+
+ dev_info(&oct->pdev->dev, "Allocated %d IOQ vectors\n", oct->num_oqs);
+ return 0;
+
+free_ioq_vector:
+ while (i) {
+ i--;
+ vfree(oct->ioq_vector[i]);
+ oct->ioq_vector[i] = NULL;
+ }
+ return -1;
+}
+
+/**
+ * octep_free_ioq_vectors() - Free Tx/Rx Queue interrupt vector info.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_free_ioq_vectors(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ if (oct->ioq_vector[i]) {
+ vfree(oct->ioq_vector[i]);
+ oct->ioq_vector[i] = NULL;
+ }
+ }
+ netdev_info(oct->netdev, "Freed IOQ Vectors\n");
+}
+
+/**
+ * octep_enable_msix_range() - enable MSI-x interrupts.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Allocate and enable all MSI-x interrupts (queue and non-queue interrupts)
+ * for the Octeon device.
+ *
+ * Return: 0, on successfully enabling all MSI-x interrupts.
+ * -1, if failed to enable any MSI-x interrupt.
+ */
+static int octep_enable_msix_range(struct octep_device *oct)
+{
+ int num_msix, msix_allocated;
+ int i;
+
+ /* Generic interrupts apart from input/output queues */
+ num_msix = oct->num_oqs + CFG_GET_NON_IOQ_MSIX(oct->conf);
+ oct->msix_entries = kcalloc(num_msix,
+ sizeof(struct msix_entry), GFP_KERNEL);
+ if (!oct->msix_entries)
+ goto msix_alloc_err;
+
+ for (i = 0; i < num_msix; i++)
+ oct->msix_entries[i].entry = i;
+
+ msix_allocated = pci_enable_msix_range(oct->pdev, oct->msix_entries,
+ num_msix, num_msix);
+ if (msix_allocated != num_msix) {
+ dev_err(&oct->pdev->dev,
+ "Failed to enable %d msix irqs; got only %d\n",
+ num_msix, msix_allocated);
+ goto enable_msix_err;
+ }
+ oct->num_irqs = msix_allocated;
+ dev_info(&oct->pdev->dev, "MSI-X enabled successfully\n");
+
+ return 0;
+
+enable_msix_err:
+ if (msix_allocated > 0)
+ pci_disable_msix(oct->pdev);
+ kfree(oct->msix_entries);
+ oct->msix_entries = NULL;
+msix_alloc_err:
+ return -1;
+}
+
+/**
+ * octep_disable_msix() - disable MSI-x interrupts.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Disable MSI-x on the Octeon device.
+ */
+static void octep_disable_msix(struct octep_device *oct)
+{
+ pci_disable_msix(oct->pdev);
+ kfree(oct->msix_entries);
+ oct->msix_entries = NULL;
+ dev_info(&oct->pdev->dev, "Disabled MSI-X\n");
+}
+
+/**
+ * octep_non_ioq_intr_handler() - common handler for all generic interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data.
+ *
+ * this is common handler for all non-queue (generic) interrupts.
+ */
+static irqreturn_t octep_non_ioq_intr_handler(int irq, void *data)
+{
+ struct octep_device *oct = data;
+
+ return oct->hw_ops.non_ioq_intr_handler(oct);
+}
+
+/**
+ * octep_ioq_intr_handler() - handler for all Tx/Rx queue interrupts.
+ *
+ * @irq: Interrupt number.
+ * @data: interrupt data contains pointers to Tx/Rx queue private data
+ * and correspong NAPI context.
+ *
+ * this is common handler for all non-queue (generic) interrupts.
+ */
+static irqreturn_t octep_ioq_intr_handler(int irq, void *data)
+{
+ struct octep_ioq_vector *ioq_vector = data;
+ struct octep_device *oct = ioq_vector->octep_dev;
+
+ return oct->hw_ops.ioq_intr_handler(ioq_vector);
+}
+
+/**
+ * octep_request_irqs() - Register interrupt handlers.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Register handlers for all queue and non-queue interrupts.
+ *
+ * Return: 0, on successful registration of all interrupt handlers.
+ * -1, on any error.
+ */
+static int octep_request_irqs(struct octep_device *oct)
+{
+ struct net_device *netdev = oct->netdev;
+ struct octep_ioq_vector *ioq_vector;
+ struct msix_entry *msix_entry;
+ char **non_ioq_msix_names;
+ int num_non_ioq_msix;
+ int ret, i;
+
+ num_non_ioq_msix = CFG_GET_NON_IOQ_MSIX(oct->conf);
+ non_ioq_msix_names = CFG_GET_NON_IOQ_MSIX_NAMES(oct->conf);
+
+ oct->non_ioq_irq_names = kcalloc(num_non_ioq_msix,
+ OCTEP_MSIX_NAME_SIZE, GFP_KERNEL);
+ if (!oct->non_ioq_irq_names)
+ goto alloc_err;
+
+ /* First few MSI-X interrupts are non-queue interrupts */
+ for (i = 0; i < num_non_ioq_msix; i++) {
+ char *irq_name;
+
+ irq_name = &oct->non_ioq_irq_names[i * OCTEP_MSIX_NAME_SIZE];
+ msix_entry = &oct->msix_entries[i];
+
+ snprintf(irq_name, OCTEP_MSIX_NAME_SIZE,
+ "%s-%s", netdev->name, non_ioq_msix_names[i]);
+ ret = request_irq(msix_entry->vector,
+ octep_non_ioq_intr_handler, 0,
+ irq_name, oct);
+ if (ret) {
+ netdev_err(netdev,
+ "request_irq failed for %s; err=%d",
+ irq_name, ret);
+ goto non_ioq_irq_err;
+ }
+ }
+
+ /* Request IRQs for Tx/Rx queues */
+ for (i = 0; i < oct->num_oqs; i++) {
+ ioq_vector = oct->ioq_vector[i];
+ msix_entry = &oct->msix_entries[i + num_non_ioq_msix];
+
+ snprintf(ioq_vector->name, sizeof(ioq_vector->name),
+ "%s-q%d", netdev->name, i);
+ ret = request_irq(msix_entry->vector,
+ octep_ioq_intr_handler, 0,
+ ioq_vector->name, ioq_vector);
+ if (ret) {
+ netdev_err(netdev,
+ "request_irq failed for Q-%d; err=%d",
+ i, ret);
+ goto ioq_irq_err;
+ }
+
+ cpumask_set_cpu(i % num_online_cpus(),
+ &ioq_vector->affinity_mask);
+ irq_set_affinity_hint(msix_entry->vector,
+ &ioq_vector->affinity_mask);
+ }
+
+ return 0;
+ioq_irq_err:
+ while (i > num_non_ioq_msix) {
+ --i;
+ irq_set_affinity_hint(oct->msix_entries[i].vector, NULL);
+ free_irq(oct->msix_entries[i].vector, oct->ioq_vector[i]);
+ }
+non_ioq_irq_err:
+ while (i) {
+ --i;
+ free_irq(oct->msix_entries[i].vector, oct);
+ }
+alloc_err:
+ return -1;
+}
+
+/**
+ * octep_free_irqs() - free all registered interrupts.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Free all queue and non-queue interrupts of the Octeon device.
+ */
+static void octep_free_irqs(struct octep_device *oct)
+{
+ int i;
+
+ /* First few MSI-X interrupts are non queue interrupts; free them */
+ for (i = 0; i < CFG_GET_NON_IOQ_MSIX(oct->conf); i++)
+ free_irq(oct->msix_entries[i].vector, oct);
+ kfree(oct->non_ioq_irq_names);
+
+ /* Free IRQs for Input/Output (Tx/Rx) queues */
+ for (i = CFG_GET_NON_IOQ_MSIX(oct->conf); i < oct->num_irqs; i++) {
+ irq_set_affinity_hint(oct->msix_entries[i].vector, NULL);
+ free_irq(oct->msix_entries[i].vector,
+ oct->ioq_vector[i - CFG_GET_NON_IOQ_MSIX(oct->conf)]);
+ }
+ netdev_info(oct->netdev, "IRQs freed\n");
+}
+
+/**
+ * octep_setup_irqs() - setup interrupts for the Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Allocate data structures to hold per interrupt information, allocate/enable
+ * MSI-x interrupt and register interrupt handlers.
+ *
+ * Return: 0, on successful allocation and registration of all interrupts.
+ * -1, on any error.
+ */
+static int octep_setup_irqs(struct octep_device *oct)
+{
+ if (octep_alloc_ioq_vectors(oct))
+ goto ioq_vector_err;
+
+ if (octep_enable_msix_range(oct))
+ goto enable_msix_err;
+
+ if (octep_request_irqs(oct))
+ goto request_irq_err;
+
+ return 0;
+
+request_irq_err:
+ octep_disable_msix(oct);
+enable_msix_err:
+ octep_free_ioq_vectors(oct);
+ioq_vector_err:
+ return -1;
+}
+
+/**
+ * octep_clean_irqs() - free all interrupts and its resources.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_clean_irqs(struct octep_device *oct)
+{
+ octep_free_irqs(oct);
+ octep_disable_msix(oct);
+ octep_free_ioq_vectors(oct);
+}
+
+/**
+ * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @oq: Octeon Rx queue data structure.
+ */
+static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq)
+{
+ u32 pkts_pend = oq->pkts_pending;
+
+ netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no);
+ if (iq->pkts_processed) {
+ writel(iq->pkts_processed, iq->inst_cnt_reg);
+ iq->pkt_in_done -= iq->pkts_processed;
+ iq->pkts_processed = 0;
+ }
+ if (oq->last_pkt_count - pkts_pend) {
+ writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg);
+ oq->last_pkt_count = pkts_pend;
+ }
+
+ /* Flush the previous wrties before writing to RESEND bit */
+ wmb();
+ writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg);
+ writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg);
+}
+
+/**
+ * octep_napi_poll() - NAPI poll function for Tx/Rx.
+ *
+ * @napi: pointer to napi context.
+ * @budget: max number of packets to be processed in single invocation.
+ */
+static int octep_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct octep_ioq_vector *ioq_vector =
+ container_of(napi, struct octep_ioq_vector, napi);
+ u32 tx_pending, rx_done;
+
+ tx_pending = octep_iq_process_completions(ioq_vector->iq, budget);
+ rx_done = octep_oq_process_rx(ioq_vector->oq, budget);
+
+ /* need more polling if tx completion processing is still pending or
+ * processed at least 'budget' number of rx packets.
+ */
+ if (tx_pending || rx_done >= budget)
+ return budget;
+
+ napi_complete(napi);
+ octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq);
+ return rx_done;
+}
+
+/**
+ * octep_napi_add() - Add NAPI poll for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_napi_add(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Adding NAPI on Q-%d\n", i);
+ netif_napi_add(oct->netdev, &oct->ioq_vector[i]->napi,
+ octep_napi_poll, 64);
+ oct->oq[i]->napi = &oct->ioq_vector[i]->napi;
+ }
+}
+
+/**
+ * octep_napi_delete() - delete NAPI poll callback for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_napi_delete(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Deleting NAPI on Q-%d\n", i);
+ netif_napi_del(&oct->ioq_vector[i]->napi);
+ oct->oq[i]->napi = NULL;
+ }
+}
+
+/**
+ * octep_napi_enable() - enable NAPI for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_napi_enable(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Enabling NAPI on Q-%d\n", i);
+ napi_enable(&oct->ioq_vector[i]->napi);
+ }
+}
+
+/**
+ * octep_napi_disable() - disable NAPI for all Tx/Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+static void octep_napi_disable(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++) {
+ netdev_dbg(oct->netdev, "Disabling NAPI on Q-%d\n", i);
+ napi_disable(&oct->ioq_vector[i]->napi);
+ }
+}
+
+static void octep_link_up(struct net_device *netdev)
+{
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+}
+
+/**
+ * octep_open() - start the octeon network device.
+ *
+ * @netdev: pointer to kernel network device.
+ *
+ * setup Tx/Rx queues, interrupts and enable hardware operation of Tx/Rx queues
+ * and interrupts..
+ *
+ * Return: 0, on successfully setting up device and bring it up.
+ * -1, on any error.
+ */
+static int octep_open(struct net_device *netdev)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ int err, ret;
+
+ netdev_info(netdev, "Starting netdev ...\n");
+ netif_carrier_off(netdev);
+
+ oct->hw_ops.reset_io_queues(oct);
+
+ if (octep_setup_iqs(oct))
+ goto setup_iq_err;
+ if (octep_setup_oqs(oct))
+ goto setup_oq_err;
+ if (octep_setup_irqs(oct))
+ goto setup_irq_err;
+
+ err = netif_set_real_num_tx_queues(netdev, oct->num_oqs);
+ if (err)
+ goto set_queues_err;
+ err = netif_set_real_num_rx_queues(netdev, oct->num_iqs);
+ if (err)
+ goto set_queues_err;
+
+ octep_napi_add(oct);
+ octep_napi_enable(oct);
+
+ oct->link_info.admin_up = 1;
+ octep_set_rx_state(oct, true);
+
+ ret = octep_get_link_status(oct);
+ if (!ret)
+ octep_set_link_status(oct, true);
+
+ /* Enable the input and output queues for this Octeon device */
+ oct->hw_ops.enable_io_queues(oct);
+
+ /* Enable Octeon device interrupts */
+ oct->hw_ops.enable_interrupts(oct);
+
+ octep_oq_dbell_init(oct);
+
+ ret = octep_get_link_status(oct);
+ if (ret)
+ octep_link_up(netdev);
+
+ return 0;
+
+set_queues_err:
+ octep_napi_disable(oct);
+ octep_napi_delete(oct);
+ octep_clean_irqs(oct);
+setup_irq_err:
+ octep_free_oqs(oct);
+setup_oq_err:
+ octep_free_iqs(oct);
+setup_iq_err:
+ return -1;
+}
+
+/**
+ * octep_stop() - stop the octeon network device.
+ *
+ * @netdev: pointer to kernel network device.
+ *
+ * stop the device Tx/Rx operations, bring down the link and
+ * free up all resources allocated for Tx/Rx queues and interrupts.
+ */
+static int octep_stop(struct net_device *netdev)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+
+ netdev_info(netdev, "Stopping the device ...\n");
+
+ /* Stop Tx from stack */
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
+
+ octep_set_link_status(oct, false);
+ octep_set_rx_state(oct, false);
+
+ oct->link_info.admin_up = 0;
+ oct->link_info.oper_up = 0;
+
+ oct->hw_ops.disable_interrupts(oct);
+ octep_napi_disable(oct);
+ octep_napi_delete(oct);
+
+ octep_clean_irqs(oct);
+ octep_clean_iqs(oct);
+
+ oct->hw_ops.disable_io_queues(oct);
+ oct->hw_ops.reset_io_queues(oct);
+ octep_free_oqs(oct);
+ octep_free_iqs(oct);
+ netdev_info(netdev, "Device stopped !!\n");
+ return 0;
+}
+
+/**
+ * octep_iq_full_check() - check if a Tx queue is full.
+ *
+ * @iq: Octeon Tx queue data structure.
+ *
+ * Return: 0, if the Tx queue is not full.
+ * 1, if the Tx queue is full.
+ */
+static inline int octep_iq_full_check(struct octep_iq *iq)
+{
+ if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+ OCTEP_WAKE_QUEUE_THRESHOLD))
+ return 0;
+
+ /* Stop the queue if unable to send */
+ netif_stop_subqueue(iq->netdev, iq->q_no);
+
+ /* check again and restart the queue, in case NAPI has just freed
+ * enough Tx ring entries.
+ */
+ if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+ OCTEP_WAKE_QUEUE_THRESHOLD)) {
+ netif_start_subqueue(iq->netdev, iq->q_no);
+ iq->stats.restart_cnt++;
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * octep_start_xmit() - Enqueue packet to Octoen hardware Tx Queue.
+ *
+ * @skb: packet skbuff pointer.
+ * @netdev: kernel network device.
+ *
+ * Return: NETDEV_TX_BUSY, if Tx Queue is full.
+ * NETDEV_TX_OK, if successfully enqueued to hardware Tx queue.
+ */
+static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ struct octep_tx_sglist_desc *sglist;
+ struct octep_tx_buffer *tx_buffer;
+ struct octep_tx_desc_hw *hw_desc;
+ struct skb_shared_info *shinfo;
+ struct octep_instr_hdr *ih;
+ struct octep_iq *iq;
+ skb_frag_t *frag;
+ u16 nr_frags, si;
+ u16 q_no, wi;
+
+ q_no = skb_get_queue_mapping(skb);
+ if (q_no >= oct->num_iqs) {
+ netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no);
+ q_no = q_no % oct->num_iqs;
+ }
+
+ iq = oct->iq[q_no];
+ if (octep_iq_full_check(iq)) {
+ iq->stats.tx_busy++;
+ return NETDEV_TX_BUSY;
+ }
+
+ shinfo = skb_shinfo(skb);
+ nr_frags = shinfo->nr_frags;
+
+ wi = iq->host_write_index;
+ hw_desc = &iq->desc_ring[wi];
+ hw_desc->ih64 = 0;
+
+ tx_buffer = iq->buff_info + wi;
+ tx_buffer->skb = skb;
+
+ ih = &hw_desc->ih;
+ ih->tlen = skb->len;
+ ih->pkind = oct->pkind;
+
+ if (!nr_frags) {
+ tx_buffer->gather = 0;
+ tx_buffer->dma = dma_map_single(iq->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(iq->dev, tx_buffer->dma))
+ goto dma_map_err;
+ hw_desc->dptr = tx_buffer->dma;
+ } else {
+ /* Scatter/Gather */
+ dma_addr_t dma;
+ u16 len;
+
+ sglist = tx_buffer->sglist;
+
+ ih->gsz = nr_frags + 1;
+ ih->gather = 1;
+ tx_buffer->gather = 1;
+
+ len = skb_headlen(skb);
+ dma = dma_map_single(iq->dev, skb->data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(iq->dev, dma))
+ goto dma_map_err;
+
+ dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
+ OCTEP_SGLIST_SIZE_PER_PKT,
+ DMA_TO_DEVICE);
+ memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
+ sglist[0].len[3] = len;
+ sglist[0].dma_ptr[0] = dma;
+
+ si = 1; /* entry 0 is main skb, mapped above */
+ frag = &shinfo->frags[0];
+ while (nr_frags--) {
+ len = skb_frag_size(frag);
+ dma = skb_frag_dma_map(iq->dev, frag, 0,
+ len, DMA_TO_DEVICE);
+ if (dma_mapping_error(iq->dev, dma))
+ goto dma_map_sg_err;
+
+ sglist[si >> 2].len[3 - (si & 3)] = len;
+ sglist[si >> 2].dma_ptr[si & 3] = dma;
+
+ frag++;
+ si++;
+ }
+ dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
+ OCTEP_SGLIST_SIZE_PER_PKT,
+ DMA_TO_DEVICE);
+
+ hw_desc->dptr = tx_buffer->sglist_dma;
+ }
+
+ /* Flush the hw descriptor before writing to doorbell */
+ wmb();
+
+ /* Ring Doorbell to notify the NIC there is a new packet */
+ writel(1, iq->doorbell_reg);
+ atomic_inc(&iq->instr_pending);
+ wi++;
+ if (wi == iq->max_count)
+ wi = 0;
+ iq->host_write_index = wi;
+
+ netdev_tx_sent_queue(iq->netdev_q, skb->len);
+ iq->stats.instr_posted++;
+ skb_tx_timestamp(skb);
+ return NETDEV_TX_OK;
+
+dma_map_sg_err:
+ if (si > 0) {
+ dma_unmap_single(iq->dev, sglist[0].dma_ptr[0],
+ sglist[0].len[0], DMA_TO_DEVICE);
+ sglist[0].len[0] = 0;
+ }
+ while (si > 1) {
+ dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3],
+ sglist[si >> 2].len[si & 3], DMA_TO_DEVICE);
+ sglist[si >> 2].len[si & 3] = 0;
+ si--;
+ }
+ tx_buffer->gather = 0;
+dma_map_err:
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+/**
+ * octep_get_stats64() - Get Octeon network device statistics.
+ *
+ * @netdev: kernel network device.
+ * @stats: pointer to stats structure to be filled in.
+ */
+static void octep_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+ struct octep_device *oct = netdev_priv(netdev);
+ int q;
+
+ octep_get_if_stats(oct);
+ tx_packets = 0;
+ tx_bytes = 0;
+ rx_packets = 0;
+ rx_bytes = 0;
+ for (q = 0; q < oct->num_oqs; q++) {
+ struct octep_iq *iq = oct->iq[q];
+ struct octep_oq *oq = oct->oq[q];
+
+ tx_packets += iq->stats.instr_completed;
+ tx_bytes += iq->stats.bytes_sent;
+ rx_packets += oq->stats.packets;
+ rx_bytes += oq->stats.bytes;
+ }
+ stats->tx_packets = tx_packets;
+ stats->tx_bytes = tx_bytes;
+ stats->rx_packets = rx_packets;
+ stats->rx_bytes = rx_bytes;
+ stats->multicast = oct->iface_rx_stats.mcast_pkts;
+ stats->rx_errors = oct->iface_rx_stats.err_pkts;
+ stats->collisions = oct->iface_tx_stats.xscol;
+ stats->tx_fifo_errors = oct->iface_tx_stats.undflw;
+}
+
+/**
+ * octep_tx_timeout_task - work queue task to Handle Tx queue timeout.
+ *
+ * @work: pointer to Tx queue timeout work_struct
+ *
+ * Stop and start the device so that it frees up all queue resources
+ * and restarts the queues, that potentially clears a Tx queue timeout
+ * condition.
+ **/
+static void octep_tx_timeout_task(struct work_struct *work)
+{
+ struct octep_device *oct = container_of(work, struct octep_device,
+ tx_timeout_task);
+ struct net_device *netdev = oct->netdev;
+
+ rtnl_lock();
+ if (netif_running(netdev)) {
+ octep_stop(netdev);
+ octep_open(netdev);
+ }
+ rtnl_unlock();
+}
+
+/**
+ * octep_tx_timeout() - Handle Tx Queue timeout.
+ *
+ * @netdev: pointer to kernel network device.
+ * @txqueue: Timed out Tx queue number.
+ *
+ * Schedule a work to handle Tx queue timeout.
+ */
+static void octep_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+
+ queue_work(octep_wq, &oct->tx_timeout_task);
+}
+
+static int octep_set_mac(struct net_device *netdev, void *p)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ struct sockaddr *addr = (struct sockaddr *)p;
+ int err;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ err = octep_set_mac_addr(oct, addr->sa_data);
+ if (err)
+ return err;
+
+ memcpy(oct->mac_addr, addr->sa_data, ETH_ALEN);
+ eth_hw_addr_set(netdev, addr->sa_data);
+
+ return 0;
+}
+
+static int octep_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct octep_device *oct = netdev_priv(netdev);
+ struct octep_iface_link_info *link_info;
+ int err = 0;
+
+ link_info = &oct->link_info;
+ if (link_info->mtu == new_mtu)
+ return 0;
+
+ err = octep_set_mtu(oct, new_mtu);
+ if (!err) {
+ oct->link_info.mtu = new_mtu;
+ netdev->mtu = new_mtu;
+ }
+
+ return err;
+}
+
+static const struct net_device_ops octep_netdev_ops = {
+ .ndo_open = octep_open,
+ .ndo_stop = octep_stop,
+ .ndo_start_xmit = octep_start_xmit,
+ .ndo_get_stats64 = octep_get_stats64,
+ .ndo_tx_timeout = octep_tx_timeout,
+ .ndo_set_mac_address = octep_set_mac,
+ .ndo_change_mtu = octep_change_mtu,
+};
+
+/**
+ * octep_ctrl_mbox_task - work queue task to handle ctrl mbox messages.
+ *
+ * @work: pointer to ctrl mbox work_struct
+ *
+ * Poll ctrl mbox message queue and handle control messages from firmware.
+ **/
+static void octep_ctrl_mbox_task(struct work_struct *work)
+{
+ struct octep_device *oct = container_of(work, struct octep_device,
+ ctrl_mbox_task);
+ struct net_device *netdev = oct->netdev;
+ struct octep_ctrl_net_f2h_req req = {};
+ struct octep_ctrl_mbox_msg msg;
+ int ret = 0;
+
+ msg.msg = &req;
+ while (true) {
+ ret = octep_ctrl_mbox_recv(&oct->ctrl_mbox, &msg);
+ if (ret)
+ break;
+
+ switch (req.hdr.cmd) {
+ case OCTEP_CTRL_NET_F2H_CMD_LINK_STATUS:
+ if (netif_running(netdev)) {
+ if (req.link.state) {
+ dev_info(&oct->pdev->dev, "netif_carrier_on\n");
+ netif_carrier_on(netdev);
+ } else {
+ dev_info(&oct->pdev->dev, "netif_carrier_off\n");
+ netif_carrier_off(netdev);
+ }
+ }
+ break;
+ default:
+ pr_info("Unknown mbox req : %u\n", req.hdr.cmd);
+ break;
+ }
+ }
+}
+
+/**
+ * octep_device_setup() - Setup Octeon Device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Setup Octeon device hardware operations, configuration, etc ...
+ */
+int octep_device_setup(struct octep_device *oct)
+{
+ struct octep_ctrl_mbox *ctrl_mbox;
+ struct pci_dev *pdev = oct->pdev;
+ int i, ret;
+
+ /* allocate memory for oct->conf */
+ oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL);
+ if (!oct->conf)
+ return -ENOMEM;
+
+ /* Map BAR regions */
+ for (i = 0; i < OCTEP_MMIO_REGIONS; i++) {
+ oct->mmio[i].hw_addr =
+ ioremap(pci_resource_start(oct->pdev, i * 2),
+ pci_resource_len(oct->pdev, i * 2));
+ oct->mmio[i].mapped = 1;
+ }
+
+ oct->chip_id = pdev->device;
+ oct->rev_id = pdev->revision;
+ dev_info(&pdev->dev, "chip_id = 0x%x\n", pdev->device);
+
+ switch (oct->chip_id) {
+ case OCTEP_PCI_DEVICE_ID_CN93_PF:
+ dev_info(&pdev->dev,
+ "Setting up OCTEON CN93XX PF PASS%d.%d\n",
+ OCTEP_MAJOR_REV(oct), OCTEP_MINOR_REV(oct));
+ octep_device_setup_cn93_pf(oct);
+ break;
+ default:
+ dev_err(&pdev->dev,
+ "%s: unsupported device\n", __func__);
+ goto unsupported_dev;
+ }
+
+ oct->pkind = CFG_GET_IQ_PKIND(oct->conf);
+
+ /* Initialize control mbox */
+ ctrl_mbox = &oct->ctrl_mbox;
+ ctrl_mbox->version = OCTEP_DRV_VERSION;
+ ctrl_mbox->barmem = CFG_GET_CTRL_MBOX_MEM_ADDR(oct->conf);
+ ret = octep_ctrl_mbox_init(ctrl_mbox);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to initialize control mbox\n");
+ return -1;
+ }
+ oct->ctrl_mbox_ifstats_offset = OCTEP_CTRL_MBOX_SZ(ctrl_mbox->h2fq.elem_sz,
+ ctrl_mbox->h2fq.elem_cnt,
+ ctrl_mbox->f2hq.elem_sz,
+ ctrl_mbox->f2hq.elem_cnt);
+
+ return 0;
+
+unsupported_dev:
+ return -1;
+}
+
+/**
+ * octep_device_cleanup() - Cleanup Octeon Device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Cleanup Octeon device allocated resources.
+ */
+static void octep_device_cleanup(struct octep_device *oct)
+{
+ int i;
+
+ dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n");
+
+ for (i = 0; i < OCTEP_MAX_VF; i++) {
+ if (oct->mbox[i])
+ vfree(oct->mbox[i]);
+ oct->mbox[i] = NULL;
+ }
+
+ octep_ctrl_mbox_uninit(&oct->ctrl_mbox);
+
+ oct->hw_ops.soft_reset(oct);
+ for (i = 0; i < OCTEP_MMIO_REGIONS; i++) {
+ if (oct->mmio[i].mapped)
+ iounmap(oct->mmio[i].hw_addr);
+ }
+
+ kfree(oct->conf);
+ oct->conf = NULL;
+}
+
+/**
+ * octep_probe() - Octeon PCI device probe handler.
+ *
+ * @pdev: PCI device structure.
+ * @ent: entry in Octeon PCI device ID table.
+ *
+ * Initializes and enables the Octeon PCI device for network operations.
+ * Initializes Octeon private data structure and registers a network device.
+ */
+static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct octep_device *octep_dev = NULL;
+ struct net_device *netdev;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to enable PCI device\n");
+ return err;
+ }
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set DMA mask !!\n");
+ goto err_dma_mask;
+ }
+
+ err = pci_request_mem_regions(pdev, OCTEP_DRV_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to map PCI memory regions\n");
+ goto err_pci_regions;
+ }
+
+ pci_enable_pcie_error_reporting(pdev);
+ pci_set_master(pdev);
+
+ netdev = alloc_etherdev_mq(sizeof(struct octep_device),
+ OCTEP_MAX_QUEUES);
+ if (!netdev) {
+ dev_err(&pdev->dev, "Failed to allocate netdev\n");
+ err = -ENOMEM;
+ goto err_alloc_netdev;
+ }
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ octep_dev = netdev_priv(netdev);
+ octep_dev->netdev = netdev;
+ octep_dev->pdev = pdev;
+ octep_dev->dev = &pdev->dev;
+ pci_set_drvdata(pdev, octep_dev);
+
+ err = octep_device_setup(octep_dev);
+ if (err) {
+ dev_err(&pdev->dev, "Device setup failed\n");
+ goto err_octep_config;
+ }
+ INIT_WORK(&octep_dev->tx_timeout_task, octep_tx_timeout_task);
+ INIT_WORK(&octep_dev->ctrl_mbox_task, octep_ctrl_mbox_task);
+
+ netdev->netdev_ops = &octep_netdev_ops;
+ octep_set_ethtool_ops(netdev);
+ netif_carrier_off(netdev);
+
+ netdev->hw_features = NETIF_F_SG;
+ netdev->features |= netdev->hw_features;
+ netdev->min_mtu = OCTEP_MIN_MTU;
+ netdev->max_mtu = OCTEP_MAX_MTU;
+ netdev->mtu = OCTEP_DEFAULT_MTU;
+
+ octep_get_mac_addr(octep_dev, octep_dev->mac_addr);
+ eth_hw_addr_set(netdev, octep_dev->mac_addr);
+
+ if (register_netdev(netdev)) {
+ dev_err(&pdev->dev, "Failed to register netdev\n");
+ goto register_dev_err;
+ }
+ dev_info(&pdev->dev, "Device probe successful\n");
+ return 0;
+
+register_dev_err:
+ octep_device_cleanup(octep_dev);
+err_octep_config:
+ free_netdev(netdev);
+err_alloc_netdev:
+ pci_disable_pcie_error_reporting(pdev);
+ pci_release_mem_regions(pdev);
+err_pci_regions:
+err_dma_mask:
+ pci_disable_device(pdev);
+ return err;
+}
+
+/**
+ * octep_remove() - Remove Octeon PCI device from driver control.
+ *
+ * @pdev: PCI device structure of the Octeon device.
+ *
+ * Cleanup all resources allocated for the Octeon device.
+ * Unregister from network device and disable the PCI device.
+ */
+static void octep_remove(struct pci_dev *pdev)
+{
+ struct octep_device *oct = pci_get_drvdata(pdev);
+ struct net_device *netdev;
+
+ if (!oct)
+ return;
+
+ cancel_work_sync(&oct->tx_timeout_task);
+ cancel_work_sync(&oct->ctrl_mbox_task);
+ netdev = oct->netdev;
+ if (netdev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(netdev);
+
+ octep_device_cleanup(oct);
+ pci_release_mem_regions(pdev);
+ free_netdev(netdev);
+ pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+}
+
+static struct pci_driver octep_driver = {
+ .name = OCTEP_DRV_NAME,
+ .id_table = octep_pci_id_tbl,
+ .probe = octep_probe,
+ .remove = octep_remove,
+};
+
+/**
+ * octep_init_module() - Module initialiation.
+ *
+ * create common resource for the driver and register PCI driver.
+ */
+static int __init octep_init_module(void)
+{
+ int ret;
+
+ pr_info("%s: Loading %s ...\n", OCTEP_DRV_NAME, OCTEP_DRV_STRING);
+
+ /* work queue for all deferred tasks */
+ octep_wq = create_singlethread_workqueue(OCTEP_DRV_NAME);
+ if (!octep_wq) {
+ pr_err("%s: Failed to create common workqueue\n",
+ OCTEP_DRV_NAME);
+ return -ENOMEM;
+ }
+
+ ret = pci_register_driver(&octep_driver);
+ if (ret < 0) {
+ pr_err("%s: Failed to register PCI driver; err=%d\n",
+ OCTEP_DRV_NAME, ret);
+ return ret;
+ }
+
+ pr_info("%s: Loaded successfully !\n", OCTEP_DRV_NAME);
+
+ return ret;
+}
+
+/**
+ * octep_exit_module() - Module exit routine.
+ *
+ * unregister the driver with PCI subsystem and cleanup common resources.
+ */
+static void __exit octep_exit_module(void)
+{
+ pr_info("%s: Unloading ...\n", OCTEP_DRV_NAME);
+
+ pci_unregister_driver(&octep_driver);
+ destroy_workqueue(octep_wq);
+
+ pr_info("%s: Unloading complete\n", OCTEP_DRV_NAME);
+}
+
+module_init(octep_init_module);
+module_exit(octep_exit_module);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
new file mode 100644
index 000000000000..520f2c3664f9
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_MAIN_H_
+#define _OCTEP_MAIN_H_
+
+#include "octep_tx.h"
+#include "octep_rx.h"
+#include "octep_ctrl_mbox.h"
+
+#define OCTEP_DRV_VERSION_MAJOR 1
+#define OCTEP_DRV_VERSION_MINOR 0
+#define OCTEP_DRV_VERSION_VARIANT 0
+
+#define OCTEP_DRV_VERSION ((OCTEP_DRV_VERSION_MAJOR << 16) + \
+ (OCTEP_DRV_VERSION_MINOR << 8) + \
+ OCTEP_DRV_VERSION_VARIANT)
+
+#define OCTEP_DRV_VERSION_STR "1.0.0"
+#define OCTEP_DRV_NAME "octeon_ep"
+#define OCTEP_DRV_STRING "Marvell Octeon EndPoint NIC Driver"
+
+#define OCTEP_PCIID_CN93_PF 0xB200177d
+#define OCTEP_PCIID_CN93_VF 0xB203177d
+
+#define OCTEP_PCI_DEVICE_ID_CN93_PF 0xB200
+#define OCTEP_PCI_DEVICE_ID_CN93_VF 0xB203
+
+#define OCTEP_MAX_QUEUES 63
+#define OCTEP_MAX_IQ OCTEP_MAX_QUEUES
+#define OCTEP_MAX_OQ OCTEP_MAX_QUEUES
+#define OCTEP_MAX_VF 64
+
+#define OCTEP_MAX_MSIX_VECTORS OCTEP_MAX_OQ
+
+/* Flags to disable and enable Interrupts */
+#define OCTEP_INPUT_INTR (1)
+#define OCTEP_OUTPUT_INTR (2)
+#define OCTEP_MBOX_INTR (4)
+#define OCTEP_ALL_INTR 0xff
+
+#define OCTEP_IQ_INTR_RESEND_BIT 59
+#define OCTEP_OQ_INTR_RESEND_BIT 59
+
+#define OCTEP_MMIO_REGIONS 3
+/* PCI address space mapping information.
+ * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
+ * Octeon gets mapped to different physical address spaces in
+ * the kernel.
+ */
+struct octep_mmio {
+ /* The physical address to which the PCI address space is mapped. */
+ u8 __iomem *hw_addr;
+
+ /* Flag indicating the mapping was successful. */
+ int mapped;
+};
+
+struct octep_pci_win_regs {
+ u8 __iomem *pci_win_wr_addr;
+ u8 __iomem *pci_win_rd_addr;
+ u8 __iomem *pci_win_wr_data;
+ u8 __iomem *pci_win_rd_data;
+};
+
+struct octep_hw_ops {
+ void (*setup_iq_regs)(struct octep_device *oct, int q);
+ void (*setup_oq_regs)(struct octep_device *oct, int q);
+ void (*setup_mbox_regs)(struct octep_device *oct, int mbox);
+
+ irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector);
+ irqreturn_t (*ioq_intr_handler)(void *ioq_vector);
+ int (*soft_reset)(struct octep_device *oct);
+ void (*reinit_regs)(struct octep_device *oct);
+ u32 (*update_iq_read_idx)(struct octep_iq *iq);
+
+ void (*enable_interrupts)(struct octep_device *oct);
+ void (*disable_interrupts)(struct octep_device *oct);
+
+ void (*enable_io_queues)(struct octep_device *oct);
+ void (*disable_io_queues)(struct octep_device *oct);
+ void (*enable_iq)(struct octep_device *oct, int q);
+ void (*disable_iq)(struct octep_device *oct, int q);
+ void (*enable_oq)(struct octep_device *oct, int q);
+ void (*disable_oq)(struct octep_device *oct, int q);
+ void (*reset_io_queues)(struct octep_device *oct);
+ void (*dump_registers)(struct octep_device *oct);
+};
+
+/* Octeon mailbox data */
+struct octep_mbox_data {
+ u32 cmd;
+ u32 total_len;
+ u32 recv_len;
+ u32 rsvd;
+ u64 *data;
+};
+
+/* Octeon device mailbox */
+struct octep_mbox {
+ /* A spinlock to protect access to this q_mbox. */
+ spinlock_t lock;
+
+ u32 q_no;
+ u32 state;
+
+ /* SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */
+ u8 __iomem *mbox_int_reg;
+
+ /* SLI_PKT_PF_VF_MBOX_SIG(0) for PF,
+ * SLI_PKT_PF_VF_MBOX_SIG(1) for VF.
+ */
+ u8 __iomem *mbox_write_reg;
+
+ /* SLI_PKT_PF_VF_MBOX_SIG(1) for PF,
+ * SLI_PKT_PF_VF_MBOX_SIG(0) for VF.
+ */
+ u8 __iomem *mbox_read_reg;
+
+ struct octep_mbox_data mbox_data;
+};
+
+/* Tx/Rx queue vector per interrupt. */
+struct octep_ioq_vector {
+ char name[OCTEP_MSIX_NAME_SIZE];
+ struct napi_struct napi;
+ struct octep_device *octep_dev;
+ struct octep_iq *iq;
+ struct octep_oq *oq;
+ cpumask_t affinity_mask;
+};
+
+/* Octeon hardware/firmware offload capability flags. */
+#define OCTEP_CAP_TX_CHECKSUM BIT(0)
+#define OCTEP_CAP_RX_CHECKSUM BIT(1)
+#define OCTEP_CAP_TSO BIT(2)
+
+/* Link modes */
+enum octep_link_mode_bit_indices {
+ OCTEP_LINK_MODE_10GBASE_T = 0,
+ OCTEP_LINK_MODE_10GBASE_R,
+ OCTEP_LINK_MODE_10GBASE_CR,
+ OCTEP_LINK_MODE_10GBASE_KR,
+ OCTEP_LINK_MODE_10GBASE_LR,
+ OCTEP_LINK_MODE_10GBASE_SR,
+ OCTEP_LINK_MODE_25GBASE_CR,
+ OCTEP_LINK_MODE_25GBASE_KR,
+ OCTEP_LINK_MODE_25GBASE_SR,
+ OCTEP_LINK_MODE_40GBASE_CR4,
+ OCTEP_LINK_MODE_40GBASE_KR4,
+ OCTEP_LINK_MODE_40GBASE_LR4,
+ OCTEP_LINK_MODE_40GBASE_SR4,
+ OCTEP_LINK_MODE_50GBASE_CR2,
+ OCTEP_LINK_MODE_50GBASE_KR2,
+ OCTEP_LINK_MODE_50GBASE_SR2,
+ OCTEP_LINK_MODE_50GBASE_CR,
+ OCTEP_LINK_MODE_50GBASE_KR,
+ OCTEP_LINK_MODE_50GBASE_LR,
+ OCTEP_LINK_MODE_50GBASE_SR,
+ OCTEP_LINK_MODE_100GBASE_CR4,
+ OCTEP_LINK_MODE_100GBASE_KR4,
+ OCTEP_LINK_MODE_100GBASE_LR4,
+ OCTEP_LINK_MODE_100GBASE_SR4,
+ OCTEP_LINK_MODE_NBITS
+};
+
+/* Hardware interface link state information. */
+struct octep_iface_link_info {
+ /* Bitmap of Supported link speeds/modes. */
+ u64 supported_modes;
+
+ /* Bitmap of Advertised link speeds/modes. */
+ u64 advertised_modes;
+
+ /* Negotiated link speed in Mbps. */
+ u32 speed;
+
+ /* MTU */
+ u16 mtu;
+
+ /* Autonegotation state. */
+#define OCTEP_LINK_MODE_AUTONEG_SUPPORTED BIT(0)
+#define OCTEP_LINK_MODE_AUTONEG_ADVERTISED BIT(1)
+ u8 autoneg;
+
+ /* Pause frames setting. */
+#define OCTEP_LINK_MODE_PAUSE_SUPPORTED BIT(0)
+#define OCTEP_LINK_MODE_PAUSE_ADVERTISED BIT(1)
+ u8 pause;
+
+ /* Admin state of the link (ifconfig <iface> up/down */
+ u8 admin_up;
+
+ /* Operational state of the link: physical link is up down */
+ u8 oper_up;
+};
+
+/* The Octeon device specific private data structure.
+ * Each Octeon device has this structure to represent all its components.
+ */
+struct octep_device {
+ struct octep_config *conf;
+
+ /* Octeon Chip type. */
+ u16 chip_id;
+ u16 rev_id;
+
+ /* Device capabilities enabled */
+ u64 caps_enabled;
+ /* Device capabilities supported */
+ u64 caps_supported;
+
+ /* Pointer to basic Linux device */
+ struct device *dev;
+ /* Linux PCI device pointer */
+ struct pci_dev *pdev;
+ /* Netdev corresponding to the Octeon device */
+ struct net_device *netdev;
+
+ /* memory mapped io range */
+ struct octep_mmio mmio[OCTEP_MMIO_REGIONS];
+
+ /* MAC address */
+ u8 mac_addr[ETH_ALEN];
+
+ /* Tx queues (IQ: Instruction Queue) */
+ u16 num_iqs;
+ /* pkind value to be used in every Tx hardware descriptor */
+ u8 pkind;
+ /* Pointers to Octeon Tx queues */
+ struct octep_iq *iq[OCTEP_MAX_IQ];
+
+ /* Rx queues (OQ: Output Queue) */
+ u16 num_oqs;
+ /* Pointers to Octeon Rx queues */
+ struct octep_oq *oq[OCTEP_MAX_OQ];
+
+ /* Hardware port number of the PCIe interface */
+ u16 pcie_port;
+
+ /* PCI Window registers to access some hardware CSRs */
+ struct octep_pci_win_regs pci_win_regs;
+ /* Hardware operations */
+ struct octep_hw_ops hw_ops;
+
+ /* IRQ info */
+ u16 num_irqs;
+ u16 num_non_ioq_irqs;
+ char *non_ioq_irq_names;
+ struct msix_entry *msix_entries;
+ /* IOq information of it's corresponding MSI-X interrupt. */
+ struct octep_ioq_vector *ioq_vector[OCTEP_MAX_QUEUES];
+
+ /* Hardware Interface Tx statistics */
+ struct octep_iface_tx_stats iface_tx_stats;
+ /* Hardware Interface Rx statistics */
+ struct octep_iface_rx_stats iface_rx_stats;
+
+ /* Hardware Interface Link info like supported modes, aneg support */
+ struct octep_iface_link_info link_info;
+
+ /* Mailbox to talk to VFs */
+ struct octep_mbox *mbox[OCTEP_MAX_VF];
+
+ /* Work entry to handle Tx timeout */
+ struct work_struct tx_timeout_task;
+
+ /* control mbox over pf */
+ struct octep_ctrl_mbox ctrl_mbox;
+
+ /* offset for iface stats */
+ u32 ctrl_mbox_ifstats_offset;
+
+ /* Work entry to handle ctrl mbox interrupt */
+ struct work_struct ctrl_mbox_task;
+
+};
+
+static inline u16 OCTEP_MAJOR_REV(struct octep_device *oct)
+{
+ u16 rev = (oct->rev_id & 0xC) >> 2;
+
+ return (rev == 0) ? 1 : rev;
+}
+
+static inline u16 OCTEP_MINOR_REV(struct octep_device *oct)
+{
+ return (oct->rev_id & 0x3);
+}
+
+/* Octeon CSR read/write access APIs */
+#define octep_write_csr(octep_dev, reg_off, value) \
+ writel(value, (octep_dev)->mmio[0].hw_addr + (reg_off))
+
+#define octep_write_csr64(octep_dev, reg_off, val64) \
+ writeq(val64, (octep_dev)->mmio[0].hw_addr + (reg_off))
+
+#define octep_read_csr(octep_dev, reg_off) \
+ readl((octep_dev)->mmio[0].hw_addr + (reg_off))
+
+#define octep_read_csr64(octep_dev, reg_off) \
+ readq((octep_dev)->mmio[0].hw_addr + (reg_off))
+
+/* Read windowed register.
+ * @param oct - pointer to the Octeon device.
+ * @param addr - Address of the register to read.
+ *
+ * This routine is called to read from the indirectly accessed
+ * Octeon registers that are visible through a PCI BAR0 mapped window
+ * register.
+ * @return - 64 bit value read from the register.
+ */
+static inline u64
+OCTEP_PCI_WIN_READ(struct octep_device *oct, u64 addr)
+{
+ u64 val64;
+
+ addr |= 1ull << 53; /* read 8 bytes */
+ writeq(addr, oct->pci_win_regs.pci_win_rd_addr);
+ val64 = readq(oct->pci_win_regs.pci_win_rd_data);
+
+ dev_dbg(&oct->pdev->dev,
+ "%s: reg: 0x%016llx val: 0x%016llx\n", __func__, addr, val64);
+
+ return val64;
+}
+
+/* Write windowed register.
+ * @param oct - pointer to the Octeon device.
+ * @param addr - Address of the register to write
+ * @param val - Value to write
+ *
+ * This routine is called to write to the indirectly accessed
+ * Octeon registers that are visible through a PCI BAR0 mapped window
+ * register.
+ * @return Nothing.
+ */
+static inline void
+OCTEP_PCI_WIN_WRITE(struct octep_device *oct, u64 addr, u64 val)
+{
+ writeq(addr, oct->pci_win_regs.pci_win_wr_addr);
+ writeq(val, oct->pci_win_regs.pci_win_wr_data);
+
+ dev_dbg(&oct->pdev->dev,
+ "%s: reg: 0x%016llx val: 0x%016llx\n", __func__, addr, val);
+}
+
+extern struct workqueue_struct *octep_wq;
+
+int octep_device_setup(struct octep_device *oct);
+int octep_setup_iqs(struct octep_device *oct);
+void octep_free_iqs(struct octep_device *oct);
+void octep_clean_iqs(struct octep_device *oct);
+int octep_setup_oqs(struct octep_device *oct);
+void octep_free_oqs(struct octep_device *oct);
+void octep_oq_dbell_init(struct octep_device *oct);
+void octep_device_setup_cn93_pf(struct octep_device *oct);
+int octep_iq_process_completions(struct octep_iq *iq, u16 budget);
+int octep_oq_process_rx(struct octep_oq *oq, int budget);
+void octep_set_ethtool_ops(struct net_device *netdev);
+
+#endif /* _OCTEP_MAIN_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
new file mode 100644
index 000000000000..cc51149790ff
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h
@@ -0,0 +1,367 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_REGS_CN9K_PF_H_
+#define _OCTEP_REGS_CN9K_PF_H_
+
+/* ############################ RST ######################### */
+#define CN93_RST_BOOT 0x000087E006001600ULL
+#define CN93_RST_CORE_DOMAIN_W1S 0x000087E006001820ULL
+#define CN93_RST_CORE_DOMAIN_W1C 0x000087E006001828ULL
+
+#define CN93_CONFIG_XPANSION_BAR 0x38
+#define CN93_CONFIG_PCIE_CAP 0x70
+#define CN93_CONFIG_PCIE_DEVCAP 0x74
+#define CN93_CONFIG_PCIE_DEVCTL 0x78
+#define CN93_CONFIG_PCIE_LINKCAP 0x7C
+#define CN93_CONFIG_PCIE_LINKCTL 0x80
+#define CN93_CONFIG_PCIE_SLOTCAP 0x84
+#define CN93_CONFIG_PCIE_SLOTCTL 0x88
+
+#define CN93_PCIE_SRIOV_FDL 0x188 /* 0x98 */
+#define CN93_PCIE_SRIOV_FDL_BIT_POS 0x10
+#define CN93_PCIE_SRIOV_FDL_MASK 0xFF
+
+#define CN93_CONFIG_PCIE_FLTMSK 0x720
+
+/* ################# Offsets of RING, EPF, MAC ######################### */
+#define CN93_RING_OFFSET (0x1ULL << 17)
+#define CN93_EPF_OFFSET (0x1ULL << 25)
+#define CN93_MAC_OFFSET (0x1ULL << 4)
+#define CN93_BIT_ARRAY_OFFSET (0x1ULL << 4)
+#define CN93_EPVF_RING_OFFSET (0x1ULL << 4)
+
+/* ################# Scratch Registers ######################### */
+#define CN93_SDP_EPF_SCRATCH 0x205E0
+
+/* ################# Window Registers ######################### */
+#define CN93_SDP_WIN_WR_ADDR64 0x20000
+#define CN93_SDP_WIN_RD_ADDR64 0x20010
+#define CN93_SDP_WIN_WR_DATA64 0x20020
+#define CN93_SDP_WIN_WR_MASK_REG 0x20030
+#define CN93_SDP_WIN_RD_DATA64 0x20040
+
+#define CN93_SDP_MAC_NUMBER 0x2C100
+
+/* ################# Global Previliged registers ######################### */
+#define CN93_SDP_EPF_RINFO 0x205F0
+
+#define CN93_SDP_EPF_RINFO_SRN(val) ((val) & 0xFF)
+#define CN93_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF)
+#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) && 0xFF)
+
+/* SDP Function select */
+#define CN93_SDP_FUNC_SEL_EPF_BIT_POS 8
+#define CN93_SDP_FUNC_SEL_FUNC_BIT_POS 0
+
+/* ##### RING IN (Into device from PCI: Tx Ring) REGISTERS #### */
+#define CN93_SDP_R_IN_CONTROL_START 0x10000
+#define CN93_SDP_R_IN_ENABLE_START 0x10010
+#define CN93_SDP_R_IN_INSTR_BADDR_START 0x10020
+#define CN93_SDP_R_IN_INSTR_RSIZE_START 0x10030
+#define CN93_SDP_R_IN_INSTR_DBELL_START 0x10040
+#define CN93_SDP_R_IN_CNTS_START 0x10050
+#define CN93_SDP_R_IN_INT_LEVELS_START 0x10060
+#define CN93_SDP_R_IN_PKT_CNT_START 0x10080
+#define CN93_SDP_R_IN_BYTE_CNT_START 0x10090
+
+#define CN93_SDP_R_IN_CONTROL(ring) \
+ (CN93_SDP_R_IN_CONTROL_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_ENABLE(ring) \
+ (CN93_SDP_R_IN_ENABLE_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_INSTR_BADDR(ring) \
+ (CN93_SDP_R_IN_INSTR_BADDR_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_INSTR_RSIZE(ring) \
+ (CN93_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_INSTR_DBELL(ring) \
+ (CN93_SDP_R_IN_INSTR_DBELL_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_CNTS(ring) \
+ (CN93_SDP_R_IN_CNTS_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_INT_LEVELS(ring) \
+ (CN93_SDP_R_IN_INT_LEVELS_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_PKT_CNT(ring) \
+ (CN93_SDP_R_IN_PKT_CNT_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_BYTE_CNT(ring) \
+ (CN93_SDP_R_IN_BYTE_CNT_START + ((ring) * CN93_RING_OFFSET))
+
+/* Rings per Virtual Function */
+#define CN93_R_IN_CTL_RPVF_MASK (0xF)
+#define CN93_R_IN_CTL_RPVF_POS (48)
+
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define CN93_R_IN_CTL_IDLE (0x1ULL << 28)
+#define CN93_R_IN_CTL_RDSIZE (0x3ULL << 25)
+#define CN93_R_IN_CTL_IS_64B (0x1ULL << 24)
+#define CN93_R_IN_CTL_D_NSR (0x1ULL << 8)
+#define CN93_R_IN_CTL_D_ESR (0x1ULL << 6)
+#define CN93_R_IN_CTL_D_ROR (0x1ULL << 5)
+#define CN93_R_IN_CTL_NSR (0x1ULL << 3)
+#define CN93_R_IN_CTL_ESR (0x1ULL << 1)
+#define CN93_R_IN_CTL_ROR (0x1ULL << 0)
+
+#define CN93_R_IN_CTL_MASK (CN93_R_IN_CTL_RDSIZE | CN93_R_IN_CTL_IS_64B)
+
+/* ##### RING OUT (out from device to PCI host: Rx Ring) REGISTERS #### */
+#define CN93_SDP_R_OUT_CNTS_START 0x10100
+#define CN93_SDP_R_OUT_INT_LEVELS_START 0x10110
+#define CN93_SDP_R_OUT_SLIST_BADDR_START 0x10120
+#define CN93_SDP_R_OUT_SLIST_RSIZE_START 0x10130
+#define CN93_SDP_R_OUT_SLIST_DBELL_START 0x10140
+#define CN93_SDP_R_OUT_CONTROL_START 0x10150
+#define CN93_SDP_R_OUT_ENABLE_START 0x10160
+#define CN93_SDP_R_OUT_PKT_CNT_START 0x10180
+#define CN93_SDP_R_OUT_BYTE_CNT_START 0x10190
+
+#define CN93_SDP_R_OUT_CONTROL(ring) \
+ (CN93_SDP_R_OUT_CONTROL_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_ENABLE(ring) \
+ (CN93_SDP_R_OUT_ENABLE_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_SLIST_BADDR(ring) \
+ (CN93_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_SLIST_RSIZE(ring) \
+ (CN93_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_SLIST_DBELL(ring) \
+ (CN93_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_CNTS(ring) \
+ (CN93_SDP_R_OUT_CNTS_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_INT_LEVELS(ring) \
+ (CN93_SDP_R_OUT_INT_LEVELS_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_PKT_CNT(ring) \
+ (CN93_SDP_R_OUT_PKT_CNT_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_BYTE_CNT(ring) \
+ (CN93_SDP_R_OUT_BYTE_CNT_START + ((ring) * CN93_RING_OFFSET))
+
+/*------------------ R_OUT Masks ----------------*/
+#define CN93_R_OUT_INT_LEVELS_BMODE BIT_ULL(63)
+#define CN93_R_OUT_INT_LEVELS_TIMET (32)
+
+#define CN93_R_OUT_CTL_IDLE BIT_ULL(40)
+#define CN93_R_OUT_CTL_ES_I BIT_ULL(34)
+#define CN93_R_OUT_CTL_NSR_I BIT_ULL(33)
+#define CN93_R_OUT_CTL_ROR_I BIT_ULL(32)
+#define CN93_R_OUT_CTL_ES_D BIT_ULL(30)
+#define CN93_R_OUT_CTL_NSR_D BIT_ULL(29)
+#define CN93_R_OUT_CTL_ROR_D BIT_ULL(28)
+#define CN93_R_OUT_CTL_ES_P BIT_ULL(26)
+#define CN93_R_OUT_CTL_NSR_P BIT_ULL(25)
+#define CN93_R_OUT_CTL_ROR_P BIT_ULL(24)
+#define CN93_R_OUT_CTL_IMODE BIT_ULL(23)
+
+/* ############### Interrupt Moderation Registers ############### */
+#define CN93_SDP_R_IN_INT_MDRT_CTL0_START 0x10280
+#define CN93_SDP_R_IN_INT_MDRT_CTL1_START 0x102A0
+#define CN93_SDP_R_IN_INT_MDRT_DBG_START 0x102C0
+
+#define CN93_SDP_R_OUT_INT_MDRT_CTL0_START 0x10380
+#define CN93_SDP_R_OUT_INT_MDRT_CTL1_START 0x103A0
+#define CN93_SDP_R_OUT_INT_MDRT_DBG_START 0x103C0
+
+#define CN93_SDP_R_IN_INT_MDRT_CTL0(ring) \
+ (CN93_SDP_R_IN_INT_MDRT_CTL0_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_INT_MDRT_CTL1(ring) \
+ (CN93_SDP_R_IN_INT_MDRT_CTL1_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_INT_MDRT_DBG(ring) \
+ (CN93_SDP_R_IN_INT_MDRT_DBG_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_INT_MDRT_CTL0(ring) \
+ (CN93_SDP_R_OUT_INT_MDRT_CTL0_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_INT_MDRT_CTL1(ring) \
+ (CN93_SDP_R_OUT_INT_MDRT_CTL1_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_INT_MDRT_DBG(ring) \
+ (CN93_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CN93_RING_OFFSET))
+
+/* ##################### Mail Box Registers ########################## */
+/* INT register for VF. when a MBOX write from PF happed to a VF,
+ * corresponding bit will be set in this register as well as in
+ * PF_VF_INT register.
+ *
+ * This is a RO register, the int can be cleared by writing 1 to PF_VF_INT
+ */
+/* Basically first 3 are from PF to VF. The last one is data from VF to PF */
+#define CN93_SDP_R_MBOX_PF_VF_DATA_START 0x10210
+#define CN93_SDP_R_MBOX_PF_VF_INT_START 0x10220
+#define CN93_SDP_R_MBOX_VF_PF_DATA_START 0x10230
+
+#define CN93_SDP_R_MBOX_PF_VF_DATA(ring) \
+ (CN93_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_MBOX_PF_VF_INT(ring) \
+ (CN93_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_MBOX_VF_PF_DATA(ring) \
+ (CN93_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CN93_RING_OFFSET))
+
+/* ##################### Interrupt Registers ########################## */
+#define CN93_SDP_R_ERR_TYPE_START 0x10400
+
+#define CN93_SDP_R_ERR_TYPE(ring) \
+ (CN93_SDP_R_ERR_TYPE_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_MBOX_ISM_START 0x10500
+#define CN93_SDP_R_OUT_CNTS_ISM_START 0x10510
+#define CN93_SDP_R_IN_CNTS_ISM_START 0x10520
+
+#define CN93_SDP_R_MBOX_ISM(ring) \
+ (CN93_SDP_R_MBOX_ISM_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_OUT_CNTS_ISM(ring) \
+ (CN93_SDP_R_OUT_CNTS_ISM_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_R_IN_CNTS_ISM(ring) \
+ (CN93_SDP_R_IN_CNTS_ISM_START + ((ring) * CN93_RING_OFFSET))
+
+#define CN93_SDP_EPF_MBOX_RINT_START 0x20100
+#define CN93_SDP_EPF_MBOX_RINT_W1S_START 0x20120
+#define CN93_SDP_EPF_MBOX_RINT_ENA_W1C_START 0x20140
+#define CN93_SDP_EPF_MBOX_RINT_ENA_W1S_START 0x20160
+
+#define CN93_SDP_EPF_VFIRE_RINT_START 0x20180
+#define CN93_SDP_EPF_VFIRE_RINT_W1S_START 0x201A0
+#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1C_START 0x201C0
+#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1S_START 0x201E0
+
+#define CN93_SDP_EPF_IRERR_RINT 0x20200
+#define CN93_SDP_EPF_IRERR_RINT_W1S 0x20210
+#define CN93_SDP_EPF_IRERR_RINT_ENA_W1C 0x20220
+#define CN93_SDP_EPF_IRERR_RINT_ENA_W1S 0x20230
+
+#define CN93_SDP_EPF_VFORE_RINT_START 0x20240
+#define CN93_SDP_EPF_VFORE_RINT_W1S_START 0x20260
+#define CN93_SDP_EPF_VFORE_RINT_ENA_W1C_START 0x20280
+#define CN93_SDP_EPF_VFORE_RINT_ENA_W1S_START 0x202A0
+
+#define CN93_SDP_EPF_ORERR_RINT 0x20320
+#define CN93_SDP_EPF_ORERR_RINT_W1S 0x20330
+#define CN93_SDP_EPF_ORERR_RINT_ENA_W1C 0x20340
+#define CN93_SDP_EPF_ORERR_RINT_ENA_W1S 0x20350
+
+#define CN93_SDP_EPF_OEI_RINT 0x20360
+#define CN93_SDP_EPF_OEI_RINT_W1S 0x20370
+#define CN93_SDP_EPF_OEI_RINT_ENA_W1C 0x20380
+#define CN93_SDP_EPF_OEI_RINT_ENA_W1S 0x20390
+
+#define CN93_SDP_EPF_DMA_RINT 0x20400
+#define CN93_SDP_EPF_DMA_RINT_W1S 0x20410
+#define CN93_SDP_EPF_DMA_RINT_ENA_W1C 0x20420
+#define CN93_SDP_EPF_DMA_RINT_ENA_W1S 0x20430
+
+#define CN93_SDP_EPF_DMA_INT_LEVEL_START 0x20440
+#define CN93_SDP_EPF_DMA_CNT_START 0x20460
+#define CN93_SDP_EPF_DMA_TIM_START 0x20480
+
+#define CN93_SDP_EPF_MISC_RINT 0x204A0
+#define CN93_SDP_EPF_MISC_RINT_W1S 0x204B0
+#define CN93_SDP_EPF_MISC_RINT_ENA_W1C 0x204C0
+#define CN93_SDP_EPF_MISC_RINT_ENA_W1S 0x204D0
+
+#define CN93_SDP_EPF_DMA_VF_RINT_START 0x204E0
+#define CN93_SDP_EPF_DMA_VF_RINT_W1S_START 0x20500
+#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C_START 0x20520
+#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S_START 0x20540
+
+#define CN93_SDP_EPF_PP_VF_RINT_START 0x20560
+#define CN93_SDP_EPF_PP_VF_RINT_W1S_START 0x20580
+#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1C_START 0x205A0
+#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1S_START 0x205C0
+
+#define CN93_SDP_EPF_MBOX_RINT(index) \
+ (CN93_SDP_EPF_MBOX_RINT_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_MBOX_RINT_W1S(index) \
+ (CN93_SDP_EPF_MBOX_RINT_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_MBOX_RINT_ENA_W1C(index) \
+ (CN93_SDP_EPF_MBOX_RINT_ENA_W1C_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_MBOX_RINT_ENA_W1S(index) \
+ (CN93_SDP_EPF_MBOX_RINT_ENA_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+
+#define CN93_SDP_EPF_VFIRE_RINT(index) \
+ (CN93_SDP_EPF_VFIRE_RINT_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_VFIRE_RINT_W1S(index) \
+ (CN93_SDP_EPF_VFIRE_RINT_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1C(index) \
+ (CN93_SDP_EPF_VFIRE_RINT_ENA_W1C_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_VFIRE_RINT_ENA_W1S(index) \
+ (CN93_SDP_EPF_VFIRE_RINT_ENA_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+
+#define CN93_SDP_EPF_VFORE_RINT(index) \
+ (CN93_SDP_EPF_VFORE_RINT_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_VFORE_RINT_W1S(index) \
+ (CN93_SDP_EPF_VFORE_RINT_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_VFORE_RINT_ENA_W1C(index) \
+ (CN93_SDP_EPF_VFORE_RINT_ENA_W1C_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_VFORE_RINT_ENA_W1S(index) \
+ (CN93_SDP_EPF_VFORE_RINT_ENA_W1S_START + ((index) * CN93_BIT_ARRAY_OFFSET))
+
+#define CN93_SDP_EPF_DMA_VF_RINT(index) \
+ (CN93_SDP_EPF_DMA_VF_RINT_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_DMA_VF_RINT_W1S(index) \
+ (CN93_SDP_EPF_DMA_VF_RINT_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C(index) \
+ (CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S(index) \
+ (CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+
+#define CN93_SDP_EPF_PP_VF_RINT(index) \
+ (CN93_SDP_EPF_PP_VF_RINT_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_PP_VF_RINT_W1S(index) \
+ (CN93_SDP_EPF_PP_VF_RINT_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1C(index) \
+ (CN93_SDP_EPF_PP_VF_RINT_ENA_W1C_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+#define CN93_SDP_EPF_PP_VF_RINT_ENA_W1S(index) \
+ (CN93_SDP_EPF_PP_VF_RINT_ENA_W1S_START + ((index) + CN93_BIT_ARRAY_OFFSET))
+
+/*------------------ Interrupt Masks ----------------*/
+#define CN93_INTR_R_SEND_ISM BIT_ULL(63)
+#define CN93_INTR_R_OUT_INT BIT_ULL(62)
+#define CN93_INTR_R_IN_INT BIT_ULL(61)
+#define CN93_INTR_R_MBOX_INT BIT_ULL(60)
+#define CN93_INTR_R_RESEND BIT_ULL(59)
+#define CN93_INTR_R_CLR_TIM BIT_ULL(58)
+
+/* ####################### Ring Mapping Registers ################################## */
+#define CN93_SDP_EPVF_RING_START 0x26000
+#define CN93_SDP_IN_RING_TB_MAP_START 0x28000
+#define CN93_SDP_IN_RATE_LIMIT_START 0x2A000
+#define CN93_SDP_MAC_PF_RING_CTL_START 0x2C000
+
+#define CN93_SDP_EPVF_RING(ring) \
+ (CN93_SDP_EPVF_RING_START + ((ring) * CN93_EPVF_RING_OFFSET))
+#define CN93_SDP_IN_RING_TB_MAP(ring) \
+ (CN93_SDP_N_RING_TB_MAP_START + ((ring) * CN93_EPVF_RING_OFFSET))
+#define CN93_SDP_IN_RATE_LIMIT(ring) \
+ (CN93_SDP_IN_RATE_LIMIT_START + ((ring) * CN93_EPVF_RING_OFFSET))
+#define CN93_SDP_MAC_PF_RING_CTL(mac) \
+ (CN93_SDP_MAC_PF_RING_CTL_START + ((mac) * CN93_MAC_OFFSET))
+
+#define CN93_SDP_MAC_PF_RING_CTL_NPFS(val) ((val) & 0xF)
+#define CN93_SDP_MAC_PF_RING_CTL_SRN(val) (((val) >> 8) & 0xFF)
+#define CN93_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 16) & 0x3F)
+
+/* Number of non-queue interrupts in CN93xx */
+#define CN93_NUM_NON_IOQ_INTR 16
+#endif /* _OCTEP_REGS_CN9K_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
new file mode 100644
index 000000000000..945947ec7723
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/vmalloc.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+
+static void octep_oq_reset_indices(struct octep_oq *oq)
+{
+ oq->host_read_idx = 0;
+ oq->host_refill_idx = 0;
+ oq->refill_count = 0;
+ oq->last_pkt_count = 0;
+ oq->pkts_pending = 0;
+}
+
+/**
+ * octep_oq_fill_ring_buffers() - fill initial receive buffers for Rx ring.
+ *
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Return: 0, if successfully filled receive buffers for all descriptors.
+ * -1, if failed to allocate a buffer or failed to map for DMA.
+ */
+static int octep_oq_fill_ring_buffers(struct octep_oq *oq)
+{
+ struct octep_oq_desc_hw *desc_ring = oq->desc_ring;
+ struct page *page;
+ u32 i;
+
+ for (i = 0; i < oq->max_count; i++) {
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ dev_err(oq->dev, "Rx buffer alloc failed\n");
+ goto rx_buf_alloc_err;
+ }
+ desc_ring[i].buffer_ptr = dma_map_page(oq->dev, page, 0,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(oq->dev, desc_ring[i].buffer_ptr)) {
+ dev_err(oq->dev,
+ "OQ-%d buffer alloc: DMA mapping error!\n",
+ oq->q_no);
+ put_page(page);
+ goto dma_map_err;
+ }
+ oq->buff_info[i].page = page;
+ }
+
+ return 0;
+
+dma_map_err:
+rx_buf_alloc_err:
+ while (i) {
+ i--;
+ dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr, PAGE_SIZE, DMA_FROM_DEVICE);
+ put_page(oq->buff_info[i].page);
+ oq->buff_info[i].page = NULL;
+ }
+
+ return -1;
+}
+
+/**
+ * octep_oq_refill() - refill buffers for used Rx ring descriptors.
+ *
+ * @oct: Octeon device private data structure.
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Return: number of descriptors successfully refilled with receive buffers.
+ */
+static int octep_oq_refill(struct octep_device *oct, struct octep_oq *oq)
+{
+ struct octep_oq_desc_hw *desc_ring = oq->desc_ring;
+ struct page *page;
+ u32 refill_idx, i;
+
+ refill_idx = oq->host_refill_idx;
+ for (i = 0; i < oq->refill_count; i++) {
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ dev_err(oq->dev, "refill: rx buffer alloc failed\n");
+ oq->stats.alloc_failures++;
+ break;
+ }
+
+ desc_ring[refill_idx].buffer_ptr = dma_map_page(oq->dev, page, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(oq->dev, desc_ring[refill_idx].buffer_ptr)) {
+ dev_err(oq->dev,
+ "OQ-%d buffer refill: DMA mapping error!\n",
+ oq->q_no);
+ put_page(page);
+ oq->stats.alloc_failures++;
+ break;
+ }
+ oq->buff_info[refill_idx].page = page;
+ refill_idx++;
+ if (refill_idx == oq->max_count)
+ refill_idx = 0;
+ }
+ oq->host_refill_idx = refill_idx;
+ oq->refill_count -= i;
+
+ return i;
+}
+
+/**
+ * octep_setup_oq() - Setup a Rx queue.
+ *
+ * @oct: Octeon device private data structure.
+ * @q_no: Rx queue number to be setup.
+ *
+ * Allocate resources for a Rx queue.
+ */
+static int octep_setup_oq(struct octep_device *oct, int q_no)
+{
+ struct octep_oq *oq;
+ u32 desc_ring_size;
+
+ oq = vzalloc(sizeof(*oq));
+ if (!oq)
+ goto create_oq_fail;
+ oct->oq[q_no] = oq;
+
+ oq->octep_dev = oct;
+ oq->netdev = oct->netdev;
+ oq->dev = &oct->pdev->dev;
+ oq->q_no = q_no;
+ oq->max_count = CFG_GET_OQ_NUM_DESC(oct->conf);
+ oq->ring_size_mask = oq->max_count - 1;
+ oq->buffer_size = CFG_GET_OQ_BUF_SIZE(oct->conf);
+ oq->max_single_buffer_size = oq->buffer_size - OCTEP_OQ_RESP_HW_SIZE;
+
+ /* When the hardware/firmware supports additional capabilities,
+ * additional header is filled-in by Octeon after length field in
+ * Rx packets. this header contains additional packet information.
+ */
+ if (oct->caps_enabled)
+ oq->max_single_buffer_size -= OCTEP_OQ_RESP_HW_EXT_SIZE;
+
+ oq->refill_threshold = CFG_GET_OQ_REFILL_THRESHOLD(oct->conf);
+
+ desc_ring_size = oq->max_count * OCTEP_OQ_DESC_SIZE;
+ oq->desc_ring = dma_alloc_coherent(oq->dev, desc_ring_size,
+ &oq->desc_ring_dma, GFP_KERNEL);
+
+ if (unlikely(!oq->desc_ring)) {
+ dev_err(oq->dev,
+ "Failed to allocate DMA memory for OQ-%d !!\n", q_no);
+ goto desc_dma_alloc_err;
+ }
+
+ oq->buff_info = (struct octep_rx_buffer *)
+ vzalloc(oq->max_count * OCTEP_OQ_RECVBUF_SIZE);
+ if (unlikely(!oq->buff_info)) {
+ dev_err(&oct->pdev->dev,
+ "Failed to allocate buffer info for OQ-%d\n", q_no);
+ goto buf_list_err;
+ }
+
+ if (octep_oq_fill_ring_buffers(oq))
+ goto oq_fill_buff_err;
+
+ octep_oq_reset_indices(oq);
+ oct->hw_ops.setup_oq_regs(oct, q_no);
+ oct->num_oqs++;
+
+ return 0;
+
+oq_fill_buff_err:
+ vfree(oq->buff_info);
+ oq->buff_info = NULL;
+buf_list_err:
+ dma_free_coherent(oq->dev, desc_ring_size,
+ oq->desc_ring, oq->desc_ring_dma);
+ oq->desc_ring = NULL;
+desc_dma_alloc_err:
+ vfree(oq);
+ oct->oq[q_no] = NULL;
+create_oq_fail:
+ return -1;
+}
+
+/**
+ * octep_oq_free_ring_buffers() - Free ring buffers.
+ *
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Free receive buffers in unused Rx queue descriptors.
+ */
+static void octep_oq_free_ring_buffers(struct octep_oq *oq)
+{
+ struct octep_oq_desc_hw *desc_ring = oq->desc_ring;
+ int i;
+
+ if (!oq->desc_ring || !oq->buff_info)
+ return;
+
+ for (i = 0; i < oq->max_count; i++) {
+ if (oq->buff_info[i].page) {
+ dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ put_page(oq->buff_info[i].page);
+ oq->buff_info[i].page = NULL;
+ desc_ring[i].buffer_ptr = 0;
+ }
+ }
+ octep_oq_reset_indices(oq);
+}
+
+/**
+ * octep_free_oq() - Free Rx queue resources.
+ *
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Free all resources of a Rx queue.
+ */
+static int octep_free_oq(struct octep_oq *oq)
+{
+ struct octep_device *oct = oq->octep_dev;
+ int q_no = oq->q_no;
+
+ octep_oq_free_ring_buffers(oq);
+
+ if (oq->buff_info)
+ vfree(oq->buff_info);
+
+ if (oq->desc_ring)
+ dma_free_coherent(oq->dev,
+ oq->max_count * OCTEP_OQ_DESC_SIZE,
+ oq->desc_ring, oq->desc_ring_dma);
+
+ vfree(oq);
+ oct->oq[q_no] = NULL;
+ oct->num_oqs--;
+ return 0;
+}
+
+/**
+ * octep_setup_oqs() - setup resources for all Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+int octep_setup_oqs(struct octep_device *oct)
+{
+ int i, retval = 0;
+
+ oct->num_oqs = 0;
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ retval = octep_setup_oq(oct, i);
+ if (retval) {
+ dev_err(&oct->pdev->dev,
+ "Failed to setup OQ(RxQ)-%d.\n", i);
+ goto oq_setup_err;
+ }
+ dev_dbg(&oct->pdev->dev, "Successfully setup OQ(RxQ)-%d.\n", i);
+ }
+
+ return 0;
+
+oq_setup_err:
+ while (i) {
+ i--;
+ octep_free_oq(oct->oq[i]);
+ }
+ return -1;
+}
+
+/**
+ * octep_oq_dbell_init() - Initialize Rx queue doorbell.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Write number of descriptors to Rx queue doorbell register.
+ */
+void octep_oq_dbell_init(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_oqs; i++)
+ writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/**
+ * octep_free_oqs() - Free resources of all Rx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+void octep_free_oqs(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ if (!oct->oq[i])
+ continue;
+ octep_free_oq(oct->oq[i]);
+ dev_dbg(&oct->pdev->dev,
+ "Successfully freed OQ(RxQ)-%d.\n", i);
+ }
+}
+
+/**
+ * octep_oq_check_hw_for_pkts() - Check for new Rx packets.
+ *
+ * @oct: Octeon device private data structure.
+ * @oq: Octeon Rx queue data structure.
+ *
+ * Return: packets received after previous check.
+ */
+static int octep_oq_check_hw_for_pkts(struct octep_device *oct,
+ struct octep_oq *oq)
+{
+ u32 pkt_count, new_pkts;
+
+ pkt_count = readl(oq->pkts_sent_reg);
+ new_pkts = pkt_count - oq->last_pkt_count;
+
+ /* Clear the hardware packets counter register if the rx queue is
+ * being processed continuously with-in a single interrupt and
+ * reached half its max value.
+ * this counter is not cleared every time read, to save write cycles.
+ */
+ if (unlikely(pkt_count > 0xF0000000U)) {
+ writel(pkt_count, oq->pkts_sent_reg);
+ pkt_count = readl(oq->pkts_sent_reg);
+ new_pkts += pkt_count;
+ }
+ oq->last_pkt_count = pkt_count;
+ oq->pkts_pending += new_pkts;
+ return new_pkts;
+}
+
+/**
+ * __octep_oq_process_rx() - Process hardware Rx queue and push to stack.
+ *
+ * @oct: Octeon device private data structure.
+ * @oq: Octeon Rx queue data structure.
+ * @pkts_to_process: number of packets to be processed.
+ *
+ * Process the new packets in Rx queue.
+ * Packets larger than single Rx buffer arrive in consecutive descriptors.
+ * But, count returned by the API only accounts full packets, not fragments.
+ *
+ * Return: number of packets processed and pushed to stack.
+ */
+static int __octep_oq_process_rx(struct octep_device *oct,
+ struct octep_oq *oq, u16 pkts_to_process)
+{
+ struct octep_oq_resp_hw_ext *resp_hw_ext = NULL;
+ struct octep_rx_buffer *buff_info;
+ struct octep_oq_resp_hw *resp_hw;
+ u32 pkt, rx_bytes, desc_used;
+ struct sk_buff *skb;
+ u16 data_offset;
+ u32 read_idx;
+
+ read_idx = oq->host_read_idx;
+ rx_bytes = 0;
+ desc_used = 0;
+ for (pkt = 0; pkt < pkts_to_process; pkt++) {
+ buff_info = (struct octep_rx_buffer *)&oq->buff_info[read_idx];
+ dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ resp_hw = page_address(buff_info->page);
+ buff_info->page = NULL;
+
+ /* Swap the length field that is in Big-Endian to CPU */
+ buff_info->len = be64_to_cpu(resp_hw->length);
+ if (oct->caps_enabled & OCTEP_CAP_RX_CHECKSUM) {
+ /* Extended response header is immediately after
+ * response header (resp_hw)
+ */
+ resp_hw_ext = (struct octep_oq_resp_hw_ext *)
+ (resp_hw + 1);
+ buff_info->len -= OCTEP_OQ_RESP_HW_EXT_SIZE;
+ /* Packet Data is immediately after
+ * extended response header.
+ */
+ data_offset = OCTEP_OQ_RESP_HW_SIZE +
+ OCTEP_OQ_RESP_HW_EXT_SIZE;
+ } else {
+ /* Data is immediately after
+ * Hardware Rx response header.
+ */
+ data_offset = OCTEP_OQ_RESP_HW_SIZE;
+ }
+ rx_bytes += buff_info->len;
+
+ if (buff_info->len <= oq->max_single_buffer_size) {
+ skb = build_skb((void *)resp_hw, PAGE_SIZE);
+ skb_reserve(skb, data_offset);
+ skb_put(skb, buff_info->len);
+ read_idx++;
+ desc_used++;
+ if (read_idx == oq->max_count)
+ read_idx = 0;
+ } else {
+ struct skb_shared_info *shinfo;
+ u16 data_len;
+
+ skb = build_skb((void *)resp_hw, PAGE_SIZE);
+ skb_reserve(skb, data_offset);
+ /* Head fragment includes response header(s);
+ * subsequent fragments contains only data.
+ */
+ skb_put(skb, oq->max_single_buffer_size);
+ read_idx++;
+ desc_used++;
+ if (read_idx == oq->max_count)
+ read_idx = 0;
+
+ shinfo = skb_shinfo(skb);
+ data_len = buff_info->len - oq->max_single_buffer_size;
+ while (data_len) {
+ dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ buff_info = (struct octep_rx_buffer *)
+ &oq->buff_info[read_idx];
+ if (data_len < oq->buffer_size) {
+ buff_info->len = data_len;
+ data_len = 0;
+ } else {
+ buff_info->len = oq->buffer_size;
+ data_len -= oq->buffer_size;
+ }
+
+ skb_add_rx_frag(skb, shinfo->nr_frags,
+ buff_info->page, 0,
+ buff_info->len,
+ buff_info->len);
+ buff_info->page = NULL;
+ read_idx++;
+ desc_used++;
+ if (read_idx == oq->max_count)
+ read_idx = 0;
+ }
+ }
+
+ skb->dev = oq->netdev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ if (resp_hw_ext &&
+ resp_hw_ext->csum_verified == OCTEP_CSUM_VERIFIED)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ else
+ skb->ip_summed = CHECKSUM_NONE;
+ napi_gro_receive(oq->napi, skb);
+ }
+
+ oq->host_read_idx = read_idx;
+ oq->refill_count += desc_used;
+ oq->stats.packets += pkt;
+ oq->stats.bytes += rx_bytes;
+
+ return pkt;
+}
+
+/**
+ * octep_oq_process_rx() - Process Rx queue.
+ *
+ * @oq: Octeon Rx queue data structure.
+ * @budget: max number of packets can be processed in one invocation.
+ *
+ * Check for newly received packets and process them.
+ * Keeps checking for new packets until budget is used or no new packets seen.
+ *
+ * Return: number of packets processed.
+ */
+int octep_oq_process_rx(struct octep_oq *oq, int budget)
+{
+ u32 pkts_available, pkts_processed, total_pkts_processed;
+ struct octep_device *oct = oq->octep_dev;
+
+ pkts_available = 0;
+ pkts_processed = 0;
+ total_pkts_processed = 0;
+ while (total_pkts_processed < budget) {
+ /* update pending count only when current one exhausted */
+ if (oq->pkts_pending == 0)
+ octep_oq_check_hw_for_pkts(oct, oq);
+ pkts_available = min(budget - total_pkts_processed,
+ oq->pkts_pending);
+ if (!pkts_available)
+ break;
+
+ pkts_processed = __octep_oq_process_rx(oct, oq,
+ pkts_available);
+ oq->pkts_pending -= pkts_processed;
+ total_pkts_processed += pkts_processed;
+ }
+
+ if (oq->refill_count >= oq->refill_threshold) {
+ u32 desc_refilled = octep_oq_refill(oct, oq);
+
+ /* flush pending writes before updating credits */
+ wmb();
+ writel(desc_refilled, oq->pkts_credit_reg);
+ }
+
+ return total_pkts_processed;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.h
new file mode 100644
index 000000000000..782a24f27f3e
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_RX_H_
+#define _OCTEP_RX_H_
+
+/* struct octep_oq_desc_hw - Octeon Hardware OQ descriptor format.
+ *
+ * The descriptor ring is made of descriptors which have 2 64-bit values:
+ *
+ * @buffer_ptr: DMA address of the skb->data
+ * @info_ptr: DMA address of host memory, used to update pkt count by hw.
+ * This is currently unused to save pci writes.
+ */
+struct octep_oq_desc_hw {
+ dma_addr_t buffer_ptr;
+ u64 info_ptr;
+};
+
+#define OCTEP_OQ_DESC_SIZE (sizeof(struct octep_oq_desc_hw))
+
+#define OCTEP_CSUM_L4_VERIFIED 0x1
+#define OCTEP_CSUM_IP_VERIFIED 0x2
+#define OCTEP_CSUM_VERIFIED (OCTEP_CSUM_L4_VERIFIED | OCTEP_CSUM_IP_VERIFIED)
+
+/* Extended Response Header in packet data received from Hardware.
+ * Includes metadata like checksum status.
+ * this is valid only if hardware/firmware published support for this.
+ * This is at offset 0 of packet data (skb->data).
+ */
+struct octep_oq_resp_hw_ext {
+ /* Reserved. */
+ u64 reserved:62;
+
+ /* checksum verified. */
+ u64 csum_verified:2;
+};
+
+#define OCTEP_OQ_RESP_HW_EXT_SIZE (sizeof(struct octep_oq_resp_hw_ext))
+
+/* Length of Rx packet DMA'ed by Octeon to Host.
+ * this is in bigendian; so need to be converted to cpu endian.
+ * Octeon writes this at the beginning of Rx buffer (skb->data).
+ */
+struct octep_oq_resp_hw {
+ /* The Length of the packet. */
+ __be64 length;
+};
+
+#define OCTEP_OQ_RESP_HW_SIZE (sizeof(struct octep_oq_resp_hw))
+
+/* Pointer to data buffer.
+ * Driver keeps a pointer to the data buffer that it made available to
+ * the Octeon device. Since the descriptor ring keeps physical (bus)
+ * addresses, this field is required for the driver to keep track of
+ * the virtual address pointers. The fields are operated by
+ * OS-dependent routines.
+ */
+struct octep_rx_buffer {
+ struct page *page;
+
+ /* length from rx hardware descriptor after converting to cpu endian */
+ u64 len;
+};
+
+#define OCTEP_OQ_RECVBUF_SIZE (sizeof(struct octep_rx_buffer))
+
+/* Output Queue statistics. Each output queue has four stats fields. */
+struct octep_oq_stats {
+ /* Number of packets received from the Device. */
+ u64 packets;
+
+ /* Number of bytes received from the Device. */
+ u64 bytes;
+
+ /* Number of times failed to allocate buffers. */
+ u64 alloc_failures;
+};
+
+#define OCTEP_OQ_STATS_SIZE (sizeof(struct octep_oq_stats))
+
+/* Hardware interface Rx statistics */
+struct octep_iface_rx_stats {
+ /* Received packets */
+ u64 pkts;
+
+ /* Octets of received packets */
+ u64 octets;
+
+ /* Received PAUSE and Control packets */
+ u64 pause_pkts;
+
+ /* Received PAUSE and Control octets */
+ u64 pause_octets;
+
+ /* Filtered DMAC0 packets */
+ u64 dmac0_pkts;
+
+ /* Filtered DMAC0 octets */
+ u64 dmac0_octets;
+
+ /* Packets dropped due to RX FIFO full */
+ u64 dropped_pkts_fifo_full;
+
+ /* Octets dropped due to RX FIFO full */
+ u64 dropped_octets_fifo_full;
+
+ /* Error packets */
+ u64 err_pkts;
+
+ /* Filtered DMAC1 packets */
+ u64 dmac1_pkts;
+
+ /* Filtered DMAC1 octets */
+ u64 dmac1_octets;
+
+ /* NCSI-bound packets dropped */
+ u64 ncsi_dropped_pkts;
+
+ /* NCSI-bound octets dropped */
+ u64 ncsi_dropped_octets;
+
+ /* Multicast packets received. */
+ u64 mcast_pkts;
+
+ /* Broadcast packets received. */
+ u64 bcast_pkts;
+
+};
+
+/* The Descriptor Ring Output Queue structure.
+ * This structure has all the information required to implement a
+ * Octeon OQ.
+ */
+struct octep_oq {
+ u32 q_no;
+
+ struct octep_device *octep_dev;
+ struct net_device *netdev;
+ struct device *dev;
+
+ struct napi_struct *napi;
+
+ /* The receive buffer list. This list has the virtual addresses
+ * of the buffers.
+ */
+ struct octep_rx_buffer *buff_info;
+
+ /* Pointer to the mapped packet credit register.
+ * Host writes number of info/buffer ptrs available to this register
+ */
+ u8 __iomem *pkts_credit_reg;
+
+ /* Pointer to the mapped packet sent register.
+ * Octeon writes the number of packets DMA'ed to host memory
+ * in this register.
+ */
+ u8 __iomem *pkts_sent_reg;
+
+ /* Statistics for this OQ. */
+ struct octep_oq_stats stats;
+
+ /* Packets pending to be processed */
+ u32 pkts_pending;
+ u32 last_pkt_count;
+
+ /* Index in the ring where the driver should read the next packet */
+ u32 host_read_idx;
+
+ /* Number of descriptors in this ring. */
+ u32 max_count;
+ u32 ring_size_mask;
+
+ /* The number of descriptors pending refill. */
+ u32 refill_count;
+
+ /* Index in the ring where the driver will refill the
+ * descriptor's buffer
+ */
+ u32 host_refill_idx;
+ u32 refill_threshold;
+
+ /* The size of each buffer pointed by the buffer pointer. */
+ u32 buffer_size;
+ u32 max_single_buffer_size;
+
+ /* The 8B aligned descriptor ring starts at this address. */
+ struct octep_oq_desc_hw *desc_ring;
+
+ /* DMA mapped address of the OQ descriptor ring. */
+ dma_addr_t desc_ring_dma;
+};
+
+#define OCTEP_OQ_SIZE (sizeof(struct octep_oq))
+#endif /* _OCTEP_RX_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
new file mode 100644
index 000000000000..511552bc3e87
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/vmalloc.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+
+/* Reset various index of Tx queue data structure. */
+static void octep_iq_reset_indices(struct octep_iq *iq)
+{
+ iq->fill_cnt = 0;
+ iq->host_write_index = 0;
+ iq->octep_read_index = 0;
+ iq->flush_index = 0;
+ iq->pkts_processed = 0;
+ iq->pkt_in_done = 0;
+ atomic_set(&iq->instr_pending, 0);
+}
+
+/**
+ * octep_iq_process_completions() - Process Tx queue completions.
+ *
+ * @iq: Octeon Tx queue data structure.
+ * @budget: max number of completions to be processed in one invocation.
+ */
+int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
+{
+ u32 compl_pkts, compl_bytes, compl_sg;
+ struct octep_device *oct = iq->octep_dev;
+ struct octep_tx_buffer *tx_buffer;
+ struct skb_shared_info *shinfo;
+ u32 fi = iq->flush_index;
+ struct sk_buff *skb;
+ u8 frags, i;
+
+ compl_pkts = 0;
+ compl_sg = 0;
+ compl_bytes = 0;
+ iq->octep_read_index = oct->hw_ops.update_iq_read_idx(iq);
+
+ while (likely(budget && (fi != iq->octep_read_index))) {
+ tx_buffer = iq->buff_info + fi;
+ skb = tx_buffer->skb;
+
+ fi++;
+ if (unlikely(fi == iq->max_count))
+ fi = 0;
+ compl_bytes += skb->len;
+ compl_pkts++;
+ budget--;
+
+ if (!tx_buffer->gather) {
+ dma_unmap_single(iq->dev, tx_buffer->dma,
+ tx_buffer->skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ /* Scatter/Gather */
+ shinfo = skb_shinfo(skb);
+ frags = shinfo->nr_frags;
+ compl_sg++;
+
+ dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0],
+ tx_buffer->sglist[0].len[0], DMA_TO_DEVICE);
+
+ i = 1; /* entry 0 is main skb, unmapped above */
+ while (frags--) {
+ dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3],
+ tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE);
+ i++;
+ }
+
+ dev_kfree_skb_any(skb);
+ }
+
+ iq->pkts_processed += compl_pkts;
+ atomic_sub(compl_pkts, &iq->instr_pending);
+ iq->stats.instr_completed += compl_pkts;
+ iq->stats.bytes_sent += compl_bytes;
+ iq->stats.sgentry_sent += compl_sg;
+ iq->flush_index = fi;
+
+ netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes);
+
+ if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) &&
+ ((iq->max_count - atomic_read(&iq->instr_pending)) >
+ OCTEP_WAKE_QUEUE_THRESHOLD))
+ netif_wake_subqueue(iq->netdev, iq->q_no);
+ return !budget;
+}
+
+/**
+ * octep_iq_free_pending() - Free Tx buffers for pending completions.
+ *
+ * @iq: Octeon Tx queue data structure.
+ */
+static void octep_iq_free_pending(struct octep_iq *iq)
+{
+ struct octep_tx_buffer *tx_buffer;
+ struct skb_shared_info *shinfo;
+ u32 fi = iq->flush_index;
+ struct sk_buff *skb;
+ u8 frags, i;
+
+ while (fi != iq->host_write_index) {
+ tx_buffer = iq->buff_info + fi;
+ skb = tx_buffer->skb;
+
+ fi++;
+ if (unlikely(fi == iq->max_count))
+ fi = 0;
+
+ if (!tx_buffer->gather) {
+ dma_unmap_single(iq->dev, tx_buffer->dma,
+ tx_buffer->skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ /* Scatter/Gather */
+ shinfo = skb_shinfo(skb);
+ frags = shinfo->nr_frags;
+
+ dma_unmap_single(iq->dev,
+ tx_buffer->sglist[0].dma_ptr[0],
+ tx_buffer->sglist[0].len[0],
+ DMA_TO_DEVICE);
+
+ i = 1; /* entry 0 is main skb, unmapped above */
+ while (frags--) {
+ dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3],
+ tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE);
+ i++;
+ }
+
+ dev_kfree_skb_any(skb);
+ }
+
+ atomic_set(&iq->instr_pending, 0);
+ iq->flush_index = fi;
+ netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
+}
+
+/**
+ * octep_clean_iqs() - Clean Tx queues to shutdown the device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * Free the buffers in Tx queue descriptors pending completion and
+ * reset queue indices
+ */
+void octep_clean_iqs(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < oct->num_iqs; i++) {
+ octep_iq_free_pending(oct->iq[i]);
+ octep_iq_reset_indices(oct->iq[i]);
+ }
+}
+
+/**
+ * octep_setup_iq() - Setup a Tx queue.
+ *
+ * @oct: Octeon device private data structure.
+ * @q_no: Tx queue number to be setup.
+ *
+ * Allocate resources for a Tx queue.
+ */
+static int octep_setup_iq(struct octep_device *oct, int q_no)
+{
+ u32 desc_ring_size, buff_info_size, sglist_size;
+ struct octep_iq *iq;
+ int i;
+
+ iq = vzalloc(sizeof(*iq));
+ if (!iq)
+ goto iq_alloc_err;
+ oct->iq[q_no] = iq;
+
+ iq->octep_dev = oct;
+ iq->netdev = oct->netdev;
+ iq->dev = &oct->pdev->dev;
+ iq->q_no = q_no;
+ iq->max_count = CFG_GET_IQ_NUM_DESC(oct->conf);
+ iq->ring_size_mask = iq->max_count - 1;
+ iq->fill_threshold = CFG_GET_IQ_DB_MIN(oct->conf);
+ iq->netdev_q = netdev_get_tx_queue(iq->netdev, q_no);
+
+ /* Allocate memory for hardware queue descriptors */
+ desc_ring_size = OCTEP_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf);
+ iq->desc_ring = dma_alloc_coherent(iq->dev, desc_ring_size,
+ &iq->desc_ring_dma, GFP_KERNEL);
+ if (unlikely(!iq->desc_ring)) {
+ dev_err(iq->dev,
+ "Failed to allocate DMA memory for IQ-%d\n", q_no);
+ goto desc_dma_alloc_err;
+ }
+
+ /* Allocate memory for hardware SGLIST descriptors */
+ sglist_size = OCTEP_SGLIST_SIZE_PER_PKT *
+ CFG_GET_IQ_NUM_DESC(oct->conf);
+ iq->sglist = dma_alloc_coherent(iq->dev, sglist_size,
+ &iq->sglist_dma, GFP_KERNEL);
+ if (unlikely(!iq->sglist)) {
+ dev_err(iq->dev,
+ "Failed to allocate DMA memory for IQ-%d SGLIST\n",
+ q_no);
+ goto sglist_alloc_err;
+ }
+
+ /* allocate memory to manage Tx packets pending completion */
+ buff_info_size = OCTEP_IQ_TXBUFF_INFO_SIZE * iq->max_count;
+ iq->buff_info = vzalloc(buff_info_size);
+ if (!iq->buff_info) {
+ dev_err(iq->dev,
+ "Failed to allocate buff info for IQ-%d\n", q_no);
+ goto buff_info_err;
+ }
+
+ /* Setup sglist addresses in tx_buffer entries */
+ for (i = 0; i < CFG_GET_IQ_NUM_DESC(oct->conf); i++) {
+ struct octep_tx_buffer *tx_buffer;
+
+ tx_buffer = &iq->buff_info[i];
+ tx_buffer->sglist =
+ &iq->sglist[i * OCTEP_SGLIST_ENTRIES_PER_PKT];
+ tx_buffer->sglist_dma =
+ iq->sglist_dma + (i * OCTEP_SGLIST_SIZE_PER_PKT);
+ }
+
+ octep_iq_reset_indices(iq);
+ oct->hw_ops.setup_iq_regs(oct, q_no);
+
+ oct->num_iqs++;
+ return 0;
+
+buff_info_err:
+ dma_free_coherent(iq->dev, sglist_size, iq->sglist, iq->sglist_dma);
+sglist_alloc_err:
+ dma_free_coherent(iq->dev, desc_ring_size,
+ iq->desc_ring, iq->desc_ring_dma);
+desc_dma_alloc_err:
+ vfree(iq);
+ oct->iq[q_no] = NULL;
+iq_alloc_err:
+ return -1;
+}
+
+/**
+ * octep_free_iq() - Free Tx queue resources.
+ *
+ * @iq: Octeon Tx queue data structure.
+ *
+ * Free all the resources allocated for a Tx queue.
+ */
+static void octep_free_iq(struct octep_iq *iq)
+{
+ struct octep_device *oct = iq->octep_dev;
+ u64 desc_ring_size, sglist_size;
+ int q_no = iq->q_no;
+
+ desc_ring_size = OCTEP_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf);
+
+ if (iq->buff_info)
+ vfree(iq->buff_info);
+
+ if (iq->desc_ring)
+ dma_free_coherent(iq->dev, desc_ring_size,
+ iq->desc_ring, iq->desc_ring_dma);
+
+ sglist_size = OCTEP_SGLIST_SIZE_PER_PKT *
+ CFG_GET_IQ_NUM_DESC(oct->conf);
+ if (iq->sglist)
+ dma_free_coherent(iq->dev, sglist_size,
+ iq->sglist, iq->sglist_dma);
+
+ vfree(iq);
+ oct->iq[q_no] = NULL;
+ oct->num_iqs--;
+}
+
+/**
+ * octep_setup_iqs() - setup resources for all Tx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+int octep_setup_iqs(struct octep_device *oct)
+{
+ int i;
+
+ oct->num_iqs = 0;
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ if (octep_setup_iq(oct, i)) {
+ dev_err(&oct->pdev->dev,
+ "Failed to setup IQ(TxQ)-%d.\n", i);
+ goto iq_setup_err;
+ }
+ dev_dbg(&oct->pdev->dev, "Successfully setup IQ(TxQ)-%d.\n", i);
+ }
+
+ return 0;
+
+iq_setup_err:
+ while (i) {
+ i--;
+ octep_free_iq(oct->iq[i]);
+ }
+ return -1;
+}
+
+/**
+ * octep_free_iqs() - Free resources of all Tx queues.
+ *
+ * @oct: Octeon device private data structure.
+ */
+void octep_free_iqs(struct octep_device *oct)
+{
+ int i;
+
+ for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+ octep_free_iq(oct->iq[i]);
+ dev_dbg(&oct->pdev->dev,
+ "Successfully destroyed IQ(TxQ)-%d.\n", i);
+ }
+ oct->num_iqs = 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
new file mode 100644
index 000000000000..2ef57980eb47
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_TX_H_
+#define _OCTEP_TX_H_
+
+#define IQ_SEND_OK 0
+#define IQ_SEND_STOP 1
+#define IQ_SEND_FAILED -1
+
+#define TX_BUFTYPE_NONE 0
+#define TX_BUFTYPE_NET 1
+#define TX_BUFTYPE_NET_SG 2
+#define NUM_TX_BUFTYPES 3
+
+/* Hardware format for Scatter/Gather list */
+struct octep_tx_sglist_desc {
+ u16 len[4];
+ dma_addr_t dma_ptr[4];
+};
+
+/* Each Scatter/Gather entry sent to hardwar hold four pointers.
+ * So, number of entries required is (MAX_SKB_FRAGS + 1)/4, where '+1'
+ * is for main skb which also goes as a gather buffer to Octeon hardware.
+ * To allocate sufficient SGLIST entries for a packet with max fragments,
+ * align by adding 3 before calcuating max SGLIST entries per packet.
+ */
+#define OCTEP_SGLIST_ENTRIES_PER_PKT ((MAX_SKB_FRAGS + 1 + 3) / 4)
+#define OCTEP_SGLIST_SIZE_PER_PKT \
+ (OCTEP_SGLIST_ENTRIES_PER_PKT * sizeof(struct octep_tx_sglist_desc))
+
+struct octep_tx_buffer {
+ struct sk_buff *skb;
+ dma_addr_t dma;
+ struct octep_tx_sglist_desc *sglist;
+ dma_addr_t sglist_dma;
+ u8 gather;
+};
+
+#define OCTEP_IQ_TXBUFF_INFO_SIZE (sizeof(struct octep_tx_buffer))
+
+/* Hardware interface Tx statistics */
+struct octep_iface_tx_stats {
+ /* Packets dropped due to excessive collisions */
+ u64 xscol;
+
+ /* Packets dropped due to excessive deferral */
+ u64 xsdef;
+
+ /* Packets sent that experienced multiple collisions before successful
+ * transmission
+ */
+ u64 mcol;
+
+ /* Packets sent that experienced a single collision before successful
+ * transmission
+ */
+ u64 scol;
+
+ /* Total octets sent on the interface */
+ u64 octs;
+
+ /* Total frames sent on the interface */
+ u64 pkts;
+
+ /* Packets sent with an octet count < 64 */
+ u64 hist_lt64;
+
+ /* Packets sent with an octet count == 64 */
+ u64 hist_eq64;
+
+ /* Packets sent with an octet count of 65–127 */
+ u64 hist_65to127;
+
+ /* Packets sent with an octet count of 128–255 */
+ u64 hist_128to255;
+
+ /* Packets sent with an octet count of 256–511 */
+ u64 hist_256to511;
+
+ /* Packets sent with an octet count of 512–1023 */
+ u64 hist_512to1023;
+
+ /* Packets sent with an octet count of 1024-1518 */
+ u64 hist_1024to1518;
+
+ /* Packets sent with an octet count of > 1518 */
+ u64 hist_gt1518;
+
+ /* Packets sent to a broadcast DMAC */
+ u64 bcst;
+
+ /* Packets sent to the multicast DMAC */
+ u64 mcst;
+
+ /* Packets sent that experienced a transmit underflow and were
+ * truncated
+ */
+ u64 undflw;
+
+ /* Control/PAUSE packets sent */
+ u64 ctl;
+};
+
+/* Input Queue statistics. Each input queue has four stats fields. */
+struct octep_iq_stats {
+ /* Instructions posted to this queue. */
+ u64 instr_posted;
+
+ /* Instructions copied by hardware for processing. */
+ u64 instr_completed;
+
+ /* Instructions that could not be processed. */
+ u64 instr_dropped;
+
+ /* Bytes sent through this queue. */
+ u64 bytes_sent;
+
+ /* Gather entries sent through this queue. */
+ u64 sgentry_sent;
+
+ /* Number of transmit failures due to TX_BUSY */
+ u64 tx_busy;
+
+ /* Number of times the queue is restarted */
+ u64 restart_cnt;
+};
+
+/* The instruction (input) queue.
+ * The input queue is used to post raw (instruction) mode data or packet
+ * data to Octeon device from the host. Each input queue (up to 4) for
+ * a Octeon device has one such structure to represent it.
+ */
+struct octep_iq {
+ u32 q_no;
+
+ struct octep_device *octep_dev;
+ struct net_device *netdev;
+ struct device *dev;
+ struct netdev_queue *netdev_q;
+
+ /* Index in input ring where driver should write the next packet */
+ u16 host_write_index;
+
+ /* Index in input ring where Octeon is expected to read next packet */
+ u16 octep_read_index;
+
+ /* This index aids in finding the window in the queue where Octeon
+ * has read the commands.
+ */
+ u16 flush_index;
+
+ /* Statistics for this input queue. */
+ struct octep_iq_stats stats;
+
+ /* This field keeps track of the instructions pending in this queue. */
+ atomic_t instr_pending;
+
+ /* Pointer to the Virtual Base addr of the input ring. */
+ struct octep_tx_desc_hw *desc_ring;
+
+ /* DMA mapped base address of the input descriptor ring. */
+ dma_addr_t desc_ring_dma;
+
+ /* Info of Tx buffers pending completion. */
+ struct octep_tx_buffer *buff_info;
+
+ /* Base pointer to Scatter/Gather lists for all ring descriptors. */
+ struct octep_tx_sglist_desc *sglist;
+
+ /* DMA mapped addr of Scatter Gather Lists */
+ dma_addr_t sglist_dma;
+
+ /* Octeon doorbell register for the ring. */
+ u8 __iomem *doorbell_reg;
+
+ /* Octeon instruction count register for this ring. */
+ u8 __iomem *inst_cnt_reg;
+
+ /* interrupt level register for this ring */
+ u8 __iomem *intr_lvl_reg;
+
+ /* Maximum no. of instructions in this queue. */
+ u32 max_count;
+ u32 ring_size_mask;
+
+ u32 pkt_in_done;
+ u32 pkts_processed;
+
+ u32 status;
+
+ /* Number of instructions pending to be posted to Octeon. */
+ u32 fill_cnt;
+
+ /* The max. number of instructions that can be held pending by the
+ * driver before ringing doorbell.
+ */
+ u32 fill_threshold;
+};
+
+/* Hardware Tx Instruction Header */
+struct octep_instr_hdr {
+ /* Data Len */
+ u64 tlen:16;
+
+ /* Reserved */
+ u64 rsvd:20;
+
+ /* PKIND for SDP */
+ u64 pkind:6;
+
+ /* Front Data size */
+ u64 fsz:6;
+
+ /* No. of entries in gather list */
+ u64 gsz:14;
+
+ /* Gather indicator 1=gather*/
+ u64 gather:1;
+
+ /* Reserved3 */
+ u64 reserved3:1;
+};
+
+/* Hardware Tx completion response header */
+struct octep_instr_resp_hdr {
+ /* Request ID */
+ u64 rid:16;
+
+ /* PCIe port to use for response */
+ u64 pcie_port:3;
+
+ /* Scatter indicator 1=scatter */
+ u64 scatter:1;
+
+ /* Size of Expected result OR no. of entries in scatter list */
+ u64 rlenssz:14;
+
+ /* Desired destination port for result */
+ u64 dport:6;
+
+ /* Opcode Specific parameters */
+ u64 param:8;
+
+ /* Opcode for the return packet */
+ u64 opcode:16;
+};
+
+/* 64-byte Tx instruction format.
+ * Format of instruction for a 64-byte mode input queue.
+ *
+ * only first 16-bytes (dptr and ih) are mandatory; rest are optional
+ * and filled by the driver based on firmware/hardware capabilities.
+ * These optional headers together called Front Data and its size is
+ * described by ih->fsz.
+ */
+struct octep_tx_desc_hw {
+ /* Pointer where the input data is available. */
+ u64 dptr;
+
+ /* Instruction Header. */
+ union {
+ struct octep_instr_hdr ih;
+ u64 ih64;
+ };
+
+ /* Pointer where the response for a RAW mode packet will be written
+ * by Octeon.
+ */
+ u64 rptr;
+
+ /* Input Instruction Response Header. */
+ struct octep_instr_resp_hdr irh;
+
+ /* Additional headers available in a 64-byte instruction. */
+ u64 exhdr[4];
+};
+
+#define OCTEP_IQ_DESC_SIZE (sizeof(struct octep_tx_desc_hw))
+#endif /* _OCTEP_TX_H_ */
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_acl.c b/drivers/net/ethernet/marvell/prestera/prestera_acl.c
index 47c899c08951..e5627782fac6 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_acl.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_acl.c
@@ -421,13 +421,6 @@ int prestera_acl_rule_add(struct prestera_switch *sw,
rule->re_arg.vtcam_id = ruleset->vtcam_id;
rule->re_key.prio = rule->priority;
- /* setup counter */
- rule->re_arg.count.valid = true;
- err = prestera_acl_chain_to_client(ruleset->ht_key.chain_index,
- &rule->re_arg.count.client);
- if (err)
- goto err_rule_add;
-
rule->re = prestera_acl_rule_entry_find(sw->acl, &rule->re_key);
err = WARN_ON(rule->re) ? -EEXIST : 0;
if (err)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
index 921959a980ee..c12b09ac6559 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c
@@ -70,6 +70,24 @@ static int prestera_flower_parse_actions(struct prestera_flow_block *block,
if (!flow_action_has_entries(flow_action))
return 0;
+ if (!flow_action_mixed_hw_stats_check(flow_action, extack))
+ return -EOPNOTSUPP;
+
+ act = flow_action_first_entry_get(flow_action);
+ if (act->hw_stats & FLOW_ACTION_HW_STATS_DISABLED) {
+ /* Nothing to do */
+ } else if (act->hw_stats & FLOW_ACTION_HW_STATS_DELAYED) {
+ /* setup counter first */
+ rule->re_arg.count.valid = true;
+ err = prestera_acl_chain_to_client(chain_index,
+ &rule->re_arg.count.client);
+ if (err)
+ return err;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type");
+ return -EOPNOTSUPP;
+ }
+
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_ACCEPT:
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index 6c5618cf4f08..3754d8aec76d 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -4,6 +4,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/inetdevice.h>
+#include <net/inet_dscp.h>
#include <net/switchdev.h>
#include <linux/rhashtable.h>
@@ -26,7 +27,7 @@ struct prestera_kern_fib_cache {
/* Indicate if route is not overlapped by another table */
struct rhash_head ht_node; /* node of prestera_router */
struct fib_info *fi;
- u8 kern_tos;
+ dscp_t kern_dscp;
u8 kern_type;
bool reachable;
};
@@ -88,7 +89,7 @@ prestera_kern_fib_cache_destroy(struct prestera_switch *sw,
static struct prestera_kern_fib_cache *
prestera_kern_fib_cache_create(struct prestera_switch *sw,
struct prestera_kern_fib_cache_key *key,
- struct fib_info *fi, u8 tos, u8 type)
+ struct fib_info *fi, dscp_t dscp, u8 type)
{
struct prestera_kern_fib_cache *fib_cache;
int err;
@@ -100,7 +101,7 @@ prestera_kern_fib_cache_create(struct prestera_switch *sw,
memcpy(&fib_cache->key, key, sizeof(*key));
fib_info_hold(fi);
fib_cache->fi = fi;
- fib_cache->kern_tos = tos;
+ fib_cache->kern_dscp = dscp;
fib_cache->kern_type = type;
err = rhashtable_insert_fast(&sw->router->kern_fib_cache_ht,
@@ -132,7 +133,7 @@ __prestera_k_arb_fib_lpm_offload_set(struct prestera_switch *sw,
fri.tb_id = fc->key.kern_tb_id;
fri.dst = fc->key.addr.u.ipv4;
fri.dst_len = fc->key.prefix_len;
- fri.tos = fc->kern_tos;
+ fri.dscp = fc->kern_dscp;
fri.type = fc->kern_type;
/* flags begin */
fri.offload = offload;
@@ -305,7 +306,7 @@ prestera_k_arb_fib_evt(struct prestera_switch *sw,
if (replace) {
fib_cache = prestera_kern_fib_cache_create(sw, &fc_key,
fen_info->fi,
- fen_info->tos,
+ fen_info->dscp,
fen_info->type);
if (!fib_cache) {
dev_err(sw->dev->dev, "fib_cache == NULL");
diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 86d356b4388d..da4ec235d146 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -7,6 +7,10 @@ config NET_VENDOR_MEDIATEK
if NET_VENDOR_MEDIATEK
+config NET_MEDIATEK_SOC_WED
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ def_bool NET_MEDIATEK_SOC != n
+
config NET_MEDIATEK_SOC
tristate "MediaTek SoC Gigabit Ethernet support"
depends on NET_DSA || !NET_DSA
diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 79d4cdbbcbf5..45ba0970504a 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -5,4 +5,9 @@
obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
+mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed.o
+ifdef CONFIG_DEBUG_FS
+mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_debugfs.o
+endif
+obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o
obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index f02d07ec5ccb..18eebcaa6a76 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -9,6 +9,7 @@
#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
+#include <linux/of_address.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
#include <linux/clk.h>
@@ -20,9 +21,11 @@
#include <linux/pinctrl/devinfo.h>
#include <linux/phylink.h>
#include <linux/jhash.h>
+#include <linux/bitfield.h>
#include <net/dsa.h>
#include "mtk_eth_soc.h"
+#include "mtk_wed.h"
static int mtk_msg_level = -1;
module_param_named(msg_level, mtk_msg_level, int, 0);
@@ -786,7 +789,7 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
dma_addr_t dma_addr;
int i;
- eth->scratch_ring = dma_alloc_coherent(eth->dev,
+ eth->scratch_ring = dma_alloc_coherent(eth->dma_dev,
cnt * sizeof(struct mtk_tx_dma),
&eth->phy_scratch_ring,
GFP_ATOMIC);
@@ -798,10 +801,10 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
if (unlikely(!eth->scratch_head))
return -ENOMEM;
- dma_addr = dma_map_single(eth->dev,
+ dma_addr = dma_map_single(eth->dma_dev,
eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+ if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
return -ENOMEM;
phy_ring_tail = eth->phy_scratch_ring +
@@ -855,26 +858,26 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
{
if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
- dma_unmap_single(eth->dev,
+ dma_unmap_single(eth->dma_dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
- dma_unmap_page(eth->dev,
+ dma_unmap_page(eth->dma_dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
}
} else {
if (dma_unmap_len(tx_buf, dma_len0)) {
- dma_unmap_page(eth->dev,
+ dma_unmap_page(eth->dma_dev,
dma_unmap_addr(tx_buf, dma_addr0),
dma_unmap_len(tx_buf, dma_len0),
DMA_TO_DEVICE);
}
if (dma_unmap_len(tx_buf, dma_len1)) {
- dma_unmap_page(eth->dev,
+ dma_unmap_page(eth->dma_dev,
dma_unmap_addr(tx_buf, dma_addr1),
dma_unmap_len(tx_buf, dma_len1),
DMA_TO_DEVICE);
@@ -952,9 +955,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
if (skb_vlan_tag_present(skb))
txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
- mapped_addr = dma_map_single(eth->dev, skb->data,
+ mapped_addr = dma_map_single(eth->dma_dev, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
+ if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr)))
return -ENOMEM;
WRITE_ONCE(itxd->txd1, mapped_addr);
@@ -993,10 +996,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
- mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
+ mapped_addr = skb_frag_dma_map(eth->dma_dev, frag, offset,
frag_map_size,
DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
+ if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr)))
goto err_dma;
if (i == nr_frags - 1 &&
@@ -1237,7 +1240,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
struct net_device *netdev;
unsigned int pktlen;
dma_addr_t dma_addr;
- u32 hash;
+ u32 hash, reason;
int mac;
ring = mtk_get_rx_ring(eth);
@@ -1274,18 +1277,18 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
netdev->stats.rx_dropped++;
goto release_desc;
}
- dma_addr = dma_map_single(eth->dev,
+ dma_addr = dma_map_single(eth->dma_dev,
new_data + NET_SKB_PAD +
eth->ip_align,
ring->buf_size,
DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
+ if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) {
skb_free_frag(new_data);
netdev->stats.rx_dropped++;
goto release_desc;
}
- dma_unmap_single(eth->dev, trxd.rxd1,
+ dma_unmap_single(eth->dma_dev, trxd.rxd1,
ring->buf_size, DMA_FROM_DEVICE);
/* receive data */
@@ -1313,6 +1316,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
}
+ reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
+ if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
+ mtk_ppe_check_skb(eth->ppe, skb,
+ trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
+
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
(trxd.rxd2 & RX_DMA_VTAG))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
@@ -1558,7 +1566,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
if (!ring->buf)
goto no_tx_mem;
- ring->dma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
+ ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
&ring->phys, GFP_ATOMIC);
if (!ring->dma)
goto no_tx_mem;
@@ -1576,7 +1584,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
* descriptors in ring->dma_pdma.
*/
if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
- ring->dma_pdma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
+ ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
&ring->phys_pdma,
GFP_ATOMIC);
if (!ring->dma_pdma)
@@ -1635,7 +1643,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
}
if (ring->dma) {
- dma_free_coherent(eth->dev,
+ dma_free_coherent(eth->dma_dev,
MTK_DMA_SIZE * sizeof(*ring->dma),
ring->dma,
ring->phys);
@@ -1643,7 +1651,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
}
if (ring->dma_pdma) {
- dma_free_coherent(eth->dev,
+ dma_free_coherent(eth->dma_dev,
MTK_DMA_SIZE * sizeof(*ring->dma_pdma),
ring->dma_pdma,
ring->phys_pdma);
@@ -1688,18 +1696,18 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
return -ENOMEM;
}
- ring->dma = dma_alloc_coherent(eth->dev,
+ ring->dma = dma_alloc_coherent(eth->dma_dev,
rx_dma_size * sizeof(*ring->dma),
&ring->phys, GFP_ATOMIC);
if (!ring->dma)
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- dma_addr_t dma_addr = dma_map_single(eth->dev,
+ dma_addr_t dma_addr = dma_map_single(eth->dma_dev,
ring->data[i] + NET_SKB_PAD + eth->ip_align,
ring->buf_size,
DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+ if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
return -ENOMEM;
ring->dma[i].rxd1 = (unsigned int)dma_addr;
@@ -1735,7 +1743,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring)
continue;
if (!ring->dma[i].rxd1)
continue;
- dma_unmap_single(eth->dev,
+ dma_unmap_single(eth->dma_dev,
ring->dma[i].rxd1,
ring->buf_size,
DMA_FROM_DEVICE);
@@ -1746,7 +1754,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring)
}
if (ring->dma) {
- dma_free_coherent(eth->dev,
+ dma_free_coherent(eth->dma_dev,
ring->dma_size * sizeof(*ring->dma),
ring->dma,
ring->phys);
@@ -2099,7 +2107,7 @@ static void mtk_dma_free(struct mtk_eth *eth)
if (eth->netdev[i])
netdev_reset_queue(eth->netdev[i]);
if (eth->scratch_ring) {
- dma_free_coherent(eth->dev,
+ dma_free_coherent(eth->dma_dev,
MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
eth->scratch_ring,
eth->phy_scratch_ring);
@@ -2267,7 +2275,7 @@ static int mtk_open(struct net_device *dev)
if (err)
return err;
- if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
+ if (eth->soc->offload_version && mtk_ppe_start(eth->ppe) == 0)
gdm_config = MTK_GDMA_TO_PPE;
mtk_gdm_config(eth, gdm_config);
@@ -2341,7 +2349,7 @@ static int mtk_stop(struct net_device *dev)
mtk_dma_free(eth);
if (eth->soc->offload_version)
- mtk_ppe_stop(&eth->ppe);
+ mtk_ppe_stop(eth->ppe);
return 0;
}
@@ -2448,6 +2456,8 @@ static void mtk_dim_tx(struct work_struct *work)
static int mtk_hw_init(struct mtk_eth *eth)
{
+ u32 dma_mask = ETHSYS_DMA_AG_MAP_PDMA | ETHSYS_DMA_AG_MAP_QDMA |
+ ETHSYS_DMA_AG_MAP_PPE;
int i, val, ret;
if (test_and_set_bit(MTK_HW_INIT, &eth->state))
@@ -2460,6 +2470,10 @@ static int mtk_hw_init(struct mtk_eth *eth)
if (ret)
goto err_disable_pm;
+ if (eth->ethsys)
+ regmap_update_bits(eth->ethsys, ETHSYS_DMA_AG_MAP, dma_mask,
+ of_dma_is_coherent(eth->dma_dev->of_node) * dma_mask);
+
if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
ret = device_reset(eth->dev);
if (ret) {
@@ -3040,6 +3054,35 @@ free_netdev:
return err;
}
+void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev)
+{
+ struct net_device *dev, *tmp;
+ LIST_HEAD(dev_list);
+ int i;
+
+ rtnl_lock();
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ dev = eth->netdev[i];
+
+ if (!dev || !(dev->flags & IFF_UP))
+ continue;
+
+ list_add_tail(&dev->close_list, &dev_list);
+ }
+
+ dev_close_many(&dev_list, false);
+
+ eth->dma_dev = dma_dev;
+
+ list_for_each_entry_safe(dev, tmp, &dev_list, close_list) {
+ list_del_init(&dev->close_list);
+ dev_open(dev, NULL);
+ }
+
+ rtnl_unlock();
+}
+
static int mtk_probe(struct platform_device *pdev)
{
struct device_node *mac_np;
@@ -3053,6 +3096,7 @@ static int mtk_probe(struct platform_device *pdev)
eth->soc = of_device_get_match_data(&pdev->dev);
eth->dev = &pdev->dev;
+ eth->dma_dev = &pdev->dev;
eth->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(eth->base))
return PTR_ERR(eth->base);
@@ -3101,6 +3145,16 @@ static int mtk_probe(struct platform_device *pdev)
}
}
+ if (of_dma_is_coherent(pdev->dev.of_node)) {
+ struct regmap *cci;
+
+ cci = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "cci-control-port");
+ /* enable CPU/bus coherency */
+ if (!IS_ERR(cci))
+ regmap_write(cci, 0, 3);
+ }
+
if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii),
GFP_KERNEL);
@@ -3123,6 +3177,22 @@ static int mtk_probe(struct platform_device *pdev)
}
}
+ for (i = 0;; i++) {
+ struct device_node *np = of_parse_phandle(pdev->dev.of_node,
+ "mediatek,wed", i);
+ static const u32 wdma_regs[] = {
+ MTK_WDMA0_BASE,
+ MTK_WDMA1_BASE
+ };
+ void __iomem *wdma;
+
+ if (!np || i >= ARRAY_SIZE(wdma_regs))
+ break;
+
+ wdma = eth->base + wdma_regs[i];
+ mtk_wed_add_hw(np, eth, wdma, i);
+ }
+
for (i = 0; i < 3; i++) {
if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0)
eth->irq[i] = eth->irq[0];
@@ -3198,10 +3268,11 @@ static int mtk_probe(struct platform_device *pdev)
}
if (eth->soc->offload_version) {
- err = mtk_ppe_init(&eth->ppe, eth->dev,
- eth->base + MTK_ETH_PPE_BASE, 2);
- if (err)
+ eth->ppe = mtk_ppe_init(eth, eth->base + MTK_ETH_PPE_BASE, 2);
+ if (!eth->ppe) {
+ err = -ENOMEM;
goto err_free_dev;
+ }
err = mtk_eth_offload_init(eth);
if (err)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index c9d42be314b5..c98c7ee42c6f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -295,6 +295,9 @@
#define MTK_GDM1_TX_GPCNT 0x2438
#define MTK_STAT_OFFSET 0x40
+#define MTK_WDMA0_BASE 0x2800
+#define MTK_WDMA1_BASE 0x2c00
+
/* QDMA descriptor txd4 */
#define TX_DMA_CHKSUM (0x7 << 29)
#define TX_DMA_TSO BIT(28)
@@ -465,6 +468,12 @@
#define RSTCTRL_FE BIT(6)
#define RSTCTRL_PPE BIT(31)
+/* ethernet dma channel agent map */
+#define ETHSYS_DMA_AG_MAP 0x408
+#define ETHSYS_DMA_AG_MAP_PDMA BIT(0)
+#define ETHSYS_DMA_AG_MAP_QDMA BIT(1)
+#define ETHSYS_DMA_AG_MAP_PPE BIT(2)
+
/* SGMII subsystem config registers */
/* Register to auto-negotiation restart */
#define SGMSYS_PCS_CONTROL_1 0x0
@@ -882,6 +891,7 @@ struct mtk_sgmii {
/* struct mtk_eth - This is the main datasructure for holding the state
* of the driver
* @dev: The device pointer
+ * @dev: The device pointer used for dma mapping/alloc
* @base: The mapped register i/o base
* @page_lock: Make sure that register operations are atomic
* @tx_irq__lock: Make sure that IRQ register operations are atomic
@@ -925,6 +935,7 @@ struct mtk_sgmii {
struct mtk_eth {
struct device *dev;
+ struct device *dma_dev;
void __iomem *base;
spinlock_t page_lock;
spinlock_t tx_irq_lock;
@@ -974,7 +985,7 @@ struct mtk_eth {
u32 rx_dma_l4_valid;
int ip_align;
- struct mtk_ppe ppe;
+ struct mtk_ppe *ppe;
struct rhashtable flow_table;
};
@@ -1023,6 +1034,7 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
int mtk_eth_offload_init(struct mtk_eth *eth);
int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data);
+void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
#endif /* MTK_ETH_H */
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 3ad10c793308..683f89f8e3b2 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -6,9 +6,22 @@
#include <linux/iopoll.h>
#include <linux/etherdevice.h>
#include <linux/platform_device.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/dsa.h>
+#include "mtk_eth_soc.h"
#include "mtk_ppe.h"
#include "mtk_ppe_regs.h"
+static DEFINE_SPINLOCK(ppe_lock);
+
+static const struct rhashtable_params mtk_flow_l2_ht_params = {
+ .head_offset = offsetof(struct mtk_flow_entry, l2_node),
+ .key_offset = offsetof(struct mtk_flow_entry, data.bridge),
+ .key_len = offsetof(struct mtk_foe_bridge, key_end),
+ .automatic_shrinking = true,
+};
+
static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
{
writel(val, ppe->base + reg);
@@ -41,6 +54,11 @@ static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
return ppe_m32(ppe, reg, val, 0);
}
+static u32 mtk_eth_timestamp(struct mtk_eth *eth)
+{
+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
+}
+
static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
{
int ret;
@@ -76,13 +94,6 @@ static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
u32 hash;
switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
- case MTK_PPE_PKT_TYPE_BRIDGE:
- hv1 = e->bridge.src_mac_lo;
- hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
- hv2 = e->bridge.src_mac_hi >> 16;
- hv2 ^= e->bridge.dest_mac_lo;
- hv3 = e->bridge.dest_mac_hi;
- break;
case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
hv1 = e->ipv4.orig.ports;
@@ -122,6 +133,9 @@ mtk_foe_entry_l2(struct mtk_foe_entry *entry)
{
int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ if (type == MTK_PPE_PKT_TYPE_BRIDGE)
+ return &entry->bridge.l2;
+
if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
return &entry->ipv6.l2;
@@ -133,6 +147,9 @@ mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
{
int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ if (type == MTK_PPE_PKT_TYPE_BRIDGE)
+ return &entry->bridge.ib2;
+
if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
return &entry->ipv6.ib2;
@@ -167,7 +184,12 @@ int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
entry->ipv6.ports = ports_pad;
- if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+ if (type == MTK_PPE_PKT_TYPE_BRIDGE) {
+ ether_addr_copy(entry->bridge.src_mac, src_mac);
+ ether_addr_copy(entry->bridge.dest_mac, dest_mac);
+ entry->bridge.ib2 = val;
+ l2 = &entry->bridge.l2;
+ } else if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
entry->ipv6.ib2 = val;
l2 = &entry->ipv6.l2;
} else {
@@ -329,32 +351,167 @@ int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
return 0;
}
+int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq,
+ int bss, int wcid)
+{
+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+ u32 *ib2 = mtk_foe_entry_ib2(entry);
+
+ *ib2 &= ~MTK_FOE_IB2_PORT_MG;
+ *ib2 |= MTK_FOE_IB2_WDMA_WINFO;
+ if (wdma_idx)
+ *ib2 |= MTK_FOE_IB2_WDMA_DEVIDX;
+
+ l2->vlan2 = FIELD_PREP(MTK_FOE_VLAN2_WINFO_BSS, bss) |
+ FIELD_PREP(MTK_FOE_VLAN2_WINFO_WCID, wcid) |
+ FIELD_PREP(MTK_FOE_VLAN2_WINFO_RING, txq);
+
+ return 0;
+}
+
static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
{
return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
}
-int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
- u16 timestamp)
+static bool
+mtk_flow_entry_match(struct mtk_flow_entry *entry, struct mtk_foe_entry *data)
+{
+ int type, len;
+
+ if ((data->ib1 ^ entry->data.ib1) & MTK_FOE_IB1_UDP)
+ return false;
+
+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->data.ib1);
+ if (type > MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+ len = offsetof(struct mtk_foe_entry, ipv6._rsv);
+ else
+ len = offsetof(struct mtk_foe_entry, ipv4.ib2);
+
+ return !memcmp(&entry->data.data, &data->data, len - 4);
+}
+
+static void
+__mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ struct hlist_head *head;
+ struct hlist_node *tmp;
+
+ if (entry->type == MTK_FLOW_TYPE_L2) {
+ rhashtable_remove_fast(&ppe->l2_flows, &entry->l2_node,
+ mtk_flow_l2_ht_params);
+
+ head = &entry->l2_flows;
+ hlist_for_each_entry_safe(entry, tmp, head, l2_data.list)
+ __mtk_foe_entry_clear(ppe, entry);
+ return;
+ }
+
+ hlist_del_init(&entry->list);
+ if (entry->hash != 0xffff) {
+ ppe->foe_table[entry->hash].ib1 &= ~MTK_FOE_IB1_STATE;
+ ppe->foe_table[entry->hash].ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE,
+ MTK_FOE_STATE_BIND);
+ dma_wmb();
+ }
+ entry->hash = 0xffff;
+
+ if (entry->type != MTK_FLOW_TYPE_L2_SUBFLOW)
+ return;
+
+ hlist_del_init(&entry->l2_data.list);
+ kfree(entry);
+}
+
+static int __mtk_foe_entry_idle_time(struct mtk_ppe *ppe, u32 ib1)
+{
+ u16 timestamp;
+ u16 now;
+
+ now = mtk_eth_timestamp(ppe->eth) & MTK_FOE_IB1_BIND_TIMESTAMP;
+ timestamp = ib1 & MTK_FOE_IB1_BIND_TIMESTAMP;
+
+ if (timestamp > now)
+ return MTK_FOE_IB1_BIND_TIMESTAMP + 1 - timestamp + now;
+ else
+ return now - timestamp;
+}
+
+static void
+mtk_flow_entry_update_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
{
+ struct mtk_flow_entry *cur;
struct mtk_foe_entry *hwe;
- u32 hash;
+ struct hlist_node *tmp;
+ int idle;
+
+ idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
+ hlist_for_each_entry_safe(cur, tmp, &entry->l2_flows, l2_data.list) {
+ int cur_idle;
+ u32 ib1;
+
+ hwe = &ppe->foe_table[cur->hash];
+ ib1 = READ_ONCE(hwe->ib1);
+
+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND) {
+ cur->hash = 0xffff;
+ __mtk_foe_entry_clear(ppe, cur);
+ continue;
+ }
+
+ cur_idle = __mtk_foe_entry_idle_time(ppe, ib1);
+ if (cur_idle >= idle)
+ continue;
+
+ idle = cur_idle;
+ entry->data.ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
+ entry->data.ib1 |= hwe->ib1 & MTK_FOE_IB1_BIND_TIMESTAMP;
+ }
+}
+
+static void
+mtk_flow_entry_update(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ struct mtk_foe_entry *hwe;
+ struct mtk_foe_entry foe;
+
+ spin_lock_bh(&ppe_lock);
+
+ if (entry->type == MTK_FLOW_TYPE_L2) {
+ mtk_flow_entry_update_l2(ppe, entry);
+ goto out;
+ }
+
+ if (entry->hash == 0xffff)
+ goto out;
+
+ hwe = &ppe->foe_table[entry->hash];
+ memcpy(&foe, hwe, sizeof(foe));
+ if (!mtk_flow_entry_match(entry, &foe)) {
+ entry->hash = 0xffff;
+ goto out;
+ }
+
+ entry->data.ib1 = foe.ib1;
+
+out:
+ spin_unlock_bh(&ppe_lock);
+}
+
+static void
+__mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+ u16 hash)
+{
+ struct mtk_foe_entry *hwe;
+ u16 timestamp;
+ timestamp = mtk_eth_timestamp(ppe->eth);
timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
- hash = mtk_ppe_hash_entry(entry);
hwe = &ppe->foe_table[hash];
- if (!mtk_foe_entry_usable(hwe)) {
- hwe++;
- hash++;
-
- if (!mtk_foe_entry_usable(hwe))
- return -ENOSPC;
- }
-
memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
wmb();
hwe->ib1 = entry->ib1;
@@ -362,32 +519,195 @@ int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
dma_wmb();
mtk_ppe_cache_clear(ppe);
+}
- return hash;
+void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ spin_lock_bh(&ppe_lock);
+ __mtk_foe_entry_clear(ppe, entry);
+ spin_unlock_bh(&ppe_lock);
+}
+
+static int
+mtk_foe_entry_commit_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ entry->type = MTK_FLOW_TYPE_L2;
+
+ return rhashtable_insert_fast(&ppe->l2_flows, &entry->l2_node,
+ mtk_flow_l2_ht_params);
+}
+
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->data.ib1);
+ u32 hash;
+
+ if (type == MTK_PPE_PKT_TYPE_BRIDGE)
+ return mtk_foe_entry_commit_l2(ppe, entry);
+
+ hash = mtk_ppe_hash_entry(&entry->data);
+ entry->hash = 0xffff;
+ spin_lock_bh(&ppe_lock);
+ hlist_add_head(&entry->list, &ppe->foe_flow[hash / 2]);
+ spin_unlock_bh(&ppe_lock);
+
+ return 0;
+}
+
+static void
+mtk_foe_entry_commit_subflow(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
+ u16 hash)
+{
+ struct mtk_flow_entry *flow_info;
+ struct mtk_foe_entry foe, *hwe;
+ struct mtk_foe_mac_info *l2;
+ u32 ib1_mask = MTK_FOE_IB1_PACKET_TYPE | MTK_FOE_IB1_UDP;
+ int type;
+
+ flow_info = kzalloc(offsetof(struct mtk_flow_entry, l2_data.end),
+ GFP_ATOMIC);
+ if (!flow_info)
+ return;
+
+ flow_info->l2_data.base_flow = entry;
+ flow_info->type = MTK_FLOW_TYPE_L2_SUBFLOW;
+ flow_info->hash = hash;
+ hlist_add_head(&flow_info->list, &ppe->foe_flow[hash / 2]);
+ hlist_add_head(&flow_info->l2_data.list, &entry->l2_flows);
+
+ hwe = &ppe->foe_table[hash];
+ memcpy(&foe, hwe, sizeof(foe));
+ foe.ib1 &= ib1_mask;
+ foe.ib1 |= entry->data.ib1 & ~ib1_mask;
+
+ l2 = mtk_foe_entry_l2(&foe);
+ memcpy(l2, &entry->data.bridge.l2, sizeof(*l2));
+
+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, foe.ib1);
+ if (type == MTK_PPE_PKT_TYPE_IPV4_HNAPT)
+ memcpy(&foe.ipv4.new, &foe.ipv4.orig, sizeof(foe.ipv4.new));
+ else if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T && l2->etype == ETH_P_IP)
+ l2->etype = ETH_P_IPV6;
+
+ *mtk_foe_entry_ib2(&foe) = entry->data.bridge.ib2;
+
+ __mtk_foe_entry_commit(ppe, &foe, hash);
}
-int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
+{
+ struct hlist_head *head = &ppe->foe_flow[hash / 2];
+ struct mtk_foe_entry *hwe = &ppe->foe_table[hash];
+ struct mtk_flow_entry *entry;
+ struct mtk_foe_bridge key = {};
+ struct hlist_node *n;
+ struct ethhdr *eh;
+ bool found = false;
+ u8 *tag;
+
+ spin_lock_bh(&ppe_lock);
+
+ if (FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) == MTK_FOE_STATE_BIND)
+ goto out;
+
+ hlist_for_each_entry_safe(entry, n, head, list) {
+ if (entry->type == MTK_FLOW_TYPE_L2_SUBFLOW) {
+ if (unlikely(FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) ==
+ MTK_FOE_STATE_BIND))
+ continue;
+
+ entry->hash = 0xffff;
+ __mtk_foe_entry_clear(ppe, entry);
+ continue;
+ }
+
+ if (found || !mtk_flow_entry_match(entry, hwe)) {
+ if (entry->hash != 0xffff)
+ entry->hash = 0xffff;
+ continue;
+ }
+
+ entry->hash = hash;
+ __mtk_foe_entry_commit(ppe, &entry->data, hash);
+ found = true;
+ }
+
+ if (found)
+ goto out;
+
+ eh = eth_hdr(skb);
+ ether_addr_copy(key.dest_mac, eh->h_dest);
+ ether_addr_copy(key.src_mac, eh->h_source);
+ tag = skb->data - 2;
+ key.vlan = 0;
+ switch (skb->protocol) {
+#if IS_ENABLED(CONFIG_NET_DSA)
+ case htons(ETH_P_XDSA):
+ if (!netdev_uses_dsa(skb->dev) ||
+ skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK)
+ goto out;
+
+ tag += 4;
+ if (get_unaligned_be16(tag) != ETH_P_8021Q)
+ break;
+
+ fallthrough;
+#endif
+ case htons(ETH_P_8021Q):
+ key.vlan = get_unaligned_be16(tag + 2) & VLAN_VID_MASK;
+ break;
+ default:
+ break;
+ }
+
+ entry = rhashtable_lookup_fast(&ppe->l2_flows, &key, mtk_flow_l2_ht_params);
+ if (!entry)
+ goto out;
+
+ mtk_foe_entry_commit_subflow(ppe, entry, hash);
+
+out:
+ spin_unlock_bh(&ppe_lock);
+}
+
+int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+{
+ mtk_flow_entry_update(ppe, entry);
+
+ return __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
+}
+
+struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base,
int version)
{
+ struct device *dev = eth->dev;
struct mtk_foe_entry *foe;
+ struct mtk_ppe *ppe;
+
+ ppe = devm_kzalloc(dev, sizeof(*ppe), GFP_KERNEL);
+ if (!ppe)
+ return NULL;
+
+ rhashtable_init(&ppe->l2_flows, &mtk_flow_l2_ht_params);
/* need to allocate a separate device, since it PPE DMA access is
* not coherent.
*/
ppe->base = base;
+ ppe->eth = eth;
ppe->dev = dev;
ppe->version = version;
foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
&ppe->foe_phys, GFP_KERNEL);
if (!foe)
- return -ENOMEM;
+ return NULL;
ppe->foe_table = foe;
mtk_ppe_debugfs_init(ppe);
- return 0;
+ return ppe;
}
static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
@@ -443,7 +763,6 @@ int mtk_ppe_start(struct mtk_ppe *ppe)
MTK_PPE_FLOW_CFG_IP4_NAT |
MTK_PPE_FLOW_CFG_IP4_NAPT |
MTK_PPE_FLOW_CFG_IP4_DSLITE |
- MTK_PPE_FLOW_CFG_L2_BRIDGE |
MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index 242fb8f2ae65..1f5cf1c9a947 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/bitfield.h>
+#include <linux/rhashtable.h>
#define MTK_ETH_PPE_BASE 0xc00
@@ -48,9 +49,9 @@ enum {
#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
#define MTK_FOE_IB2_MULTICAST BIT(8)
-#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
-#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
-#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
+#define MTK_FOE_IB2_WDMA_QID2 GENMASK(13, 12)
+#define MTK_FOE_IB2_WDMA_DEVIDX BIT(16)
+#define MTK_FOE_IB2_WDMA_WINFO BIT(17)
#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
@@ -58,9 +59,9 @@ enum {
#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
-#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
-#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
-#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
+#define MTK_FOE_VLAN2_WINFO_BSS GENMASK(5, 0)
+#define MTK_FOE_VLAN2_WINFO_WCID GENMASK(13, 6)
+#define MTK_FOE_VLAN2_WINFO_RING GENMASK(15, 14)
enum {
MTK_FOE_STATE_INVALID,
@@ -84,19 +85,16 @@ struct mtk_foe_mac_info {
u16 src_mac_lo;
};
+/* software-only entry type */
struct mtk_foe_bridge {
- u32 dest_mac_hi;
+ u8 dest_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+ u16 vlan;
- u16 src_mac_lo;
- u16 dest_mac_lo;
-
- u32 src_mac_hi;
+ struct {} key_end;
u32 ib2;
- u32 _rsv[5];
-
- u32 udf_tsid;
struct mtk_foe_mac_info l2;
};
@@ -235,7 +233,37 @@ enum {
MTK_PPE_CPU_REASON_INVALID = 0x1f,
};
+enum {
+ MTK_FLOW_TYPE_L4,
+ MTK_FLOW_TYPE_L2,
+ MTK_FLOW_TYPE_L2_SUBFLOW,
+};
+
+struct mtk_flow_entry {
+ union {
+ struct hlist_node list;
+ struct {
+ struct rhash_head l2_node;
+ struct hlist_head l2_flows;
+ };
+ };
+ u8 type;
+ s8 wed_index;
+ u16 hash;
+ union {
+ struct mtk_foe_entry data;
+ struct {
+ struct mtk_flow_entry *base_flow;
+ struct hlist_node list;
+ struct {} end;
+ } l2_data;
+ };
+ struct rhash_head node;
+ unsigned long cookie;
+};
+
struct mtk_ppe {
+ struct mtk_eth *eth;
struct device *dev;
void __iomem *base;
int version;
@@ -243,19 +271,35 @@ struct mtk_ppe {
struct mtk_foe_entry *foe_table;
dma_addr_t foe_phys;
+ u16 foe_check_time[MTK_PPE_ENTRIES];
+ struct hlist_head foe_flow[MTK_PPE_ENTRIES / 2];
+
+ struct rhashtable l2_flows;
+
void *acct_table;
};
-int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
- int version);
+struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version);
int mtk_ppe_start(struct mtk_ppe *ppe);
int mtk_ppe_stop(struct mtk_ppe *ppe);
+void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash);
+
static inline void
-mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
+mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
{
- ppe->foe_table[hash].ib1 = 0;
- dma_wmb();
+ u16 now, diff;
+
+ if (!ppe)
+ return;
+
+ now = (u16)jiffies;
+ diff = now - ppe->foe_check_time[hash];
+ if (diff < HZ / 10)
+ return;
+
+ ppe->foe_check_time[hash] = now;
+ __mtk_ppe_check_skb(ppe, skb, hash);
}
static inline int
@@ -281,8 +325,11 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
-int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
- u16 timestamp);
+int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq,
+ int bss, int wcid);
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
#endif
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
index d4b482340cb9..eb0b598f14e4 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -32,7 +32,6 @@ static const char *mtk_foe_pkt_type_str(int type)
static const char * const type_str[] = {
[MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
[MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
- [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
[MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
[MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
[MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 7bb1f20002b5..1fe31058b0f2 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -6,10 +6,12 @@
#include <linux/if_ether.h>
#include <linux/rhashtable.h>
#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <net/flow_offload.h>
#include <net/pkt_cls.h>
#include <net/dsa.h>
#include "mtk_eth_soc.h"
+#include "mtk_wed.h"
struct mtk_flow_data {
struct ethhdr eth;
@@ -19,11 +21,18 @@ struct mtk_flow_data {
__be32 src_addr;
__be32 dst_addr;
} v4;
+
+ struct {
+ struct in6_addr src_addr;
+ struct in6_addr dst_addr;
+ } v6;
};
__be16 src_port;
__be16 dst_port;
+ u16 vlan_in;
+
struct {
u16 id;
__be16 proto;
@@ -35,12 +44,6 @@ struct mtk_flow_data {
} pppoe;
};
-struct mtk_flow_entry {
- struct rhash_head node;
- unsigned long cookie;
- u16 hash;
-};
-
static const struct rhashtable_params mtk_flow_ht_params = {
.head_offset = offsetof(struct mtk_flow_entry, node),
.key_offset = offsetof(struct mtk_flow_entry, cookie),
@@ -48,12 +51,6 @@ static const struct rhashtable_params mtk_flow_ht_params = {
.automatic_shrinking = true,
};
-static u32
-mtk_eth_timestamp(struct mtk_eth *eth)
-{
- return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
-}
-
static int
mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
bool egress)
@@ -63,6 +60,14 @@ mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
data->v4.dst_addr, data->dst_port);
}
+static int
+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
+{
+ return mtk_foe_entry_set_ipv6_tuple(foe,
+ data->v6.src_addr.s6_addr32, data->src_port,
+ data->v6.dst_addr.s6_addr32, data->dst_port);
+}
+
static void
mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
{
@@ -80,6 +85,35 @@ mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
memcpy(dest, src, act->mangle.mask ? 2 : 4);
}
+static int
+mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_info *info)
+{
+ struct net_device_path_ctx ctx = {
+ .dev = dev,
+ .daddr = addr,
+ };
+ struct net_device_path path = {};
+
+ if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
+ return -1;
+
+ if (!dev->netdev_ops->ndo_fill_forward_path)
+ return -1;
+
+ if (dev->netdev_ops->ndo_fill_forward_path(&ctx, &path))
+ return -1;
+
+ if (path.type != DEV_PATH_MTK_WDMA)
+ return -1;
+
+ info->wdma_idx = path.mtk_wdma.wdma_idx;
+ info->queue = path.mtk_wdma.queue;
+ info->bss = path.mtk_wdma.bss;
+ info->wcid = path.mtk_wdma.wcid;
+
+ return 0;
+}
+
static int
mtk_flow_mangle_ports(const struct flow_action_entry *act,
@@ -149,10 +183,20 @@ mtk_flow_get_dsa_port(struct net_device **dev)
static int
mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
- struct net_device *dev)
+ struct net_device *dev, const u8 *dest_mac,
+ int *wed_index)
{
+ struct mtk_wdma_info info = {};
int pse_port, dsa_port;
+ if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
+ mtk_foe_entry_set_wdma(foe, info.wdma_idx, info.queue, info.bss,
+ info.wcid);
+ pse_port = 3;
+ *wed_index = info.wdma_idx;
+ goto out;
+ }
+
dsa_port = mtk_flow_get_dsa_port(&dev);
if (dsa_port >= 0)
mtk_foe_entry_set_dsa(foe, dsa_port);
@@ -164,6 +208,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
else
return -EOPNOTSUPP;
+out:
mtk_foe_entry_set_pse_port(foe, pse_port);
return 0;
@@ -179,11 +224,10 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
struct net_device *odev = NULL;
struct mtk_flow_entry *entry;
int offload_type = 0;
+ int wed_index = -1;
u16 addr_type = 0;
- u32 timestamp;
u8 l4proto = 0;
int err = 0;
- int hash;
int i;
if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
@@ -215,9 +259,45 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
return -EOPNOTSUPP;
}
+ switch (addr_type) {
+ case 0:
+ offload_type = MTK_PPE_PKT_TYPE_BRIDGE;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_match_eth_addrs match;
+
+ flow_rule_match_eth_addrs(rule, &match);
+ memcpy(data.eth.h_dest, match.key->dst, ETH_ALEN);
+ memcpy(data.eth.h_source, match.key->src, ETH_ALEN);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_match_vlan match;
+
+ flow_rule_match_vlan(rule, &match);
+
+ if (match.key->vlan_tpid != cpu_to_be16(ETH_P_8021Q))
+ return -EOPNOTSUPP;
+
+ data.vlan_in = match.key->vlan_id;
+ }
+ break;
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
+ break;
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
flow_action_for_each(i, act, &rule->action) {
switch (act->id) {
case FLOW_ACTION_MANGLE:
+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
+ return -EOPNOTSUPP;
if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
mtk_flow_offload_mangle_eth(act, &data.eth);
break;
@@ -249,14 +329,6 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
}
}
- switch (addr_type) {
- case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
- offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
if (!is_valid_ether_addr(data.eth.h_source) ||
!is_valid_ether_addr(data.eth.h_dest))
return -EINVAL;
@@ -270,10 +342,13 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
struct flow_match_ports ports;
+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
+ return -EOPNOTSUPP;
+
flow_rule_match_ports(rule, &ports);
data.src_port = ports.key->src;
data.dst_port = ports.key->dst;
- } else {
+ } else if (offload_type != MTK_PPE_PKT_TYPE_BRIDGE) {
return -EOPNOTSUPP;
}
@@ -288,10 +363,24 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
mtk_flow_set_ipv4_addr(&foe, &data, false);
}
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs addrs;
+
+ flow_rule_match_ipv6_addrs(rule, &addrs);
+
+ data.v6.src_addr = addrs.key->src;
+ data.v6.dst_addr = addrs.key->dst;
+
+ mtk_flow_set_ipv6_addr(&foe, &data);
+ }
+
flow_action_for_each(i, act, &rule->action) {
if (act->id != FLOW_ACTION_MANGLE)
continue;
+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
+ return -EOPNOTSUPP;
+
switch (act->mangle.htype) {
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
@@ -317,6 +406,9 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
return err;
}
+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
+ foe.bridge.vlan = data.vlan_in;
+
if (data.vlan.num == 1) {
if (data.vlan.proto != htons(ETH_P_8021Q))
return -EOPNOTSUPP;
@@ -326,33 +418,38 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
if (data.pppoe.num == 1)
mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
- err = mtk_flow_set_output_device(eth, &foe, odev);
+ err = mtk_flow_set_output_device(eth, &foe, odev, data.eth.h_dest,
+ &wed_index);
if (err)
return err;
+ if (wed_index >= 0 && (err = mtk_wed_flow_add(wed_index)) < 0)
+ return err;
+
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
entry->cookie = f->cookie;
- timestamp = mtk_eth_timestamp(eth);
- hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
- if (hash < 0) {
- err = hash;
+ memcpy(&entry->data, &foe, sizeof(entry->data));
+ entry->wed_index = wed_index;
+
+ if (mtk_foe_entry_commit(eth->ppe, entry) < 0)
goto free;
- }
- entry->hash = hash;
err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
mtk_flow_ht_params);
if (err < 0)
- goto clear_flow;
+ goto clear;
return 0;
-clear_flow:
- mtk_foe_entry_clear(&eth->ppe, hash);
+
+clear:
+ mtk_foe_entry_clear(eth->ppe, entry);
free:
kfree(entry);
+ if (wed_index >= 0)
+ mtk_wed_flow_remove(wed_index);
return err;
}
@@ -366,9 +463,11 @@ mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
if (!entry)
return -ENOENT;
- mtk_foe_entry_clear(&eth->ppe, entry->hash);
+ mtk_foe_entry_clear(eth->ppe, entry);
rhashtable_remove_fast(&eth->flow_table, &entry->node,
mtk_flow_ht_params);
+ if (entry->wed_index >= 0)
+ mtk_wed_flow_remove(entry->wed_index);
kfree(entry);
return 0;
@@ -378,7 +477,6 @@ static int
mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
{
struct mtk_flow_entry *entry;
- int timestamp;
u32 idle;
entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
@@ -386,11 +484,7 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
if (!entry)
return -ENOENT;
- timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
- if (timestamp < 0)
- return -ETIMEDOUT;
-
- idle = mtk_eth_timestamp(eth) - timestamp;
+ idle = mtk_foe_entry_idle_time(eth->ppe, entry);
f->stats.lastused = jiffies - idle * HZ;
return 0;
@@ -442,7 +536,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
struct flow_block_cb *block_cb;
flow_setup_cb_t *cb;
- if (!eth->ppe.foe_table)
+ if (!eth->ppe || !eth->ppe->foe_table)
return -EOPNOTSUPP;
if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
@@ -483,15 +577,18 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
- if (type == TC_SETUP_FT)
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ case TC_SETUP_FT:
return mtk_eth_setup_tc_block(dev, type_data);
-
- return -EOPNOTSUPP;
+ default:
+ return -EOPNOTSUPP;
+ }
}
int mtk_eth_offload_init(struct mtk_eth *eth)
{
- if (!eth->ppe.foe_table)
+ if (!eth->ppe || !eth->ppe->foe_table)
return 0;
return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
new file mode 100644
index 000000000000..5530f7991d1d
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/skbuff.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/mfd/syscon.h>
+#include <linux/debugfs.h>
+#include <linux/soc/mediatek/mtk_wed.h>
+#include "mtk_eth_soc.h"
+#include "mtk_wed_regs.h"
+#include "mtk_wed.h"
+#include "mtk_ppe.h"
+
+#define MTK_PCIE_BASE(n) (0x1a143000 + (n) * 0x2000)
+
+#define MTK_WED_PKT_SIZE 1900
+#define MTK_WED_BUF_SIZE 2048
+#define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048)
+
+#define MTK_WED_TX_RING_SIZE 2048
+#define MTK_WED_WDMA_RING_SIZE 1024
+
+static struct mtk_wed_hw *hw_list[2];
+static DEFINE_MUTEX(hw_lock);
+
+static void
+wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
+{
+ regmap_update_bits(dev->hw->regs, reg, mask | val, val);
+}
+
+static void
+wed_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+ return wed_m32(dev, reg, 0, mask);
+}
+
+static void
+wed_clr(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+ return wed_m32(dev, reg, mask, 0);
+}
+
+static void
+wdma_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
+{
+ wdma_w32(dev, reg, (wdma_r32(dev, reg) & ~mask) | val);
+}
+
+static void
+wdma_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
+{
+ wdma_m32(dev, reg, 0, mask);
+}
+
+static u32
+mtk_wed_read_reset(struct mtk_wed_device *dev)
+{
+ return wed_r32(dev, MTK_WED_RESET);
+}
+
+static void
+mtk_wed_reset(struct mtk_wed_device *dev, u32 mask)
+{
+ u32 status;
+
+ wed_w32(dev, MTK_WED_RESET, mask);
+ if (readx_poll_timeout(mtk_wed_read_reset, dev, status,
+ !(status & mask), 0, 1000))
+ WARN_ON_ONCE(1);
+}
+
+static struct mtk_wed_hw *
+mtk_wed_assign(struct mtk_wed_device *dev)
+{
+ struct mtk_wed_hw *hw;
+
+ hw = hw_list[pci_domain_nr(dev->wlan.pci_dev->bus)];
+ if (!hw || hw->wed_dev)
+ return NULL;
+
+ hw->wed_dev = dev;
+ return hw;
+}
+
+static int
+mtk_wed_buffer_alloc(struct mtk_wed_device *dev)
+{
+ struct mtk_wdma_desc *desc;
+ dma_addr_t desc_phys;
+ void **page_list;
+ int token = dev->wlan.token_start;
+ int ring_size;
+ int n_pages;
+ int i, page_idx;
+
+ ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+ n_pages = ring_size / MTK_WED_BUF_PER_PAGE;
+
+ page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ dev->buf_ring.size = ring_size;
+ dev->buf_ring.pages = page_list;
+
+ desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
+ &desc_phys, GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ dev->buf_ring.desc = desc;
+ dev->buf_ring.desc_phys = desc_phys;
+
+ for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
+ dma_addr_t page_phys, buf_phys;
+ struct page *page;
+ void *buf;
+ int s;
+
+ page = __dev_alloc_pages(GFP_KERNEL, 0);
+ if (!page)
+ return -ENOMEM;
+
+ page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev->hw->dev, page_phys)) {
+ __free_page(page);
+ return -ENOMEM;
+ }
+
+ page_list[page_idx++] = page;
+ dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ buf = page_to_virt(page);
+ buf_phys = page_phys;
+
+ for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
+ u32 txd_size;
+ u32 ctrl;
+
+ txd_size = dev->wlan.init_buf(buf, buf_phys, token++);
+
+ desc->buf0 = cpu_to_le32(buf_phys);
+ desc->buf1 = cpu_to_le32(buf_phys + txd_size);
+ ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) |
+ FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
+ MTK_WED_BUF_SIZE - txd_size) |
+ MTK_WDMA_DESC_CTRL_LAST_SEG1;
+ desc->ctrl = cpu_to_le32(ctrl);
+ desc->info = 0;
+ desc++;
+
+ buf += MTK_WED_BUF_SIZE;
+ buf_phys += MTK_WED_BUF_SIZE;
+ }
+
+ dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+
+ return 0;
+}
+
+static void
+mtk_wed_free_buffer(struct mtk_wed_device *dev)
+{
+ struct mtk_wdma_desc *desc = dev->buf_ring.desc;
+ void **page_list = dev->buf_ring.pages;
+ int page_idx;
+ int i;
+
+ if (!page_list)
+ return;
+
+ if (!desc)
+ goto free_pagelist;
+
+ for (i = 0, page_idx = 0; i < dev->buf_ring.size; i += MTK_WED_BUF_PER_PAGE) {
+ void *page = page_list[page_idx++];
+ dma_addr_t buf_addr;
+
+ if (!page)
+ break;
+
+ buf_addr = le32_to_cpu(desc[i].buf0);
+ dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ __free_page(page);
+ }
+
+ dma_free_coherent(dev->hw->dev, dev->buf_ring.size * sizeof(*desc),
+ desc, dev->buf_ring.desc_phys);
+
+free_pagelist:
+ kfree(page_list);
+}
+
+static void
+mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring)
+{
+ if (!ring->desc)
+ return;
+
+ dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc),
+ ring->desc, ring->desc_phys);
+}
+
+static void
+mtk_wed_free_tx_rings(struct mtk_wed_device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++)
+ mtk_wed_free_ring(dev, &dev->tx_ring[i]);
+ for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
+ mtk_wed_free_ring(dev, &dev->tx_wdma[i]);
+}
+
+static void
+mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
+{
+ u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
+
+ if (!dev->hw->num_flows)
+ mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
+
+ wed_w32(dev, MTK_WED_EXT_INT_MASK, en ? mask : 0);
+ wed_r32(dev, MTK_WED_EXT_INT_MASK);
+}
+
+static void
+mtk_wed_stop(struct mtk_wed_device *dev)
+{
+ regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
+ mtk_wed_set_ext_int(dev, false);
+
+ wed_clr(dev, MTK_WED_CTRL,
+ MTK_WED_CTRL_WDMA_INT_AGENT_EN |
+ MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
+ MTK_WED_CTRL_WED_TX_BM_EN |
+ MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
+ wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
+ wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
+ wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
+ wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
+ wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
+
+ wed_clr(dev, MTK_WED_GLO_CFG,
+ MTK_WED_GLO_CFG_TX_DMA_EN |
+ MTK_WED_GLO_CFG_RX_DMA_EN);
+ wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+ wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
+ MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+}
+
+static void
+mtk_wed_detach(struct mtk_wed_device *dev)
+{
+ struct device_node *wlan_node = dev->wlan.pci_dev->dev.of_node;
+ struct mtk_wed_hw *hw = dev->hw;
+
+ mutex_lock(&hw_lock);
+
+ mtk_wed_stop(dev);
+
+ wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
+ wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
+
+ mtk_wed_reset(dev, MTK_WED_RESET_WED);
+
+ mtk_wed_free_buffer(dev);
+ mtk_wed_free_tx_rings(dev);
+
+ if (of_dma_is_coherent(wlan_node))
+ regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
+ BIT(hw->index), BIT(hw->index));
+
+ if (!hw_list[!hw->index]->wed_dev &&
+ hw->eth->dma_dev != hw->eth->dev)
+ mtk_eth_set_dma_device(hw->eth, hw->eth->dev);
+
+ memset(dev, 0, sizeof(*dev));
+ module_put(THIS_MODULE);
+
+ hw->wed_dev = NULL;
+ mutex_unlock(&hw_lock);
+}
+
+static void
+mtk_wed_hw_init_early(struct mtk_wed_device *dev)
+{
+ u32 mask, set;
+ u32 offset;
+
+ mtk_wed_stop(dev);
+ mtk_wed_reset(dev, MTK_WED_RESET_WED);
+
+ mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE |
+ MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
+ MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
+ set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) |
+ MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
+ MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
+ wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set);
+
+ wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO_PRERES);
+
+ offset = dev->hw->index ? 0x04000400 : 0;
+ wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset);
+ wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset);
+
+ wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index));
+ wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys);
+}
+
+static void
+mtk_wed_hw_init(struct mtk_wed_device *dev)
+{
+ if (dev->init_done)
+ return;
+
+ dev->init_done = true;
+ mtk_wed_set_ext_int(dev, false);
+ wed_w32(dev, MTK_WED_TX_BM_CTRL,
+ MTK_WED_TX_BM_CTRL_PAUSE |
+ FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
+ dev->buf_ring.size / 128) |
+ FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
+ MTK_WED_TX_RING_SIZE / 256));
+
+ wed_w32(dev, MTK_WED_TX_BM_BASE, dev->buf_ring.desc_phys);
+
+ wed_w32(dev, MTK_WED_TX_BM_TKID,
+ FIELD_PREP(MTK_WED_TX_BM_TKID_START,
+ dev->wlan.token_start) |
+ FIELD_PREP(MTK_WED_TX_BM_TKID_END,
+ dev->wlan.token_start + dev->wlan.nbuf - 1));
+
+ wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE);
+
+ wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
+ FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) |
+ MTK_WED_TX_BM_DYN_THR_HI);
+
+ mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
+
+ wed_set(dev, MTK_WED_CTRL,
+ MTK_WED_CTRL_WED_TX_BM_EN |
+ MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
+
+ wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE);
+}
+
+static void
+mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ desc[i].buf0 = 0;
+ desc[i].ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
+ desc[i].buf1 = 0;
+ desc[i].info = 0;
+ }
+}
+
+static u32
+mtk_wed_check_busy(struct mtk_wed_device *dev)
+{
+ if (wed_r32(dev, MTK_WED_GLO_CFG) & MTK_WED_GLO_CFG_TX_DMA_BUSY)
+ return true;
+
+ if (wed_r32(dev, MTK_WED_WPDMA_GLO_CFG) &
+ MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY)
+ return true;
+
+ if (wed_r32(dev, MTK_WED_CTRL) & MTK_WED_CTRL_WDMA_INT_AGENT_BUSY)
+ return true;
+
+ if (wed_r32(dev, MTK_WED_WDMA_GLO_CFG) &
+ MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY)
+ return true;
+
+ if (wdma_r32(dev, MTK_WDMA_GLO_CFG) &
+ MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY)
+ return true;
+
+ if (wed_r32(dev, MTK_WED_CTRL) &
+ (MTK_WED_CTRL_WED_TX_BM_BUSY | MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY))
+ return true;
+
+ return false;
+}
+
+static int
+mtk_wed_poll_busy(struct mtk_wed_device *dev)
+{
+ int sleep = 15000;
+ int timeout = 100 * sleep;
+ u32 val;
+
+ return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
+ timeout, false, dev);
+}
+
+static void
+mtk_wed_reset_dma(struct mtk_wed_device *dev)
+{
+ bool busy = false;
+ u32 val;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) {
+ struct mtk_wdma_desc *desc = dev->tx_ring[i].desc;
+
+ if (!desc)
+ continue;
+
+ mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE);
+ }
+
+ if (mtk_wed_poll_busy(dev))
+ busy = mtk_wed_check_busy(dev);
+
+ if (busy) {
+ mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA);
+ } else {
+ wed_w32(dev, MTK_WED_RESET_IDX,
+ MTK_WED_RESET_IDX_TX |
+ MTK_WED_RESET_IDX_RX);
+ wed_w32(dev, MTK_WED_RESET_IDX, 0);
+ }
+
+ wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
+ wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
+
+ if (busy) {
+ mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT);
+ mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV);
+ } else {
+ wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
+ MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV);
+ wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0);
+
+ wed_set(dev, MTK_WED_WDMA_GLO_CFG,
+ MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE);
+
+ wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
+ MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE);
+ }
+
+ for (i = 0; i < 100; i++) {
+ val = wed_r32(dev, MTK_WED_TX_BM_INTF);
+ if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40)
+ break;
+ }
+
+ mtk_wed_reset(dev, MTK_WED_RESET_TX_FREE_AGENT);
+ mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
+
+ if (busy) {
+ mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
+ mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV);
+ mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV);
+ } else {
+ wed_w32(dev, MTK_WED_WPDMA_RESET_IDX,
+ MTK_WED_WPDMA_RESET_IDX_TX |
+ MTK_WED_WPDMA_RESET_IDX_RX);
+ wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, 0);
+ }
+
+}
+
+static int
+mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
+ int size)
+{
+ ring->desc = dma_alloc_coherent(dev->hw->dev,
+ size * sizeof(*ring->desc),
+ &ring->desc_phys, GFP_KERNEL);
+ if (!ring->desc)
+ return -ENOMEM;
+
+ ring->size = size;
+ mtk_wed_ring_reset(ring->desc, size);
+
+ return 0;
+}
+
+static int
+mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size)
+{
+ struct mtk_wed_ring *wdma = &dev->tx_wdma[idx];
+
+ if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE))
+ return -ENOMEM;
+
+ wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
+ wdma->desc_phys);
+ wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT,
+ size);
+ wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
+
+ wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
+ wdma->desc_phys);
+ wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT,
+ size);
+
+ return 0;
+}
+
+static void
+mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
+{
+ u32 wdma_mask;
+ u32 val;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
+ if (!dev->tx_wdma[i].desc)
+ mtk_wed_wdma_ring_setup(dev, i, 16);
+
+ wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0));
+
+ mtk_wed_hw_init(dev);
+
+ wed_set(dev, MTK_WED_CTRL,
+ MTK_WED_CTRL_WDMA_INT_AGENT_EN |
+ MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
+ MTK_WED_CTRL_WED_TX_BM_EN |
+ MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
+
+ wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS);
+
+ wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER,
+ MTK_WED_WPDMA_INT_TRIGGER_RX_DONE |
+ MTK_WED_WPDMA_INT_TRIGGER_TX_DONE);
+
+ wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
+ MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+
+ wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask);
+ wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
+
+ wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask);
+ wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask);
+
+ wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
+ wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
+
+ wed_set(dev, MTK_WED_GLO_CFG,
+ MTK_WED_GLO_CFG_TX_DMA_EN |
+ MTK_WED_GLO_CFG_RX_DMA_EN);
+ wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+ MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+ MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+ wed_set(dev, MTK_WED_WDMA_GLO_CFG,
+ MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+
+ mtk_wed_set_ext_int(dev, true);
+ val = dev->wlan.wpdma_phys |
+ MTK_PCIE_MIRROR_MAP_EN |
+ FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index);
+
+ if (dev->hw->index)
+ val |= BIT(1);
+ val |= BIT(0);
+ regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
+
+ dev->running = true;
+}
+
+static int
+mtk_wed_attach(struct mtk_wed_device *dev)
+ __releases(RCU)
+{
+ struct mtk_wed_hw *hw;
+ int ret = 0;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
+ "mtk_wed_attach without holding the RCU read lock");
+
+ if (pci_domain_nr(dev->wlan.pci_dev->bus) > 1 ||
+ !try_module_get(THIS_MODULE))
+ ret = -ENODEV;
+
+ rcu_read_unlock();
+
+ if (ret)
+ return ret;
+
+ mutex_lock(&hw_lock);
+
+ hw = mtk_wed_assign(dev);
+ if (!hw) {
+ module_put(THIS_MODULE);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ dev_info(&dev->wlan.pci_dev->dev, "attaching wed device %d\n", hw->index);
+
+ dev->hw = hw;
+ dev->dev = hw->dev;
+ dev->irq = hw->irq;
+ dev->wdma_idx = hw->index;
+
+ if (hw->eth->dma_dev == hw->eth->dev &&
+ of_dma_is_coherent(hw->eth->dev->of_node))
+ mtk_eth_set_dma_device(hw->eth, hw->dev);
+
+ ret = mtk_wed_buffer_alloc(dev);
+ if (ret) {
+ mtk_wed_detach(dev);
+ goto out;
+ }
+
+ mtk_wed_hw_init_early(dev);
+ regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0);
+
+out:
+ mutex_unlock(&hw_lock);
+
+ return ret;
+}
+
+static int
+mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
+{
+ struct mtk_wed_ring *ring = &dev->tx_ring[idx];
+
+ /*
+ * Tx ring redirection:
+ * Instead of configuring the WLAN PDMA TX ring directly, the WLAN
+ * driver allocated DMA ring gets configured into WED MTK_WED_RING_TX(n)
+ * registers.
+ *
+ * WED driver posts its own DMA ring as WLAN PDMA TX and configures it
+ * into MTK_WED_WPDMA_RING_TX(n) registers.
+ * It gets filled with packets picked up from WED TX ring and from
+ * WDMA RX.
+ */
+
+ BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+
+ if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
+ return -ENOMEM;
+
+ if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
+ return -ENOMEM;
+
+ ring->reg_base = MTK_WED_RING_TX(idx);
+ ring->wpdma = regs;
+
+ /* WED -> WPDMA */
+ wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys);
+ wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE);
+ wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_CPU_IDX, 0);
+
+ wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
+ ring->desc_phys);
+ wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT,
+ MTK_WED_TX_RING_SIZE);
+ wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
+
+ return 0;
+}
+
+static int
+mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
+{
+ struct mtk_wed_ring *ring = &dev->txfree_ring;
+ int i;
+
+ /*
+ * For txfree event handling, the same DMA ring is shared between WED
+ * and WLAN. The WLAN driver accesses the ring index registers through
+ * WED
+ */
+ ring->reg_base = MTK_WED_RING_RX(1);
+ ring->wpdma = regs;
+
+ for (i = 0; i < 12; i += 4) {
+ u32 val = readl(regs + i);
+
+ wed_w32(dev, MTK_WED_RING_RX(1) + i, val);
+ wed_w32(dev, MTK_WED_WPDMA_RING_RX(1) + i, val);
+ }
+
+ return 0;
+}
+
+static u32
+mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
+{
+ u32 val;
+
+ val = wed_r32(dev, MTK_WED_EXT_INT_STATUS);
+ wed_w32(dev, MTK_WED_EXT_INT_STATUS, val);
+ val &= MTK_WED_EXT_INT_STATUS_ERROR_MASK;
+ if (!dev->hw->num_flows)
+ val &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
+ if (val && net_ratelimit())
+ pr_err("mtk_wed%d: error status=%08x\n", dev->hw->index, val);
+
+ val = wed_r32(dev, MTK_WED_INT_STATUS);
+ val &= mask;
+ wed_w32(dev, MTK_WED_INT_STATUS, val); /* ACK */
+
+ return val;
+}
+
+static void
+mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
+{
+ if (!dev->running)
+ return;
+
+ mtk_wed_set_ext_int(dev, !!mask);
+ wed_w32(dev, MTK_WED_INT_MASK, mask);
+}
+
+int mtk_wed_flow_add(int index)
+{
+ struct mtk_wed_hw *hw = hw_list[index];
+ int ret;
+
+ if (!hw || !hw->wed_dev)
+ return -ENODEV;
+
+ if (hw->num_flows) {
+ hw->num_flows++;
+ return 0;
+ }
+
+ mutex_lock(&hw_lock);
+ if (!hw->wed_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = hw->wed_dev->wlan.offload_enable(hw->wed_dev);
+ if (!ret)
+ hw->num_flows++;
+ mtk_wed_set_ext_int(hw->wed_dev, true);
+
+out:
+ mutex_unlock(&hw_lock);
+
+ return ret;
+}
+
+void mtk_wed_flow_remove(int index)
+{
+ struct mtk_wed_hw *hw = hw_list[index];
+
+ if (!hw)
+ return;
+
+ if (--hw->num_flows)
+ return;
+
+ mutex_lock(&hw_lock);
+ if (!hw->wed_dev)
+ goto out;
+
+ hw->wed_dev->wlan.offload_disable(hw->wed_dev);
+ mtk_wed_set_ext_int(hw->wed_dev, true);
+
+out:
+ mutex_unlock(&hw_lock);
+}
+
+void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ void __iomem *wdma, int index)
+{
+ static const struct mtk_wed_ops wed_ops = {
+ .attach = mtk_wed_attach,
+ .tx_ring_setup = mtk_wed_tx_ring_setup,
+ .txfree_ring_setup = mtk_wed_txfree_ring_setup,
+ .start = mtk_wed_start,
+ .stop = mtk_wed_stop,
+ .reset_dma = mtk_wed_reset_dma,
+ .reg_read = wed_r32,
+ .reg_write = wed_w32,
+ .irq_get = mtk_wed_irq_get,
+ .irq_set_mask = mtk_wed_irq_set_mask,
+ .detach = mtk_wed_detach,
+ };
+ struct device_node *eth_np = eth->dev->of_node;
+ struct platform_device *pdev;
+ struct mtk_wed_hw *hw;
+ struct regmap *regs;
+ int irq;
+
+ if (!np)
+ return;
+
+ pdev = of_find_device_by_node(np);
+ if (!pdev)
+ return;
+
+ get_device(&pdev->dev);
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return;
+
+ regs = syscon_regmap_lookup_by_phandle(np, NULL);
+ if (IS_ERR(regs))
+ return;
+
+ rcu_assign_pointer(mtk_soc_wed_ops, &wed_ops);
+
+ mutex_lock(&hw_lock);
+
+ if (WARN_ON(hw_list[index]))
+ goto unlock;
+
+ hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+ hw->node = np;
+ hw->regs = regs;
+ hw->eth = eth;
+ hw->dev = &pdev->dev;
+ hw->wdma = wdma;
+ hw->index = index;
+ hw->irq = irq;
+ hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
+ "mediatek,pcie-mirror");
+ hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
+ "mediatek,hifsys");
+ if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) {
+ kfree(hw);
+ goto unlock;
+ }
+
+ if (!index) {
+ regmap_write(hw->mirror, 0, 0);
+ regmap_write(hw->mirror, 4, 0);
+ }
+ mtk_wed_hw_add_debugfs(hw);
+
+ hw_list[index] = hw;
+
+unlock:
+ mutex_unlock(&hw_lock);
+}
+
+void mtk_wed_exit(void)
+{
+ int i;
+
+ rcu_assign_pointer(mtk_soc_wed_ops, NULL);
+
+ synchronize_rcu();
+
+ for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
+ struct mtk_wed_hw *hw;
+
+ hw = hw_list[i];
+ if (!hw)
+ continue;
+
+ hw_list[i] = NULL;
+ debugfs_remove(hw->debugfs_dir);
+ put_device(hw->dev);
+ kfree(hw);
+ }
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
new file mode 100644
index 000000000000..981ec613f4b0
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_wed.h
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_WED_PRIV_H
+#define __MTK_WED_PRIV_H
+
+#include <linux/soc/mediatek/mtk_wed.h>
+#include <linux/debugfs.h>
+#include <linux/regmap.h>
+#include <linux/netdevice.h>
+
+struct mtk_eth;
+
+struct mtk_wed_hw {
+ struct device_node *node;
+ struct mtk_eth *eth;
+ struct regmap *regs;
+ struct regmap *hifsys;
+ struct device *dev;
+ void __iomem *wdma;
+ struct regmap *mirror;
+ struct dentry *debugfs_dir;
+ struct mtk_wed_device *wed_dev;
+ u32 debugfs_reg;
+ u32 num_flows;
+ char dirname[5];
+ int irq;
+ int index;
+};
+
+struct mtk_wdma_info {
+ u8 wdma_idx;
+ u8 queue;
+ u16 wcid;
+ u8 bss;
+};
+
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+static inline void
+wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
+{
+ regmap_write(dev->hw->regs, reg, val);
+}
+
+static inline u32
+wed_r32(struct mtk_wed_device *dev, u32 reg)
+{
+ unsigned int val;
+
+ regmap_read(dev->hw->regs, reg, &val);
+
+ return val;
+}
+
+static inline void
+wdma_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
+{
+ writel(val, dev->hw->wdma + reg);
+}
+
+static inline u32
+wdma_r32(struct mtk_wed_device *dev, u32 reg)
+{
+ return readl(dev->hw->wdma + reg);
+}
+
+static inline u32
+wpdma_tx_r32(struct mtk_wed_device *dev, int ring, u32 reg)
+{
+ if (!dev->tx_ring[ring].wpdma)
+ return 0;
+
+ return readl(dev->tx_ring[ring].wpdma + reg);
+}
+
+static inline void
+wpdma_tx_w32(struct mtk_wed_device *dev, int ring, u32 reg, u32 val)
+{
+ if (!dev->tx_ring[ring].wpdma)
+ return;
+
+ writel(val, dev->tx_ring[ring].wpdma + reg);
+}
+
+static inline u32
+wpdma_txfree_r32(struct mtk_wed_device *dev, u32 reg)
+{
+ if (!dev->txfree_ring.wpdma)
+ return 0;
+
+ return readl(dev->txfree_ring.wpdma + reg);
+}
+
+static inline void
+wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
+{
+ if (!dev->txfree_ring.wpdma)
+ return;
+
+ writel(val, dev->txfree_ring.wpdma + reg);
+}
+
+void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ void __iomem *wdma, int index);
+void mtk_wed_exit(void);
+int mtk_wed_flow_add(int index);
+void mtk_wed_flow_remove(int index);
+#else
+static inline void
+mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ void __iomem *wdma, int index)
+{
+}
+static inline void
+mtk_wed_exit(void)
+{
+}
+static inline int mtk_wed_flow_add(int index)
+{
+ return -EINVAL;
+}
+static inline void mtk_wed_flow_remove(int index)
+{
+}
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw);
+#else
+static inline void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
new file mode 100644
index 000000000000..a81d3fd1a439
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/seq_file.h>
+#include "mtk_wed.h"
+#include "mtk_wed_regs.h"
+
+struct reg_dump {
+ const char *name;
+ u16 offset;
+ u8 type;
+ u8 base;
+};
+
+enum {
+ DUMP_TYPE_STRING,
+ DUMP_TYPE_WED,
+ DUMP_TYPE_WDMA,
+ DUMP_TYPE_WPDMA_TX,
+ DUMP_TYPE_WPDMA_TXFREE,
+};
+
+#define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING }
+#define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ }
+#define DUMP_RING(_prefix, _base, ...) \
+ { _prefix " BASE", _base, __VA_ARGS__ }, \
+ { _prefix " CNT", _base + 0x4, __VA_ARGS__ }, \
+ { _prefix " CIDX", _base + 0x8, __VA_ARGS__ }, \
+ { _prefix " DIDX", _base + 0xc, __VA_ARGS__ }
+
+#define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED)
+#define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED)
+
+#define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA)
+#define DUMP_WDMA_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WDMA)
+
+#define DUMP_WPDMA_TX_RING(_n) DUMP_RING("WPDMA_TX" #_n, 0, DUMP_TYPE_WPDMA_TX, _n)
+#define DUMP_WPDMA_TXFREE_RING DUMP_RING("WPDMA_RX1", 0, DUMP_TYPE_WPDMA_TXFREE)
+
+static void
+print_reg_val(struct seq_file *s, const char *name, u32 val)
+{
+ seq_printf(s, "%-32s %08x\n", name, val);
+}
+
+static void
+dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev,
+ const struct reg_dump *regs, int n_regs)
+{
+ const struct reg_dump *cur;
+ u32 val;
+
+ for (cur = regs; cur < &regs[n_regs]; cur++) {
+ switch (cur->type) {
+ case DUMP_TYPE_STRING:
+ seq_printf(s, "%s======== %s:\n",
+ cur > regs ? "\n" : "",
+ cur->name);
+ continue;
+ case DUMP_TYPE_WED:
+ val = wed_r32(dev, cur->offset);
+ break;
+ case DUMP_TYPE_WDMA:
+ val = wdma_r32(dev, cur->offset);
+ break;
+ case DUMP_TYPE_WPDMA_TX:
+ val = wpdma_tx_r32(dev, cur->base, cur->offset);
+ break;
+ case DUMP_TYPE_WPDMA_TXFREE:
+ val = wpdma_txfree_r32(dev, cur->offset);
+ break;
+ }
+ print_reg_val(s, cur->name, val);
+ }
+}
+
+
+static int
+wed_txinfo_show(struct seq_file *s, void *data)
+{
+ static const struct reg_dump regs[] = {
+ DUMP_STR("WED TX"),
+ DUMP_WED(WED_TX_MIB(0)),
+ DUMP_WED_RING(WED_RING_TX(0)),
+
+ DUMP_WED(WED_TX_MIB(1)),
+ DUMP_WED_RING(WED_RING_TX(1)),
+
+ DUMP_STR("WPDMA TX"),
+ DUMP_WED(WED_WPDMA_TX_MIB(0)),
+ DUMP_WED_RING(WED_WPDMA_RING_TX(0)),
+ DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(0)),
+
+ DUMP_WED(WED_WPDMA_TX_MIB(1)),
+ DUMP_WED_RING(WED_WPDMA_RING_TX(1)),
+ DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(1)),
+
+ DUMP_STR("WPDMA TX"),
+ DUMP_WPDMA_TX_RING(0),
+ DUMP_WPDMA_TX_RING(1),
+
+ DUMP_STR("WED WDMA RX"),
+ DUMP_WED(WED_WDMA_RX_MIB(0)),
+ DUMP_WED_RING(WED_WDMA_RING_RX(0)),
+ DUMP_WED(WED_WDMA_RX_THRES(0)),
+ DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(0)),
+ DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(0)),
+
+ DUMP_WED(WED_WDMA_RX_MIB(1)),
+ DUMP_WED_RING(WED_WDMA_RING_RX(1)),
+ DUMP_WED(WED_WDMA_RX_THRES(1)),
+ DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(1)),
+ DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(1)),
+
+ DUMP_STR("WDMA RX"),
+ DUMP_WDMA(WDMA_GLO_CFG),
+ DUMP_WDMA_RING(WDMA_RING_RX(0)),
+ DUMP_WDMA_RING(WDMA_RING_RX(1)),
+ };
+ struct mtk_wed_hw *hw = s->private;
+ struct mtk_wed_device *dev = hw->wed_dev;
+
+ if (!dev)
+ return 0;
+
+ dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wed_txinfo);
+
+
+static int
+mtk_wed_reg_set(void *data, u64 val)
+{
+ struct mtk_wed_hw *hw = data;
+
+ regmap_write(hw->regs, hw->debugfs_reg, val);
+
+ return 0;
+}
+
+static int
+mtk_wed_reg_get(void *data, u64 *val)
+{
+ struct mtk_wed_hw *hw = data;
+ unsigned int regval;
+ int ret;
+
+ ret = regmap_read(hw->regs, hw->debugfs_reg, &regval);
+ if (ret)
+ return ret;
+
+ *val = regval;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mtk_wed_reg_get, mtk_wed_reg_set,
+ "0x%08llx\n");
+
+void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
+{
+ struct dentry *dir;
+
+ snprintf(hw->dirname, sizeof(hw->dirname), "wed%d", hw->index);
+ dir = debugfs_create_dir(hw->dirname, NULL);
+ if (!dir)
+ return;
+
+ hw->debugfs_dir = dir;
+ debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg);
+ debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval);
+ debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_ops.c b/drivers/net/ethernet/mediatek/mtk_wed_ops.c
new file mode 100644
index 000000000000..a5d9d8a5bce2
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_wed_ops.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/soc/mediatek/mtk_wed.h>
+
+const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
+EXPORT_SYMBOL_GPL(mtk_soc_wed_ops);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
new file mode 100644
index 000000000000..0a0465ea58b4
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_WED_REGS_H
+#define __MTK_WED_REGS_H
+
+#define MTK_WDMA_DESC_CTRL_LEN1 GENMASK(14, 0)
+#define MTK_WDMA_DESC_CTRL_LAST_SEG1 BIT(15)
+#define MTK_WDMA_DESC_CTRL_BURST BIT(16)
+#define MTK_WDMA_DESC_CTRL_LEN0 GENMASK(29, 16)
+#define MTK_WDMA_DESC_CTRL_LAST_SEG0 BIT(30)
+#define MTK_WDMA_DESC_CTRL_DMA_DONE BIT(31)
+
+struct mtk_wdma_desc {
+ __le32 buf0;
+ __le32 ctrl;
+ __le32 buf1;
+ __le32 info;
+} __packed __aligned(4);
+
+#define MTK_WED_RESET 0x008
+#define MTK_WED_RESET_TX_BM BIT(0)
+#define MTK_WED_RESET_TX_FREE_AGENT BIT(4)
+#define MTK_WED_RESET_WPDMA_TX_DRV BIT(8)
+#define MTK_WED_RESET_WPDMA_RX_DRV BIT(9)
+#define MTK_WED_RESET_WPDMA_INT_AGENT BIT(11)
+#define MTK_WED_RESET_WED_TX_DMA BIT(12)
+#define MTK_WED_RESET_WDMA_RX_DRV BIT(17)
+#define MTK_WED_RESET_WDMA_INT_AGENT BIT(19)
+#define MTK_WED_RESET_WED BIT(31)
+
+#define MTK_WED_CTRL 0x00c
+#define MTK_WED_CTRL_WPDMA_INT_AGENT_EN BIT(0)
+#define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY BIT(1)
+#define MTK_WED_CTRL_WDMA_INT_AGENT_EN BIT(2)
+#define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY BIT(3)
+#define MTK_WED_CTRL_WED_TX_BM_EN BIT(8)
+#define MTK_WED_CTRL_WED_TX_BM_BUSY BIT(9)
+#define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN BIT(10)
+#define MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY BIT(11)
+#define MTK_WED_CTRL_RESERVE_EN BIT(12)
+#define MTK_WED_CTRL_RESERVE_BUSY BIT(13)
+#define MTK_WED_CTRL_FINAL_DIDX_READ BIT(24)
+#define MTK_WED_CTRL_MIB_READ_CLEAR BIT(28)
+
+#define MTK_WED_EXT_INT_STATUS 0x020
+#define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR BIT(0)
+#define MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD BIT(1)
+#define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID BIT(4)
+#define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH BIT(8)
+#define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH BIT(9)
+#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(12)
+#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(13)
+#define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR BIT(16)
+#define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR BIT(17)
+#define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT BIT(18)
+#define MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN BIT(19)
+#define MTK_WED_EXT_INT_STATUS_RX_DRV_BM_DMAD_COHERENT BIT(20)
+#define MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR BIT(21)
+#define MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR BIT(22)
+#define MTK_WED_EXT_INT_STATUS_RX_DRV_DMA_RECYCLE BIT(24)
+#define MTK_WED_EXT_INT_STATUS_ERROR_MASK (MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \
+ MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \
+ MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \
+ MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \
+ MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \
+ MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN | \
+ MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR | \
+ MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR)
+
+#define MTK_WED_EXT_INT_MASK 0x028
+
+#define MTK_WED_STATUS 0x060
+#define MTK_WED_STATUS_TX GENMASK(15, 8)
+
+#define MTK_WED_TX_BM_CTRL 0x080
+#define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM GENMASK(6, 0)
+#define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM GENMASK(22, 16)
+#define MTK_WED_TX_BM_CTRL_PAUSE BIT(28)
+
+#define MTK_WED_TX_BM_BASE 0x084
+
+#define MTK_WED_TX_BM_TKID 0x088
+#define MTK_WED_TX_BM_TKID_START GENMASK(15, 0)
+#define MTK_WED_TX_BM_TKID_END GENMASK(31, 16)
+
+#define MTK_WED_TX_BM_BUF_LEN 0x08c
+
+#define MTK_WED_TX_BM_INTF 0x09c
+#define MTK_WED_TX_BM_INTF_TKID GENMASK(15, 0)
+#define MTK_WED_TX_BM_INTF_TKFIFO_FDEP GENMASK(23, 16)
+#define MTK_WED_TX_BM_INTF_TKID_VALID BIT(28)
+#define MTK_WED_TX_BM_INTF_TKID_READ BIT(29)
+
+#define MTK_WED_TX_BM_DYN_THR 0x0a0
+#define MTK_WED_TX_BM_DYN_THR_LO GENMASK(6, 0)
+#define MTK_WED_TX_BM_DYN_THR_HI GENMASK(22, 16)
+
+#define MTK_WED_INT_STATUS 0x200
+#define MTK_WED_INT_MASK 0x204
+
+#define MTK_WED_GLO_CFG 0x208
+#define MTK_WED_GLO_CFG_TX_DMA_EN BIT(0)
+#define MTK_WED_GLO_CFG_TX_DMA_BUSY BIT(1)
+#define MTK_WED_GLO_CFG_RX_DMA_EN BIT(2)
+#define MTK_WED_GLO_CFG_RX_DMA_BUSY BIT(3)
+#define MTK_WED_GLO_CFG_RX_BT_SIZE GENMASK(5, 4)
+#define MTK_WED_GLO_CFG_TX_WB_DDONE BIT(6)
+#define MTK_WED_GLO_CFG_BIG_ENDIAN BIT(7)
+#define MTK_WED_GLO_CFG_DIS_BT_SIZE_ALIGN BIT(8)
+#define MTK_WED_GLO_CFG_TX_BT_SIZE_LO BIT(9)
+#define MTK_WED_GLO_CFG_MULTI_DMA_EN GENMASK(11, 10)
+#define MTK_WED_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12)
+#define MTK_WED_GLO_CFG_MI_DEPTH_RD GENMASK(21, 13)
+#define MTK_WED_GLO_CFG_TX_BT_SIZE_HI GENMASK(23, 22)
+#define MTK_WED_GLO_CFG_SW_RESET BIT(24)
+#define MTK_WED_GLO_CFG_FIRST_TOKEN_ONLY BIT(26)
+#define MTK_WED_GLO_CFG_OMIT_RX_INFO BIT(27)
+#define MTK_WED_GLO_CFG_OMIT_TX_INFO BIT(28)
+#define MTK_WED_GLO_CFG_BYTE_SWAP BIT(29)
+#define MTK_WED_GLO_CFG_RX_2B_OFFSET BIT(31)
+
+#define MTK_WED_RESET_IDX 0x20c
+#define MTK_WED_RESET_IDX_TX GENMASK(3, 0)
+#define MTK_WED_RESET_IDX_RX GENMASK(17, 16)
+
+#define MTK_WED_TX_MIB(_n) (0x2a0 + (_n) * 4)
+
+#define MTK_WED_RING_TX(_n) (0x300 + (_n) * 0x10)
+
+#define MTK_WED_RING_RX(_n) (0x400 + (_n) * 0x10)
+
+#define MTK_WED_WPDMA_INT_TRIGGER 0x504
+#define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE BIT(1)
+#define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE GENMASK(5, 4)
+
+#define MTK_WED_WPDMA_GLO_CFG 0x508
+#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN BIT(0)
+#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY BIT(1)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN BIT(2)
+#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY BIT(3)
+#define MTK_WED_WPDMA_GLO_CFG_RX_BT_SIZE GENMASK(5, 4)
+#define MTK_WED_WPDMA_GLO_CFG_TX_WB_DDONE BIT(6)
+#define MTK_WED_WPDMA_GLO_CFG_BIG_ENDIAN BIT(7)
+#define MTK_WED_WPDMA_GLO_CFG_DIS_BT_SIZE_ALIGN BIT(8)
+#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_LO BIT(9)
+#define MTK_WED_WPDMA_GLO_CFG_MULTI_DMA_EN GENMASK(11, 10)
+#define MTK_WED_WPDMA_GLO_CFG_FIFO_LITTLE_ENDIAN BIT(12)
+#define MTK_WED_WPDMA_GLO_CFG_MI_DEPTH_RD GENMASK(21, 13)
+#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_HI GENMASK(23, 22)
+#define MTK_WED_WPDMA_GLO_CFG_SW_RESET BIT(24)
+#define MTK_WED_WPDMA_GLO_CFG_FIRST_TOKEN_ONLY BIT(26)
+#define MTK_WED_WPDMA_GLO_CFG_OMIT_RX_INFO BIT(27)
+#define MTK_WED_WPDMA_GLO_CFG_OMIT_TX_INFO BIT(28)
+#define MTK_WED_WPDMA_GLO_CFG_BYTE_SWAP BIT(29)
+#define MTK_WED_WPDMA_GLO_CFG_RX_2B_OFFSET BIT(31)
+
+#define MTK_WED_WPDMA_RESET_IDX 0x50c
+#define MTK_WED_WPDMA_RESET_IDX_TX GENMASK(3, 0)
+#define MTK_WED_WPDMA_RESET_IDX_RX GENMASK(17, 16)
+
+#define MTK_WED_WPDMA_INT_CTRL 0x520
+#define MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV BIT(21)
+
+#define MTK_WED_WPDMA_INT_MASK 0x524
+
+#define MTK_WED_PCIE_CFG_BASE 0x560
+
+#define MTK_WED_PCIE_INT_TRIGGER 0x570
+#define MTK_WED_PCIE_INT_TRIGGER_STATUS BIT(16)
+
+#define MTK_WED_WPDMA_CFG_BASE 0x580
+
+#define MTK_WED_WPDMA_TX_MIB(_n) (0x5a0 + (_n) * 4)
+#define MTK_WED_WPDMA_TX_COHERENT_MIB(_n) (0x5d0 + (_n) * 4)
+
+#define MTK_WED_WPDMA_RING_TX(_n) (0x600 + (_n) * 0x10)
+#define MTK_WED_WPDMA_RING_RX(_n) (0x700 + (_n) * 0x10)
+#define MTK_WED_WDMA_RING_RX(_n) (0x900 + (_n) * 0x10)
+#define MTK_WED_WDMA_RX_THRES(_n) (0x940 + (_n) * 0x4)
+
+#define MTK_WED_WDMA_GLO_CFG 0xa04
+#define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN BIT(0)
+#define MTK_WED_WDMA_GLO_CFG_RX_DRV_EN BIT(2)
+#define MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY BIT(3)
+#define MTK_WED_WDMA_GLO_CFG_BT_SIZE GENMASK(5, 4)
+#define MTK_WED_WDMA_GLO_CFG_TX_WB_DDONE BIT(6)
+#define MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE BIT(13)
+#define MTK_WED_WDMA_GLO_CFG_WCOMPLETE_SEL BIT(16)
+#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_RXDMA_BYPASS BIT(17)
+#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_BYPASS BIT(18)
+#define MTK_WED_WDMA_GLO_CFG_FSM_RETURN_IDLE BIT(19)
+#define MTK_WED_WDMA_GLO_CFG_WAIT_COHERENT BIT(20)
+#define MTK_WED_WDMA_GLO_CFG_AXI_W_AFTER_AW BIT(21)
+#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY_SINGLE_W BIT(22)
+#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY BIT(23)
+#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP BIT(24)
+#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE BIT(25)
+#define MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE BIT(26)
+#define MTK_WED_WDMA_GLO_CFG_RXDRV_CLKGATE_BYPASS BIT(30)
+
+#define MTK_WED_WDMA_RESET_IDX 0xa08
+#define MTK_WED_WDMA_RESET_IDX_RX GENMASK(17, 16)
+#define MTK_WED_WDMA_RESET_IDX_DRV GENMASK(25, 24)
+
+#define MTK_WED_WDMA_INT_TRIGGER 0xa28
+#define MTK_WED_WDMA_INT_TRIGGER_RX_DONE GENMASK(17, 16)
+
+#define MTK_WED_WDMA_INT_CTRL 0xa2c
+#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL GENMASK(17, 16)
+
+#define MTK_WED_WDMA_OFFSET0 0xaa4
+#define MTK_WED_WDMA_OFFSET1 0xaa8
+
+#define MTK_WED_WDMA_RX_MIB(_n) (0xae0 + (_n) * 4)
+#define MTK_WED_WDMA_RX_RECYCLE_MIB(_n) (0xae8 + (_n) * 4)
+#define MTK_WED_WDMA_RX_PROCESSED_MIB(_n) (0xaf0 + (_n) * 4)
+
+#define MTK_WED_RING_OFS_BASE 0x00
+#define MTK_WED_RING_OFS_COUNT 0x04
+#define MTK_WED_RING_OFS_CPU_IDX 0x08
+#define MTK_WED_RING_OFS_DMA_IDX 0x0c
+
+#define MTK_WDMA_RING_RX(_n) (0x100 + (_n) * 0x10)
+
+#define MTK_WDMA_GLO_CFG 0x204
+#define MTK_WDMA_GLO_CFG_RX_INFO_PRERES GENMASK(28, 26)
+
+#define MTK_WDMA_RESET_IDX 0x208
+#define MTK_WDMA_RESET_IDX_TX GENMASK(3, 0)
+#define MTK_WDMA_RESET_IDX_RX GENMASK(17, 16)
+
+#define MTK_WDMA_INT_MASK 0x228
+#define MTK_WDMA_INT_MASK_TX_DONE GENMASK(3, 0)
+#define MTK_WDMA_INT_MASK_RX_DONE GENMASK(17, 16)
+#define MTK_WDMA_INT_MASK_TX_DELAY BIT(28)
+#define MTK_WDMA_INT_MASK_TX_COHERENT BIT(29)
+#define MTK_WDMA_INT_MASK_RX_DELAY BIT(30)
+#define MTK_WDMA_INT_MASK_RX_COHERENT BIT(31)
+
+#define MTK_WDMA_INT_GRP1 0x250
+#define MTK_WDMA_INT_GRP2 0x254
+
+#define MTK_PCIE_MIRROR_MAP(n) ((n) ? 0x4 : 0x0)
+#define MTK_PCIE_MIRROR_MAP_EN BIT(0)
+#define MTK_PCIE_MIRROR_MAP_WED_ID BIT(1)
+
+/* DMA channel mapping */
+#define HIFSYS_DMA_AG_MAP 0x008
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 4ba1a78c6515..bfc0cd5ec423 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -16,13 +16,9 @@ config MLX5_CORE
Core driver for low level functionality of the ConnectX-4 and
Connect-IB cards by Mellanox Technologies.
-config MLX5_ACCEL
- bool
-
config MLX5_FPGA
bool "Mellanox Technologies Innova support"
depends on MLX5_CORE
- select MLX5_ACCEL
help
Build support for the Innova family of network cards by Mellanox
Technologies. Innova network cards are comprised of a ConnectX chip
@@ -143,71 +139,21 @@ config MLX5_CORE_IPOIB
help
MLX5 IPoIB offloads & acceleration support.
-config MLX5_FPGA_IPSEC
- bool "Mellanox Technologies IPsec Innova support"
- depends on MLX5_CORE
- depends on MLX5_FPGA
- help
- Build IPsec support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
-
-config MLX5_IPSEC
- bool "Mellanox Technologies IPsec Connect-X support"
- depends on MLX5_CORE_EN
- depends on XFRM_OFFLOAD
- depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
- select MLX5_ACCEL
- help
- Build IPsec support for the Connect-X family of network cards by Mellanox
- Technologies.
- Note: If you select this option, the mlx5_core driver will include
- IPsec support for the Connect-X family.
-
config MLX5_EN_IPSEC
- bool "IPSec XFRM cryptography-offload acceleration"
+ bool "Mellanox Technologies IPsec Connect-X support"
depends on MLX5_CORE_EN
depends on XFRM_OFFLOAD
depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
- depends on MLX5_FPGA_IPSEC || MLX5_IPSEC
help
Build support for IPsec cryptography-offload acceleration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
-
-config MLX5_FPGA_TLS
- bool "Mellanox Technologies TLS Innova support"
- depends on TLS_DEVICE
- depends on TLS=y || MLX5_CORE=m
- depends on MLX5_CORE_EN
- depends on MLX5_FPGA
- select MLX5_EN_TLS
- help
- Build TLS support for the Innova family of network cards by Mellanox
- Technologies. Innova network cards are comprised of a ConnectX chip
- and an FPGA chip on one board. If you select this option, the
- mlx5_core driver will include the Innova FPGA core and allow building
- sandbox-specific client drivers.
-config MLX5_TLS
+config MLX5_EN_TLS
bool "Mellanox Technologies TLS Connect-X support"
depends on TLS_DEVICE
depends on TLS=y || MLX5_CORE=m
depends on MLX5_CORE_EN
- select MLX5_ACCEL
- select MLX5_EN_TLS
- help
- Build TLS support for the Connect-X family of network cards by Mellanox
- Technologies.
-
-config MLX5_EN_TLS
- bool
help
Build support for TLS cryptography-offload acceleration in the NIC.
- Note: Support for hardware with this capability needs to be selected
- for this option to become available.
config MLX5_SW_STEERING
bool "Mellanox Technologies software-managed steering"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 4bc666714a35..81620c25c77e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
- en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o
+ en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o lib/crypto.o
#
# Netdev extra
@@ -88,17 +88,13 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib
#
# Accelerations & FPGA
#
-mlx5_core-$(CONFIG_MLX5_IPSEC) += accel/ipsec_offload.o
-mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o
-mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o
-mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o
-
mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
- en_accel/ipsec_stats.o en_accel/ipsec_fs.o
+ en_accel/ipsec_stats.o en_accel/ipsec_fs.o \
+ en_accel/ipsec_offload.o
-mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \
en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \
en_accel/ktls_tx.o en_accel/ktls_rx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
deleted file mode 100644
index 82b185121edb..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __MLX5E_ACCEL_H__
-#define __MLX5E_ACCEL_H__
-
-#ifdef CONFIG_MLX5_ACCEL
-
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-
-static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
-{
- __be16 *ethtype;
-
- if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN))
- return false;
- ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
- if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
- return false;
- return true;
-}
-
-static inline void remove_metadata_hdr(struct sk_buff *skb)
-{
- struct ethhdr *old_eth;
- struct ethhdr *new_eth;
-
- /* Remove the metadata from the buffer */
- old_eth = (struct ethhdr *)skb->data;
- new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
- memmove(new_eth, old_eth, 2 * ETH_ALEN);
- /* Ethertype is already in its new place */
- skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
-}
-
-#endif /* CONFIG_MLX5_ACCEL */
-
-#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
deleted file mode 100644
index 09f5ce97af46..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-
-#include "accel/ipsec.h"
-#include "mlx5_core.h"
-#include "fpga/ipsec.h"
-#include "accel/ipsec_offload.h"
-
-void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops;
- int err = 0;
-
- ipsec_ops = (mlx5_ipsec_offload_ops(mdev)) ?
- mlx5_ipsec_offload_ops(mdev) :
- mlx5_fpga_ipsec_ops(mdev);
-
- if (!ipsec_ops || !ipsec_ops->init) {
- mlx5_core_dbg(mdev, "IPsec ops is not supported\n");
- return;
- }
-
- err = ipsec_ops->init(mdev);
- if (err) {
- mlx5_core_warn_once(mdev, "Failed to start IPsec device, err = %d\n", err);
- return;
- }
-
- mdev->ipsec_ops = ipsec_ops;
-}
-
-void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->cleanup)
- return;
-
- ipsec_ops->cleanup(mdev);
-}
-
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->device_caps)
- return 0;
-
- return ipsec_ops->device_caps(mdev);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
-
-unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->counters_count)
- return -EOPNOTSUPP;
-
- return ipsec_ops->counters_count(mdev);
-}
-
-int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int count)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->counters_read)
- return -EOPNOTSUPP;
-
- return ipsec_ops->counters_read(mdev, counters, count);
-}
-
-void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- u32 *sa_handle)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
- __be32 saddr[4] = {}, daddr[4] = {};
-
- if (!ipsec_ops || !ipsec_ops->create_hw_context)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (!xfrm->attrs.is_ipv6) {
- saddr[3] = xfrm->attrs.saddr.a4;
- daddr[3] = xfrm->attrs.daddr.a4;
- } else {
- memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr));
- memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr));
- }
-
- return ipsec_ops->create_hw_context(mdev, xfrm, saddr, daddr, xfrm->attrs.spi,
- xfrm->attrs.is_ipv6, sa_handle);
-}
-
-void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->free_hw_context)
- return;
-
- ipsec_ops->free_hw_context(context);
-}
-
-struct mlx5_accel_esp_xfrm *
-mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = mdev->ipsec_ops;
- struct mlx5_accel_esp_xfrm *xfrm;
-
- if (!ipsec_ops || !ipsec_ops->esp_create_xfrm)
- return ERR_PTR(-EOPNOTSUPP);
-
- xfrm = ipsec_ops->esp_create_xfrm(mdev, attrs, flags);
- if (IS_ERR(xfrm))
- return xfrm;
-
- xfrm->mdev = mdev;
- return xfrm;
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
-
-void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->esp_destroy_xfrm)
- return;
-
- ipsec_ops->esp_destroy_xfrm(xfrm);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
-
-int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- const struct mlx5_accel_ipsec_ops *ipsec_ops = xfrm->mdev->ipsec_ops;
-
- if (!ipsec_ops || !ipsec_ops->esp_modify_xfrm)
- return -EOPNOTSUPP;
-
- return ipsec_ops->esp_modify_xfrm(xfrm, attrs);
-}
-EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
deleted file mode 100644
index fbb9c5415d53..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_ACCEL_IPSEC_H__
-#define __MLX5_ACCEL_IPSEC_H__
-
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/accel.h>
-
-#ifdef CONFIG_MLX5_ACCEL
-
-#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
-
-unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
-int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int count);
-
-void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- u32 *sa_handle);
-void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context);
-
-void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
-void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
-
-struct mlx5_accel_ipsec_ops {
- u32 (*device_caps)(struct mlx5_core_dev *mdev);
- unsigned int (*counters_count)(struct mlx5_core_dev *mdev);
- int (*counters_read)(struct mlx5_core_dev *mdev, u64 *counters, unsigned int count);
- void* (*create_hw_context)(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- const __be32 saddr[4], const __be32 daddr[4],
- const __be32 spi, bool is_ipv6, u32 *sa_handle);
- void (*free_hw_context)(void *context);
- int (*init)(struct mlx5_core_dev *mdev);
- void (*cleanup)(struct mlx5_core_dev *mdev);
- struct mlx5_accel_esp_xfrm* (*esp_create_xfrm)(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags);
- int (*esp_modify_xfrm)(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs);
- void (*esp_destroy_xfrm)(struct mlx5_accel_esp_xfrm *xfrm);
-};
-
-#else
-
-#define MLX5_IPSEC_DEV(mdev) false
-
-static inline void *
-mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *xfrm,
- u32 *sa_handle)
-{
- return NULL;
-}
-
-static inline void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context) {}
-
-static inline void mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) {}
-
-static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) {}
-
-#endif /* CONFIG_MLX5_ACCEL */
-
-#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h
deleted file mode 100644
index 970c66d19c1d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
-
-#ifndef __MLX5_IPSEC_OFFLOAD_H__
-#define __MLX5_IPSEC_OFFLOAD_H__
-
-#include <linux/mlx5/driver.h>
-#include "accel/ipsec.h"
-
-#ifdef CONFIG_MLX5_IPSEC
-
-const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev);
-static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- if (!MLX5_CAP_GEN(mdev, ipsec_offload))
- return false;
-
- if (!MLX5_CAP_GEN(mdev, log_max_dek))
- return false;
-
- if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
- MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
- return false;
-
- return MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) &&
- MLX5_CAP_ETH(mdev, insert_trailer);
-}
-
-#else
-static inline const struct mlx5_accel_ipsec_ops *
-mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev) { return NULL; }
-static inline bool mlx5_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- return false;
-}
-
-#endif /* CONFIG_MLX5_IPSEC */
-#endif /* __MLX5_IPSEC_OFFLOAD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
deleted file mode 100644
index 6c2b86a26863..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-
-#include "accel/tls.h"
-#include "mlx5_core.h"
-#include "lib/mlx5.h"
-
-#ifdef CONFIG_MLX5_FPGA_TLS
-#include "fpga/tls.h"
-
-int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx)
-{
- return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
- start_offload_tcp_sn, p_swid,
- direction_sx);
-}
-
-void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx)
-{
- mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
-}
-
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn)
-{
- return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
-}
-
-bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_is_tls_device(mdev) ||
- mlx5_accel_is_ktls_device(mdev);
-}
-
-u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_tls_device_caps(mdev);
-}
-
-int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
-{
- return mlx5_fpga_tls_init(mdev);
-}
-
-void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
-{
- mlx5_fpga_tls_cleanup(mdev);
-}
-#endif
-
-#ifdef CONFIG_MLX5_TLS
-int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id)
-{
- u32 sz_bytes;
- void *key;
-
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128: {
- struct tls12_crypto_info_aes_gcm_128 *info =
- (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
-
- key = info->key;
- sz_bytes = sizeof(info->key);
- break;
- }
- case TLS_CIPHER_AES_GCM_256: {
- struct tls12_crypto_info_aes_gcm_256 *info =
- (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
-
- key = info->key;
- sz_bytes = sizeof(info->key);
- break;
- }
- default:
- return -EINVAL;
- }
-
- return mlx5_create_encryption_key(mdev, key, sz_bytes,
- MLX5_ACCEL_OBJ_TLS_KEY,
- p_key_id);
-}
-
-void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
-{
- mlx5_destroy_encryption_key(mdev, key_id);
-}
-#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
deleted file mode 100644
index fd874f0c380a..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_ACCEL_TLS_H__
-#define __MLX5_ACCEL_TLS_H__
-
-#include <linux/mlx5/driver.h>
-#include <linux/tls.h>
-
-#ifdef CONFIG_MLX5_TLS
-int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id);
-void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
-
-static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
-{
- return MLX5_CAP_GEN(mdev, tls_tx);
-}
-
-static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
-{
- return MLX5_CAP_GEN(mdev, tls_rx);
-}
-
-static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev)
-{
- if (!mlx5_accel_is_ktls_tx(mdev) &&
- !mlx5_accel_is_ktls_rx(mdev))
- return false;
-
- if (!MLX5_CAP_GEN(mdev, log_max_dek))
- return false;
-
- return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
-}
-
-static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info)
-{
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- if (crypto_info->version == TLS_1_2_VERSION)
- return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
- break;
- }
-
- return false;
-}
-#else
-static inline bool mlx5_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
-{ return false; }
-
-static inline bool mlx5_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
-{ return false; }
-
-static inline int
-mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info,
- u32 *p_key_id) { return -ENOTSUPP; }
-static inline void
-mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {}
-
-static inline bool
-mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
-static inline bool
-mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
- struct tls_crypto_info *crypto_info) { return false; }
-#endif
-
-enum {
- MLX5_ACCEL_TLS_TX = BIT(0),
- MLX5_ACCEL_TLS_RX = BIT(1),
- MLX5_ACCEL_TLS_V12 = BIT(2),
- MLX5_ACCEL_TLS_V13 = BIT(3),
- MLX5_ACCEL_TLS_LRO = BIT(4),
- MLX5_ACCEL_TLS_IPV6 = BIT(5),
- MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
- MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
-};
-
-struct mlx5_ifc_tls_flow_bits {
- u8 src_port[0x10];
- u8 dst_port[0x10];
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
- u8 ipv6[0x1];
- u8 direction_sx[0x1];
- u8 reserved_at_2[0x1e];
-};
-
-#ifdef CONFIG_MLX5_FPGA_TLS
-int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx);
-void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx);
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn);
-bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
-u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
-int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
-void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
-
-#else
-
-static inline int
-mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx) { return -ENOTSUPP; }
-static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- bool direction_sx) { }
-static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn) { return 0; }
-static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return mlx5_accel_is_ktls_device(mdev);
-}
-static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
-static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
-static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
-#endif
-
-#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8653ac0fd865..50818081bdc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -354,7 +354,6 @@ enum {
MLX5E_RQ_STATE_AM,
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
- MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */
MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 08fd1370a8b0..1e8700957280 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -5,8 +5,7 @@
#include "en/txrx.h"
#include "en/port.h"
#include "en_accel/en_accel.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
+#include "en_accel/ipsec_offload.h"
static bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
@@ -207,7 +206,7 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
u16 stop_room;
- stop_room = mlx5e_tls_get_stop_room(mdev, params);
+ stop_room = mlx5e_ktls_get_stop_room(mdev, params);
stop_room += mlx5e_stop_room_for_max_wqe(mdev);
if (is_mpwqe)
/* A MPWQE can take up to the maximum-sized WQE + all the normal
@@ -327,9 +326,6 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
return false;
- if (mlx5_fpga_is_ipsec_device(mdev))
- return false;
-
if (params->xdp_prog) {
/* XSK params are not considered here. If striding RQ is in use,
* and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
@@ -423,9 +419,6 @@ static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
int max_mtu;
int i;
- if (mlx5_fpga_is_ipsec_device(mdev))
- byte_count += MLX5E_METADATA_ETHER_LEN;
-
if (mlx5e_rx_is_linear_skb(params, xsk)) {
int frag_stride;
@@ -696,8 +689,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
bool allow_swp;
- allow_swp = mlx5_geneve_tx_allowed(mdev) ||
- !!MLX5_IPSEC_DEV(mdev);
+ allow_swp =
+ mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev);
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
@@ -804,7 +797,7 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
{
- if (mlx5e_accel_is_ktls_rx(mdev))
+ if (mlx5e_is_ktls_rx(mdev))
return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
@@ -833,7 +826,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
mlx5e_build_sq_param_common(mdev, param);
param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
- param->is_tls = mlx5e_accel_is_ktls_rx(mdev);
+ param->is_tls = mlx5e_is_ktls_rx(mdev);
if (param->is_tls)
param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 62cde3e87c2e..04c0a5e1c89a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -37,8 +37,8 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/tls.h"
-#include "en_accel/tls_rxtx.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ktls_txrx.h"
#include "en.h"
#include "en/txrx.h"
@@ -124,8 +124,9 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
#ifdef CONFIG_MLX5_EN_TLS
/* May send SKBs and WQEs. */
- if (mlx5e_tls_skb_offloaded(skb))
- if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
+ if (mlx5e_ktls_skb_offloaded(skb))
+ if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb,
+ &state->tls)))
return false;
#endif
@@ -174,7 +175,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
struct mlx5_wqe_inline_seg *inlseg)
{
#ifdef CONFIG_MLX5_EN_TLS
- mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls);
+ mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls);
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 299e3f0fcb5c..c280a18ff002 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -226,8 +226,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
return -EINVAL;
}
if (x->props.flags & XFRM_STATE_ESN &&
- !(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_ESN)) {
+ !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_CAP_ESN)) {
netdev_info(netdev, "Cannot offload ESN xfrm states\n");
return -EINVAL;
}
@@ -275,8 +274,7 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
return -EINVAL;
}
if (x->props.family == AF_INET6 &&
- !(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_IPV6)) {
+ !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_CAP_IPV6)) {
netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
return -EINVAL;
}
@@ -286,9 +284,6 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
static int mlx5e_xfrm_fs_add_rule(struct mlx5e_priv *priv,
struct mlx5e_ipsec_sa_entry *sa_entry)
{
- if (!mlx5_is_ipsec_device(priv->mdev))
- return 0;
-
return mlx5e_accel_ipsec_fs_add_rule(priv, &sa_entry->xfrm->attrs,
sa_entry->ipsec_obj_id,
&sa_entry->ipsec_rule);
@@ -297,9 +292,6 @@ static int mlx5e_xfrm_fs_add_rule(struct mlx5e_priv *priv,
static void mlx5e_xfrm_fs_del_rule(struct mlx5e_priv *priv,
struct mlx5e_ipsec_sa_entry *sa_entry)
{
- if (!mlx5_is_ipsec_device(priv->mdev))
- return;
-
mlx5e_accel_ipsec_fs_del_rule(priv, &sa_entry->xfrm->attrs,
&sa_entry->ipsec_rule);
}
@@ -333,9 +325,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
/* create xfrm */
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
- sa_entry->xfrm =
- mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
- MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+ sa_entry->xfrm = mlx5_accel_esp_create_xfrm(priv->mdev, &attrs);
if (IS_ERR(sa_entry->xfrm)) {
err = PTR_ERR(sa_entry->xfrm);
goto err_sa_entry;
@@ -414,7 +404,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
struct mlx5e_ipsec *ipsec = NULL;
- if (!MLX5_IPSEC_DEV(priv->mdev)) {
+ if (!mlx5_ipsec_device_caps(priv->mdev)) {
netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
return 0;
}
@@ -425,10 +415,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
hash_init(ipsec->sadb_rx);
spin_lock_init(&ipsec->sadb_rx_lock);
- ida_init(&ipsec->halloc);
ipsec->en_priv = priv;
- ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
- MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
priv->netdev->name);
if (!ipsec->wq) {
@@ -452,7 +439,6 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
mlx5e_accel_ipsec_fs_cleanup(priv);
destroy_workqueue(ipsec->wq);
- ida_destroy(&ipsec->halloc);
kfree(ipsec);
priv->ipsec = NULL;
}
@@ -531,7 +517,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
struct net_device *netdev = priv->netdev;
- if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
+ if (!(mlx5_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
!MLX5_CAP_ETH(mdev, swp)) {
mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
return;
@@ -550,15 +536,13 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
- if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
+ if (!(mlx5_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
!MLX5_CAP_ETH(mdev, swp_lso)) {
mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
return;
}
- if (mlx5_is_ipsec_device(mdev))
- netdev->gso_partial_features |= NETIF_F_GSO_ESP;
-
+ netdev->gso_partial_features |= NETIF_F_GSO_ESP;
mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
netdev->features |= NETIF_F_GSO_ESP;
netdev->hw_features |= NETIF_F_GSO_ESP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 6164c7f59efb..a0e9dade09e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -40,7 +40,7 @@
#include <net/xfrm.h>
#include <linux/idr.h>
-#include "accel/ipsec.h"
+#include "ipsec_offload.h"
#define MLX5E_IPSEC_SADB_RX_BITS 10
#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
@@ -55,24 +55,6 @@ struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_tx_drop_no_state;
atomic64_t ipsec_tx_drop_not_ip;
atomic64_t ipsec_tx_drop_trailer;
- atomic64_t ipsec_tx_drop_metadata;
-};
-
-struct mlx5e_ipsec_stats {
- u64 ipsec_dec_in_packets;
- u64 ipsec_dec_out_packets;
- u64 ipsec_dec_bypass_packets;
- u64 ipsec_enc_in_packets;
- u64 ipsec_enc_out_packets;
- u64 ipsec_enc_bypass_packets;
- u64 ipsec_dec_drop_packets;
- u64 ipsec_dec_auth_fail_packets;
- u64 ipsec_enc_drop_packets;
- u64 ipsec_add_sa_success;
- u64 ipsec_add_sa_fail;
- u64 ipsec_del_sa_success;
- u64 ipsec_del_sa_fail;
- u64 ipsec_cmd_drop;
};
struct mlx5e_accel_fs_esp;
@@ -81,11 +63,8 @@ struct mlx5e_ipsec_tx;
struct mlx5e_ipsec {
struct mlx5e_priv *en_priv;
DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
- bool no_trailer;
- spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
- struct ida halloc;
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx */
struct mlx5e_ipsec_sw_stats sw_stats;
- struct mlx5e_ipsec_stats stats;
struct workqueue_struct *wq;
struct mlx5e_accel_fs_esp *rx_fs;
struct mlx5e_ipsec_tx *tx_fs;
@@ -116,7 +95,6 @@ struct mlx5e_ipsec_sa_entry {
struct mlx5e_ipsec_rule ipsec_rule;
};
-void mlx5e_ipsec_build_inverse_table(void);
int mlx5e_ipsec_init(struct mlx5e_priv *priv);
void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
@@ -125,11 +103,6 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
unsigned int handle);
#else
-
-static inline void mlx5e_ipsec_build_inverse_table(void)
-{
-}
-
static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 17da23dff0ed..66b529e36ea1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#include <linux/netdevice.h>
-#include "accel/ipsec_offload.h"
+#include "ipsec_offload.h"
#include "ipsec_fs.h"
#include "fs_core.h"
@@ -700,9 +700,6 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv)
{
int err;
- if (!mlx5_is_ipsec_device(priv->mdev) || !priv->ipsec)
- return -EOPNOTSUPP;
-
err = fs_init_tx(priv);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h
index 3389b3bb3ef8..b70953979709 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.h
@@ -6,10 +6,9 @@
#include "en.h"
#include "ipsec.h"
-#include "accel/ipsec_offload.h"
+#include "ipsec_offload.h"
#include "en/fs.h"
-#ifdef CONFIG_MLX5_EN_IPSEC
void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv);
int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv);
int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
@@ -19,8 +18,4 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_priv *priv,
void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_priv *priv,
struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5e_ipsec_rule *ipsec_rule);
-#else
-static inline void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_priv *priv) {}
-static inline int mlx5e_accel_ipsec_fs_init(struct mlx5e_priv *priv) { return 0; }
-#endif
#endif /* __MLX5_IPSEC_STEERING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index d6667d38e1de..37c9880719cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -1,14 +1,11 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+/* Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */
#include "mlx5_core.h"
#include "ipsec_offload.h"
#include "lib/mlx5.h"
#include "en_accel/ipsec_fs.h"
-#define MLX5_IPSEC_DEV_BASIC_CAPS (MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 | \
- MLX5_ACCEL_IPSEC_CAP_LSO)
-
struct mlx5_ipsec_sa_ctx {
struct rhash_head hash;
u32 enc_key_id;
@@ -25,25 +22,37 @@ struct mlx5_ipsec_esp_xfrm {
struct mlx5_accel_esp_xfrm accel_xfrm;
};
-static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev)
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
{
- u32 caps = MLX5_IPSEC_DEV_BASIC_CAPS;
+ u32 caps;
+
+ if (!MLX5_CAP_GEN(mdev, ipsec_offload))
+ return 0;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return 0;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC))
+ return 0;
- if (!mlx5_is_ipsec_device(mdev))
+ if (!MLX5_CAP_IPSEC(mdev, ipsec_crypto_offload) ||
+ !MLX5_CAP_ETH(mdev, insert_trailer))
return 0;
if (!MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ipsec_encrypt) ||
!MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ipsec_decrypt))
return 0;
+ caps = MLX5_ACCEL_IPSEC_CAP_DEVICE | MLX5_ACCEL_IPSEC_CAP_IPV6 |
+ MLX5_ACCEL_IPSEC_CAP_LSO;
+
if (MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_encrypt) &&
MLX5_CAP_IPSEC(mdev, ipsec_crypto_esp_aes_gcm_128_decrypt))
caps |= MLX5_ACCEL_IPSEC_CAP_ESP;
- if (MLX5_CAP_IPSEC(mdev, ipsec_esn)) {
+ if (MLX5_CAP_IPSEC(mdev, ipsec_esn))
caps |= MLX5_ACCEL_IPSEC_CAP_ESN;
- caps |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
- }
/* We can accommodate up to 2^24 different IPsec objects
* because we use up to 24 bit in flow table metadata
@@ -52,6 +61,7 @@ static u32 mlx5_ipsec_offload_device_caps(struct mlx5_core_dev *mdev)
WARN_ON_ONCE(MLX5_CAP_IPSEC(mdev, log_max_ipsec_offload) > 24);
return caps;
}
+EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps);
static int
mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
@@ -94,8 +104,7 @@ mlx5_ipsec_offload_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
static struct mlx5_accel_esp_xfrm *
mlx5_ipsec_offload_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
{
struct mlx5_ipsec_esp_xfrm *mxfrm;
int err = 0;
@@ -274,11 +283,6 @@ static void mlx5_ipsec_offload_delete_sa_ctx(void *context)
mutex_unlock(&mxfrm->lock);
}
-static int mlx5_ipsec_offload_init(struct mlx5_core_dev *mdev)
-{
- return 0;
-}
-
static int mlx5_modify_ipsec_obj(struct mlx5_core_dev *mdev,
struct mlx5_ipsec_obj_attrs *attrs,
u32 ipsec_id)
@@ -366,20 +370,51 @@ change_sw_xfrm_attrs:
return err;
}
-static const struct mlx5_accel_ipsec_ops ipsec_offload_ops = {
- .device_caps = mlx5_ipsec_offload_device_caps,
- .create_hw_context = mlx5_ipsec_offload_create_sa_ctx,
- .free_hw_context = mlx5_ipsec_offload_delete_sa_ctx,
- .init = mlx5_ipsec_offload_init,
- .esp_create_xfrm = mlx5_ipsec_offload_esp_create_xfrm,
- .esp_destroy_xfrm = mlx5_ipsec_offload_esp_destroy_xfrm,
- .esp_modify_xfrm = mlx5_ipsec_offload_esp_modify_xfrm,
-};
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ u32 *sa_handle)
+{
+ __be32 saddr[4] = {}, daddr[4] = {};
+
+ if (!xfrm->attrs.is_ipv6) {
+ saddr[3] = xfrm->attrs.saddr.a4;
+ daddr[3] = xfrm->attrs.daddr.a4;
+ } else {
+ memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr));
+ memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr));
+ }
+
+ return mlx5_ipsec_offload_create_sa_ctx(mdev, xfrm, saddr, daddr,
+ xfrm->attrs.spi,
+ xfrm->attrs.is_ipv6, sa_handle);
+}
+
+void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context)
+{
+ mlx5_ipsec_offload_delete_sa_ctx(context);
+}
-const struct mlx5_accel_ipsec_ops *mlx5_ipsec_offload_ops(struct mlx5_core_dev *mdev)
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
{
- if (!mlx5_ipsec_offload_device_caps(mdev))
- return NULL;
+ struct mlx5_accel_esp_xfrm *xfrm;
- return &ipsec_offload_ops;
+ xfrm = mlx5_ipsec_offload_esp_create_xfrm(mdev, attrs);
+ if (IS_ERR(xfrm))
+ return xfrm;
+
+ xfrm->mdev = mdev;
+ return xfrm;
+}
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+ mlx5_ipsec_offload_esp_destroy_xfrm(xfrm);
+}
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ return mlx5_ipsec_offload_esp_modify_xfrm(xfrm, attrs);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h
new file mode 100644
index 000000000000..7dac104e6ef1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_IPSEC_OFFLOAD_H__
+#define __MLX5_IPSEC_OFFLOAD_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
+
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ u32 *sa_handle);
+void mlx5_accel_esp_free_hw_context(struct mlx5_core_dev *mdev, void *context);
+#endif /* __MLX5_IPSEC_OFFLOAD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index b56fea142c24..9b65c765cbd9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -34,78 +34,16 @@
#include <crypto/aead.h>
#include <net/xfrm.h>
#include <net/esp.h>
-#include "accel/ipsec_offload.h"
+#include "ipsec_offload.h"
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/ipsec.h"
-#include "accel/accel.h"
#include "en.h"
enum {
- MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
- MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
- MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
-};
-
-struct mlx5e_ipsec_rx_metadata {
- unsigned char nexthdr;
- __be32 sa_handle;
-} __packed;
-
-enum {
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
};
-struct mlx5e_ipsec_tx_metadata {
- __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */
- __be16 seq; /* LSBs of the first TCP seq, only for LSO */
- u8 esp_next_proto; /* Next protocol of ESP */
-} __packed;
-
-struct mlx5e_ipsec_metadata {
- unsigned char syndrome;
- union {
- unsigned char raw[5];
- /* from FPGA to host, on successful decrypt */
- struct mlx5e_ipsec_rx_metadata rx;
- /* from host to FPGA */
- struct mlx5e_ipsec_tx_metadata tx;
- } __packed content;
- /* packet type ID field */
- __be16 ethertype;
-} __packed;
-
-#define MAX_LSO_MSS 2048
-
-/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
-static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
-
-static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
-{
- return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
-}
-
-static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
-{
- struct mlx5e_ipsec_metadata *mdata;
- struct ethhdr *eth;
-
- if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
- return ERR_PTR(-ENOMEM);
-
- eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
- skb->mac_header -= sizeof(*mdata);
- mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
-
- memmove(skb->data, skb->data + sizeof(*mdata),
- 2 * ETH_ALEN);
-
- eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
-
- memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
- return mdata;
-}
-
static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
{
unsigned int alen = crypto_aead_authsize(x->data);
@@ -244,40 +182,6 @@ void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
skb_store_bits(skb, iv_offset, &seqno, 8);
}
-static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
- struct mlx5e_ipsec_metadata *mdata,
- struct xfrm_offload *xo)
-{
- struct ip_esp_hdr *esph;
- struct tcphdr *tcph;
-
- if (skb_is_gso(skb)) {
- /* Add LSO metadata indication */
- esph = ip_esp_hdr(skb);
- tcph = inner_tcp_hdr(skb);
- netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
- skb->network_header,
- skb->transport_header,
- skb->inner_network_header,
- skb->inner_transport_header);
- netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
- skb->len, skb_shinfo(skb)->gso_size,
- ntohs(tcph->source), ntohs(tcph->dest),
- ntohl(tcph->seq), ntohl(esph->seq_no));
- mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
- mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
- mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
- } else {
- mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
- }
- mdata->content.tx.esp_next_proto = xo->proto;
-
- netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
- mdata->syndrome, mdata->content.tx.esp_next_proto,
- ntohs(mdata->content.tx.mss_inv),
- ntohs(mdata->content.tx.seq));
-}
-
void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_ipsec_state *ipsec_st,
struct mlx5_wqe_inline_seg *inlseg)
@@ -298,16 +202,14 @@ static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
ipsec_st->x = x;
ipsec_st->xo = xo;
- if (mlx5_is_ipsec_device(priv->mdev)) {
- aead = x->data;
- alen = crypto_aead_authsize(aead);
- blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(skb->len + 2, blksize);
- plen = max_t(u32, clen - skb->len, 4);
- tailen = plen + alen;
- ipsec_st->plen = plen;
- ipsec_st->tailen = tailen;
- }
+ aead = x->data;
+ alen = crypto_aead_authsize(aead);
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ clen = ALIGN(skb->len + 2, blksize);
+ plen = max_t(u32, clen - skb->len, 4);
+ tailen = plen + alen;
+ ipsec_st->plen = plen;
+ ipsec_st->tailen = tailen;
return 0;
}
@@ -340,19 +242,17 @@ void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
((struct iphdr *)skb_network_header(skb))->protocol :
((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
- if (mlx5_is_ipsec_device(priv->mdev)) {
- eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
- eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
- encap = x->encap;
- if (!encap) {
- eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
- } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
- eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
- cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
- }
+ eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
+ eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
+ encap = x->encap;
+ if (!encap) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
+ } else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
+ eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
+ cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
}
}
@@ -363,7 +263,6 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo = xfrm_offload(skb);
struct mlx5e_ipsec_sa_entry *sa_entry;
- struct mlx5e_ipsec_metadata *mdata;
struct xfrm_state *x;
struct sec_path *sp;
@@ -392,19 +291,8 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
goto drop;
}
- if (MLX5_CAP_GEN(priv->mdev, fpga)) {
- mdata = mlx5e_ipsec_add_metadata(skb);
- if (IS_ERR(mdata)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
- goto drop;
- }
- }
-
sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
sa_entry->set_iv_op(skb, x, xo);
- if (MLX5_CAP_GEN(priv->mdev, fpga))
- mlx5e_ipsec_set_metadata(skb, mdata, xo);
-
mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
return true;
@@ -414,79 +302,6 @@ drop:
return false;
}
-static inline struct xfrm_state *
-mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
- struct mlx5e_ipsec_metadata *mdata)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct xfrm_offload *xo;
- struct xfrm_state *xs;
- struct sec_path *sp;
- u32 sa_handle;
-
- sp = secpath_set(skb);
- if (unlikely(!sp)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
- return NULL;
- }
-
- sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
- xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
- if (unlikely(!xs)) {
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
- return NULL;
- }
-
- sp = skb_sec_path(skb);
- sp->xvec[sp->len++] = xs;
- sp->olen++;
-
- xo = xfrm_offload(skb);
- xo->flags = CRYPTO_DONE;
- switch (mdata->syndrome) {
- case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
- xo->status = CRYPTO_SUCCESS;
- if (likely(priv->ipsec->no_trailer)) {
- xo->flags |= XFRM_ESP_NO_TRAILER;
- xo->proto = mdata->content.rx.nexthdr;
- }
- break;
- case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
- xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
- break;
- case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
- xo->status = CRYPTO_INVALID_PROTOCOL;
- break;
- default:
- atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
- return NULL;
- }
- return xs;
-}
-
-struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
- struct sk_buff *skb, u32 *cqe_bcnt)
-{
- struct mlx5e_ipsec_metadata *mdata;
- struct xfrm_state *xs;
-
- if (!is_metadata_hdr_valid(skb))
- return skb;
-
- /* Use the metadata */
- mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
- xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
- if (unlikely(!xs)) {
- kfree_skb(skb);
- return NULL;
- }
-
- remove_metadata_hdr(skb);
- *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
-
- return skb;
-}
-
enum {
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED,
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED,
@@ -528,8 +343,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
xo->status = CRYPTO_SUCCESS;
- if (WARN_ON_ONCE(priv->ipsec->no_trailer))
- xo->flags |= XFRM_ESP_NO_TRAILER;
break;
case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_AUTH_FAILED:
xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
@@ -541,21 +354,3 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
}
}
-
-void mlx5e_ipsec_build_inverse_table(void)
-{
- u16 mss_inv;
- u32 mss;
-
- /* Calculate 1/x inverse table for use in GSO data path.
- * Using this table, we provide the IPSec accelerator with the value of
- * 1/gso_size so that it can infer the position of each segment inside
- * the GSO, and increment the ESP sequence number, and generate the IV.
- * The HW needs this value in Q0.16 fixed-point number format
- */
- mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
- for (mss = 2; mss < MAX_LSO_MSS; mss++) {
- mss_inv = div_u64(1ULL << 32, mss) >> 16;
- mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
- }
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 428881e0adcb..0ae4e12ce528 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -53,9 +53,6 @@ struct mlx5e_accel_tx_ipsec_state {
#ifdef CONFIG_MLX5_EN_IPSEC
-struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
- struct sk_buff *skb, u32 *cqe_bcnt);
-
void mlx5e_ipsec_inverse_table_init(void);
void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
index 5cb936541b9e..3aace1c2a763 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -35,27 +35,9 @@
#include <net/sock.h>
#include "en.h"
-#include "accel/ipsec.h"
+#include "ipsec_offload.h"
#include "fpga/sdk.h"
#include "en_accel/ipsec.h"
-#include "fpga/ipsec.h"
-
-static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
-};
static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
@@ -65,13 +47,11 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
- { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
};
#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
-#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw)
@@ -103,45 +83,4 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw)
return idx;
}
-static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw)
-{
- return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0;
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw)
-{
- int ret = 0;
-
- if (priv->ipsec)
- ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
- NUM_IPSEC_HW_COUNTERS);
- if (ret)
- memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw)
-{
- unsigned int i;
-
- if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev))
- for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
- strcpy(data + (idx++) * ETH_GSTRING_LEN,
- mlx5e_ipsec_hw_stats_desc[i].format);
-
- return idx;
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw)
-{
- int i;
-
- if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev))
- for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
- data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
- mlx5e_ipsec_hw_stats_desc,
- i);
- return idx;
-}
-
MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0);
-MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index d93aadbf10da..814f2a56f633 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -2,11 +2,49 @@
// Copyright (c) 2019 Mellanox Technologies.
#include "en.h"
-#include "en_accel/tls.h"
+#include "lib/mlx5.h"
#include "en_accel/ktls.h"
#include "en_accel/ktls_utils.h"
#include "en_accel/fs_tcp.h"
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id)
+{
+ u32 sz_bytes;
+ void *key;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *info =
+ (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ case TLS_CIPHER_AES_GCM_256: {
+ struct tls12_crypto_info_aes_gcm_256 *info =
+ (struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+
+ key = info->key;
+ sz_bytes = sizeof(info->key);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return mlx5_create_encryption_key(mdev, key, sz_bytes,
+ MLX5_ACCEL_OBJ_TLS_KEY,
+ p_key_id);
+}
+
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id)
+{
+ mlx5_destroy_encryption_key(mdev, key_id);
+}
+
static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
enum tls_offload_ctx_dir direction,
struct tls_crypto_info *crypto_info,
@@ -59,15 +97,15 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- if (!mlx5e_accel_is_ktls_tx(mdev) && !mlx5e_accel_is_ktls_rx(mdev))
+ if (!mlx5e_is_ktls_tx(mdev) && !mlx5e_is_ktls_rx(mdev))
return;
- if (mlx5e_accel_is_ktls_tx(mdev)) {
+ if (mlx5e_is_ktls_tx(mdev)) {
netdev->hw_features |= NETIF_F_HW_TLS_TX;
netdev->features |= NETIF_F_HW_TLS_TX;
}
- if (mlx5e_accel_is_ktls_rx(mdev))
+ if (mlx5e_is_ktls_rx(mdev))
netdev->hw_features |= NETIF_F_HW_TLS_RX;
netdev->tlsdev_ops = &mlx5e_ktls_ops;
@@ -92,7 +130,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
{
int err;
- if (!mlx5e_accel_is_ktls_rx(priv->mdev))
+ if (!mlx5e_is_ktls_rx(priv->mdev))
return 0;
priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx");
@@ -112,7 +150,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
{
- if (!mlx5e_accel_is_ktls_rx(priv->mdev))
+ if (!mlx5e_is_ktls_rx(priv->mdev))
return;
if (priv->netdev->features & NETIF_F_HW_TLS_RX)
@@ -120,3 +158,24 @@ void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv)
destroy_workqueue(priv->tls->rx_wq);
}
+
+int mlx5e_ktls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls;
+
+ if (!mlx5e_is_ktls_device(priv->mdev))
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
+{
+ kfree(priv->tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 5833deb2354c..d016624fbc9d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -4,9 +4,42 @@
#ifndef __MLX5E_KTLS_H__
#define __MLX5E_KTLS_H__
+#include <linux/tls.h>
+#include <net/tls.h>
#include "en.h"
#ifdef CONFIG_MLX5_EN_TLS
+int mlx5_ktls_create_key(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info,
+ u32 *p_key_id);
+void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id);
+
+static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev)
+{
+ if (is_kdump_kernel())
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, tls_tx) && !MLX5_CAP_GEN(mdev, tls_rx))
+ return false;
+
+ if (!MLX5_CAP_GEN(mdev, log_max_dek))
+ return false;
+
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+}
+
+static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev,
+ struct tls_crypto_info *crypto_info)
+{
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (crypto_info->version == TLS_1_2_VERSION)
+ return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128);
+ break;
+ }
+
+ return false;
+}
void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
int mlx5e_ktls_init_rx(struct mlx5e_priv *priv);
@@ -16,26 +49,36 @@ struct mlx5e_ktls_resync_resp *
mlx5e_ktls_rx_resync_create_resp_list(void);
void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list);
-static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev)
+static inline bool mlx5e_is_ktls_tx(struct mlx5_core_dev *mdev)
{
- return !is_kdump_kernel() &&
- mlx5_accel_is_ktls_tx(mdev);
+ return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_tx);
}
-static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev)
+static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
{
- return !is_kdump_kernel() &&
- mlx5_accel_is_ktls_rx(mdev);
+ return !is_kdump_kernel() && MLX5_CAP_GEN(mdev, tls_rx);
}
-static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev)
-{
- return !is_kdump_kernel() &&
- mlx5_accel_is_ktls_device(mdev);
-}
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_ctx;
+ atomic64_t tx_tls_del;
+ atomic64_t rx_tls_ctx;
+ atomic64_t rx_tls_del;
+};
-#else
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+ struct workqueue_struct *rx_wq;
+};
+int mlx5e_ktls_init(struct mlx5e_priv *priv);
+void mlx5e_ktls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv);
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
{
}
@@ -64,10 +107,23 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
static inline void
mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {}
-static inline bool mlx5e_accel_is_ktls_tx(struct mlx5_core_dev *mdev) { return false; }
-static inline bool mlx5e_accel_is_ktls_rx(struct mlx5_core_dev *mdev) { return false; }
-static inline bool mlx5e_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev)
+{
+ return false;
+}
+
+static inline int mlx5e_ktls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_ktls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ return 0;
+}
+static inline int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
#endif
#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 96064a2033f7..0bb0633b7542 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -3,7 +3,7 @@
#include <net/inet6_hashtables.h>
#include "en_accel/en_accel.h"
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
#include "en_accel/ktls_txrx.h"
#include "en_accel/ktls_utils.h"
#include "en_accel/fs_tcp.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
index 56e7b2aee85f..2ab46c4247ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c
@@ -36,14 +36,7 @@
#include "en.h"
#include "fpga/sdk.h"
-#include "en_accel/tls.h"
-
-static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
- { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
-};
+#include "en_accel/ktls.h"
static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) },
@@ -55,51 +48,43 @@ static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = {
#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
-static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv)
-{
- if (!priv->tls)
- return NULL;
- if (mlx5e_accel_is_ktls_device(priv->mdev))
- return mlx5e_ktls_sw_stats_desc;
- return mlx5e_tls_sw_stats_desc;
-}
-
-int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+int mlx5e_ktls_get_count(struct mlx5e_priv *priv)
{
if (!priv->tls)
return 0;
- if (mlx5e_accel_is_ktls_device(priv->mdev))
- return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
- return ARRAY_SIZE(mlx5e_tls_sw_stats_desc);
+
+ return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc);
}
-int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+int mlx5e_ktls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
{
- const struct counter_desc *stats_desc;
unsigned int i, n, idx = 0;
- stats_desc = get_tls_atomic_stats(priv);
- n = mlx5e_tls_get_count(priv);
+ if (!priv->tls)
+ return 0;
+
+ n = mlx5e_ktls_get_count(priv);
for (i = 0; i < n; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
- stats_desc[i].format);
+ mlx5e_ktls_sw_stats_desc[i].format);
return n;
}
-int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+int mlx5e_ktls_get_stats(struct mlx5e_priv *priv, u64 *data)
{
- const struct counter_desc *stats_desc;
unsigned int i, n, idx = 0;
- stats_desc = get_tls_atomic_stats(priv);
- n = mlx5e_tls_get_count(priv);
+ if (!priv->tls)
+ return 0;
+
+ n = mlx5e_ktls_get_count(priv);
for (i = 0; i < n; i++)
- data[idx++] =
- MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
- stats_desc, i);
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_ktls_sw_stats_desc,
+ i);
return n;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index aaf11c66bf4c..4b6f0d1ea59a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2019 Mellanox Technologies.
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
#include "en_accel/ktls_txrx.h"
#include "en_accel/ktls_utils.h"
@@ -27,7 +27,7 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
{
u16 num_dumps, stop_room = 0;
- if (!mlx5e_accel_is_ktls_tx(mdev))
+ if (!mlx5e_is_ktls_tx(mdev))
return 0;
num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
@@ -448,14 +448,26 @@ err_out:
return MLX5E_KTLS_SYNC_FAIL;
}
-bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, int datalen,
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
struct mlx5e_accel_tx_tls_state *state)
{
struct mlx5e_ktls_offload_context_tx *priv_tx;
struct mlx5e_sq_stats *stats = sq->stats;
+ struct tls_context *tls_ctx;
+ int datalen;
u32 seq;
+ datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (!datalen)
+ return true;
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
+ goto err_out;
+
priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index 08c9d5134479..2dd78dd4ad65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -16,8 +16,8 @@ struct mlx5e_accel_tx_tls_state {
u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, int datalen,
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
struct mlx5e_accel_tx_tls_state *state);
void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
struct mlx5_cqe64 *cqe, u32 *cqe_bcnt);
@@ -48,6 +48,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
{
return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state);
}
+
+static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb)
+{
+ return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
+static inline void
+mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
+ struct mlx5e_accel_tx_tls_state *state)
+{
+ cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
+}
#else
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
@@ -69,6 +81,18 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
return false;
}
+static inline u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return 0;
+}
+
+static inline void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe,
+ u32 *cqe_bcnt)
+{
+}
#endif /* CONFIG_MLX5_EN_TLS */
#endif /* __MLX5E_TLS_TXRX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
index e5c180f2403b..0dc715c4c10d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_utils.h
@@ -6,7 +6,6 @@
#include <net/tls.h>
#include "en.h"
-#include "accel/tls.h"
enum {
MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
deleted file mode 100644
index b8fc863aa68d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/netdevice.h>
-#include <net/ipv6.h>
-#include "en_accel/tls.h"
-#include "accel/tls.h"
-
-static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- MLX5_SET(tls_flow, flow, ipv6, 0);
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
-
- MLX5_SET(tls_flow, flow, ipv6, 1);
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
-}
-#endif
-
-static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
-{
- struct inet_sock *inet = inet_sk(sk);
-
- memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
- MLX5_FLD_SZ_BYTES(tls_flow, src_port));
- memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
- MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
-}
-
-static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
-{
- switch (sk->sk_family) {
- case AF_INET:
- mlx5e_tls_set_ipv4_flow(flow, sk);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- if (!sk->sk_ipv6only &&
- ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
- mlx5e_tls_set_ipv4_flow(flow, sk);
- break;
- }
- if (!(caps & MLX5_ACCEL_TLS_IPV6))
- goto error_out;
-
- mlx5e_tls_set_ipv6_flow(flow, sk);
- break;
-#endif
- default:
- goto error_out;
- }
-
- mlx5e_tls_set_flow_tcp_ports(flow, sk);
- return 0;
-error_out:
- return -EINVAL;
-}
-
-static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
- enum tls_offload_ctx_dir direction,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct mlx5_core_dev *mdev = priv->mdev;
- u32 caps = mlx5_accel_tls_device_caps(mdev);
- int ret = -ENOMEM;
- void *flow;
- u32 swid;
-
- flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
- if (!flow)
- return ret;
-
- ret = mlx5e_tls_set_flow(flow, sk, caps);
- if (ret)
- goto free_flow;
-
- ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info,
- start_offload_tcp_sn, &swid,
- direction == TLS_OFFLOAD_CTX_DIR_TX);
- if (ret < 0)
- goto free_flow;
-
- if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
- struct mlx5e_tls_offload_context_tx *tx_ctx =
- mlx5e_get_tls_tx_context(tls_ctx);
-
- tx_ctx->swid = htonl(swid);
- tx_ctx->expected_seq = start_offload_tcp_sn;
- } else {
- struct mlx5e_tls_offload_context_rx *rx_ctx =
- mlx5e_get_tls_rx_context(tls_ctx);
-
- rx_ctx->handle = htonl(swid);
- }
-
- return 0;
-free_flow:
- kfree(flow);
- return ret;
-}
-
-static void mlx5e_tls_del(struct net_device *netdev,
- struct tls_context *tls_ctx,
- enum tls_offload_ctx_dir direction)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- unsigned int handle;
-
- handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ?
- mlx5e_get_tls_tx_context(tls_ctx)->swid :
- mlx5e_get_tls_rx_context(tls_ctx)->handle);
-
- mlx5_accel_tls_del_flow(priv->mdev, handle,
- direction == TLS_OFFLOAD_CTX_DIR_TX);
-}
-
-static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
- u32 seq, u8 *rcd_sn_data,
- enum tls_offload_ctx_dir direction)
-{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_tls_offload_context_rx *rx_ctx;
- __be64 rcd_sn = *(__be64 *)rcd_sn_data;
-
- if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
- return -EINVAL;
- rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
-
- netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
- be64_to_cpu(rcd_sn));
- mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
-
- return 0;
-}
-
-static const struct tlsdev_ops mlx5e_tls_ops = {
- .tls_dev_add = mlx5e_tls_add,
- .tls_dev_del = mlx5e_tls_del,
- .tls_dev_resync = mlx5e_tls_resync,
-};
-
-void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
-{
- struct net_device *netdev = priv->netdev;
- u32 caps;
-
- if (mlx5e_accel_is_ktls_device(priv->mdev)) {
- mlx5e_ktls_build_netdev(priv);
- return;
- }
-
- /* FPGA */
- if (!mlx5e_accel_is_tls_device(priv->mdev))
- return;
-
- caps = mlx5_accel_tls_device_caps(priv->mdev);
- if (caps & MLX5_ACCEL_TLS_TX) {
- netdev->features |= NETIF_F_HW_TLS_TX;
- netdev->hw_features |= NETIF_F_HW_TLS_TX;
- }
-
- if (caps & MLX5_ACCEL_TLS_RX) {
- netdev->features |= NETIF_F_HW_TLS_RX;
- netdev->hw_features |= NETIF_F_HW_TLS_RX;
- }
-
- if (!(caps & MLX5_ACCEL_TLS_LRO)) {
- netdev->features &= ~NETIF_F_LRO;
- netdev->hw_features &= ~NETIF_F_LRO;
- }
-
- netdev->tlsdev_ops = &mlx5e_tls_ops;
-}
-
-int mlx5e_tls_init(struct mlx5e_priv *priv)
-{
- struct mlx5e_tls *tls;
-
- if (!mlx5e_accel_is_tls_device(priv->mdev))
- return 0;
-
- tls = kzalloc(sizeof(*tls), GFP_KERNEL);
- if (!tls)
- return -ENOMEM;
-
- priv->tls = tls;
- return 0;
-}
-
-void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
-{
- struct mlx5e_tls *tls = priv->tls;
-
- if (!tls)
- return;
-
- kfree(tls);
- priv->tls = NULL;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
deleted file mode 100644
index 62ecf14bf86a..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-#ifndef __MLX5E_TLS_H__
-#define __MLX5E_TLS_H__
-
-#include "accel/tls.h"
-#include "en_accel/ktls.h"
-
-#ifdef CONFIG_MLX5_EN_TLS
-#include <net/tls.h>
-#include "en.h"
-
-struct mlx5e_tls_sw_stats {
- atomic64_t tx_tls_ctx;
- atomic64_t tx_tls_del;
- atomic64_t tx_tls_drop_metadata;
- atomic64_t tx_tls_drop_resync_alloc;
- atomic64_t tx_tls_drop_no_sync_data;
- atomic64_t tx_tls_drop_bypass_required;
- atomic64_t rx_tls_ctx;
- atomic64_t rx_tls_del;
- atomic64_t rx_tls_drop_resync_request;
- atomic64_t rx_tls_resync_request;
- atomic64_t rx_tls_resync_reply;
- atomic64_t rx_tls_auth_fail;
-};
-
-struct mlx5e_tls {
- struct mlx5e_tls_sw_stats sw_stats;
- struct workqueue_struct *rx_wq;
-};
-
-struct mlx5e_tls_offload_context_tx {
- struct tls_offload_context_tx base;
- u32 expected_seq;
- __be32 swid;
-};
-
-static inline struct mlx5e_tls_offload_context_tx *
-mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
-{
- BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) >
- TLS_OFFLOAD_CONTEXT_SIZE_TX);
- return container_of(tls_offload_ctx_tx(tls_ctx),
- struct mlx5e_tls_offload_context_tx,
- base);
-}
-
-struct mlx5e_tls_offload_context_rx {
- struct tls_offload_context_rx base;
- __be32 handle;
-};
-
-static inline struct mlx5e_tls_offload_context_rx *
-mlx5e_get_tls_rx_context(struct tls_context *tls_ctx)
-{
- BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) >
- TLS_OFFLOAD_CONTEXT_SIZE_RX);
- return container_of(tls_offload_ctx_rx(tls_ctx),
- struct mlx5e_tls_offload_context_rx,
- base);
-}
-
-static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv)
-{
- return priv->tls;
-}
-
-void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
-int mlx5e_tls_init(struct mlx5e_priv *priv);
-void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
-
-int mlx5e_tls_get_count(struct mlx5e_priv *priv);
-int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
-int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
-
-static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev)
-{
- return !is_kdump_kernel() &&
- mlx5_accel_is_tls_device(mdev);
-}
-
-#else
-
-static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
-{
- if (!is_kdump_kernel() &&
- mlx5_accel_is_ktls_device(priv->mdev))
- mlx5e_ktls_build_netdev(priv);
-}
-
-static inline bool mlx5e_is_tls_on(struct mlx5e_priv *priv) { return false; }
-static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
-static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
-static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
-static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
-static inline bool mlx5e_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
-
-#endif
-
-#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
deleted file mode 100644
index a05580cea481..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include "en_accel/tls.h"
-#include "en_accel/tls_rxtx.h"
-#include "accel/accel.h"
-
-#include <net/inet6_hashtables.h>
-#include <linux/ipv6.h>
-
-#define SYNDROM_DECRYPTED 0x30
-#define SYNDROM_RESYNC_REQUEST 0x31
-#define SYNDROM_AUTH_FAILED 0x32
-
-#define SYNDROME_OFFLOAD_REQUIRED 32
-#define SYNDROME_SYNC 33
-
-struct sync_info {
- u64 rcd_sn;
- s32 sync_len;
- int nr_frags;
- skb_frag_t frags[MAX_SKB_FRAGS];
-};
-
-struct recv_metadata_content {
- u8 syndrome;
- u8 reserved;
- __be32 sync_seq;
-} __packed;
-
-struct send_metadata_content {
- /* One byte of syndrome followed by 3 bytes of swid */
- __be32 syndrome_swid;
- __be16 first_seq;
-} __packed;
-
-struct mlx5e_tls_metadata {
- union {
- /* from fpga to host */
- struct recv_metadata_content recv;
- /* from host to fpga */
- struct send_metadata_content send;
- unsigned char raw[6];
- } __packed content;
- /* packet type ID field */
- __be16 ethertype;
-} __packed;
-
-static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
-{
- struct mlx5e_tls_metadata *pet;
- struct ethhdr *eth;
-
- if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
- return -ENOMEM;
-
- eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
- skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
- pet = (struct mlx5e_tls_metadata *)(eth + 1);
-
- memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
- 2 * ETH_ALEN);
-
- eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
- pet->content.send.syndrome_swid =
- htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
-
- return 0;
-}
-
-static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context,
- u32 tcp_seq, struct sync_info *info)
-{
- int remaining, i = 0, ret = -EINVAL;
- struct tls_record_info *record;
- unsigned long flags;
- s32 sync_size;
-
- spin_lock_irqsave(&context->base.lock, flags);
- record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
-
- if (unlikely(!record))
- goto out;
-
- sync_size = tcp_seq - tls_record_start_seq(record);
- info->sync_len = sync_size;
- if (unlikely(sync_size < 0)) {
- if (tls_record_is_start_marker(record))
- goto done;
-
- goto out;
- }
-
- remaining = sync_size;
- while (remaining > 0) {
- info->frags[i] = record->frags[i];
- __skb_frag_ref(&info->frags[i]);
- remaining -= skb_frag_size(&info->frags[i]);
-
- if (remaining < 0)
- skb_frag_size_add(&info->frags[i], remaining);
-
- i++;
- }
- info->nr_frags = i;
-done:
- ret = 0;
-out:
- spin_unlock_irqrestore(&context->base.lock, flags);
- return ret;
-}
-
-static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
- struct sk_buff *nskb, u32 tcp_seq,
- int headln, __be64 rcd_sn)
-{
- struct mlx5e_tls_metadata *pet;
- u8 syndrome = SYNDROME_SYNC;
- struct iphdr *iph;
- struct tcphdr *th;
- int data_len, mss;
-
- nskb->dev = skb->dev;
- skb_reset_mac_header(nskb);
- skb_set_network_header(nskb, skb_network_offset(skb));
- skb_set_transport_header(nskb, skb_transport_offset(skb));
- memcpy(nskb->data, skb->data, headln);
- memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
-
- iph = ip_hdr(nskb);
- iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
- th = tcp_hdr(nskb);
- data_len = nskb->len - headln;
- tcp_seq -= data_len;
- th->seq = htonl(tcp_seq);
-
- mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
- skb_shinfo(nskb)->gso_size = 0;
- if (data_len > mss) {
- skb_shinfo(nskb)->gso_size = mss;
- skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
- }
- skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
-
- pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
- memcpy(pet, &syndrome, sizeof(syndrome));
- pet->content.send.first_seq = htons(tcp_seq);
-
- /* MLX5 devices don't care about the checksum partial start, offset
- * and pseudo header
- */
- nskb->ip_summed = CHECKSUM_PARTIAL;
-
- nskb->queue_mapping = skb->queue_mapping;
-}
-
-static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
- struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tls *tls)
-{
- u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
- struct sync_info info;
- struct sk_buff *nskb;
- int linear_len = 0;
- int headln;
- int i;
-
- sq->stats->tls_ooo++;
-
- if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
- /* We might get here if a retransmission reaches the driver
- * after the relevant record is acked.
- * It should be safe to drop the packet in this case
- */
- atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
- goto err_out;
- }
-
- if (unlikely(info.sync_len < 0)) {
- u32 payload;
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- payload = skb->len - headln;
- if (likely(payload <= -info.sync_len))
- /* SKB payload doesn't require offload
- */
- return true;
-
- atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
- goto err_out;
- }
-
- if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
- atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
- goto err_out;
- }
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- linear_len += headln + sizeof(info.rcd_sn);
- nskb = alloc_skb(linear_len, GFP_ATOMIC);
- if (unlikely(!nskb)) {
- atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
- goto err_out;
- }
-
- context->expected_seq = tcp_seq + skb->len - headln;
- skb_put(nskb, linear_len);
- for (i = 0; i < info.nr_frags; i++)
- skb_shinfo(nskb)->frags[i] = info.frags[i];
-
- skb_shinfo(nskb)->nr_frags = info.nr_frags;
- nskb->data_len = info.sync_len;
- nskb->len += info.sync_len;
- sq->stats->tls_resync_bytes += nskb->len;
- mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
- cpu_to_be64(info.rcd_sn));
- mlx5e_sq_xmit_simple(sq, nskb, true);
-
- return true;
-
-err_out:
- dev_kfree_skb_any(skb);
- return false;
-}
-
-bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_tls_offload_context_tx *context;
- struct tls_context *tls_ctx;
- u32 expected_seq;
- int datalen;
- u32 skb_seq;
-
- datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
- if (!datalen)
- return true;
-
- mlx5e_tx_mpwqe_ensure_complete(sq);
-
- tls_ctx = tls_get_ctx(skb->sk);
- if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
- goto err_out;
-
- if (mlx5e_accel_is_ktls_tx(sq->mdev))
- return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state);
-
- /* FPGA */
- skb_seq = ntohl(tcp_hdr(skb)->seq);
- context = mlx5e_get_tls_tx_context(tls_ctx);
- expected_seq = context->expected_seq;
-
- if (unlikely(expected_seq != skb_seq))
- return mlx5e_tls_handle_ooo(context, sq, skb, priv->tls);
-
- if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
- atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
- dev_kfree_skb_any(skb);
- return false;
- }
-
- context->expected_seq = skb_seq + datalen;
- return true;
-
-err_out:
- dev_kfree_skb_any(skb);
- return false;
-}
-
-static int tls_update_resync_sn(struct net_device *netdev,
- struct sk_buff *skb,
- struct mlx5e_tls_metadata *mdata)
-{
- struct sock *sk = NULL;
- struct iphdr *iph;
- struct tcphdr *th;
- __be32 seq;
-
- if (mdata->ethertype != htons(ETH_P_IP))
- return -EINVAL;
-
- iph = (struct iphdr *)(mdata + 1);
-
- th = ((void *)iph) + iph->ihl * 4;
-
- if (iph->version == 4) {
- sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
- iph->saddr, th->source, iph->daddr,
- th->dest, netdev->ifindex);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
-
- sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
- &ipv6h->saddr, th->source,
- &ipv6h->daddr, ntohs(th->dest),
- netdev->ifindex, 0);
-#endif
- }
- if (!sk || sk->sk_state == TCP_TIME_WAIT) {
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request);
- goto out;
- }
-
- skb->sk = sk;
- skb->destructor = sock_edemux;
-
- memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq));
- tls_offload_rx_resync_request(sk, seq);
-out:
- return 0;
-}
-
-/* FPGA tls rx handler */
-void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb,
- u32 *cqe_bcnt)
-{
- struct mlx5e_tls_metadata *mdata;
- struct mlx5e_priv *priv;
-
- /* Use the metadata */
- mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN);
- switch (mdata->content.recv.syndrome) {
- case SYNDROM_DECRYPTED:
- skb->decrypted = 1;
- break;
- case SYNDROM_RESYNC_REQUEST:
- tls_update_resync_sn(rq->netdev, skb, mdata);
- priv = netdev_priv(rq->netdev);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request);
- break;
- case SYNDROM_AUTH_FAILED:
- /* Authentication failure will be observed and verified by kTLS */
- priv = netdev_priv(rq->netdev);
- atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail);
- break;
- default:
- /* Bypass the metadata header to others */
- return;
- }
-
- remove_metadata_hdr(skb);
- *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
-}
-
-u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
-{
- if (!mlx5e_accel_is_tls_device(mdev))
- return 0;
-
- if (mlx5e_accel_is_ktls_device(mdev))
- return mlx5e_ktls_get_stop_room(mdev, params);
-
- /* FPGA */
- /* Resync SKB. */
- return mlx5e_stop_room_for_max_wqe(mdev);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
deleted file mode 100644
index 0ca0a023fb8d..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5E_TLS_RXTX_H__
-#define __MLX5E_TLS_RXTX_H__
-
-#include "accel/accel.h"
-#include "en_accel/ktls_txrx.h"
-
-#ifdef CONFIG_MLX5_EN_TLS
-
-#include <linux/skbuff.h>
-#include "en.h"
-#include "en/txrx.h"
-
-u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-
-bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
- struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state);
-
-static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb)
-{
- return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
-}
-
-static inline void
-mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
- struct mlx5e_accel_tx_tls_state *state)
-{
- cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
-}
-
-void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb,
- u32 *cqe_bcnt);
-
-static inline void
-mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
- struct mlx5_cqe64 *cqe, u32 *cqe_bcnt)
-{
- if (unlikely(get_cqe_tls_offload(cqe))) /* cqe bit indicates a TLS device */
- return mlx5e_ktls_handle_rx_skb(rq, skb, cqe, cqe_bcnt);
-
- if (unlikely(test_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state) && is_metadata_hdr_valid(skb)))
- return mlx5e_tls_handle_rx_skb_metadata(rq, skb, cqe_bcnt);
-}
-
-#else
-
-static inline bool
-mlx5e_accel_is_tls(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return false; }
-static inline void
-mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
- struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) {}
-static inline u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
-{
- return 0;
-}
-
-#endif /* CONFIG_MLX5_EN_TLS */
-
-#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2f1dedc721d1..12b72a0bcb1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -47,9 +47,8 @@
#include "en_rep.h"
#include "en_accel/ipsec.h"
#include "en_accel/en_accel.h"
-#include "en_accel/tls.h"
-#include "accel/ipsec.h"
-#include "accel/tls.h"
+#include "en_accel/ktls.h"
+#include "en_accel/ipsec_offload.h"
#include "lib/vxlan.h"
#include "lib/clock.h"
#include "en/port.h"
@@ -68,7 +67,6 @@
#include "en/ptp.h"
#include "qos.h"
#include "en/trap.h"
-#include "fpga/ipsec.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
@@ -1036,9 +1034,6 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
if (err)
goto err_destroy_rq;
- if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev))
- __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
-
if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
@@ -1334,7 +1329,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
- if (MLX5_IPSEC_DEV(c->priv->mdev))
+ if (mlx5_ipsec_device_caps(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (param->is_mpw)
set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
@@ -4471,12 +4466,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return -EINVAL;
}
- if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
- netdev_warn(netdev,
- "XDP is not available on Innova cards with IPsec support\n");
- return -EINVAL;
- }
-
new_params = priv->channels.params;
new_params.xdp_prog = prog;
@@ -4934,7 +4923,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
mlx5e_set_netdev_dev_addr(netdev);
mlx5e_ipsec_build_netdev(priv);
- mlx5e_tls_build_netdev(priv);
+ mlx5e_ktls_build_netdev(priv);
}
void mlx5e_create_q_counters(struct mlx5e_priv *priv)
@@ -4996,7 +4985,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err)
mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
- err = mlx5e_tls_init(priv);
+ err = mlx5e_ktls_init(priv);
if (err)
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
@@ -5007,7 +4996,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
mlx5e_health_destroy_reporters(priv);
- mlx5e_tls_cleanup(priv);
+ mlx5e_ktls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
mlx5e_fs_cleanup(priv);
}
@@ -5704,7 +5693,6 @@ int mlx5e_init(void)
{
int ret;
- mlx5e_ipsec_build_inverse_table();
mlx5e_build_ptys2ethtool_map();
ret = auxiliary_driver_register(&mlx5e_driver);
if (ret)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 6b7e7ea6ded2..47f7b4c034cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1112,7 +1112,6 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
&MLX5E_STATS_GRP(per_port_buff_congest),
#ifdef CONFIG_MLX5_EN_IPSEC
&MLX5E_STATS_GRP(ipsec_sw),
- &MLX5E_STATS_GRP(ipsec_hw),
#endif
&MLX5E_STATS_GRP(ptp),
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 56bb58704bf9..a5f6fd16b665 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -48,10 +48,9 @@
#include "en_rep.h"
#include "en/rep/tc.h"
#include "ipoib/ipoib.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
+#include "en_accel/ipsec_offload.h"
#include "en_accel/ipsec_rxtx.h"
-#include "en_accel/tls_rxtx.h"
+#include "en_accel/ktls_txrx.h"
#include "en/xdp.h"
#include "en/xsk/rx.h"
#include "en/health.h"
@@ -1416,7 +1415,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
skb->mac_len = ETH_HLEN;
- mlx5e_tls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
+ if (unlikely(get_cqe_tls_offload(cqe)))
+ mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
@@ -2383,46 +2383,6 @@ const struct mlx5e_rx_handlers mlx5i_rx_handlers = {
};
#endif /* CONFIG_MLX5_CORE_IPOIB */
-#ifdef CONFIG_MLX5_EN_IPSEC
-
-static void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
-{
- struct mlx5_wq_cyc *wq = &rq->wqe.wq;
- struct mlx5e_wqe_frag_info *wi;
- struct sk_buff *skb;
- u32 cqe_bcnt;
- u16 ci;
-
- ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
- wi = get_frag(rq, ci);
- cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
-
- if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
- rq->stats->wqe_err++;
- goto wq_free_wqe;
- }
-
- skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
- mlx5e_skb_from_cqe_linear,
- mlx5e_skb_from_cqe_nonlinear,
- rq, cqe, wi, cqe_bcnt);
- if (unlikely(!skb)) /* a DROP, save the page-reuse checks */
- goto wq_free_wqe;
-
- skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
- if (unlikely(!skb))
- goto wq_free_wqe;
-
- mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
- napi_gro_receive(rq->cq.napi, skb);
-
-wq_free_wqe:
- mlx5e_free_rx_wqe(rq, wi, true);
- mlx5_wq_cyc_pop(wq);
-}
-
-#endif /* CONFIG_MLX5_EN_IPSEC */
-
int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk)
{
struct net_device *netdev = rq->netdev;
@@ -2439,10 +2399,6 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
rq->post_wqes = mlx5e_post_rx_mpwqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
- if (mlx5_fpga_is_ipsec_device(mdev)) {
- netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
- return -EINVAL;
- }
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
if (!rq->handle_rx_cqe) {
@@ -2466,14 +2422,7 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
mlx5e_skb_from_cqe_nonlinear;
rq->post_wqes = mlx5e_post_rx_wqes;
rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
-
-#ifdef CONFIG_MLX5_EN_IPSEC
- if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) &&
- priv->ipsec)
- rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
- else
-#endif
- rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
+ rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
if (!rq->handle_rx_cqe) {
netdev_err(netdev, "RX handler of RQ is not set\n");
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index bdc870f9c2f3..57fa0489eeb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -32,7 +32,7 @@
#include "lib/mlx5.h"
#include "en.h"
-#include "en_accel/tls.h"
+#include "en_accel/ktls.h"
#include "en_accel/en_accel.h"
#include "en/ptp.h"
#include "en/port.h"
@@ -1900,17 +1900,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; }
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls)
{
- return mlx5e_tls_get_count(priv);
+ return mlx5e_ktls_get_count(priv);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls)
{
- return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+ return idx + mlx5e_ktls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls)
{
- return idx + mlx5e_tls_get_stats(priv, data + idx);
+ return idx + mlx5e_ktls_get_stats(priv, data + idx);
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; }
@@ -2443,7 +2443,6 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
&MLX5E_STATS_GRP(pme),
#ifdef CONFIG_MLX5_EN_IPSEC
&MLX5E_STATS_GRP(ipsec_sw),
- &MLX5E_STATS_GRP(ipsec_hw),
#endif
&MLX5E_STATS_GRP(tls),
&MLX5E_STATS_GRP(channels),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index a7a025d15c14..e48b15b55b6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -482,7 +482,6 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio);
extern MLX5E_DECLARE_STATS_GRP(pme);
extern MLX5E_DECLARE_STATS_GRP(channels);
extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
-extern MLX5E_DECLARE_STATS_GRP(ipsec_hw);
extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
extern MLX5E_DECLARE_STATS_GRP(ptp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 2a984e82ae16..750c32050165 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -57,9 +57,6 @@ struct mlx5_fpga_device {
u32 mkey;
struct mlx5_uars_page *uar;
} conn_res;
-
- struct mlx5_fpga_ipsec *ipsec;
- struct mlx5_fpga_tls *tls;
};
#define mlx5_fpga_dbg(__adev, format, ...) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
deleted file mode 100644
index 8ec148010d62..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/rhashtable.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs_helpers.h>
-#include <linux/mlx5/fs.h>
-#include <linux/rbtree.h>
-
-#include "mlx5_core.h"
-#include "fs_cmd.h"
-#include "fpga/ipsec.h"
-#include "fpga/sdk.h"
-#include "fpga/core.h"
-
-enum mlx5_fpga_ipsec_cmd_status {
- MLX5_FPGA_IPSEC_CMD_PENDING,
- MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
- MLX5_FPGA_IPSEC_CMD_COMPLETE,
-};
-
-struct mlx5_fpga_ipsec_cmd_context {
- struct mlx5_fpga_dma_buf buf;
- enum mlx5_fpga_ipsec_cmd_status status;
- struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
- int status_code;
- struct completion complete;
- struct mlx5_fpga_device *dev;
- struct list_head list; /* Item in pending_cmds */
- u8 command[];
-};
-
-struct mlx5_fpga_esp_xfrm;
-
-struct mlx5_fpga_ipsec_sa_ctx {
- struct rhash_head hash;
- struct mlx5_ifc_fpga_ipsec_sa hw_sa;
- u32 sa_handle;
- struct mlx5_core_dev *dev;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
-};
-
-struct mlx5_fpga_esp_xfrm {
- unsigned int num_rules;
- struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
- struct mutex lock; /* xfrm lock */
- struct mlx5_accel_esp_xfrm accel_xfrm;
-};
-
-struct mlx5_fpga_ipsec_rule {
- struct rb_node node;
- struct fs_fte *fte;
- struct mlx5_fpga_ipsec_sa_ctx *ctx;
-};
-
-static const struct rhashtable_params rhash_sa = {
- /* Keep out "cmd" field from the key as it's
- * value is not constant during the lifetime
- * of the key object.
- */
- .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
- sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
- .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
- sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
- .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
- .automatic_shrinking = true,
- .min_size = 1,
-};
-
-struct mlx5_fpga_ipsec {
- struct mlx5_fpga_device *fdev;
- struct list_head pending_cmds;
- spinlock_t pending_cmds_lock; /* Protects pending_cmds */
- u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
- struct mlx5_fpga_conn *conn;
-
- struct notifier_block fs_notifier_ingress_bypass;
- struct notifier_block fs_notifier_egress;
-
- /* Map hardware SA --> SA context
- * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx)
- * We will use this hash to avoid SAs duplication in fpga which
- * aren't allowed
- */
- struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
- struct mutex sa_hash_lock;
-
- /* Tree holding all rules for this fpga device
- * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
- */
- struct rb_root rules_rb;
- struct mutex rules_rb_lock; /* rules lock */
-
- struct ida halloc;
-};
-
-bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
-{
- if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
- return false;
-
- if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
- return false;
-
- return true;
-}
-
-static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf,
- u8 status)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
-
- if (status) {
- context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
- buf);
- mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
- status);
- context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
- complete(&context->complete);
- }
-}
-
-static inline
-int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
-{
- switch (syndrome) {
- case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
- return 0;
- case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
- return -EEXIST;
- case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
- return -EINVAL;
- case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
- return -EIO;
- }
- return -EIO;
-}
-
-static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
-{
- struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
- struct mlx5_fpga_ipsec_cmd_context *context;
- enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
- struct mlx5_fpga_device *fdev = cb_arg;
- unsigned long flags;
-
- if (buf->sg[0].size < sizeof(*resp)) {
- mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
- buf->sg[0].size, sizeof(*resp));
- return;
- }
-
- mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
- ntohl(resp->syndrome));
-
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
- struct mlx5_fpga_ipsec_cmd_context,
- list);
- if (context)
- list_del(&context->list);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
-
- if (!context) {
- mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
- return;
- }
- mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
-
- syndrome = ntohl(resp->syndrome);
- context->status_code = syndrome_to_errno(syndrome);
- context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
- memcpy(&context->resp, resp, sizeof(*resp));
-
- if (context->status_code)
- mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
- syndrome);
-
- complete(&context->complete);
-}
-
-static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
- const void *cmd, int cmd_size)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
- struct mlx5_fpga_device *fdev = mdev->fpga;
- unsigned long flags;
- int res;
-
- if (!fdev || !fdev->ipsec)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (cmd_size & 3)
- return ERR_PTR(-EINVAL);
-
- context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
- context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
- context->dev = fdev;
- context->buf.complete = mlx5_fpga_ipsec_send_complete;
- init_completion(&context->complete);
- memcpy(&context->command, cmd, cmd_size);
- context->buf.sg[0].size = cmd_size;
- context->buf.sg[0].data = &context->command;
-
- spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
- res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
- if (!res)
- list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
- spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
-
- if (res) {
- mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
- kfree(context);
- return ERR_PTR(res);
- }
-
- /* Context should be freed by the caller after completion. */
- return context;
-}
-
-static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
-{
- struct mlx5_fpga_ipsec_cmd_context *context = ctx;
- unsigned long timeout =
- msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
- int res;
-
- res = wait_for_completion_timeout(&context->complete, timeout);
- if (!res) {
- mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
- return -ETIMEDOUT;
- }
-
- if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
- res = context->status_code;
- else
- res = -EIO;
-
- return res;
-}
-
-static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
-{
- if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
- return true;
- return false;
-}
-
-static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
- int opcode)
-{
- struct mlx5_core_dev *dev = fdev->mdev;
- struct mlx5_ifc_fpga_ipsec_sa *sa;
- struct mlx5_fpga_ipsec_cmd_context *cmd_context;
- size_t sa_cmd_size;
- int err;
-
- hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
- if (is_v2_sadb_supported(fdev->ipsec))
- sa_cmd_size = sizeof(*hw_sa);
- else
- sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
-
- cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
- mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
- if (IS_ERR(cmd_context))
- return PTR_ERR(cmd_context);
-
- err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
- if (err)
- goto out;
-
- sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
- if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
- mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
- ntohl(sa->ipsec_sa_v1.sw_sa_handle),
- ntohl(cmd_context->resp.sw_sa_handle));
- err = -EIO;
- }
-
-out:
- kfree(cmd_context);
- return err;
-}
-
-u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- u32 ret = 0;
-
- if (mlx5_fpga_is_ipsec_device(mdev)) {
- ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
- ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
- } else {
- return ret;
- }
-
- if (!fdev->ipsec)
- return ret;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
- ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
- ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
- ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
- ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
-
- if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
- ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
- ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
- }
-
- return ret;
-}
-
-static unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!fdev || !fdev->ipsec)
- return 0;
-
- return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- number_of_ipsec_counters);
-}
-
-static int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
- unsigned int counters_count)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- unsigned int i;
- __be32 *data;
- u32 count;
- u64 addr;
- int ret;
-
- if (!fdev || !fdev->ipsec)
- return 0;
-
- addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- ipsec_counters_addr_low) +
- ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
- ipsec_counters_addr_high) << 32);
-
- count = mlx5_fpga_ipsec_counters_count(mdev);
-
- data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL);
- if (!data) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
- MLX5_FPGA_ACCESS_TYPE_DONTCARE);
- if (ret < 0) {
- mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
- ret);
- goto out;
- }
- ret = 0;
-
- if (count > counters_count)
- count = counters_count;
-
- /* Each counter is low word, then high. But each word is big-endian */
- for (i = 0; i < count; i++)
- counters[i] = (u64)ntohl(data[i * 2]) |
- ((u64)ntohl(data[i * 2 + 1]) << 32);
-
-out:
- kfree(data);
- return ret;
-}
-
-static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
-{
- struct mlx5_fpga_ipsec_cmd_context *context;
- struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
- int err;
-
- cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
- cmd.flags = htonl(flags);
- context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
- if (IS_ERR(context))
- return PTR_ERR(context);
-
- err = mlx5_fpga_ipsec_cmd_wait(context);
- if (err)
- goto out;
-
- if ((context->resp.flags & cmd.flags) != cmd.flags) {
- mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
- cmd.flags,
- context->resp.flags);
- err = -EIO;
- }
-
-out:
- kfree(context);
- return err;
-}
-
-static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
-{
- u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
- u32 flags = 0;
-
- if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
- flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
-
- return mlx5_fpga_ipsec_set_caps(mdev, flags);
-}
-
-static void
-mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
-{
- const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
-
- /* key */
- memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
- aes_gcm->key_len / 8);
- /* Duplicate 128 bit key twice according to HW layout */
- if (aes_gcm->key_len == 128)
- memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
- aes_gcm->aes_key, aes_gcm->key_len / 8);
-
- /* salt and seq_iv */
- memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
- sizeof(aes_gcm->seq_iv));
- memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
- sizeof(aes_gcm->salt));
-
- /* esn */
- if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
- hw_sa->ipsec_sa_v1.flags |=
- (xfrm_attrs->flags &
- MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
- hw_sa->esn = htonl(xfrm_attrs->esn);
- } else {
- hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
- hw_sa->ipsec_sa_v1.flags &=
- ~(xfrm_attrs->flags &
- MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
- hw_sa->esn = 0;
- }
-
- /* rx handle */
- hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
-
- /* enc mode */
- switch (aes_gcm->key_len) {
- case 128:
- hw_sa->ipsec_sa_v1.enc_mode =
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
- break;
- case 256:
- hw_sa->ipsec_sa_v1.enc_mode =
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
- break;
- }
-
- /* flags */
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
- MLX5_FPGA_IPSEC_SA_SPI_EN |
- MLX5_FPGA_IPSEC_SA_IP_ESP;
-
- if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
- else
- hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
-}
-
-static void
-mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
- const __be32 saddr[4],
- const __be32 daddr[4],
- const __be32 spi, bool is_ipv6,
- struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
-{
- mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
-
- /* IPs */
- memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
- memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
-
- /* SPI */
- hw_sa->ipsec_sa_v1.spi = spi;
-
- /* flags */
- if (is_ipv6)
- hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
-}
-
-static bool is_full_mask(const void *p, size_t len)
-{
- WARN_ON(len % 4);
-
- return !memchr_inv(p, 0xff, len);
-}
-
-static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
- const u32 *match_c,
- const u32 *match_v)
-{
- const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- match_c,
- misc_parameters);
- const void *headers_c = MLX5_ADDR_OF(fte_match_param,
- match_c,
- outer_headers);
- const void *headers_v = MLX5_ADDR_OF(fte_match_param,
- match_v,
- outer_headers);
-
- if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
- const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4);
- const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
- ipv4)) ||
- !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
- ipv4)))
- return false;
- } else {
- const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6);
- const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
-
- if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6)) ||
- !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
- ipv6)))
- return false;
- }
-
- if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- outer_esp_spi),
- MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
- return false;
-
- return true;
-}
-
-static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
- u8 match_criteria_enable,
- const u32 *match_c,
- const u32 *match_v)
-{
- u32 ipsec_dev_caps = mlx5_fpga_ipsec_device_caps(dev);
- bool ipv6_flow;
-
- ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
-
- if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
- mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
- mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
- mlx5_fs_is_vxlan_flow(match_c) ||
- !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
- ipv6_flow))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
- mlx5_fs_is_outer_ipsec_flow(match_c))
- return false;
-
- if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
- ipv6_flow)
- return false;
-
- if (!validate_fpga_full_mask(dev, match_c, match_v))
- return false;
-
- return true;
-}
-
-static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
- u8 match_criteria_enable,
- const u32 *match_c,
- const u32 *match_v,
- struct mlx5_flow_act *flow_act,
- struct mlx5_flow_context *flow_context)
-{
- const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
- MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
- bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
- MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
- int ret;
-
- ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
- match_v);
- if (!ret)
- return ret;
-
- if (is_dmac || is_smac ||
- (match_criteria_enable &
- ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
- (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
- return false;
-
- return true;
-}
-
-static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct mlx5_accel_esp_xfrm *accel_xfrm,
- const __be32 saddr[4], const __be32 daddr[4],
- const __be32 spi, bool is_ipv6, u32 *sa_handle)
-{
- struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- container_of(accel_xfrm, typeof(*fpga_xfrm),
- accel_xfrm);
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- int opcode, err;
- void *context;
-
- /* alloc SA */
- sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
- if (!sa_ctx)
- return ERR_PTR(-ENOMEM);
-
- sa_ctx->dev = mdev;
-
- /* build candidate SA */
- mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
- saddr, daddr, spi, is_ipv6,
- &sa_ctx->hw_sa);
-
- mutex_lock(&fpga_xfrm->lock);
-
- if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */
- /* all rules must be with same IPs and SPI */
- if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
- sizeof(sa_ctx->hw_sa))) {
- context = ERR_PTR(-EINVAL);
- goto exists;
- }
-
- ++fpga_xfrm->num_rules;
- context = fpga_xfrm->sa_ctx;
- goto exists;
- }
-
- if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) {
- err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL);
- if (err < 0) {
- context = ERR_PTR(err);
- goto exists;
- }
-
- sa_ctx->sa_handle = err;
- if (sa_handle)
- *sa_handle = sa_ctx->sa_handle;
- }
- /* This is unbounded fpga_xfrm, try to add to hash */
- mutex_lock(&fipsec->sa_hash_lock);
-
- err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa);
- if (err) {
- /* Can't bound different accel_xfrm to already existing sa_ctx.
- * This is because we can't support multiple ketmats for
- * same IPs and SPI
- */
- context = ERR_PTR(-EEXIST);
- goto unlock_hash;
- }
-
- /* Bound accel_xfrm to sa_ctx */
- opcode = is_v2_sadb_supported(fdev->ipsec) ?
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
- sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err) {
- context = ERR_PTR(err);
- goto delete_hash;
- }
-
- mutex_unlock(&fipsec->sa_hash_lock);
-
- ++fpga_xfrm->num_rules;
- fpga_xfrm->sa_ctx = sa_ctx;
- sa_ctx->fpga_xfrm = fpga_xfrm;
-
- mutex_unlock(&fpga_xfrm->lock);
-
- return sa_ctx;
-
-delete_hash:
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa));
-unlock_hash:
- mutex_unlock(&fipsec->sa_hash_lock);
- if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
- ida_free(&fipsec->halloc, sa_ctx->sa_handle);
-exists:
- mutex_unlock(&fpga_xfrm->lock);
- kfree(sa_ctx);
- return context;
-}
-
-static void *
-mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
- struct fs_fte *fte,
- bool is_egress)
-{
- struct mlx5_accel_esp_xfrm *accel_xfrm;
- __be32 saddr[4], daddr[4], spi;
- struct mlx5_flow_group *fg;
- bool is_ipv6 = false;
-
- fs_get_obj(fg, fte->node.parent);
- /* validate */
- if (is_egress &&
- !mlx5_is_fpga_egress_ipsec_rule(mdev,
- fg->mask.match_criteria_enable,
- fg->mask.match_criteria,
- fte->val,
- &fte->action,
- &fte->flow_context))
- return ERR_PTR(-EINVAL);
- else if (!mlx5_is_fpga_ipsec_rule(mdev,
- fg->mask.match_criteria_enable,
- fg->mask.match_criteria,
- fte->val))
- return ERR_PTR(-EINVAL);
-
- /* get xfrm context */
- accel_xfrm =
- (struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
-
- /* IPs */
- if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
- fte->val)) {
- memcpy(&saddr[3],
- MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- fte->val,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- sizeof(saddr[3]));
- memcpy(&daddr[3],
- MLX5_ADDR_OF(fte_match_set_lyr_2_4,
- fte->val,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- sizeof(daddr[3]));
- } else {
- memcpy(saddr,
- MLX5_ADDR_OF(fte_match_param,
- fte->val,
- outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
- sizeof(saddr));
- memcpy(daddr,
- MLX5_ADDR_OF(fte_match_param,
- fte->val,
- outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- sizeof(daddr));
- is_ipv6 = true;
- }
-
- /* SPI */
- spi = MLX5_GET_BE(typeof(spi),
- fte_match_param, fte->val,
- misc_parameters.outer_esp_spi);
-
- /* create */
- return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
- saddr, daddr,
- spi, is_ipv6, NULL);
-}
-
-static void
-mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
-{
- struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- int opcode = is_v2_sadb_supported(fdev->ipsec) ?
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
- int err;
-
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
- sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err) {
- WARN_ON(err);
- return;
- }
-
- if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action ==
- MLX5_ACCEL_ESP_ACTION_DECRYPT)
- ida_free(&fipsec->halloc, sa_ctx->sa_handle);
-
- mutex_lock(&fipsec->sa_hash_lock);
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
- rhash_sa));
- mutex_unlock(&fipsec->sa_hash_lock);
-}
-
-static void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
-
- mutex_lock(&fpga_xfrm->lock);
- if (!--fpga_xfrm->num_rules) {
- mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
- kfree(fpga_xfrm->sa_ctx);
- fpga_xfrm->sa_ctx = NULL;
- }
- mutex_unlock(&fpga_xfrm->lock);
-}
-
-static inline struct mlx5_fpga_ipsec_rule *
-_rule_search(struct rb_root *root, struct fs_fte *fte)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct mlx5_fpga_ipsec_rule *rule =
- container_of(node, struct mlx5_fpga_ipsec_rule,
- node);
-
- if (rule->fte < fte)
- node = node->rb_left;
- else if (rule->fte > fte)
- node = node->rb_right;
- else
- return rule;
- }
- return NULL;
-}
-
-static struct mlx5_fpga_ipsec_rule *
-rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
-{
- struct mlx5_fpga_ipsec_rule *rule;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- rule = _rule_search(&ipsec_dev->rules_rb, fte);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-
- return rule;
-}
-
-static inline int _rule_insert(struct rb_root *root,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- struct rb_node **new = &root->rb_node, *parent = NULL;
-
- /* Figure out where to put new node */
- while (*new) {
- struct mlx5_fpga_ipsec_rule *this =
- container_of(*new, struct mlx5_fpga_ipsec_rule,
- node);
-
- parent = *new;
- if (rule->fte < this->fte)
- new = &((*new)->rb_left);
- else if (rule->fte > this->fte)
- new = &((*new)->rb_right);
- else
- return -EEXIST;
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&rule->node, parent, new);
- rb_insert_color(&rule->node, root);
-
- return 0;
-}
-
-static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- int ret;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- ret = _rule_insert(&ipsec_dev->rules_rb, rule);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-
- return ret;
-}
-
-static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- struct rb_root *root = &ipsec_dev->rules_rb;
-
- mutex_lock(&ipsec_dev->rules_rb_lock);
- rb_erase(&rule->node, root);
- mutex_unlock(&ipsec_dev->rules_rb_lock);
-}
-
-static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
- struct mlx5_fpga_ipsec_rule *rule)
-{
- _rule_delete(ipsec_dev, rule);
- kfree(rule);
-}
-
-struct mailbox_mod {
- uintptr_t saved_esp_id;
- u32 saved_action;
- u32 saved_outer_esp_spi_value;
-};
-
-static void restore_spec_mailbox(struct fs_fte *fte,
- struct mailbox_mod *mbox_mod)
-{
- char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
- fte->val,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- mbox_mod->saved_outer_esp_spi_value);
- fte->action.action |= mbox_mod->saved_action;
- fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
-}
-
-static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
- struct fs_fte *fte,
- struct mailbox_mod *mbox_mod)
-{
- char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
- fte->val,
- misc_parameters);
-
- mbox_mod->saved_esp_id = fte->action.esp_id;
- mbox_mod->saved_action = fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
- mbox_mod->saved_outer_esp_spi_value =
- MLX5_GET(fte_match_set_misc, misc_params_v,
- outer_esp_spi);
-
- fte->action.esp_id = 0;
- fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
- if (!MLX5_CAP_FLOWTABLE(mdev,
- flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
-}
-
-static enum fs_flow_table_type egress_to_fs_ft(bool egress)
-{
- return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
-}
-
-static int fpga_ipsec_fs_create_flow_group(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg,
- bool is_egress)
-{
- int (*create_flow_group)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft, u32 *in,
- struct mlx5_flow_group *fg) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
- char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
- match_criteria.misc_parameters);
- struct mlx5_core_dev *dev = ns->dev;
- u32 saved_outer_esp_spi_mask;
- u8 match_criteria_enable;
- int ret;
-
- if (MLX5_CAP_FLOWTABLE(dev,
- flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- return create_flow_group(ns, ft, in, fg);
-
- match_criteria_enable =
- MLX5_GET(create_flow_group_in, in, match_criteria_enable);
- saved_outer_esp_spi_mask =
- MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
- if (!match_criteria_enable || !saved_outer_esp_spi_mask)
- return create_flow_group(ns, ft, in, fg);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
-
- if (!(*misc_params_c) &&
- !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
- MLX5_SET(create_flow_group_in, in, match_criteria_enable,
- match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
-
- ret = create_flow_group(ns, ft, in, fg);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
- MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
-
- return ret;
-}
-
-static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*create_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
- struct mlx5_core_dev *dev = ns->dev;
- struct mlx5_fpga_device *fdev = dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_ipsec_rule *rule;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return create_fte(ns, ft, fg, fte);
-
- rule = kzalloc(sizeof(*rule), GFP_KERNEL);
- if (!rule)
- return -ENOMEM;
-
- rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
- if (IS_ERR(rule->ctx)) {
- int err = PTR_ERR(rule->ctx);
-
- kfree(rule);
- return err;
- }
-
- rule->fte = fte;
- WARN_ON(rule_insert(fipsec, rule));
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = create_fte(ns, ft, fg, fte);
- restore_spec_mailbox(fte, &mbox_mod);
- if (ret) {
- _rule_delete(fipsec, rule);
- mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
- kfree(rule);
- }
-
- return ret;
-}
-
-static int fpga_ipsec_fs_update_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*update_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
- struct mlx5_core_dev *dev = ns->dev;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return update_fte(ns, ft, fg, modify_mask, fte);
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = update_fte(ns, ft, fg, modify_mask, fte);
- restore_spec_mailbox(fte, &mbox_mod);
-
- return ret;
-}
-
-static int fpga_ipsec_fs_delete_fte(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte,
- bool is_egress)
-{
- int (*delete_fte)(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte) =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
- struct mlx5_core_dev *dev = ns->dev;
- struct mlx5_fpga_device *fdev = dev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_ipsec_rule *rule;
- bool is_esp = fte->action.esp_id;
- struct mailbox_mod mbox_mod;
- int ret;
-
- if (!is_esp ||
- !(fte->action.action &
- (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
- return delete_fte(ns, ft, fte);
-
- rule = rule_search(fipsec, fte);
- if (!rule)
- return -ENOENT;
-
- mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
- rule_delete(fipsec, rule);
-
- modify_spec_mailbox(dev, fte, &mbox_mod);
- ret = delete_fte(ns, ft, fte);
- restore_spec_mailbox(fte, &mbox_mod);
-
- return ret;
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg)
-{
- return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
- true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_delete_fte(ns, ft, fte, true);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- u32 *in,
- struct mlx5_flow_group *fg)
-{
- return fpga_ipsec_fs_create_flow_group(ns, ft, in, fg, false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_create_fte(ns, ft, fg, fte, false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_group *fg,
- int modify_mask,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_update_fte(ns, ft, fg, modify_mask, fte,
- false);
-}
-
-static int
-mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_flow_root_namespace *ns,
- struct mlx5_flow_table *ft,
- struct fs_fte *fte)
-{
- return fpga_ipsec_fs_delete_fte(ns, ft, fte, false);
-}
-
-static struct mlx5_flow_cmds fpga_ipsec_ingress;
-static struct mlx5_flow_cmds fpga_ipsec_egress;
-
-const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- switch (type) {
- case FS_FT_NIC_RX:
- return &fpga_ipsec_ingress;
- case FS_FT_NIC_TX:
- return &fpga_ipsec_egress;
- default:
- WARN_ON(true);
- return NULL;
- }
-}
-
-static int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_conn_attr init_attr = {0};
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_conn *conn;
- int err;
-
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return 0;
-
- fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
- if (!fdev->ipsec)
- return -ENOMEM;
-
- fdev->ipsec->fdev = fdev;
-
- err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
- fdev->ipsec->caps);
- if (err) {
- mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
- err);
- goto error;
- }
-
- INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
- spin_lock_init(&fdev->ipsec->pending_cmds_lock);
-
- init_attr.rx_size = SBU_QP_QUEUE_SIZE;
- init_attr.tx_size = SBU_QP_QUEUE_SIZE;
- init_attr.recv_cb = mlx5_fpga_ipsec_recv;
- init_attr.cb_arg = fdev;
- conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
- if (IS_ERR(conn)) {
- err = PTR_ERR(conn);
- mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
- err);
- goto error;
- }
- fdev->ipsec->conn = conn;
-
- err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
- if (err)
- goto err_destroy_conn;
- mutex_init(&fdev->ipsec->sa_hash_lock);
-
- fdev->ipsec->rules_rb = RB_ROOT;
- mutex_init(&fdev->ipsec->rules_rb_lock);
-
- err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
- if (err) {
- mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
- err);
- goto err_destroy_hash;
- }
-
- ida_init(&fdev->ipsec->halloc);
-
- return 0;
-
-err_destroy_hash:
- rhashtable_destroy(&fdev->ipsec->sa_hash);
-
-err_destroy_conn:
- mlx5_fpga_sbu_conn_destroy(conn);
-
-error:
- kfree(fdev->ipsec);
- fdev->ipsec = NULL;
- return err;
-}
-
-static void destroy_rules_rb(struct rb_root *root)
-{
- struct mlx5_fpga_ipsec_rule *r, *tmp;
-
- rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
- rb_erase(&r->node, root);
- mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
- kfree(r);
- }
-}
-
-static void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return;
-
- ida_destroy(&fdev->ipsec->halloc);
- destroy_rules_rb(&fdev->ipsec->rules_rb);
- rhashtable_destroy(&fdev->ipsec->sa_hash);
-
- mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
- kfree(fdev->ipsec);
- fdev->ipsec = NULL;
-}
-
-void mlx5_fpga_ipsec_build_fs_cmds(void)
-{
- /* ingress */
- fpga_ipsec_ingress.create_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
- fpga_ipsec_ingress.destroy_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
- fpga_ipsec_ingress.modify_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
- fpga_ipsec_ingress.create_flow_group =
- mlx5_fpga_ipsec_fs_create_flow_group_ingress;
- fpga_ipsec_ingress.destroy_flow_group =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
- fpga_ipsec_ingress.create_fte =
- mlx5_fpga_ipsec_fs_create_fte_ingress;
- fpga_ipsec_ingress.update_fte =
- mlx5_fpga_ipsec_fs_update_fte_ingress;
- fpga_ipsec_ingress.delete_fte =
- mlx5_fpga_ipsec_fs_delete_fte_ingress;
- fpga_ipsec_ingress.update_root_ft =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
-
- /* egress */
- fpga_ipsec_egress.create_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
- fpga_ipsec_egress.destroy_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
- fpga_ipsec_egress.modify_flow_table =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
- fpga_ipsec_egress.create_flow_group =
- mlx5_fpga_ipsec_fs_create_flow_group_egress;
- fpga_ipsec_egress.destroy_flow_group =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
- fpga_ipsec_egress.create_fte =
- mlx5_fpga_ipsec_fs_create_fte_egress;
- fpga_ipsec_egress.update_fte =
- mlx5_fpga_ipsec_fs_update_fte_egress;
- fpga_ipsec_egress.delete_fte =
- mlx5_fpga_ipsec_fs_delete_fte_egress;
- fpga_ipsec_egress.update_root_ft =
- mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
-}
-
-static int
-mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- if (attrs->tfc_pad) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
- mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.iv_algo !=
- MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
- mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.icv_len != 128) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
- return -EOPNOTSUPP;
- }
-
- if (attrs->keymat.aes_gcm.key_len != 128 &&
- attrs->keymat.aes_gcm.key_len != 256) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
- return -EOPNOTSUPP;
- }
-
- if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
- (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
- v2_command))) {
- mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static struct mlx5_accel_esp_xfrm *
-mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
-
- if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
- mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
- return ERR_PTR(-EINVAL);
- }
-
- if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
- mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
- return ERR_PTR(-EOPNOTSUPP);
- }
-
- fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
- if (!fpga_xfrm)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&fpga_xfrm->lock);
- memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
- sizeof(fpga_xfrm->accel_xfrm.attrs));
-
- return &fpga_xfrm->accel_xfrm;
-}
-
-static void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
-{
- struct mlx5_fpga_esp_xfrm *fpga_xfrm =
- container_of(xfrm, struct mlx5_fpga_esp_xfrm,
- accel_xfrm);
- /* assuming no sa_ctx are connected to this xfrm_ctx */
- kfree(fpga_xfrm);
-}
-
-static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
- const struct mlx5_accel_esp_xfrm_attrs *attrs)
-{
- struct mlx5_core_dev *mdev = xfrm->mdev;
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
- struct mlx5_fpga_esp_xfrm *fpga_xfrm;
- struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
-
- int err = 0;
-
- if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
- return 0;
-
- if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
- mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
- return -EOPNOTSUPP;
- }
-
- if (is_v2_sadb_supported(fipsec)) {
- mlx5_core_warn(mdev, "Modify esp is not supported\n");
- return -EOPNOTSUPP;
- }
-
- fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
-
- mutex_lock(&fpga_xfrm->lock);
-
- if (!fpga_xfrm->sa_ctx)
- /* Unbounded xfrm, change only sw attrs */
- goto change_sw_xfrm_attrs;
-
- /* copy original hw sa */
- memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
- mutex_lock(&fipsec->sa_hash_lock);
- /* remove original hw sa from hash */
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash, rhash_sa));
- /* update hw_sa with new xfrm attrs*/
- mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
- &fpga_xfrm->sa_ctx->hw_sa);
- /* try to insert new hw_sa to hash */
- err = rhashtable_insert_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash, rhash_sa);
- if (err)
- goto rollback_sa;
-
- /* modify device with new hw_sa */
- err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
- MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
- fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
- if (err)
- WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash,
- rhash_sa));
-rollback_sa:
- if (err) {
- /* return original hw_sa to hash */
- memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
- sizeof(org_hw_sa));
- WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
- &fpga_xfrm->sa_ctx->hash,
- rhash_sa));
- }
- mutex_unlock(&fipsec->sa_hash_lock);
-
-change_sw_xfrm_attrs:
- if (!err)
- memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
- mutex_unlock(&fpga_xfrm->lock);
- return err;
-}
-
-static const struct mlx5_accel_ipsec_ops fpga_ipsec_ops = {
- .device_caps = mlx5_fpga_ipsec_device_caps,
- .counters_count = mlx5_fpga_ipsec_counters_count,
- .counters_read = mlx5_fpga_ipsec_counters_read,
- .create_hw_context = mlx5_fpga_ipsec_create_sa_ctx,
- .free_hw_context = mlx5_fpga_ipsec_delete_sa_ctx,
- .init = mlx5_fpga_ipsec_init,
- .cleanup = mlx5_fpga_ipsec_cleanup,
- .esp_create_xfrm = mlx5_fpga_esp_create_xfrm,
- .esp_modify_xfrm = mlx5_fpga_esp_modify_xfrm,
- .esp_destroy_xfrm = mlx5_fpga_esp_destroy_xfrm,
-};
-
-const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
-{
- if (!mlx5_fpga_is_ipsec_device(mdev))
- return NULL;
-
- return &fpga_ipsec_ops;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
deleted file mode 100644
index 8931b5584477..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_FPGA_IPSEC_H__
-#define __MLX5_FPGA_IPSEC_H__
-
-#include "accel/ipsec.h"
-#include "fs_cmd.h"
-
-#ifdef CONFIG_MLX5_FPGA_IPSEC
-const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev);
-u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
-const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
-void mlx5_fpga_ipsec_build_fs_cmds(void);
-bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev);
-#else
-static inline
-const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
-{ return NULL; }
-static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
-static inline const struct mlx5_flow_cmds *
-mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
-{
- return mlx5_fs_cmd_get_default(type);
-}
-
-static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {};
-static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; }
-
-#endif /* CONFIG_MLX5_FPGA_IPSEC */
-#endif /* __MLX5_FPGA_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
deleted file mode 100644
index 29b7339ebfa3..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/mlx5/device.h>
-#include "fpga/tls.h"
-#include "fpga/cmd.h"
-#include "fpga/sdk.h"
-#include "fpga/core.h"
-#include "accel/tls.h"
-
-struct mlx5_fpga_tls_command_context;
-
-typedef void (*mlx5_fpga_tls_command_complete)
- (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *ctx,
- struct mlx5_fpga_dma_buf *resp);
-
-struct mlx5_fpga_tls_command_context {
- struct list_head list;
- /* There is no guarantee on the order between the TX completion
- * and the command response.
- * The TX completion is going to touch cmd->buf even in
- * the case of successful transmission.
- * So instead of requiring separate allocations for cmd
- * and cmd->buf we've decided to use a reference counter
- */
- refcount_t ref;
- struct mlx5_fpga_dma_buf buf;
- mlx5_fpga_tls_command_complete complete;
-};
-
-static void
-mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
-{
- if (refcount_dec_and_test(&ctx->ref))
- kfree(ctx);
-}
-
-static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *resp)
-{
- struct mlx5_fpga_conn *conn = fdev->tls->conn;
- struct mlx5_fpga_tls_command_context *ctx;
- struct mlx5_fpga_tls *tls = fdev->tls;
- unsigned long flags;
-
- spin_lock_irqsave(&tls->pending_cmds_lock, flags);
- ctx = list_first_entry(&tls->pending_cmds,
- struct mlx5_fpga_tls_command_context, list);
- list_del(&ctx->list);
- spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
- ctx->complete(conn, fdev, ctx, resp);
-}
-
-static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf,
- u8 status)
-{
- struct mlx5_fpga_tls_command_context *ctx =
- container_of(buf, struct mlx5_fpga_tls_command_context, buf);
-
- mlx5_fpga_tls_put_command_ctx(ctx);
-
- if (unlikely(status))
- mlx5_fpga_tls_cmd_complete(fdev, NULL);
-}
-
-static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- mlx5_fpga_tls_command_complete complete)
-{
- struct mlx5_fpga_tls *tls = fdev->tls;
- unsigned long flags;
- int ret;
-
- refcount_set(&cmd->ref, 2);
- cmd->complete = complete;
- cmd->buf.complete = mlx5_fpga_cmd_send_complete;
-
- spin_lock_irqsave(&tls->pending_cmds_lock, flags);
- /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
- * to make sure commands are inserted to the tls->pending_cmds list
- * and the command QP in the same order.
- */
- ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
- if (likely(!ret))
- list_add_tail(&cmd->list, &tls->pending_cmds);
- else
- complete(tls->conn, fdev, cmd, NULL);
- spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
-}
-
-/* Start of context identifiers range (inclusive) */
-#define SWID_START 0
-/* End of context identifiers range (exclusive) */
-#define SWID_END BIT(24)
-
-static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
- void *ptr)
-{
- unsigned long flags;
- int ret;
-
- /* TLS metadata format is 1 byte for syndrome followed
- * by 3 bytes of swid (software ID)
- * swid must not exceed 3 bytes.
- * See tls_rxtx.c:insert_pet() for details
- */
- BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
-
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(idr_spinlock, flags);
- ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
- spin_unlock_irqrestore(idr_spinlock, flags);
- idr_preload_end();
-
- return ret;
-}
-
-static void *mlx5_fpga_tls_release_swid(struct idr *idr,
- spinlock_t *idr_spinlock, u32 swid)
-{
- unsigned long flags;
- void *ptr;
-
- spin_lock_irqsave(idr_spinlock, flags);
- ptr = idr_remove(idr, swid);
- spin_unlock_irqrestore(idr_spinlock, flags);
- return ptr;
-}
-
-static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_dma_buf *buf, u8 status)
-{
- kfree(buf);
-}
-
-static void
-mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- struct mlx5_fpga_dma_buf *resp)
-{
- if (resp) {
- u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
-
- if (syndrome)
- mlx5_fpga_err(fdev,
- "Teardown stream failed with syndrome = %d",
- syndrome);
- }
- mlx5_fpga_tls_put_command_ctx(cmd);
-}
-
-static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
-{
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
- MLX5_BYTE_OFF(tls_flow, ipv6));
-
- MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
- MLX5_SET(tls_cmd, cmd, direction_sx,
- MLX5_GET(tls_flow, flow, direction_sx));
-}
-
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn)
-{
- struct mlx5_fpga_dma_buf *buf;
- int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
- void *flow;
- void *cmd;
- int ret;
-
- buf = kzalloc(size, GFP_ATOMIC);
- if (!buf)
- return -ENOMEM;
-
- cmd = (buf + 1);
-
- rcu_read_lock();
- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- if (unlikely(!flow)) {
- rcu_read_unlock();
- WARN_ONCE(1, "Received NULL pointer for handle\n");
- kfree(buf);
- return -EINVAL;
- }
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
- rcu_read_unlock();
-
- MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
- MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
- MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
- MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
-
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- buf->complete = mlx_tls_kfree_complete;
-
- ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
- if (ret < 0)
- kfree(buf);
-
- return ret;
-}
-
-static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
- void *flow, u32 swid, gfp_t flags)
-{
- struct mlx5_fpga_tls_command_context *ctx;
- struct mlx5_fpga_dma_buf *buf;
- void *cmd;
-
- ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
- if (!ctx)
- return;
-
- buf = &ctx->buf;
- cmd = (ctx + 1);
- MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
- MLX5_SET(tls_cmd, cmd, swid, swid);
-
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
- kfree(flow);
-
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
-
- mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
- mlx5_fpga_tls_teardown_completion);
-}
-
-void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- gfp_t flags, bool direction_sx)
-{
- struct mlx5_fpga_tls *tls = mdev->fpga->tls;
- void *flow;
-
- if (direction_sx)
- flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock,
- swid);
- else
- flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock,
- swid);
-
- if (!flow) {
- mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
- swid);
- return;
- }
-
- synchronize_rcu(); /* before kfree(flow) */
- mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
-}
-
-enum mlx5_fpga_setup_stream_status {
- MLX5_FPGA_CMD_PENDING,
- MLX5_FPGA_CMD_SEND_FAILED,
- MLX5_FPGA_CMD_RESPONSE_RECEIVED,
- MLX5_FPGA_CMD_ABANDONED,
-};
-
-struct mlx5_setup_stream_context {
- struct mlx5_fpga_tls_command_context cmd;
- atomic_t status;
- u32 syndrome;
- struct completion comp;
-};
-
-static void
-mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
- struct mlx5_fpga_device *fdev,
- struct mlx5_fpga_tls_command_context *cmd,
- struct mlx5_fpga_dma_buf *resp)
-{
- struct mlx5_setup_stream_context *ctx =
- container_of(cmd, struct mlx5_setup_stream_context, cmd);
- int status = MLX5_FPGA_CMD_SEND_FAILED;
- void *tls_cmd = ctx + 1;
-
- /* If we failed to send to command resp == NULL */
- if (resp) {
- ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
- status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
- }
-
- status = atomic_xchg_release(&ctx->status, status);
- if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
- complete(&ctx->comp);
- return;
- }
-
- mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
- ctx->syndrome);
-
- if (!ctx->syndrome) {
- /* The process was killed while waiting for the context to be
- * added, and the add completed successfully.
- * We need to destroy the HW context, and we can't can't reuse
- * the command context because we might not have received
- * the tx completion yet.
- */
- mlx5_fpga_tls_del_flow(fdev->mdev,
- MLX5_GET(tls_cmd, tls_cmd, swid),
- GFP_ATOMIC,
- MLX5_GET(tls_cmd, tls_cmd,
- direction_sx));
- }
-
- mlx5_fpga_tls_put_command_ctx(cmd);
-}
-
-static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
- struct mlx5_setup_stream_context *ctx)
-{
- struct mlx5_fpga_dma_buf *buf;
- void *cmd = ctx + 1;
- int status, ret = 0;
-
- buf = &ctx->cmd.buf;
- buf->sg[0].data = cmd;
- buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
-
- init_completion(&ctx->comp);
- atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
- ctx->syndrome = -1;
-
- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
- mlx5_fpga_tls_setup_completion);
- wait_for_completion_killable(&ctx->comp);
-
- status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
- if (unlikely(status == MLX5_FPGA_CMD_PENDING))
- /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
- return -EINTR;
-
- if (unlikely(ctx->syndrome))
- ret = -ENOMEM;
-
- mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
- return ret;
-}
-
-static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
- struct mlx5_fpga_dma_buf *buf)
-{
- struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
-
- mlx5_fpga_tls_cmd_complete(fdev, buf);
-}
-
-bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
-{
- if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
- return false;
-
- if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
- MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
- return false;
-
- if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
- return false;
-
- return true;
-}
-
-static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
- u32 *p_caps)
-{
- int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
- u32 caps = 0;
- void *buf;
-
- buf = kzalloc(cap_size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
- if (err)
- goto out;
-
- if (MLX5_GET(tls_extended_cap, buf, tx))
- caps |= MLX5_ACCEL_TLS_TX;
- if (MLX5_GET(tls_extended_cap, buf, rx))
- caps |= MLX5_ACCEL_TLS_RX;
- if (MLX5_GET(tls_extended_cap, buf, tls_v12))
- caps |= MLX5_ACCEL_TLS_V12;
- if (MLX5_GET(tls_extended_cap, buf, tls_v13))
- caps |= MLX5_ACCEL_TLS_V13;
- if (MLX5_GET(tls_extended_cap, buf, lro))
- caps |= MLX5_ACCEL_TLS_LRO;
- if (MLX5_GET(tls_extended_cap, buf, ipv6))
- caps |= MLX5_ACCEL_TLS_IPV6;
-
- if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
- caps |= MLX5_ACCEL_TLS_AES_GCM128;
- if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
- caps |= MLX5_ACCEL_TLS_AES_GCM256;
-
- *p_caps = caps;
- err = 0;
-out:
- kfree(buf);
- return err;
-}
-
-int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
- struct mlx5_fpga_conn_attr init_attr = {0};
- struct mlx5_fpga_conn *conn;
- struct mlx5_fpga_tls *tls;
- int err = 0;
-
- if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
- return 0;
-
- tls = kzalloc(sizeof(*tls), GFP_KERNEL);
- if (!tls)
- return -ENOMEM;
-
- err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
- if (err)
- goto error;
-
- if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
- err = -ENOTSUPP;
- goto error;
- }
-
- init_attr.rx_size = SBU_QP_QUEUE_SIZE;
- init_attr.tx_size = SBU_QP_QUEUE_SIZE;
- init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
- init_attr.cb_arg = fdev;
- conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
- if (IS_ERR(conn)) {
- err = PTR_ERR(conn);
- mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
- err);
- goto error;
- }
-
- tls->conn = conn;
- spin_lock_init(&tls->pending_cmds_lock);
- INIT_LIST_HEAD(&tls->pending_cmds);
-
- idr_init(&tls->tx_idr);
- idr_init(&tls->rx_idr);
- spin_lock_init(&tls->tx_idr_spinlock);
- spin_lock_init(&tls->rx_idr_spinlock);
- fdev->tls = tls;
- return 0;
-
-error:
- kfree(tls);
- return err;
-}
-
-void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
-{
- struct mlx5_fpga_device *fdev = mdev->fpga;
-
- if (!fdev || !fdev->tls)
- return;
-
- mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
- kfree(fdev->tls);
- fdev->tls = NULL;
-}
-
-static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
- struct tls_crypto_info *info,
- __be64 *rcd_sn)
-{
- struct tls12_crypto_info_aes_gcm_128 *crypto_info =
- (struct tls12_crypto_info_aes_gcm_128 *)info;
-
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
- TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
-
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
- crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
- crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
- /* in AES-GCM 128 we need to write the key twice */
- memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
- TLS_CIPHER_AES_GCM_128_KEY_SIZE,
- crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
-
- MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
-}
-
-static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
- struct tls_crypto_info *crypto_info)
-{
- __be64 rcd_sn;
-
- switch (crypto_info->cipher_type) {
- case TLS_CIPHER_AES_GCM_128:
- if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
- return -EINVAL;
- mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 swid, u32 tcp_sn)
-{
- u32 caps = mlx5_fpga_tls_device_caps(mdev);
- struct mlx5_setup_stream_context *ctx;
- int ret = -ENOMEM;
- size_t cmd_size;
- void *cmd;
-
- cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
- ctx = kzalloc(cmd_size, GFP_KERNEL);
- if (!ctx)
- goto out;
-
- cmd = ctx + 1;
- ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
- if (ret)
- goto free_ctx;
-
- mlx5_fpga_tls_flow_to_cmd(flow, cmd);
-
- MLX5_SET(tls_cmd, cmd, swid, swid);
- MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
-
- return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
-
-free_ctx:
- kfree(ctx);
-out:
- return ret;
-}
-
-int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx)
-{
- struct mlx5_fpga_tls *tls = mdev->fpga->tls;
- int ret = -ENOMEM;
- u32 swid;
-
- if (direction_sx)
- ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock, flow);
- else
- ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock, flow);
-
- if (ret < 0)
- return ret;
-
- swid = ret;
- MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
-
- ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
- start_offload_tcp_sn);
- if (ret && ret != -EINTR)
- goto free_swid;
-
- *p_swid = swid;
- return 0;
-free_swid:
- if (direction_sx)
- mlx5_fpga_tls_release_swid(&tls->tx_idr,
- &tls->tx_idr_spinlock, swid);
- else
- mlx5_fpga_tls_release_swid(&tls->rx_idr,
- &tls->rx_idr_spinlock, swid);
-
- return ret;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
deleted file mode 100644
index 5714cf391d1b..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef __MLX5_FPGA_TLS_H__
-#define __MLX5_FPGA_TLS_H__
-
-#include <linux/mlx5/driver.h>
-
-#include <net/tls.h>
-#include "fpga/core.h"
-
-struct mlx5_fpga_tls {
- struct list_head pending_cmds;
- spinlock_t pending_cmds_lock; /* Protects pending_cmds */
- u32 caps;
- struct mlx5_fpga_conn *conn;
-
- struct idr tx_idr;
- struct idr rx_idr;
- spinlock_t tx_idr_spinlock; /* protects the IDR */
- spinlock_t rx_idr_spinlock; /* protects the IDR */
-};
-
-int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
- struct tls_crypto_info *crypto_info,
- u32 start_offload_tcp_sn, u32 *p_swid,
- bool direction_sx);
-
-void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
- gfp_t flags, bool direction_sx);
-
-bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
-int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
-
-static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
-{
- return mdev->fpga->tls->caps;
-}
-
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
- u32 seq, __be64 rcd_sn);
-
-#endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a0ac17c3f12f..33e9f86cf7d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -878,9 +878,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
table_type = FS_FT_NIC_RX;
break;
case MLX5_FLOW_NAMESPACE_EGRESS:
-#ifdef CONFIG_MLX5_IPSEC
case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL:
-#endif
max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_TX;
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 816d991f7621..297e6a468a3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,8 +40,6 @@
#include "fs_cmd.h"
#include "fs_ft_pool.h"
#include "diag/fs_tracepoint.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
@@ -188,24 +186,18 @@ static struct init_tree_node {
static struct init_tree_node egress_root_fs = {
.type = FS_TYPE_NAMESPACE,
-#ifdef CONFIG_MLX5_IPSEC
.ar_size = 2,
-#else
- .ar_size = 1,
-#endif
.children = (struct init_tree_node[]) {
ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
BY_PASS_PRIO_NUM_LEVELS))),
-#ifdef CONFIG_MLX5_IPSEC
ADD_PRIO(0, KERNEL_TX_MIN_LEVEL, 0,
FS_CHAINING_CAPS_EGRESS,
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
KERNEL_TX_IPSEC_NUM_LEVELS))),
-#endif
}
};
@@ -2519,10 +2511,6 @@ static struct mlx5_flow_root_namespace
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_namespace *ns;
- if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
- (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
- cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
-
/* Create the root namespace */
root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
if (!root_ns)
@@ -3172,8 +3160,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}
- if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
err = init_egress_root_ns(steering);
if (err)
goto err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 614687e0e3d9..cfb8bedba512 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -35,7 +35,6 @@
#include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h"
#include "lib/tout.h"
-#include "accel/tls.h"
enum {
MCQS_IDENTIFIER_BOOT_IMG = 0x1,
@@ -249,7 +248,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
- if (mlx5_accel_is_ktls_tx(dev) || mlx5_accel_is_ktls_rx(dev)) {
+ if (MLX5_CAP_GEN(dev, tls_tx) || MLX5_CAP_GEN(dev, tls_rx)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_TLS);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2589e39eb9c7..d504c8cb8f96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -62,9 +62,7 @@
#include "lib/mlx5.h"
#include "lib/tout.h"
#include "fpga/core.h"
-#include "fpga/ipsec.h"
-#include "accel/ipsec.h"
-#include "accel/tls.h"
+#include "en_accel/ipsec_offload.h"
#include "lib/clock.h"
#include "lib/vxlan.h"
#include "lib/geneve.h"
@@ -1183,14 +1181,6 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fpga_start;
}
- mlx5_accel_ipsec_init(dev);
-
- err = mlx5_accel_tls_init(dev);
- if (err) {
- mlx5_core_err(dev, "TLS device start failed %d\n", err);
- goto err_tls_start;
- }
-
err = mlx5_init_fs(dev);
if (err) {
mlx5_core_err(dev, "Failed to init flow steering\n");
@@ -1238,9 +1228,6 @@ err_vhca:
err_set_hca:
mlx5_cleanup_fs(dev);
err_fs:
- mlx5_accel_tls_cleanup(dev);
-err_tls_start:
- mlx5_accel_ipsec_cleanup(dev);
mlx5_fpga_device_stop(dev);
err_fpga_start:
mlx5_rsc_dump_cleanup(dev);
@@ -1266,8 +1253,6 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
mlx5_cleanup_fs(dev);
- mlx5_accel_ipsec_cleanup(dev);
- mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
mlx5_rsc_dump_cleanup(dev);
mlx5_hv_vhca_cleanup(dev->hv_vhca);
@@ -1947,7 +1932,6 @@ static int __init init(void)
get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
mlx5_core_verify_params();
- mlx5_fpga_ipsec_build_fs_cmds();
mlx5_register_debugfs();
err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 29a74b8bd5b5..f1bb243dfb8c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -69,8 +69,8 @@ mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, bool *qsfp,
if (err)
return err;
- mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1,
- MLXSW_REG_MCIA_I2C_ADDR_LOW);
+ mlxsw_reg_mcia_pack(mcia_pl, 0, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0,
+ 1, MLXSW_REG_MCIA_I2C_ADDR_LOW);
err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl);
if (err)
return err;
@@ -145,7 +145,8 @@ mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module,
}
}
- mlxsw_reg_mcia_pack(mcia_pl, module, 0, page, offset, size, i2c_addr);
+ mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, page, offset, size,
+ i2c_addr);
err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl);
if (err)
@@ -177,7 +178,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
int page;
int err;
- mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
false, false);
err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
if (err)
@@ -219,12 +220,12 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
page = MLXSW_REG_MCIA_TH_PAGE_CMIS_NUM;
else
page = MLXSW_REG_MCIA_TH_PAGE_NUM;
- mlxsw_reg_mcia_pack(mcia_pl, module, 0, page,
+ mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, page,
MLXSW_REG_MCIA_TH_PAGE_OFF + off,
MLXSW_REG_MCIA_TH_ITEM_SIZE,
MLXSW_REG_MCIA_I2C_ADDR_LOW);
} else {
- mlxsw_reg_mcia_pack(mcia_pl, module, 0,
+ mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0,
MLXSW_REG_MCIA_PAGE0_LO,
off, MLXSW_REG_MCIA_TH_ITEM_SIZE,
MLXSW_REG_MCIA_I2C_ADDR_HIGH);
@@ -419,7 +420,7 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, u8 module,
size = min_t(u8, page->length - bytes_read,
MLXSW_REG_MCIA_EEPROM_SIZE);
- mlxsw_reg_mcia_pack(mcia_pl, module, 0, page->page,
+ mlxsw_reg_mcia_pack(mcia_pl, 0, module, 0, page->page,
device_addr + bytes_read, size,
page->i2c_address);
mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank);
@@ -447,7 +448,7 @@ static int mlxsw_env_module_reset(struct mlxsw_core *mlxsw_core, u8 module)
{
char pmaos_pl[MLXSW_REG_PMAOS_LEN];
- mlxsw_reg_pmaos_pack(pmaos_pl, module);
+ mlxsw_reg_pmaos_pack(pmaos_pl, 0, module);
mlxsw_reg_pmaos_rst_set(pmaos_pl, true);
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl);
@@ -519,7 +520,7 @@ mlxsw_env_get_module_power_mode(struct mlxsw_core *mlxsw_core, u8 module,
params->policy = mlxsw_env->module_info[module].power_mode_policy;
- mlxsw_reg_mcion_pack(mcion_pl, module);
+ mlxsw_reg_mcion_pack(mcion_pl, 0, module);
err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcion), mcion_pl);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to retrieve module's power mode");
@@ -547,7 +548,7 @@ static int mlxsw_env_module_enable_set(struct mlxsw_core *mlxsw_core,
enum mlxsw_reg_pmaos_admin_status admin_status;
char pmaos_pl[MLXSW_REG_PMAOS_LEN];
- mlxsw_reg_pmaos_pack(pmaos_pl, module);
+ mlxsw_reg_pmaos_pack(pmaos_pl, 0, module);
admin_status = enable ? MLXSW_REG_PMAOS_ADMIN_STATUS_ENABLED :
MLXSW_REG_PMAOS_ADMIN_STATUS_DISABLED;
mlxsw_reg_pmaos_admin_status_set(pmaos_pl, admin_status);
@@ -562,7 +563,7 @@ static int mlxsw_env_module_low_power_set(struct mlxsw_core *mlxsw_core,
u16 eeprom_override_mask, eeprom_override;
char pmmp_pl[MLXSW_REG_PMMP_LEN];
- mlxsw_reg_pmmp_pack(pmmp_pl, module);
+ mlxsw_reg_pmmp_pack(pmmp_pl, 0, module);
mlxsw_reg_pmmp_sticky_set(pmmp_pl, true);
/* Mask all the bits except low power mode. */
eeprom_override_mask = ~MLXSW_REG_PMMP_EEPROM_OVERRIDE_LOW_POWER_MASK;
@@ -660,8 +661,8 @@ static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core,
u16 temp;
int err;
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
- 1);
+ mlxsw_reg_mtbr_pack(mtbr_pl, 0,
+ MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1);
err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtbr), mtbr_pl);
if (err)
return err;
@@ -930,7 +931,7 @@ mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core)
for (i = 0; i < mlxsw_core_env(mlxsw_core)->module_count; i++) {
char pmaos_pl[MLXSW_REG_PMAOS_LEN];
- mlxsw_reg_pmaos_pack(pmaos_pl, i);
+ mlxsw_reg_pmaos_pack(pmaos_pl, 0, i);
mlxsw_reg_pmaos_e_set(pmaos_pl,
MLXSW_REG_PMAOS_E_GENERATE_EVENT);
mlxsw_reg_pmaos_ee_set(pmaos_pl, true);
@@ -1059,12 +1060,12 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
u8 module_count;
int i, err;
- mlxsw_reg_mgpir_pack(mgpir_pl);
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl);
if (err)
return err;
- mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count);
+ mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count, NULL);
env = kzalloc(struct_size(env, module_info, module_count), GFP_KERNEL);
if (!env)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 8b170ad92302..2bc4c4556895 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -66,7 +66,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index,
mlxsw_hwmon->module_sensor_max);
- mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, index, false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
@@ -89,7 +89,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
index = mlxsw_hwmon_get_attr_index(mlxsw_hwmon_attr->type_index,
mlxsw_hwmon->module_sensor_max);
- mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, index, false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
@@ -232,8 +232,9 @@ static int mlxsw_hwmon_module_temp_get(struct device *dev,
int err;
module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
- mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
- false, false);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0,
+ MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, false,
+ false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(dev, "Failed to query module temperature\n");
@@ -270,8 +271,8 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
int err;
module = mlxsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
- 1);
+ mlxsw_reg_mtbr_pack(mtbr_pl, 0,
+ MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, 1);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
if (err) {
dev_err(dev, "Failed to query module temperature sensor\n");
@@ -655,13 +656,13 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
u8 module_sensor_max;
int i, err;
- mlxsw_reg_mgpir_pack(mgpir_pl);
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
if (err)
return err;
mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
- &module_sensor_max);
+ &module_sensor_max, NULL);
/* Add extra attributes for module temperature. Sensor index is
* assigned to sensor_count value, while all indexed before
@@ -706,12 +707,13 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
u8 gbox_num;
int err;
- mlxsw_reg_mgpir_pack(mgpir_pl);
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
if (err)
return err;
- mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL);
+ mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL,
+ NULL);
if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
!gbox_num)
return 0;
@@ -721,7 +723,7 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
while (index < max_index) {
sensor_index = index % mlxsw_hwmon->module_sensor_max +
MLXSW_REG_MTMP_GBOX_INDEX_MIN;
- mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, sensor_index, true, true);
err = mlxsw_reg_write(mlxsw_hwmon->core,
MLXSW_REG(mtmp), mtmp_pl);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 05f54bd982c0..adb2820430b1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -271,7 +271,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
int temp;
int err;
- mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, 0, false, false);
err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
@@ -431,7 +431,7 @@ mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
int err;
/* Read module temperature and thresholds. */
- mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, sensor_index, false, false);
err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
/* Set temperature and thresholds to zero to avoid passing
@@ -576,7 +576,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
int err;
index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
- mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, index, false, false);
err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
if (err)
@@ -746,13 +746,13 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
char mgpir_pl[MLXSW_REG_MGPIR_LEN];
int i, err;
- mlxsw_reg_mgpir_pack(mgpir_pl);
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
if (err)
return err;
mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
- &thermal->tz_module_num);
+ &thermal->tz_module_num, NULL);
thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
sizeof(*thermal->tz_module_arr),
@@ -837,13 +837,13 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
int i;
int err;
- mlxsw_reg_mgpir_pack(mgpir_pl);
+ mlxsw_reg_mgpir_pack(mgpir_pl, 0);
err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
if (err)
return err;
mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
- NULL);
+ NULL, NULL);
if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
!gbox_num)
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 67b1a2f8397f..b8a236872fea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5769,9 +5769,10 @@ enum mlxsw_reg_pmaos_e {
*/
MLXSW_ITEM32(reg, pmaos, e, 0x04, 0, 2);
-static inline void mlxsw_reg_pmaos_pack(char *payload, u8 module)
+static inline void mlxsw_reg_pmaos_pack(char *payload, u8 slot_index, u8 module)
{
MLXSW_REG_ZERO(pmaos, payload);
+ mlxsw_reg_pmaos_slot_index_set(payload, slot_index);
mlxsw_reg_pmaos_module_set(payload, module);
}
@@ -5984,6 +5985,12 @@ MLXSW_REG_DEFINE(pmmp, MLXSW_REG_PMMP_ID, MLXSW_REG_PMMP_LEN);
*/
MLXSW_ITEM32(reg, pmmp, module, 0x00, 16, 8);
+/* reg_pmmp_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmmp, slot_index, 0x00, 24, 4);
+
/* reg_pmmp_sticky
* When set, will keep eeprom_override values after plug-out event.
* Access: OP
@@ -6011,9 +6018,10 @@ enum {
*/
MLXSW_ITEM32(reg, pmmp, eeprom_override, 0x04, 0, 16);
-static inline void mlxsw_reg_pmmp_pack(char *payload, u8 module)
+static inline void mlxsw_reg_pmmp_pack(char *payload, u8 slot_index, u8 module)
{
MLXSW_REG_ZERO(pmmp, payload);
+ mlxsw_reg_pmmp_slot_index_set(payload, slot_index);
mlxsw_reg_pmmp_module_set(payload, module);
}
@@ -9721,6 +9729,12 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
+/* reg_mtmp_slot_index
+ * Slot index (0: Main board).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtmp, slot_index, 0x00, 16, 4);
+
#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64
#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256
/* reg_mtmp_sensor_index
@@ -9810,11 +9824,12 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16);
*/
MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
-static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
- bool max_temp_enable,
+static inline void mlxsw_reg_mtmp_pack(char *payload, u8 slot_index,
+ u16 sensor_index, bool max_temp_enable,
bool max_temp_reset)
{
MLXSW_REG_ZERO(mtmp, payload);
+ mlxsw_reg_mtmp_slot_index_set(payload, slot_index);
mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index);
mlxsw_reg_mtmp_mte_set(payload, max_temp_enable);
mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset);
@@ -9880,6 +9895,12 @@ MLXSW_ITEM_BIT_ARRAY(reg, mtwe, sensor_warning, 0x0, 0x10, 1);
MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
+/* reg_mtbr_slot_index
+ * Slot index (0: Main board).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtbr, slot_index, 0x00, 16, 4);
+
/* reg_mtbr_base_sensor_index
* Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors,
* 64-127 are mapped to the SFP+/QSFP modules sequentially).
@@ -9912,10 +9933,11 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
MLXSW_REG_MTBR_REC_LEN, 0x00, false);
-static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index,
- u8 num_rec)
+static inline void mlxsw_reg_mtbr_pack(char *payload, u8 slot_index,
+ u16 base_sensor_index, u8 num_rec)
{
MLXSW_REG_ZERO(mtbr, payload);
+ mlxsw_reg_mtbr_slot_index_set(payload, slot_index);
mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index);
mlxsw_reg_mtbr_num_rec_set(payload, num_rec);
}
@@ -9964,6 +9986,12 @@ MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1);
*/
MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8);
+/* reg_mcia_slot_index
+ * Slot index (0: Main board)
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcia, slot, 0x00, 12, 4);
+
enum {
MLXSW_REG_MCIA_STATUS_GOOD = 0,
/* No response from module's EEPROM. */
@@ -10063,11 +10091,13 @@ MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) / \
MLXSW_REG_MCIA_EEPROM_UP_PAGE_LENGTH + 1)
-static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
- u8 page_number, u16 device_addr,
- u8 size, u8 i2c_device_addr)
+static inline void mlxsw_reg_mcia_pack(char *payload, u8 slot_index, u8 module,
+ u8 lock, u8 page_number,
+ u16 device_addr, u8 size,
+ u8 i2c_device_addr)
{
MLXSW_REG_ZERO(mcia, payload);
+ mlxsw_reg_mcia_slot_set(payload, slot_index);
mlxsw_reg_mcia_module_set(payload, module);
mlxsw_reg_mcia_l_set(payload, lock);
mlxsw_reg_mcia_page_number_set(payload, page_number);
@@ -10499,6 +10529,12 @@ MLXSW_REG_DEFINE(mcion, MLXSW_REG_MCION_ID, MLXSW_REG_MCION_LEN);
*/
MLXSW_ITEM32(reg, mcion, module, 0x00, 16, 8);
+/* reg_mcion_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcion, slot_index, 0x00, 12, 4);
+
enum {
MLXSW_REG_MCION_MODULE_STATUS_BITS_PRESENT_MASK = BIT(0),
MLXSW_REG_MCION_MODULE_STATUS_BITS_LOW_POWER_MASK = BIT(8),
@@ -10510,9 +10546,10 @@ enum {
*/
MLXSW_ITEM32(reg, mcion, module_status_bits, 0x04, 0, 16);
-static inline void mlxsw_reg_mcion_pack(char *payload, u8 module)
+static inline void mlxsw_reg_mcion_pack(char *payload, u8 slot_index, u8 module)
{
MLXSW_REG_ZERO(mcion, payload);
+ mlxsw_reg_mcion_slot_index_set(payload, slot_index);
mlxsw_reg_mcion_module_set(payload, module);
}
@@ -11326,6 +11363,12 @@ enum mlxsw_reg_mgpir_device_type {
MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE,
};
+/* mgpir_slot_index
+ * Slot index (0: Main board).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mgpir, slot_index, 0x00, 28, 4);
+
/* mgpir_device_type
* Access: RO
*/
@@ -11343,21 +11386,35 @@ MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
*/
MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
+/* max_modules_per_slot
+ * Maximum number of modules that can be connected per slot.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, max_modules_per_slot, 0x04, 16, 8);
+
+/* mgpir_num_of_slots
+ * Number of slots in the system.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_slots, 0x04, 8, 8);
+
/* mgpir_num_of_modules
* Number of modules.
* Access: RO
*/
MLXSW_ITEM32(reg, mgpir, num_of_modules, 0x04, 0, 8);
-static inline void mlxsw_reg_mgpir_pack(char *payload)
+static inline void mlxsw_reg_mgpir_pack(char *payload, u8 slot_index)
{
MLXSW_REG_ZERO(mgpir, payload);
+ mlxsw_reg_mgpir_slot_index_set(payload, slot_index);
}
static inline void
mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
enum mlxsw_reg_mgpir_device_type *device_type,
- u8 *devices_per_flash, u8 *num_of_modules)
+ u8 *devices_per_flash, u8 *num_of_modules,
+ u8 *num_of_slots)
{
if (num_of_devices)
*num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
@@ -11368,6 +11425,8 @@ mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
mlxsw_reg_mgpir_devices_per_flash_get(payload);
if (num_of_modules)
*num_of_modules = mlxsw_reg_mgpir_num_of_modules_get(payload);
+ if (num_of_slots)
+ *num_of_slots = mlxsw_reg_mgpir_num_of_slots_get(payload);
}
/* MFDE - Monitoring FW Debug Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 79deb19e3a19..dc820d9f2696 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -21,6 +21,7 @@
#include <net/netevent.h>
#include <net/neighbour.h>
#include <net/arp.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/nexthop.h>
@@ -507,7 +508,7 @@ struct mlxsw_sp_fib4_entry {
struct mlxsw_sp_fib_entry common;
struct fib_info *fi;
u32 tb_id;
- u8 tos;
+ dscp_t dscp;
u8 type;
};
@@ -5559,7 +5560,7 @@ mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
common);
- return !fib4_entry->tos;
+ return !fib4_entry->dscp;
}
static bool
@@ -5620,7 +5621,7 @@ mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp,
fri.tb_id = fen_info->tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = fen_info->dst_len;
- fri.tos = fen_info->tos;
+ fri.dscp = fen_info->dscp;
fri.type = fen_info->type;
fri.offload = false;
fri.trap = false;
@@ -5645,7 +5646,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
fri.tb_id = fib4_entry->tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = dst_len;
- fri.tos = fib4_entry->tos;
+ fri.dscp = fib4_entry->dscp;
fri.type = fib4_entry->type;
fri.offload = should_offload;
fri.trap = !should_offload;
@@ -5668,7 +5669,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
fri.tb_id = fib4_entry->tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = dst_len;
- fri.tos = fib4_entry->tos;
+ fri.dscp = fib4_entry->dscp;
fri.type = fib4_entry->type;
fri.offload = false;
fri.trap = false;
@@ -6250,7 +6251,7 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
fib_info_hold(fib4_entry->fi);
fib4_entry->tb_id = fen_info->tb_id;
fib4_entry->type = fen_info->type;
- fib4_entry->tos = fen_info->tos;
+ fib4_entry->dscp = fen_info->dscp;
fib_entry->fib_node = fib_node;
@@ -6304,7 +6305,7 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
fib4_entry = container_of(fib_node->fib_entry,
struct mlxsw_sp_fib4_entry, common);
if (fib4_entry->tb_id == fen_info->tb_id &&
- fib4_entry->tos == fen_info->tos &&
+ fib4_entry->dscp == fen_info->dscp &&
fib4_entry->type == fen_info->type &&
fib4_entry->fi == fen_info->fi)
return fib4_entry;
@@ -7010,7 +7011,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
if (IS_ERR(mlxsw_sp_rt6)) {
err = PTR_ERR(mlxsw_sp_rt6);
- goto err_rt6_create;
+ goto err_rt6_unwind;
}
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
@@ -7019,14 +7020,12 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
if (err)
- goto err_nexthop6_group_update;
+ goto err_rt6_unwind;
return 0;
-err_nexthop6_group_update:
- i = nrt6;
-err_rt6_create:
- for (i--; i >= 0; i--) {
+err_rt6_unwind:
+ for (; i > 0; i--) {
fib6_entry->nrt6--;
mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
struct mlxsw_sp_rt6, list);
@@ -7154,7 +7153,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
if (IS_ERR(mlxsw_sp_rt6)) {
err = PTR_ERR(mlxsw_sp_rt6);
- goto err_rt6_create;
+ goto err_rt6_unwind;
}
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
fib6_entry->nrt6++;
@@ -7162,7 +7161,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
if (err)
- goto err_nexthop6_group_get;
+ goto err_rt6_unwind;
err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
fib_node->fib);
@@ -7181,10 +7180,8 @@ err_fib6_entry_type_set:
mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
err_nexthop_group_vr_link:
mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
-err_nexthop6_group_get:
- i = nrt6;
-err_rt6_create:
- for (i--; i >= 0; i--) {
+err_rt6_unwind:
+ for (; i > 0; i--) {
fib6_entry->nrt6--;
mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
struct mlxsw_sp_rt6, list);
diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile
index a9ffc719aa0e..fd2e0ebb2427 100644
--- a/drivers/net/ethernet/microchip/lan966x/Makefile
+++ b/drivers/net/ethernet/microchip/lan966x/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_LAN966X_SWITCH) += lan966x-switch.o
lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \
lan966x_mac.o lan966x_ethtool.o lan966x_switchdev.o \
lan966x_vlan.o lan966x_fdb.o lan966x_mdb.o \
- lan966x_ptp.o
+ lan966x_ptp.o lan966x_fdma.o
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
new file mode 100644
index 000000000000..9e2a7323eaf0
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -0,0 +1,842 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "lan966x_main.h"
+
+static int lan966x_fdma_channel_active(struct lan966x *lan966x)
+{
+ return lan_rd(lan966x, FDMA_CH_ACTIVE);
+}
+
+static struct page *lan966x_fdma_rx_alloc_page(struct lan966x_rx *rx,
+ struct lan966x_db *db)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ dma_addr_t dma_addr;
+ struct page *page;
+
+ page = dev_alloc_pages(rx->page_order);
+ if (unlikely(!page))
+ return NULL;
+
+ dma_addr = dma_map_page(lan966x->dev, page, 0,
+ PAGE_SIZE << rx->page_order,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(lan966x->dev, dma_addr)))
+ goto free_page;
+
+ db->dataptr = dma_addr;
+
+ return page;
+
+free_page:
+ __free_pages(page, rx->page_order);
+ return NULL;
+}
+
+static void lan966x_fdma_rx_free_pages(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ struct lan966x_rx_dcb *dcb;
+ struct lan966x_db *db;
+ int i, j;
+
+ for (i = 0; i < FDMA_DCB_MAX; ++i) {
+ dcb = &rx->dcbs[i];
+
+ for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) {
+ db = &dcb->db[j];
+ dma_unmap_single(lan966x->dev,
+ (dma_addr_t)db->dataptr,
+ PAGE_SIZE << rx->page_order,
+ DMA_FROM_DEVICE);
+ __free_pages(rx->page[i][j], rx->page_order);
+ }
+ }
+}
+
+static void lan966x_fdma_rx_add_dcb(struct lan966x_rx *rx,
+ struct lan966x_rx_dcb *dcb,
+ u64 nextptr)
+{
+ struct lan966x_db *db;
+ int i;
+
+ for (i = 0; i < FDMA_RX_DCB_MAX_DBS; ++i) {
+ db = &dcb->db[i];
+ db->status = FDMA_DCB_STATUS_INTR;
+ }
+
+ dcb->nextptr = FDMA_DCB_INVALID_DATA;
+ dcb->info = FDMA_DCB_INFO_DATAL(PAGE_SIZE << rx->page_order);
+
+ rx->last_entry->nextptr = nextptr;
+ rx->last_entry = dcb;
+}
+
+static int lan966x_fdma_rx_alloc(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ struct lan966x_rx_dcb *dcb;
+ struct lan966x_db *db;
+ struct page *page;
+ int i, j;
+ int size;
+
+ /* calculate how many pages are needed to allocate the dcbs */
+ size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+
+ rx->dcbs = dma_alloc_coherent(lan966x->dev, size, &rx->dma, GFP_KERNEL);
+ if (!rx->dcbs)
+ return -ENOMEM;
+
+ rx->last_entry = rx->dcbs;
+ rx->db_index = 0;
+ rx->dcb_index = 0;
+
+ /* Now for each dcb allocate the dbs */
+ for (i = 0; i < FDMA_DCB_MAX; ++i) {
+ dcb = &rx->dcbs[i];
+ dcb->info = 0;
+
+ /* For each db allocate a page and map it to the DB dataptr. */
+ for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) {
+ db = &dcb->db[j];
+ page = lan966x_fdma_rx_alloc_page(rx, db);
+ if (!page)
+ return -ENOMEM;
+
+ db->status = 0;
+ rx->page[i][j] = page;
+ }
+
+ lan966x_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * i);
+ }
+
+ return 0;
+}
+
+static void lan966x_fdma_rx_free(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ u32 size;
+
+ /* Now it is possible to do the cleanup of dcb */
+ size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ dma_free_coherent(lan966x->dev, size, rx->dcbs, rx->dma);
+}
+
+static void lan966x_fdma_rx_start(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ u32 mask;
+
+ /* When activating a channel, first is required to write the first DCB
+ * address and then to activate it
+ */
+ lan_wr(lower_32_bits((u64)rx->dma), lan966x,
+ FDMA_DCB_LLP(rx->channel_id));
+ lan_wr(upper_32_bits((u64)rx->dma), lan966x,
+ FDMA_DCB_LLP1(rx->channel_id));
+
+ lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) |
+ FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
+ FDMA_CH_CFG_CH_INJ_PORT_SET(0) |
+ FDMA_CH_CFG_CH_MEM_SET(1),
+ lan966x, FDMA_CH_CFG(rx->channel_id));
+
+ /* Start fdma */
+ lan_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(0),
+ FDMA_PORT_CTRL_XTR_STOP,
+ lan966x, FDMA_PORT_CTRL(0));
+
+ /* Enable interrupts */
+ mask = lan_rd(lan966x, FDMA_INTR_DB_ENA);
+ mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask);
+ mask |= BIT(rx->channel_id);
+ lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask),
+ FDMA_INTR_DB_ENA_INTR_DB_ENA,
+ lan966x, FDMA_INTR_DB_ENA);
+
+ /* Activate the channel */
+ lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(rx->channel_id)),
+ FDMA_CH_ACTIVATE_CH_ACTIVATE,
+ lan966x, FDMA_CH_ACTIVATE);
+}
+
+static void lan966x_fdma_rx_disable(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ u32 val;
+
+ /* Disable the channel */
+ lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(rx->channel_id)),
+ FDMA_CH_DISABLE_CH_DISABLE,
+ lan966x, FDMA_CH_DISABLE);
+
+ readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x,
+ val, !(val & BIT(rx->channel_id)),
+ READL_SLEEP_US, READL_TIMEOUT_US);
+
+ lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(rx->channel_id)),
+ FDMA_CH_DB_DISCARD_DB_DISCARD,
+ lan966x, FDMA_CH_DB_DISCARD);
+}
+
+static void lan966x_fdma_rx_reload(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+
+ lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(rx->channel_id)),
+ FDMA_CH_RELOAD_CH_RELOAD,
+ lan966x, FDMA_CH_RELOAD);
+}
+
+static void lan966x_fdma_tx_add_dcb(struct lan966x_tx *tx,
+ struct lan966x_tx_dcb *dcb)
+{
+ dcb->nextptr = FDMA_DCB_INVALID_DATA;
+ dcb->info = 0;
+}
+
+static int lan966x_fdma_tx_alloc(struct lan966x_tx *tx)
+{
+ struct lan966x *lan966x = tx->lan966x;
+ struct lan966x_tx_dcb *dcb;
+ struct lan966x_db *db;
+ int size;
+ int i, j;
+
+ tx->dcbs_buf = kcalloc(FDMA_DCB_MAX, sizeof(struct lan966x_tx_dcb_buf),
+ GFP_KERNEL);
+ if (!tx->dcbs_buf)
+ return -ENOMEM;
+
+ /* calculate how many pages are needed to allocate the dcbs */
+ size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ tx->dcbs = dma_alloc_coherent(lan966x->dev, size, &tx->dma, GFP_KERNEL);
+ if (!tx->dcbs)
+ goto out;
+
+ /* Now for each dcb allocate the db */
+ for (i = 0; i < FDMA_DCB_MAX; ++i) {
+ dcb = &tx->dcbs[i];
+
+ for (j = 0; j < FDMA_TX_DCB_MAX_DBS; ++j) {
+ db = &dcb->db[j];
+ db->dataptr = 0;
+ db->status = 0;
+ }
+
+ lan966x_fdma_tx_add_dcb(tx, dcb);
+ }
+
+ return 0;
+
+out:
+ kfree(tx->dcbs_buf);
+ return -ENOMEM;
+}
+
+static void lan966x_fdma_tx_free(struct lan966x_tx *tx)
+{
+ struct lan966x *lan966x = tx->lan966x;
+ int size;
+
+ kfree(tx->dcbs_buf);
+
+ size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ dma_free_coherent(lan966x->dev, size, tx->dcbs, tx->dma);
+}
+
+static void lan966x_fdma_tx_activate(struct lan966x_tx *tx)
+{
+ struct lan966x *lan966x = tx->lan966x;
+ u32 mask;
+
+ /* When activating a channel, first is required to write the first DCB
+ * address and then to activate it
+ */
+ lan_wr(lower_32_bits((u64)tx->dma), lan966x,
+ FDMA_DCB_LLP(tx->channel_id));
+ lan_wr(upper_32_bits((u64)tx->dma), lan966x,
+ FDMA_DCB_LLP1(tx->channel_id));
+
+ lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) |
+ FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
+ FDMA_CH_CFG_CH_INJ_PORT_SET(0) |
+ FDMA_CH_CFG_CH_MEM_SET(1),
+ lan966x, FDMA_CH_CFG(tx->channel_id));
+
+ /* Start fdma */
+ lan_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0),
+ FDMA_PORT_CTRL_INJ_STOP,
+ lan966x, FDMA_PORT_CTRL(0));
+
+ /* Enable interrupts */
+ mask = lan_rd(lan966x, FDMA_INTR_DB_ENA);
+ mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask);
+ mask |= BIT(tx->channel_id);
+ lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask),
+ FDMA_INTR_DB_ENA_INTR_DB_ENA,
+ lan966x, FDMA_INTR_DB_ENA);
+
+ /* Activate the channel */
+ lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(tx->channel_id)),
+ FDMA_CH_ACTIVATE_CH_ACTIVATE,
+ lan966x, FDMA_CH_ACTIVATE);
+}
+
+static void lan966x_fdma_tx_disable(struct lan966x_tx *tx)
+{
+ struct lan966x *lan966x = tx->lan966x;
+ u32 val;
+
+ /* Disable the channel */
+ lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(tx->channel_id)),
+ FDMA_CH_DISABLE_CH_DISABLE,
+ lan966x, FDMA_CH_DISABLE);
+
+ readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x,
+ val, !(val & BIT(tx->channel_id)),
+ READL_SLEEP_US, READL_TIMEOUT_US);
+
+ lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(tx->channel_id)),
+ FDMA_CH_DB_DISCARD_DB_DISCARD,
+ lan966x, FDMA_CH_DB_DISCARD);
+
+ tx->activated = false;
+}
+
+static void lan966x_fdma_tx_reload(struct lan966x_tx *tx)
+{
+ struct lan966x *lan966x = tx->lan966x;
+
+ /* Write the registers to reload the channel */
+ lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(tx->channel_id)),
+ FDMA_CH_RELOAD_CH_RELOAD,
+ lan966x, FDMA_CH_RELOAD);
+}
+
+static void lan966x_fdma_wakeup_netdev(struct lan966x *lan966x)
+{
+ struct lan966x_port *port;
+ int i;
+
+ for (i = 0; i < lan966x->num_phys_ports; ++i) {
+ port = lan966x->ports[i];
+ if (!port)
+ continue;
+
+ if (netif_queue_stopped(port->dev))
+ netif_wake_queue(port->dev);
+ }
+}
+
+static void lan966x_fdma_stop_netdev(struct lan966x *lan966x)
+{
+ struct lan966x_port *port;
+ int i;
+
+ for (i = 0; i < lan966x->num_phys_ports; ++i) {
+ port = lan966x->ports[i];
+ if (!port)
+ continue;
+
+ netif_stop_queue(port->dev);
+ }
+}
+
+static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
+{
+ struct lan966x_tx *tx = &lan966x->tx;
+ struct lan966x_tx_dcb_buf *dcb_buf;
+ struct lan966x_db *db;
+ unsigned long flags;
+ bool clear = false;
+ int i;
+
+ spin_lock_irqsave(&lan966x->tx_lock, flags);
+ for (i = 0; i < FDMA_DCB_MAX; ++i) {
+ dcb_buf = &tx->dcbs_buf[i];
+
+ if (!dcb_buf->used)
+ continue;
+
+ db = &tx->dcbs[i].db[0];
+ if (!(db->status & FDMA_DCB_STATUS_DONE))
+ continue;
+
+ dcb_buf->dev->stats.tx_packets++;
+ dcb_buf->dev->stats.tx_bytes += dcb_buf->skb->len;
+
+ dcb_buf->used = false;
+ dma_unmap_single(lan966x->dev,
+ dcb_buf->dma_addr,
+ dcb_buf->skb->len,
+ DMA_TO_DEVICE);
+ if (!dcb_buf->ptp)
+ dev_kfree_skb_any(dcb_buf->skb);
+
+ clear = true;
+ }
+
+ if (clear)
+ lan966x_fdma_wakeup_netdev(lan966x);
+
+ spin_unlock_irqrestore(&lan966x->tx_lock, flags);
+}
+
+static bool lan966x_fdma_rx_more_frames(struct lan966x_rx *rx)
+{
+ struct lan966x_db *db;
+
+ /* Check if there is any data */
+ db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
+ if (unlikely(!(db->status & FDMA_DCB_STATUS_DONE)))
+ return false;
+
+ return true;
+}
+
+static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx)
+{
+ struct lan966x *lan966x = rx->lan966x;
+ u64 src_port, timestamp;
+ struct lan966x_db *db;
+ struct sk_buff *skb;
+ struct page *page;
+
+ /* Get the received frame and unmap it */
+ db = &rx->dcbs[rx->dcb_index].db[rx->db_index];
+ page = rx->page[rx->dcb_index][rx->db_index];
+ skb = build_skb(page_address(page), PAGE_SIZE << rx->page_order);
+ if (unlikely(!skb))
+ goto unmap_page;
+
+ dma_unmap_single(lan966x->dev, (dma_addr_t)db->dataptr,
+ FDMA_DCB_STATUS_BLOCKL(db->status),
+ DMA_FROM_DEVICE);
+ skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status));
+
+ lan966x_ifh_get_src_port(skb->data, &src_port);
+ lan966x_ifh_get_timestamp(skb->data, &timestamp);
+
+ WARN_ON(src_port >= lan966x->num_phys_ports);
+
+ skb->dev = lan966x->ports[src_port]->dev;
+ skb_pull(skb, IFH_LEN * sizeof(u32));
+
+ if (likely(!(skb->dev->features & NETIF_F_RXFCS)))
+ skb_trim(skb, skb->len - ETH_FCS_LEN);
+
+ lan966x_ptp_rxtstamp(lan966x, skb, timestamp);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ if (lan966x->bridge_mask & BIT(src_port)) {
+ skb->offload_fwd_mark = 1;
+
+ skb_reset_network_header(skb);
+ if (!lan966x_hw_offload(lan966x, src_port, skb))
+ skb->offload_fwd_mark = 0;
+ }
+
+ skb->dev->stats.rx_bytes += skb->len;
+ skb->dev->stats.rx_packets++;
+
+ return skb;
+
+unmap_page:
+ dma_unmap_page(lan966x->dev, (dma_addr_t)db->dataptr,
+ FDMA_DCB_STATUS_BLOCKL(db->status),
+ DMA_FROM_DEVICE);
+ __free_pages(page, rx->page_order);
+
+ return NULL;
+}
+
+static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
+{
+ struct lan966x *lan966x = container_of(napi, struct lan966x, napi);
+ struct lan966x_rx *rx = &lan966x->rx;
+ int dcb_reload = rx->dcb_index;
+ struct lan966x_rx_dcb *old_dcb;
+ struct lan966x_db *db;
+ struct sk_buff *skb;
+ struct page *page;
+ int counter = 0;
+ u64 nextptr;
+
+ lan966x_fdma_tx_clear_buf(lan966x, weight);
+
+ /* Get all received skb */
+ while (counter < weight) {
+ if (!lan966x_fdma_rx_more_frames(rx))
+ break;
+
+ skb = lan966x_fdma_rx_get_frame(rx);
+
+ rx->page[rx->dcb_index][rx->db_index] = NULL;
+ rx->dcb_index++;
+ rx->dcb_index &= FDMA_DCB_MAX - 1;
+
+ if (!skb)
+ break;
+
+ napi_gro_receive(&lan966x->napi, skb);
+ counter++;
+ }
+
+ /* Allocate new pages and map them */
+ while (dcb_reload != rx->dcb_index) {
+ db = &rx->dcbs[dcb_reload].db[rx->db_index];
+ page = lan966x_fdma_rx_alloc_page(rx, db);
+ if (unlikely(!page))
+ break;
+ rx->page[dcb_reload][rx->db_index] = page;
+
+ old_dcb = &rx->dcbs[dcb_reload];
+ dcb_reload++;
+ dcb_reload &= FDMA_DCB_MAX - 1;
+
+ nextptr = rx->dma + ((unsigned long)old_dcb -
+ (unsigned long)rx->dcbs);
+ lan966x_fdma_rx_add_dcb(rx, old_dcb, nextptr);
+ lan966x_fdma_rx_reload(rx);
+ }
+
+ if (counter < weight && napi_complete_done(napi, counter))
+ lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
+
+ return counter;
+}
+
+irqreturn_t lan966x_fdma_irq_handler(int irq, void *args)
+{
+ struct lan966x *lan966x = args;
+ u32 db, err, err_type;
+
+ db = lan_rd(lan966x, FDMA_INTR_DB);
+ err = lan_rd(lan966x, FDMA_INTR_ERR);
+
+ if (db) {
+ lan_wr(0, lan966x, FDMA_INTR_DB_ENA);
+ lan_wr(db, lan966x, FDMA_INTR_DB);
+
+ napi_schedule(&lan966x->napi);
+ }
+
+ if (err) {
+ err_type = lan_rd(lan966x, FDMA_ERRORS);
+
+ WARN(1, "Unexpected error: %d, error_type: %d\n", err, err_type);
+
+ lan_wr(err, lan966x, FDMA_INTR_ERR);
+ lan_wr(err_type, lan966x, FDMA_ERRORS);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx)
+{
+ struct lan966x_tx_dcb_buf *dcb_buf;
+ int i;
+
+ for (i = 0; i < FDMA_DCB_MAX; ++i) {
+ dcb_buf = &tx->dcbs_buf[i];
+ if (!dcb_buf->used && i != tx->last_in_use)
+ return i;
+ }
+
+ return -1;
+}
+
+int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ struct lan966x *lan966x = port->lan966x;
+ struct lan966x_tx_dcb_buf *next_dcb_buf;
+ struct lan966x_tx_dcb *next_dcb, *dcb;
+ struct lan966x_tx *tx = &lan966x->tx;
+ struct lan966x_db *next_db;
+ int needed_headroom;
+ int needed_tailroom;
+ dma_addr_t dma_addr;
+ int next_to_use;
+ int err;
+
+ /* Get next index */
+ next_to_use = lan966x_fdma_get_next_dcb(tx);
+ if (next_to_use < 0) {
+ netif_stop_queue(dev);
+ return NETDEV_TX_BUSY;
+ }
+
+ if (skb_put_padto(skb, ETH_ZLEN)) {
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
+
+ /* skb processing */
+ needed_headroom = max_t(int, IFH_LEN * sizeof(u32) - skb_headroom(skb), 0);
+ needed_tailroom = max_t(int, ETH_FCS_LEN - skb_tailroom(skb), 0);
+ if (needed_headroom || needed_tailroom || skb_header_cloned(skb)) {
+ err = pskb_expand_head(skb, needed_headroom, needed_tailroom,
+ GFP_ATOMIC);
+ if (unlikely(err)) {
+ dev->stats.tx_dropped++;
+ err = NETDEV_TX_OK;
+ goto release;
+ }
+ }
+
+ skb_tx_timestamp(skb);
+ skb_push(skb, IFH_LEN * sizeof(u32));
+ memcpy(skb->data, ifh, IFH_LEN * sizeof(u32));
+ skb_put(skb, 4);
+
+ dma_addr = dma_map_single(lan966x->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(lan966x->dev, dma_addr)) {
+ dev->stats.tx_dropped++;
+ err = NETDEV_TX_OK;
+ goto release;
+ }
+
+ /* Setup next dcb */
+ next_dcb = &tx->dcbs[next_to_use];
+ next_dcb->nextptr = FDMA_DCB_INVALID_DATA;
+
+ next_db = &next_dcb->db[0];
+ next_db->dataptr = dma_addr;
+ next_db->status = FDMA_DCB_STATUS_SOF |
+ FDMA_DCB_STATUS_EOF |
+ FDMA_DCB_STATUS_INTR |
+ FDMA_DCB_STATUS_BLOCKO(0) |
+ FDMA_DCB_STATUS_BLOCKL(skb->len);
+
+ /* Fill up the buffer */
+ next_dcb_buf = &tx->dcbs_buf[next_to_use];
+ next_dcb_buf->skb = skb;
+ next_dcb_buf->dma_addr = dma_addr;
+ next_dcb_buf->used = true;
+ next_dcb_buf->ptp = false;
+ next_dcb_buf->dev = dev;
+
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ next_dcb_buf->ptp = true;
+
+ if (likely(lan966x->tx.activated)) {
+ /* Connect current dcb to the next db */
+ dcb = &tx->dcbs[tx->last_in_use];
+ dcb->nextptr = tx->dma + (next_to_use *
+ sizeof(struct lan966x_tx_dcb));
+
+ lan966x_fdma_tx_reload(tx);
+ } else {
+ /* Because it is first time, then just activate */
+ lan966x->tx.activated = true;
+ lan966x_fdma_tx_activate(tx);
+ }
+
+ /* Move to next dcb because this last in use */
+ tx->last_in_use = next_to_use;
+
+ return NETDEV_TX_OK;
+
+release:
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ lan966x_ptp_txtstamp_release(port, skb);
+
+ dev_kfree_skb_any(skb);
+ return err;
+}
+
+static int lan966x_fdma_get_max_mtu(struct lan966x *lan966x)
+{
+ int max_mtu = 0;
+ int i;
+
+ for (i = 0; i < lan966x->num_phys_ports; ++i) {
+ int mtu;
+
+ if (!lan966x->ports[i])
+ continue;
+
+ mtu = lan966x->ports[i]->dev->mtu;
+ if (mtu > max_mtu)
+ max_mtu = mtu;
+ }
+
+ return max_mtu;
+}
+
+static int lan966x_qsys_sw_status(struct lan966x *lan966x)
+{
+ return lan_rd(lan966x, QSYS_SW_STATUS(CPU_PORT));
+}
+
+static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu)
+{
+ void *rx_dcbs, *tx_dcbs, *tx_dcbs_buf;
+ dma_addr_t rx_dma, tx_dma;
+ u32 size;
+ int err;
+
+ /* Store these for later to free them */
+ rx_dma = lan966x->rx.dma;
+ tx_dma = lan966x->tx.dma;
+ rx_dcbs = lan966x->rx.dcbs;
+ tx_dcbs = lan966x->tx.dcbs;
+ tx_dcbs_buf = lan966x->tx.dcbs_buf;
+
+ napi_synchronize(&lan966x->napi);
+ napi_disable(&lan966x->napi);
+ lan966x_fdma_stop_netdev(lan966x);
+
+ lan966x_fdma_rx_disable(&lan966x->rx);
+ lan966x_fdma_rx_free_pages(&lan966x->rx);
+ lan966x->rx.page_order = round_up(new_mtu, PAGE_SIZE) / PAGE_SIZE - 1;
+ err = lan966x_fdma_rx_alloc(&lan966x->rx);
+ if (err)
+ goto restore;
+ lan966x_fdma_rx_start(&lan966x->rx);
+
+ size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ dma_free_coherent(lan966x->dev, size, rx_dcbs, rx_dma);
+
+ lan966x_fdma_tx_disable(&lan966x->tx);
+ err = lan966x_fdma_tx_alloc(&lan966x->tx);
+ if (err)
+ goto restore_tx;
+
+ size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ dma_free_coherent(lan966x->dev, size, tx_dcbs, tx_dma);
+
+ kfree(tx_dcbs_buf);
+
+ lan966x_fdma_wakeup_netdev(lan966x);
+ napi_enable(&lan966x->napi);
+
+ return err;
+restore:
+ lan966x->rx.dma = rx_dma;
+ lan966x->tx.dma = tx_dma;
+ lan966x_fdma_rx_start(&lan966x->rx);
+
+restore_tx:
+ lan966x->rx.dcbs = rx_dcbs;
+ lan966x->tx.dcbs = tx_dcbs;
+ lan966x->tx.dcbs_buf = tx_dcbs_buf;
+
+ return err;
+}
+
+int lan966x_fdma_change_mtu(struct lan966x *lan966x)
+{
+ int max_mtu;
+ int err;
+ u32 val;
+
+ max_mtu = lan966x_fdma_get_max_mtu(lan966x);
+ max_mtu += IFH_LEN * sizeof(u32);
+
+ if (round_up(max_mtu, PAGE_SIZE) / PAGE_SIZE - 1 ==
+ lan966x->rx.page_order)
+ return 0;
+
+ /* Disable the CPU port */
+ lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(0),
+ QSYS_SW_PORT_MODE_PORT_ENA,
+ lan966x, QSYS_SW_PORT_MODE(CPU_PORT));
+
+ /* Flush the CPU queues */
+ readx_poll_timeout(lan966x_qsys_sw_status, lan966x,
+ val, !(QSYS_SW_STATUS_EQ_AVAIL_GET(val)),
+ READL_SLEEP_US, READL_TIMEOUT_US);
+
+ /* Add a sleep in case there are frames between the queues and the CPU
+ * port
+ */
+ usleep_range(1000, 2000);
+
+ err = lan966x_fdma_reload(lan966x, max_mtu);
+
+ /* Enable back the CPU port */
+ lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(1),
+ QSYS_SW_PORT_MODE_PORT_ENA,
+ lan966x, QSYS_SW_PORT_MODE(CPU_PORT));
+
+ return err;
+}
+
+void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev)
+{
+ if (lan966x->fdma_ndev)
+ return;
+
+ lan966x->fdma_ndev = dev;
+ netif_napi_add(dev, &lan966x->napi, lan966x_fdma_napi_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&lan966x->napi);
+}
+
+void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev)
+{
+ if (lan966x->fdma_ndev == dev) {
+ netif_napi_del(&lan966x->napi);
+ lan966x->fdma_ndev = NULL;
+ }
+}
+
+int lan966x_fdma_init(struct lan966x *lan966x)
+{
+ int err;
+
+ if (!lan966x->fdma)
+ return 0;
+
+ lan966x->rx.lan966x = lan966x;
+ lan966x->rx.channel_id = FDMA_XTR_CHANNEL;
+ lan966x->tx.lan966x = lan966x;
+ lan966x->tx.channel_id = FDMA_INJ_CHANNEL;
+ lan966x->tx.last_in_use = -1;
+
+ err = lan966x_fdma_rx_alloc(&lan966x->rx);
+ if (err)
+ return err;
+
+ err = lan966x_fdma_tx_alloc(&lan966x->tx);
+ if (err) {
+ lan966x_fdma_rx_free(&lan966x->rx);
+ return err;
+ }
+
+ lan966x_fdma_rx_start(&lan966x->rx);
+
+ return 0;
+}
+
+void lan966x_fdma_deinit(struct lan966x *lan966x)
+{
+ if (!lan966x->fdma)
+ return;
+
+ lan966x_fdma_rx_disable(&lan966x->rx);
+ lan966x_fdma_tx_disable(&lan966x->tx);
+
+ napi_synchronize(&lan966x->napi);
+ napi_disable(&lan966x->napi);
+
+ lan966x_fdma_rx_free_pages(&lan966x->rx);
+ lan966x_fdma_rx_free(&lan966x->rx);
+ lan966x_fdma_tx_free(&lan966x->tx);
+}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 958e55596b82..106d8c83544d 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -24,9 +24,6 @@
#define XTR_NOT_READY 0x07000080U
#define XTR_VALID_BYTES(x) (4 - (((x) >> 24) & 3))
-#define READL_SLEEP_US 10
-#define READL_TIMEOUT_US 100000000
-
#define IO_RANGES 2
static const struct of_device_id lan966x_match[] = {
@@ -43,6 +40,7 @@ struct lan966x_main_io_resource {
static const struct lan966x_main_io_resource lan966x_main_iomap[] = {
{ TARGET_CPU, 0xc0000, 0 }, /* 0xe00c0000 */
+ { TARGET_FDMA, 0xc0400, 0 }, /* 0xe00c0400 */
{ TARGET_ORG, 0, 1 }, /* 0xe2000000 */
{ TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */
{ TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */
@@ -343,7 +341,10 @@ static int lan966x_port_xmit(struct sk_buff *skb, struct net_device *dev)
}
spin_lock(&lan966x->tx_lock);
- err = lan966x_port_ifh_xmit(skb, ifh, dev);
+ if (port->lan966x->fdma)
+ err = lan966x_fdma_xmit(skb, ifh, dev);
+ else
+ err = lan966x_port_ifh_xmit(skb, ifh, dev);
spin_unlock(&lan966x->tx_lock);
return err;
@@ -353,12 +354,24 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu)
{
struct lan966x_port *port = netdev_priv(dev);
struct lan966x *lan966x = port->lan966x;
+ int old_mtu = dev->mtu;
+ int err;
lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(new_mtu),
lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port));
dev->mtu = new_mtu;
- return 0;
+ if (!lan966x->fdma)
+ return 0;
+
+ err = lan966x_fdma_change_mtu(lan966x);
+ if (err) {
+ lan_wr(DEV_MAC_MAXLEN_CFG_MAX_LEN_SET(old_mtu),
+ lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port));
+ dev->mtu = old_mtu;
+ }
+
+ return err;
}
static int lan966x_mc_unsync(struct net_device *dev, const unsigned char *addr)
@@ -432,8 +445,7 @@ bool lan966x_netdevice_check(const struct net_device *dev)
return dev->netdev_ops == &lan966x_port_netdev_ops;
}
-static bool lan966x_hw_offload(struct lan966x *lan966x, u32 port,
- struct sk_buff *skb)
+bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb)
{
u32 val;
@@ -520,7 +532,7 @@ static int lan966x_rx_frame_word(struct lan966x *lan966x, u8 grp, u32 *rval)
}
}
-static void lan966x_ifh_get_src_port(void *ifh, u64 *src_port)
+void lan966x_ifh_get_src_port(void *ifh, u64 *src_port)
{
packing(ifh, src_port, IFH_POS_SRCPORT + IFH_WID_SRCPORT - 1,
IFH_POS_SRCPORT, IFH_LEN * 4, UNPACK, 0);
@@ -532,7 +544,7 @@ static void lan966x_ifh_get_len(void *ifh, u64 *len)
IFH_POS_LEN, IFH_LEN * 4, UNPACK, 0);
}
-static void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp)
+void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp)
{
packing(ifh, timestamp, IFH_POS_TIMESTAMP + IFH_WID_TIMESTAMP - 1,
IFH_POS_TIMESTAMP, IFH_LEN * 4, UNPACK, 0);
@@ -652,6 +664,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x)
if (port->dev)
unregister_netdev(port->dev);
+ if (lan966x->fdma && lan966x->fdma_ndev == port->dev)
+ lan966x_fdma_netdev_deinit(lan966x, port->dev);
+
if (port->phylink) {
rtnl_lock();
lan966x_port_stop(port->dev);
@@ -671,6 +686,9 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x)
disable_irq(lan966x->ana_irq);
lan966x->ana_irq = -ENXIO;
}
+
+ if (lan966x->fdma)
+ devm_free_irq(lan966x->dev, lan966x->fdma_irq, lan966x);
}
static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
@@ -799,12 +817,12 @@ static void lan966x_init(struct lan966x *lan966x)
/* Do byte-swap and expect status after last data word
* Extraction: Mode: manual extraction) | Byte_swap
*/
- lan_wr(QS_XTR_GRP_CFG_MODE_SET(1) |
+ lan_wr(QS_XTR_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) |
QS_XTR_GRP_CFG_BYTE_SWAP_SET(1),
lan966x, QS_XTR_GRP_CFG(0));
/* Injection: Mode: manual injection | Byte_swap */
- lan_wr(QS_INJ_GRP_CFG_MODE_SET(1) |
+ lan_wr(QS_INJ_GRP_CFG_MODE_SET(lan966x->fdma ? 2 : 1) |
QS_INJ_GRP_CFG_BYTE_SWAP_SET(1),
lan966x, QS_INJ_GRP_CFG(0));
@@ -1026,6 +1044,17 @@ static int lan966x_probe(struct platform_device *pdev)
lan966x->ptp = 1;
}
+ lan966x->fdma_irq = platform_get_irq_byname(pdev, "fdma");
+ if (lan966x->fdma_irq > 0) {
+ err = devm_request_irq(&pdev->dev, lan966x->fdma_irq,
+ lan966x_fdma_irq_handler, 0,
+ "fdma irq", lan966x);
+ if (err)
+ return dev_err_probe(&pdev->dev, err, "Unable to use fdma irq");
+
+ lan966x->fdma = true;
+ }
+
/* init switch */
lan966x_init(lan966x);
lan966x_stats_init(lan966x);
@@ -1064,8 +1093,15 @@ static int lan966x_probe(struct platform_device *pdev)
if (err)
goto cleanup_fdb;
+ err = lan966x_fdma_init(lan966x);
+ if (err)
+ goto cleanup_ptp;
+
return 0;
+cleanup_ptp:
+ lan966x_ptp_deinit(lan966x);
+
cleanup_fdb:
lan966x_fdb_deinit(lan966x);
@@ -1085,6 +1121,7 @@ static int lan966x_remove(struct platform_device *pdev)
{
struct lan966x *lan966x = platform_get_drvdata(pdev);
+ lan966x_fdma_deinit(lan966x);
lan966x_cleanup_ports(lan966x);
cancel_delayed_work_sync(&lan966x->stats_work);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index ae282da1da74..5213263c4e87 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -17,6 +17,9 @@
#define TABLE_UPDATE_SLEEP_US 10
#define TABLE_UPDATE_TIMEOUT_US 100000
+#define READL_SLEEP_US 10
+#define READL_TIMEOUT_US 100000000
+
#define LAN966X_BUFFER_CELL_SZ 64
#define LAN966X_BUFFER_MEMORY (160 * 1024)
#define LAN966X_BUFFER_MIN_SZ 60
@@ -58,6 +61,22 @@
#define IFH_REW_OP_ONE_STEP_PTP 0x3
#define IFH_REW_OP_TWO_STEP_PTP 0x4
+#define FDMA_RX_DCB_MAX_DBS 1
+#define FDMA_TX_DCB_MAX_DBS 1
+#define FDMA_DCB_INFO_DATAL(x) ((x) & GENMASK(15, 0))
+
+#define FDMA_DCB_STATUS_BLOCKL(x) ((x) & GENMASK(15, 0))
+#define FDMA_DCB_STATUS_SOF BIT(16)
+#define FDMA_DCB_STATUS_EOF BIT(17)
+#define FDMA_DCB_STATUS_INTR BIT(18)
+#define FDMA_DCB_STATUS_DONE BIT(19)
+#define FDMA_DCB_STATUS_BLOCKO(x) (((x) << 20) & GENMASK(31, 20))
+#define FDMA_DCB_INVALID_DATA 0x1
+
+#define FDMA_XTR_CHANNEL 6
+#define FDMA_INJ_CHANNEL 0
+#define FDMA_DCB_MAX 512
+
/* MAC table entry types.
* ENTRYTYPE_NORMAL is subject to aging.
* ENTRYTYPE_LOCKED is not subject to aging.
@@ -73,6 +92,83 @@ enum macaccess_entry_type {
struct lan966x_port;
+struct lan966x_db {
+ u64 dataptr;
+ u64 status;
+};
+
+struct lan966x_rx_dcb {
+ u64 nextptr;
+ u64 info;
+ struct lan966x_db db[FDMA_RX_DCB_MAX_DBS];
+};
+
+struct lan966x_tx_dcb {
+ u64 nextptr;
+ u64 info;
+ struct lan966x_db db[FDMA_TX_DCB_MAX_DBS];
+};
+
+struct lan966x_rx {
+ struct lan966x *lan966x;
+
+ /* Pointer to the array of hardware dcbs. */
+ struct lan966x_rx_dcb *dcbs;
+
+ /* Pointer to the last address in the dcbs. */
+ struct lan966x_rx_dcb *last_entry;
+
+ /* For each DB, there is a page */
+ struct page *page[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS];
+
+ /* Represents the db_index, it can have a value between 0 and
+ * FDMA_RX_DCB_MAX_DBS, once it reaches the value of FDMA_RX_DCB_MAX_DBS
+ * it means that the DCB can be reused.
+ */
+ int db_index;
+
+ /* Represents the index in the dcbs. It has a value between 0 and
+ * FDMA_DCB_MAX
+ */
+ int dcb_index;
+
+ /* Represents the dma address to the dcbs array */
+ dma_addr_t dma;
+
+ /* Represents the page order that is used to allocate the pages for the
+ * RX buffers. This value is calculated based on max MTU of the devices.
+ */
+ u8 page_order;
+
+ u8 channel_id;
+};
+
+struct lan966x_tx_dcb_buf {
+ struct net_device *dev;
+ struct sk_buff *skb;
+ dma_addr_t dma_addr;
+ bool used;
+ bool ptp;
+};
+
+struct lan966x_tx {
+ struct lan966x *lan966x;
+
+ /* Pointer to the dcb list */
+ struct lan966x_tx_dcb *dcbs;
+ u16 last_in_use;
+
+ /* Represents the DMA address to the first entry of the dcb entries. */
+ dma_addr_t dma;
+
+ /* Array of dcbs that are given to the HW */
+ struct lan966x_tx_dcb_buf *dcbs_buf;
+
+ u8 channel_id;
+
+ bool activated;
+};
+
struct lan966x_stat_layout {
u32 offset;
char name[ETH_GSTRING_LEN];
@@ -134,6 +230,7 @@ struct lan966x {
int xtr_irq;
int ana_irq;
int ptp_irq;
+ int fdma_irq;
/* worqueue for fdb */
struct workqueue_struct *fdb_work;
@@ -150,6 +247,13 @@ struct lan966x {
spinlock_t ptp_ts_id_lock; /* lock for ts_id */
struct mutex ptp_lock; /* lock for ptp interface state */
u16 ptp_skbs;
+
+ /* fdma */
+ bool fdma;
+ struct net_device *fdma_ndev;
+ struct lan966x_rx rx;
+ struct lan966x_tx tx;
+ struct napi_struct napi;
};
struct lan966x_port_config {
@@ -195,6 +299,11 @@ bool lan966x_netdevice_check(const struct net_device *dev);
void lan966x_register_notifier_blocks(void);
void lan966x_unregister_notifier_blocks(void);
+bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb);
+
+void lan966x_ifh_get_src_port(void *ifh, u64 *src_port);
+void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp);
+
void lan966x_stats_get(struct net_device *dev,
struct rtnl_link_stats64 *stats);
int lan966x_stats_init(struct lan966x *lan966x);
@@ -284,6 +393,14 @@ void lan966x_ptp_txtstamp_release(struct lan966x_port *port,
struct sk_buff *skb);
irqreturn_t lan966x_ptp_irq_handler(int irq, void *args);
+int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
+int lan966x_fdma_change_mtu(struct lan966x *lan966x);
+void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev);
+void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
+int lan966x_fdma_init(struct lan966x *lan966x);
+void lan966x_fdma_deinit(struct lan966x *lan966x);
+irqreturn_t lan966x_fdma_irq_handler(int irq, void *args);
+
static inline void __iomem *lan_addr(void __iomem *base[],
int id, int tinst, int tcnt,
int gbase, int ginst,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
index 237555845a52..f141644e4372 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c
@@ -393,6 +393,9 @@ void lan966x_port_init(struct lan966x_port *port)
lan966x_port_config_down(port);
+ if (lan966x->fdma)
+ lan966x_fdma_netdev_init(lan966x, port->dev);
+
if (config->portmode != PHY_INTERFACE_MODE_QSGMII)
return;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
index 0c0b3e173d53..2f59285bef29 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
@@ -17,6 +17,7 @@ enum lan966x_target {
TARGET_CHIP_TOP = 5,
TARGET_CPU = 6,
TARGET_DEV = 13,
+ TARGET_FDMA = 21,
TARGET_GCB = 27,
TARGET_ORG = 36,
TARGET_PTP = 41,
@@ -578,6 +579,111 @@ enum lan966x_target {
#define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\
FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x)
+/* FDMA:FDMA:FDMA_CH_ACTIVATE */
+#define FDMA_CH_ACTIVATE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 0, 0, 1, 4)
+
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE GENMASK(7, 0)
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(x)\
+ FIELD_PREP(FDMA_CH_ACTIVATE_CH_ACTIVATE, x)
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE_GET(x)\
+ FIELD_GET(FDMA_CH_ACTIVATE_CH_ACTIVATE, x)
+
+/* FDMA:FDMA:FDMA_CH_RELOAD */
+#define FDMA_CH_RELOAD __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 4, 0, 1, 4)
+
+#define FDMA_CH_RELOAD_CH_RELOAD GENMASK(7, 0)
+#define FDMA_CH_RELOAD_CH_RELOAD_SET(x)\
+ FIELD_PREP(FDMA_CH_RELOAD_CH_RELOAD, x)
+#define FDMA_CH_RELOAD_CH_RELOAD_GET(x)\
+ FIELD_GET(FDMA_CH_RELOAD_CH_RELOAD, x)
+
+/* FDMA:FDMA:FDMA_CH_DISABLE */
+#define FDMA_CH_DISABLE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 8, 0, 1, 4)
+
+#define FDMA_CH_DISABLE_CH_DISABLE GENMASK(7, 0)
+#define FDMA_CH_DISABLE_CH_DISABLE_SET(x)\
+ FIELD_PREP(FDMA_CH_DISABLE_CH_DISABLE, x)
+#define FDMA_CH_DISABLE_CH_DISABLE_GET(x)\
+ FIELD_GET(FDMA_CH_DISABLE_CH_DISABLE, x)
+
+/* FDMA:FDMA:FDMA_CH_DB_DISCARD */
+#define FDMA_CH_DB_DISCARD __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 16, 0, 1, 4)
+
+#define FDMA_CH_DB_DISCARD_DB_DISCARD GENMASK(7, 0)
+#define FDMA_CH_DB_DISCARD_DB_DISCARD_SET(x)\
+ FIELD_PREP(FDMA_CH_DB_DISCARD_DB_DISCARD, x)
+#define FDMA_CH_DB_DISCARD_DB_DISCARD_GET(x)\
+ FIELD_GET(FDMA_CH_DB_DISCARD_DB_DISCARD, x)
+
+/* FDMA:FDMA:FDMA_DCB_LLP */
+#define FDMA_DCB_LLP(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 52, r, 8, 4)
+
+/* FDMA:FDMA:FDMA_DCB_LLP1 */
+#define FDMA_DCB_LLP1(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 84, r, 8, 4)
+
+/* FDMA:FDMA:FDMA_CH_ACTIVE */
+#define FDMA_CH_ACTIVE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 180, 0, 1, 4)
+
+/* FDMA:FDMA:FDMA_CH_CFG */
+#define FDMA_CH_CFG(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 224, r, 8, 4)
+
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY BIT(4)
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x)
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x)
+
+#define FDMA_CH_CFG_CH_INJ_PORT BIT(3)
+#define FDMA_CH_CFG_CH_INJ_PORT_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_INJ_PORT, x)
+#define FDMA_CH_CFG_CH_INJ_PORT_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_INJ_PORT, x)
+
+#define FDMA_CH_CFG_CH_DCB_DB_CNT GENMASK(2, 1)
+#define FDMA_CH_CFG_CH_DCB_DB_CNT_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_DCB_DB_CNT, x)
+#define FDMA_CH_CFG_CH_DCB_DB_CNT_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_DCB_DB_CNT, x)
+
+#define FDMA_CH_CFG_CH_MEM BIT(0)
+#define FDMA_CH_CFG_CH_MEM_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_MEM, x)
+#define FDMA_CH_CFG_CH_MEM_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_MEM, x)
+
+/* FDMA:FDMA:FDMA_PORT_CTRL */
+#define FDMA_PORT_CTRL(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 376, r, 2, 4)
+
+#define FDMA_PORT_CTRL_INJ_STOP BIT(4)
+#define FDMA_PORT_CTRL_INJ_STOP_SET(x)\
+ FIELD_PREP(FDMA_PORT_CTRL_INJ_STOP, x)
+#define FDMA_PORT_CTRL_INJ_STOP_GET(x)\
+ FIELD_GET(FDMA_PORT_CTRL_INJ_STOP, x)
+
+#define FDMA_PORT_CTRL_XTR_STOP BIT(2)
+#define FDMA_PORT_CTRL_XTR_STOP_SET(x)\
+ FIELD_PREP(FDMA_PORT_CTRL_XTR_STOP, x)
+#define FDMA_PORT_CTRL_XTR_STOP_GET(x)\
+ FIELD_GET(FDMA_PORT_CTRL_XTR_STOP, x)
+
+/* FDMA:FDMA:FDMA_INTR_DB */
+#define FDMA_INTR_DB __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 392, 0, 1, 4)
+
+/* FDMA:FDMA:FDMA_INTR_DB_ENA */
+#define FDMA_INTR_DB_ENA __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 396, 0, 1, 4)
+
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA GENMASK(7, 0)
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(x)\
+ FIELD_PREP(FDMA_INTR_DB_ENA_INTR_DB_ENA, x)
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(x)\
+ FIELD_GET(FDMA_INTR_DB_ENA_INTR_DB_ENA, x)
+
+/* FDMA:FDMA:FDMA_INTR_ERR */
+#define FDMA_INTR_ERR __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 400, 0, 1, 4)
+
+/* FDMA:FDMA:FDMA_ERRORS */
+#define FDMA_ERRORS __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 412, 0, 1, 4)
+
/* PTP:PTP_CFG:PTP_DOM_CFG */
#define PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4)
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
index 3d379e937184..ddb34bfb9bef 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -13,22 +13,36 @@
#include <linux/ctype.h>
#include <linux/types.h>
#include <linux/sizes.h>
+#include <linux/stringify.h>
#ifndef NFP_SUBSYS
#define NFP_SUBSYS "nfp"
#endif
-#define nfp_err(cpp, fmt, args...) \
+#define string_format(x) __FILE__ ":" __stringify(__LINE__) ": " x
+
+#define __nfp_err(cpp, fmt, args...) \
dev_err(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define nfp_warn(cpp, fmt, args...) \
+#define __nfp_warn(cpp, fmt, args...) \
dev_warn(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define nfp_info(cpp, fmt, args...) \
+#define __nfp_info(cpp, fmt, args...) \
dev_info(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
-#define nfp_dbg(cpp, fmt, args...) \
+#define __nfp_dbg(cpp, fmt, args...) \
dev_dbg(nfp_cpp_device(cpp)->parent, NFP_SUBSYS ": " fmt, ## args)
+#define __nfp_printk(level, cpp, fmt, args...) \
+ dev_printk(level, nfp_cpp_device(cpp)->parent, \
+ NFP_SUBSYS ": " fmt, ## args)
+
+#define nfp_err(cpp, fmt, args...) \
+ __nfp_err(cpp, string_format(fmt), ## args)
+#define nfp_warn(cpp, fmt, args...) \
+ __nfp_warn(cpp, string_format(fmt), ## args)
+#define nfp_info(cpp, fmt, args...) \
+ __nfp_info(cpp, string_format(fmt), ## args)
+#define nfp_dbg(cpp, fmt, args...) \
+ __nfp_dbg(cpp, string_format(fmt), ## args)
#define nfp_printk(level, cpp, fmt, args...) \
- dev_printk(level, nfp_cpp_device(cpp)->parent, \
- NFP_SUBSYS ": " fmt, ## args)
+ __nfp_printk(level, cpp, string_format(fmt), ## args)
#define PCI_64BIT_BAR_COUNT 3
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c
index 96a2077fd315..7e286cddbedb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_ip_services.c
@@ -161,11 +161,11 @@ EXPORT_SYMBOL(qed_vlan_get_ndev);
struct pci_dev *qed_validate_ndev(struct net_device *ndev)
{
- struct pci_dev *pdev = NULL;
struct net_device *upper;
+ struct pci_dev *pdev;
for_each_pci_dev(pdev) {
- if (pdev && pdev->driver &&
+ if (pdev->driver &&
!strcmp(pdev->driver->name, "qede")) {
upper = pci_get_drvdata(pdev);
if (upper->ifindex == ndev->ifindex)
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 50d535981a35..c9ee5011803f 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2256,7 +2256,7 @@ int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
* guaranteed to satisfy the second as we only attempt TSO if
* inner_network_header <= 208.
*/
- ip_tot_len = -EFX_TSO2_MAX_HDRLEN;
+ ip_tot_len = 0x10000 - EFX_TSO2_MAX_HDRLEN;
EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN +
(tcp->doff << 2u) > ip_tot_len);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index daf0c00c1242..c05a83da9e44 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -28,7 +28,6 @@ static inline netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct
ef100_enqueue_skb, __efx_enqueue_skb,
tx_queue, skb);
}
-void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
void efx_xmit_done_single(struct efx_tx_queue *tx_queue);
int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
void *type_data);
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index 377df8b7f015..eec80b024195 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -51,28 +51,7 @@ MODULE_PARM_DESC(irq_adapt_high_thresh,
*/
static int napi_weight = 64;
-/***************
- * Housekeeping
- ***************/
-
-int efx_channel_dummy_op_int(struct efx_channel *channel)
-{
- return 0;
-}
-
-void efx_channel_dummy_op_void(struct efx_channel *channel)
-{
-}
-
-static const struct efx_channel_type efx_default_channel_type = {
- .pre_probe = efx_channel_dummy_op_int,
- .post_remove = efx_channel_dummy_op_void,
- .get_name = efx_get_channel_name,
- .copy = efx_copy_channel,
- .want_txqs = efx_default_channel_want_txqs,
- .keep_eventq = false,
- .want_pio = true,
-};
+static const struct efx_channel_type efx_default_channel_type;
/*************
* INTERRUPTS
@@ -619,6 +598,7 @@ void efx_fini_channels(struct efx_nic *efx)
/* Allocate and initialise a channel structure, copying parameters
* (but not resources) from an old channel structure.
*/
+static
struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
{
struct efx_rx_queue *rx_queue;
@@ -696,7 +676,8 @@ fail:
return rc;
}
-void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
+static void efx_get_channel_name(struct efx_channel *channel, char *buf,
+ size_t len)
{
struct efx_nic *efx = channel->efx;
const char *type;
@@ -1004,7 +985,7 @@ int efx_set_channels(struct efx_nic *efx)
return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
}
-bool efx_default_channel_want_txqs(struct efx_channel *channel)
+static bool efx_default_channel_want_txqs(struct efx_channel *channel)
{
return channel->channel - channel->efx->tx_channel_offset <
channel->efx->n_tx_channels;
@@ -1362,3 +1343,26 @@ void efx_fini_napi(struct efx_nic *efx)
efx_for_each_channel(channel, efx)
efx_fini_napi_channel(channel);
}
+
+/***************
+ * Housekeeping
+ ***************/
+
+static int efx_channel_dummy_op_int(struct efx_channel *channel)
+{
+ return 0;
+}
+
+void efx_channel_dummy_op_void(struct efx_channel *channel)
+{
+}
+
+static const struct efx_channel_type efx_default_channel_type = {
+ .pre_probe = efx_channel_dummy_op_int,
+ .post_remove = efx_channel_dummy_op_void,
+ .get_name = efx_get_channel_name,
+ .copy = efx_copy_channel,
+ .want_txqs = efx_default_channel_want_txqs,
+ .keep_eventq = false,
+ .want_pio = true,
+};
diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h
index d77ec1f77fb1..64abb99a56b8 100644
--- a/drivers/net/ethernet/sfc/efx_channels.h
+++ b/drivers/net/ethernet/sfc/efx_channels.h
@@ -32,16 +32,13 @@ void efx_fini_eventq(struct efx_channel *channel);
void efx_remove_eventq(struct efx_channel *channel);
int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
-void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len);
void efx_set_channel_names(struct efx_nic *efx);
int efx_init_channels(struct efx_nic *efx);
int efx_probe_channels(struct efx_nic *efx);
int efx_set_channels(struct efx_nic *efx);
-bool efx_default_channel_want_txqs(struct efx_channel *channel);
void efx_remove_channel(struct efx_channel *channel);
void efx_remove_channels(struct efx_nic *efx);
void efx_fini_channels(struct efx_nic *efx);
-struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel);
void efx_start_channels(struct efx_nic *efx);
void efx_stop_channels(struct efx_nic *efx);
@@ -50,7 +47,6 @@ void efx_init_napi(struct efx_nic *efx);
void efx_fini_napi_channel(struct efx_channel *channel);
void efx_fini_napi(struct efx_nic *efx);
-int efx_channel_dummy_op_int(struct efx_channel *channel);
void efx_channel_dummy_op_void(struct efx_channel *channel);
#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index af37c990217e..f6577e74d6e6 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -51,8 +51,8 @@ static unsigned int efx_monitor_interval = 1 * HZ;
/* Default stats update time */
#define STATS_PERIOD_MS_DEFAULT 1000
-const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
-const char *const efx_reset_type_names[] = {
+static const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
+static const char *const efx_reset_type_names[] = {
[RESET_TYPE_INVISIBLE] = "INVISIBLE",
[RESET_TYPE_ALL] = "ALL",
[RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
index 0c6cc2191369..6bbdb5d2eebf 100644
--- a/drivers/net/ethernet/sfc/falcon/rx.c
+++ b/drivers/net/ethernet/sfc/falcon/rx.c
@@ -718,12 +718,14 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx,
struct ef4_rx_queue *rx_queue)
{
unsigned int bufs_in_recycle_ring, page_ring_size;
+ struct iommu_domain __maybe_unused *domain;
/* Set the RX recycle ring size */
#ifdef CONFIG_PPC64
bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU;
#else
- if (iommu_present(&pci_bus_type))
+ domain = iommu_get_domain_for_dev(&efx->pci_dev->dev);
+ if (domain && domain->type != IOMMU_DOMAIN_IDENTITY)
bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU;
else
bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_NOIOMMU;
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 148dcd48b58d..9599123bc28d 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -16,6 +16,7 @@
#include "bitfield.h"
#include "efx.h"
#include "rx_common.h"
+#include "tx_common.h"
#include "nic.h"
#include "farch_regs.h"
#include "sriov.h"
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index d3fcbf930dba..ff617b1b38d3 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -73,8 +73,8 @@
* \------------------------------ Resync (always set)
*
* The client writes it's request into MC shared memory, and rings the
- * doorbell. Each request is completed by either by the MC writting
- * back into shared memory, or by writting out an event.
+ * doorbell. Each request is completed by either by the MC writing
+ * back into shared memory, or by writing out an event.
*
* All MCDI commands support completion by shared memory response. Each
* request may also contain additional data (accounted for by HEADER.LEN),
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index c75dc75e2857..318db906a154 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -612,11 +612,6 @@ extern const unsigned int efx_loopback_mode_max;
#define LOOPBACK_MODE(efx) \
STRING_TABLE_LOOKUP((efx)->loopback_mode, efx_loopback_mode)
-extern const char *const efx_reset_type_names[];
-extern const unsigned int efx_reset_type_max;
-#define RESET_TYPE(type) \
- STRING_TABLE_LOOKUP(type, efx_reset_type)
-
enum efx_int_mode {
/* Be careful if altering to correct macro below */
EFX_INT_MODE_MSIX = 0,
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 6983799e1c05..138bca611341 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -527,7 +527,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
/* PTP "event" packet */
if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
- unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
+ ((efx_ptp_use_mac_tx_timestamps(efx) && efx->ptp_data) ||
+ unlikely(efx_ptp_is_ptp_tx(efx, skb)))) {
/* There may be existing transmits on the channel that are
* waiting for this packet to trigger the doorbell write.
* We need to send the packets at this point.
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index a5d150c5f3d8..9bc625fccca0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -88,11 +88,9 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
u32 tmp, addr, value = MII_XGMAC_BUSY;
int ret;
- ret = pm_runtime_get_sync(priv->device);
- if (ret < 0) {
- pm_runtime_put_noidle(priv->device);
+ ret = pm_runtime_resume_and_get(priv->device);
+ if (ret < 0)
return ret;
- }
/* Wait until any existing MII operation is complete */
if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
@@ -156,11 +154,9 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
u32 addr, tmp, value = MII_XGMAC_BUSY;
int ret;
- ret = pm_runtime_get_sync(priv->device);
- if (ret < 0) {
- pm_runtime_put_noidle(priv->device);
+ ret = pm_runtime_resume_and_get(priv->device);
+ if (ret < 0)
return ret;
- }
/* Wait until any existing MII operation is complete */
if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
@@ -229,11 +225,9 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
int data = 0;
u32 v;
- data = pm_runtime_get_sync(priv->device);
- if (data < 0) {
- pm_runtime_put_noidle(priv->device);
+ data = pm_runtime_resume_and_get(priv->device);
+ if (data < 0)
return data;
- }
value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask;
@@ -297,11 +291,9 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
u32 value = MII_BUSY;
u32 v;
- ret = pm_runtime_get_sync(priv->device);
- if (ret < 0) {
- pm_runtime_put_noidle(priv->device);
+ ret = pm_runtime_resume_and_get(priv->device);
+ if (ret < 0)
return ret;
- }
value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask;
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index affcf92cd3aa..fb30bc5d56cb 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -94,6 +94,7 @@ config TI_K3_AM65_CPSW_NUSS
depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
select NET_DEVLINK
select TI_DAVINCI_MDIO
+ select PHYLINK
imply PHY_TI_GMII_SEL
depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS
help
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index d2747e9db286..b7ebd741f284 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -173,11 +173,9 @@ static int am65_cpsw_nuss_ndo_slave_add_vid(struct net_device *ndev,
if (!netif_running(ndev) || !vid)
return 0;
- ret = pm_runtime_get_sync(common->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(common->dev);
+ ret = pm_runtime_resume_and_get(common->dev);
+ if (ret < 0)
return ret;
- }
port_mask = BIT(port->port_id) | ALE_PORT_HOST;
if (!vid)
@@ -203,11 +201,9 @@ static int am65_cpsw_nuss_ndo_slave_kill_vid(struct net_device *ndev,
if (!netif_running(ndev) || !vid)
return 0;
- ret = pm_runtime_get_sync(common->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(common->dev);
+ ret = pm_runtime_resume_and_get(common->dev);
+ if (ret < 0)
return ret;
- }
dev_info(common->dev, "Removing vlan %d from vlan filter\n", vid);
ret = cpsw_ale_del_vlan(common->ale, vid,
@@ -557,11 +553,9 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev)
struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
int ret, i;
- ret = pm_runtime_get_sync(common->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(common->dev);
+ ret = pm_runtime_resume_and_get(common->dev);
+ if (ret < 0)
return ret;
- }
/* Notify the stack of the actual queue counts. */
ret = netif_set_real_num_tx_queues(ndev, common->tx_ch_num);
@@ -1214,11 +1208,9 @@ static int am65_cpsw_nuss_ndo_slave_set_mac_address(struct net_device *ndev,
if (ret < 0)
return ret;
- ret = pm_runtime_get_sync(common->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(common->dev);
+ ret = pm_runtime_resume_and_get(common->dev);
+ if (ret < 0)
return ret;
- }
cpsw_ale_del_ucast(common->ale, ndev->dev_addr,
HOST_PORT_NUM, 0, 0);
@@ -2692,9 +2684,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
common->bus_freq = clk_get_rate(clk);
pm_runtime_enable(dev);
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
- pm_runtime_put_noidle(dev);
pm_runtime_disable(dev);
return ret;
}
@@ -2789,11 +2780,9 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
common = dev_get_drvdata(dev);
- ret = pm_runtime_get_sync(&pdev->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0)
return ret;
- }
am65_cpsw_nuss_phylink_cleanup(common);
am65_cpsw_unregister_devlink(common);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c
index ebcc6386cc34..aa32dd905e2b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-qos.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c
@@ -8,10 +8,12 @@
#include <linux/pm_runtime.h>
#include <linux/time.h>
+#include <net/pkt_cls.h>
#include "am65-cpsw-nuss.h"
#include "am65-cpsw-qos.h"
#include "am65-cpts.h"
+#include "cpsw_ale.h"
#define AM65_CPSW_REG_CTL 0x004
#define AM65_CPSW_PN_REG_CTL 0x004
@@ -588,12 +590,190 @@ static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data)
return am65_cpsw_set_taprio(ndev, type_data);
}
+static int am65_cpsw_qos_clsflower_add_policer(struct am65_cpsw_port *port,
+ struct netlink_ext_ack *extack,
+ struct flow_cls_offload *cls,
+ u64 rate_pkt_ps)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ static const u8 mc_mac[] = {0x01, 0x00, 0x00, 0x00, 0x00, 0x00};
+ struct am65_cpsw_qos *qos = &port->qos;
+ struct flow_match_eth_addrs match;
+ int ret;
+
+ if (dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported keys used");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ NL_SET_ERR_MSG_MOD(extack, "Not matching on eth address");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_eth_addrs(rule, &match);
+
+ if (!is_zero_ether_addr(match.mask->src)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on source MAC not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (is_broadcast_ether_addr(match.key->dst) &&
+ is_broadcast_ether_addr(match.mask->dst)) {
+ ret = cpsw_ale_rx_ratelimit_bc(port->common->ale, port->port_id, rate_pkt_ps);
+ if (ret)
+ return ret;
+
+ qos->ale_bc_ratelimit.cookie = cls->cookie;
+ qos->ale_bc_ratelimit.rate_packet_ps = rate_pkt_ps;
+ } else if (ether_addr_equal_unaligned(match.key->dst, mc_mac) &&
+ ether_addr_equal_unaligned(match.mask->dst, mc_mac)) {
+ ret = cpsw_ale_rx_ratelimit_mc(port->common->ale, port->port_id, rate_pkt_ps);
+ if (ret)
+ return ret;
+
+ qos->ale_mc_ratelimit.cookie = cls->cookie;
+ qos->ale_mc_ratelimit.rate_packet_ps = rate_pkt_ps;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Not supported matching key");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int am65_cpsw_qos_clsflower_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when exceed action is not drop");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+ !flow_action_is_last_entry(action, act)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is ok, but action is not last");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.rate_bytes_ps || act->police.peakrate_bytes_ps ||
+ act->police.avrate || act->police.overhead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when bytes per second/peakrate/avrate/overhead is configured");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int am65_cpsw_qos_configure_clsflower(struct am65_cpsw_port *port,
+ struct flow_cls_offload *cls)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct netlink_ext_ack *extack = cls->common.extack;
+ const struct flow_action_entry *act;
+ int i, ret;
+
+ flow_action_for_each(i, act, &rule->action) {
+ switch (act->id) {
+ case FLOW_ACTION_POLICE:
+ ret = am65_cpsw_qos_clsflower_policer_validate(&rule->action, act, extack);
+ if (ret)
+ return ret;
+
+ return am65_cpsw_qos_clsflower_add_policer(port, extack, cls,
+ act->police.rate_pkt_ps);
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Action not supported");
+ return -EOPNOTSUPP;
+ }
+ }
+ return -EOPNOTSUPP;
+}
+
+static int am65_cpsw_qos_delete_clsflower(struct am65_cpsw_port *port, struct flow_cls_offload *cls)
+{
+ struct am65_cpsw_qos *qos = &port->qos;
+
+ if (cls->cookie == qos->ale_bc_ratelimit.cookie) {
+ qos->ale_bc_ratelimit.cookie = 0;
+ qos->ale_bc_ratelimit.rate_packet_ps = 0;
+ cpsw_ale_rx_ratelimit_bc(port->common->ale, port->port_id, 0);
+ }
+
+ if (cls->cookie == qos->ale_mc_ratelimit.cookie) {
+ qos->ale_mc_ratelimit.cookie = 0;
+ qos->ale_mc_ratelimit.rate_packet_ps = 0;
+ cpsw_ale_rx_ratelimit_mc(port->common->ale, port->port_id, 0);
+ }
+
+ return 0;
+}
+
+static int am65_cpsw_qos_setup_tc_clsflower(struct am65_cpsw_port *port,
+ struct flow_cls_offload *cls_flower)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return am65_cpsw_qos_configure_clsflower(port, cls_flower);
+ case FLOW_CLS_DESTROY:
+ return am65_cpsw_qos_delete_clsflower(port, cls_flower);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int am65_cpsw_qos_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ struct am65_cpsw_port *port = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(port->ndev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return am65_cpsw_qos_setup_tc_clsflower(port, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static LIST_HEAD(am65_cpsw_qos_block_cb_list);
+
+static int am65_cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_block_offload *f)
+{
+ struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+
+ return flow_block_cb_setup_simple(f, &am65_cpsw_qos_block_cb_list,
+ am65_cpsw_qos_setup_tc_block_cb,
+ port, port, true);
+}
+
int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
case TC_SETUP_QDISC_TAPRIO:
return am65_cpsw_setup_taprio(ndev, type_data);
+ case TC_SETUP_BLOCK:
+ return am65_cpsw_qos_setup_tc_block(ndev, type_data);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.h b/drivers/net/ethernet/ti/am65-cpsw-qos.h
index e8f1b6b59e93..fb223b43b196 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-qos.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-qos.h
@@ -14,11 +14,19 @@ struct am65_cpsw_est {
struct tc_taprio_qopt_offload taprio;
};
+struct am65_cpsw_ale_ratelimit {
+ unsigned long cookie;
+ u64 rate_packet_ps;
+};
+
struct am65_cpsw_qos {
struct am65_cpsw_est *est_admin;
struct am65_cpsw_est *est_oper;
ktime_t link_down_time;
int link_speed;
+
+ struct am65_cpsw_ale_ratelimit ale_bc_ratelimit;
+ struct am65_cpsw_ale_ratelimit ale_mc_ratelimit;
};
int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 03575c017500..e6ad2e53f1cd 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -335,7 +335,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
static unsigned int cpsw_rxbuf_total_len(unsigned int len)
{
- len += CPSW_HEADROOM;
+ len += CPSW_HEADROOM_NA;
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
return SKB_DATA_ALIGN(len);
@@ -756,11 +756,9 @@ static int cpsw_ndo_open(struct net_device *ndev)
int ret;
u32 reg;
- ret = pm_runtime_get_sync(cpsw->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(cpsw->dev);
+ ret = pm_runtime_resume_and_get(cpsw->dev);
+ if (ret < 0)
return ret;
- }
netif_carrier_off(ndev);
@@ -968,11 +966,9 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
- ret = pm_runtime_get_sync(cpsw->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(cpsw->dev);
+ ret = pm_runtime_resume_and_get(cpsw->dev);
+ if (ret < 0)
return ret;
- }
if (cpsw->data.dual_emac) {
vid = cpsw->slaves[priv->emac_port].port_vlan;
@@ -1052,11 +1048,9 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
if (vid == cpsw->data.default_vlan)
return 0;
- ret = pm_runtime_get_sync(cpsw->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(cpsw->dev);
+ ret = pm_runtime_resume_and_get(cpsw->dev);
+ if (ret < 0)
return ret;
- }
if (cpsw->data.dual_emac) {
/* In dual EMAC, reserved VLAN id should not be used for
@@ -1090,11 +1084,9 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
if (vid == cpsw->data.default_vlan)
return 0;
- ret = pm_runtime_get_sync(cpsw->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(cpsw->dev);
+ ret = pm_runtime_resume_and_get(cpsw->dev);
+ if (ret < 0)
return ret;
- }
if (cpsw->data.dual_emac) {
int i;
@@ -1567,11 +1559,9 @@ static int cpsw_probe(struct platform_device *pdev)
/* Need to enable clocks with runtime PM api to access module
* registers
*/
- ret = pm_runtime_get_sync(dev);
- if (ret < 0) {
- pm_runtime_put_noidle(dev);
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
goto clean_runtime_disable_ret;
- }
ret = cpsw_probe_dt(&cpsw->data, pdev);
if (ret)
@@ -1734,11 +1724,9 @@ static int cpsw_remove(struct platform_device *pdev)
struct cpsw_common *cpsw = platform_get_drvdata(pdev);
int i, ret;
- ret = pm_runtime_get_sync(&pdev->dev);
- if (ret < 0) {
- pm_runtime_put_noidle(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0)
return ret;
- }
for (i = 0; i < cpsw->data.slaves; i++)
if (cpsw->slaves[i].ndev)
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 1ef0aaef5c61..231370e9a801 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -50,6 +50,8 @@
/* ALE_AGING_TIMER */
#define ALE_AGING_TIMER_MASK GENMASK(23, 0)
+#define ALE_RATE_LIMIT_MIN_PPS 1000
+
/**
* struct ale_entry_fld - The ALE tbl entry field description
* @start_bit: field start bit
@@ -1136,6 +1138,50 @@ int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control)
return tmp & BITMASK(info->bits);
}
+int cpsw_ale_rx_ratelimit_mc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps)
+
+{
+ int val = ratelimit_pps / ALE_RATE_LIMIT_MIN_PPS;
+ u32 remainder = ratelimit_pps % ALE_RATE_LIMIT_MIN_PPS;
+
+ if (ratelimit_pps && !val) {
+ dev_err(ale->params.dev, "ALE MC port:%d ratelimit min value 1000pps\n", port);
+ return -EINVAL;
+ }
+
+ if (remainder)
+ dev_info(ale->params.dev, "ALE port:%d MC ratelimit set to %dpps (requested %d)\n",
+ port, ratelimit_pps - remainder, ratelimit_pps);
+
+ cpsw_ale_control_set(ale, port, ALE_PORT_MCAST_LIMIT, val);
+
+ dev_dbg(ale->params.dev, "ALE port:%d MC ratelimit set %d\n",
+ port, val * ALE_RATE_LIMIT_MIN_PPS);
+ return 0;
+}
+
+int cpsw_ale_rx_ratelimit_bc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps)
+
+{
+ int val = ratelimit_pps / ALE_RATE_LIMIT_MIN_PPS;
+ u32 remainder = ratelimit_pps % ALE_RATE_LIMIT_MIN_PPS;
+
+ if (ratelimit_pps && !val) {
+ dev_err(ale->params.dev, "ALE port:%d BC ratelimit min value 1000pps\n", port);
+ return -EINVAL;
+ }
+
+ if (remainder)
+ dev_info(ale->params.dev, "ALE port:%d BC ratelimit set to %dpps (requested %d)\n",
+ port, ratelimit_pps - remainder, ratelimit_pps);
+
+ cpsw_ale_control_set(ale, port, ALE_PORT_BCAST_LIMIT, val);
+
+ dev_dbg(ale->params.dev, "ALE port:%d BC ratelimit set %d\n",
+ port, val * ALE_RATE_LIMIT_MIN_PPS);
+ return 0;
+}
+
static void cpsw_ale_timer(struct timer_list *t)
{
struct cpsw_ale *ale = from_timer(ale, t, timer);
@@ -1199,6 +1245,26 @@ static void cpsw_ale_aging_stop(struct cpsw_ale *ale)
void cpsw_ale_start(struct cpsw_ale *ale)
{
+ unsigned long ale_prescale;
+
+ /* configure Broadcast and Multicast Rate Limit
+ * number_of_packets = (Fclk / ALE_PRESCALE) * port.BCAST/MCAST_LIMIT
+ * ALE_PRESCALE width is 19bit and min value 0x10
+ * port.BCAST/MCAST_LIMIT is 8bit
+ *
+ * For multi port configuration support the ALE_PRESCALE is configured to 1ms interval,
+ * which allows to configure port.BCAST/MCAST_LIMIT per port and achieve:
+ * min number_of_packets = 1000 when port.BCAST/MCAST_LIMIT = 1
+ * max number_of_packets = 1000 * 255 = 255000 when port.BCAST/MCAST_LIMIT = 0xFF
+ */
+ ale_prescale = ale->params.bus_freq / ALE_RATE_LIMIT_MIN_PPS;
+ writel((u32)ale_prescale, ale->params.ale_regs + ALE_PRESCALE);
+
+ /* Allow MC/BC rate limiting globally.
+ * The actual Rate Limit cfg enabled per-port by port.BCAST/MCAST_LIMIT
+ */
+ cpsw_ale_control_set(ale, 0, ALE_RATE_LIMIT, 1);
+
cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index 13fe47687fde..aba4572cfa3b 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -120,6 +120,8 @@ int cpsw_ale_add_vlan(struct cpsw_ale *ale, u16 vid, int port, int untag,
int reg_mcast, int unreg_mcast);
int cpsw_ale_del_vlan(struct cpsw_ale *ale, u16 vid, int port);
void cpsw_ale_set_allmulti(struct cpsw_ale *ale, int allmulti, int port);
+int cpsw_ale_rx_ratelimit_bc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps);
+int cpsw_ale_rx_ratelimit_mc(struct cpsw_ale *ale, int port, unsigned int ratelimit_pps);
int cpsw_ale_control_get(struct cpsw_ale *ale, int port, int control);
int cpsw_ale_control_set(struct cpsw_ale *ale, int port,
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index bd4b1528cf99..dfa0a9cf9d89 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -273,7 +273,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
static unsigned int cpsw_rxbuf_total_len(unsigned int len)
{
- len += CPSW_HEADROOM;
+ len += CPSW_HEADROOM_NA;
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
return SKB_DATA_ALIGN(len);
@@ -498,6 +498,8 @@ static void cpsw_restore(struct cpsw_priv *priv)
/* restore CBS offload */
cpsw_cbs_resume(&cpsw->slaves[priv->emac_port - 1], priv);
+
+ cpsw_qos_clsflower_resume(priv);
}
static void cpsw_init_stp_ale_entry(struct cpsw_common *cpsw)
@@ -1407,7 +1409,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw)
cpsw->slaves[i].ndev = ndev;
ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
- NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL;
+ NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC;
ndev->netdev_ops = &cpsw_netdev_ops;
ndev->ethtool_ops = &cpsw_ethtool_ops;
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 8f6817f346ba..4dbd327c6a4d 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -502,6 +502,7 @@ int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs,
ale_params.ale_ageout = ale_ageout;
ale_params.ale_ports = CPSW_ALE_PORTS_NUM;
ale_params.dev_id = "cpsw";
+ ale_params.bus_freq = cpsw->bus_freq_mhz * 1000000;
cpsw->ale = cpsw_ale_create(&ale_params);
if (IS_ERR(cpsw->ale)) {
@@ -1048,6 +1049,8 @@ static int cpsw_set_mqprio(struct net_device *ndev, void *type_data)
return 0;
}
+static int cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_block_offload *f);
+
int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
void *type_data)
{
@@ -1058,6 +1061,9 @@ int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
case TC_SETUP_QDISC_MQPRIO:
return cpsw_set_mqprio(ndev, type_data);
+ case TC_SETUP_BLOCK:
+ return cpsw_qos_setup_tc_block(ndev, type_data);
+
default:
return -EOPNOTSUPP;
}
@@ -1381,3 +1387,202 @@ drop:
page_pool_recycle_direct(cpsw->page_pool[ch], page);
return ret;
}
+
+static int cpsw_qos_clsflower_add_policer(struct cpsw_priv *priv,
+ struct netlink_ext_ack *extack,
+ struct flow_cls_offload *cls,
+ u64 rate_pkt_ps)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct flow_dissector *dissector = rule->match.dissector;
+ static const u8 mc_mac[] = {0x01, 0x00, 0x00, 0x00, 0x00, 0x00};
+ struct flow_match_eth_addrs match;
+ u32 port_id;
+ int ret;
+
+ if (dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported keys used");
+ return -EOPNOTSUPP;
+ }
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ NL_SET_ERR_MSG_MOD(extack, "Not matching on eth address");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_eth_addrs(rule, &match);
+
+ if (!is_zero_ether_addr(match.mask->src)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on source MAC not supported");
+ return -EOPNOTSUPP;
+ }
+
+ port_id = cpsw_slave_index(priv->cpsw, priv) + 1;
+
+ if (is_broadcast_ether_addr(match.key->dst) &&
+ is_broadcast_ether_addr(match.mask->dst)) {
+ ret = cpsw_ale_rx_ratelimit_bc(priv->cpsw->ale, port_id, rate_pkt_ps);
+ if (ret)
+ return ret;
+
+ priv->ale_bc_ratelimit.cookie = cls->cookie;
+ priv->ale_bc_ratelimit.rate_packet_ps = rate_pkt_ps;
+ } else if (ether_addr_equal_unaligned(match.key->dst, mc_mac) &&
+ ether_addr_equal_unaligned(match.mask->dst, mc_mac)) {
+ ret = cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id, rate_pkt_ps);
+ if (ret)
+ return ret;
+
+ priv->ale_mc_ratelimit.cookie = cls->cookie;
+ priv->ale_mc_ratelimit.rate_packet_ps = rate_pkt_ps;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Not supported matching key");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int cpsw_qos_clsflower_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when exceed action is not drop");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+ !flow_action_is_last_entry(action, act)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is ok, but action is not last");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.rate_bytes_ps || act->police.peakrate_bytes_ps ||
+ act->police.avrate || act->police.overhead) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when bytes per second/peakrate/avrate/overhead is configured");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int cpsw_qos_configure_clsflower(struct cpsw_priv *priv, struct flow_cls_offload *cls)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+ struct netlink_ext_ack *extack = cls->common.extack;
+ const struct flow_action_entry *act;
+ int i, ret;
+
+ flow_action_for_each(i, act, &rule->action) {
+ switch (act->id) {
+ case FLOW_ACTION_POLICE:
+ ret = cpsw_qos_clsflower_policer_validate(&rule->action, act, extack);
+ if (ret)
+ return ret;
+
+ return cpsw_qos_clsflower_add_policer(priv, extack, cls,
+ act->police.rate_pkt_ps);
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Action not supported");
+ return -EOPNOTSUPP;
+ }
+ }
+ return -EOPNOTSUPP;
+}
+
+static int cpsw_qos_delete_clsflower(struct cpsw_priv *priv, struct flow_cls_offload *cls)
+{
+ u32 port_id = cpsw_slave_index(priv->cpsw, priv) + 1;
+
+ if (cls->cookie == priv->ale_bc_ratelimit.cookie) {
+ priv->ale_bc_ratelimit.cookie = 0;
+ priv->ale_bc_ratelimit.rate_packet_ps = 0;
+ cpsw_ale_rx_ratelimit_bc(priv->cpsw->ale, port_id, 0);
+ }
+
+ if (cls->cookie == priv->ale_mc_ratelimit.cookie) {
+ priv->ale_mc_ratelimit.cookie = 0;
+ priv->ale_mc_ratelimit.rate_packet_ps = 0;
+ cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id, 0);
+ }
+
+ return 0;
+}
+
+static int cpsw_qos_setup_tc_clsflower(struct cpsw_priv *priv, struct flow_cls_offload *cls_flower)
+{
+ switch (cls_flower->command) {
+ case FLOW_CLS_REPLACE:
+ return cpsw_qos_configure_clsflower(priv, cls_flower);
+ case FLOW_CLS_DESTROY:
+ return cpsw_qos_delete_clsflower(priv, cls_flower);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int cpsw_qos_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+ struct cpsw_priv *priv = cb_priv;
+ int ret;
+
+ if (!tc_cls_can_offload_and_chain0(priv->ndev, type_data))
+ return -EOPNOTSUPP;
+
+ ret = pm_runtime_get_sync(priv->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(priv->dev);
+ return ret;
+ }
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ ret = cpsw_qos_setup_tc_clsflower(priv, type_data);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ pm_runtime_put(priv->dev);
+ return ret;
+}
+
+static LIST_HEAD(cpsw_qos_block_cb_list);
+
+static int cpsw_qos_setup_tc_block(struct net_device *ndev, struct flow_block_offload *f)
+{
+ struct cpsw_priv *priv = netdev_priv(ndev);
+
+ return flow_block_cb_setup_simple(f, &cpsw_qos_block_cb_list,
+ cpsw_qos_setup_tc_block_cb,
+ priv, priv, true);
+}
+
+void cpsw_qos_clsflower_resume(struct cpsw_priv *priv)
+{
+ u32 port_id = cpsw_slave_index(priv->cpsw, priv) + 1;
+
+ if (priv->ale_bc_ratelimit.cookie)
+ cpsw_ale_rx_ratelimit_bc(priv->cpsw->ale, port_id,
+ priv->ale_bc_ratelimit.rate_packet_ps);
+
+ if (priv->ale_mc_ratelimit.cookie)
+ cpsw_ale_rx_ratelimit_mc(priv->cpsw->ale, port_id,
+ priv->ale_mc_ratelimit.rate_packet_ps);
+}
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index 74555970730c..fc591f5ebe18 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -364,6 +364,11 @@ struct cpsw_common {
u8 base_mac[ETH_ALEN];
};
+struct cpsw_ale_ratelimit {
+ unsigned long cookie;
+ u64 rate_packet_ps;
+};
+
struct cpsw_priv {
struct net_device *ndev;
struct device *dev;
@@ -384,6 +389,8 @@ struct cpsw_priv {
struct cpsw_common *cpsw;
int offload_fwd_mark;
u32 tx_packet_min;
+ struct cpsw_ale_ratelimit ale_bc_ratelimit;
+ struct cpsw_ale_ratelimit ale_mc_ratelimit;
};
#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
@@ -411,7 +418,6 @@ struct __aligned(sizeof(long)) cpsw_meta_xdp {
/* The buf includes headroom compatible with both skb and xdpf */
#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN)
-#define CPSW_HEADROOM ALIGN(CPSW_HEADROOM_NA, sizeof(long))
static inline int cpsw_is_xdpf_handle(void *handle)
{
@@ -462,6 +468,7 @@ int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
bool cpsw_shp_is_off(struct cpsw_priv *priv);
void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv);
void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv);
+void cpsw_qos_clsflower_resume(struct cpsw_priv *priv);
/* ethtool */
u32 cpsw_get_msglevel(struct net_device *ndev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 869e362e09c1..3f6b9dfca095 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1515,7 +1515,7 @@ static int temac_probe(struct platform_device *pdev)
of_node_put(dma_np);
return PTR_ERR(lp->sdma_regs);
}
- if (of_get_property(dma_np, "little-endian", NULL)) {
+ if (of_property_read_bool(dma_np, "little-endian")) {
lp->dma_in = temac_dma_in32_le;
lp->dma_out = temac_dma_out32_le;
} else {
diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
index 1f382777aa5a..9abbdb71e629 100644
--- a/drivers/net/ethernet/xscale/ptp_ixp46x.c
+++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -271,7 +271,7 @@ static int ptp_ixp_probe(struct platform_device *pdev)
ixp_clock.master_irq = platform_get_irq(pdev, 0);
ixp_clock.slave_irq = platform_get_irq(pdev, 1);
if (IS_ERR(ixp_clock.regs) ||
- !ixp_clock.master_irq || !ixp_clock.slave_irq)
+ ixp_clock.master_irq < 0 || ixp_clock.slave_irq < 0)
return -ENXIO;
ixp_clock.caps = ptp_ixp_caps;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index cf69da0e296c..25b38a374e3c 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/hyperv.h>
#include <linux/rndis.h>
+#include <linux/jhash.h>
/* RSS related */
#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
@@ -237,6 +238,7 @@ int netvsc_recv_callback(struct net_device *net,
void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
+void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev);
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp);
unsigned int netvsc_xdp_fraglen(unsigned int len);
@@ -246,6 +248,8 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netvsc_device *nvdev);
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog);
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf);
+int netvsc_ndoxdp_xmit(struct net_device *ndev, int n,
+ struct xdp_frame **frames, u32 flags);
int rndis_set_subchannel(struct net_device *ndev,
struct netvsc_device *nvdev,
@@ -942,12 +946,21 @@ struct nvsc_rsc {
#define NVSC_RSC_CSUM_INFO BIT(1) /* valid/present bit for 'csum_info' */
#define NVSC_RSC_HASH_INFO BIT(2) /* valid/present bit for 'hash_info' */
-struct netvsc_stats {
+struct netvsc_stats_tx {
+ u64 packets;
+ u64 bytes;
+ u64 xdp_xmit;
+ struct u64_stats_sync syncp;
+};
+
+struct netvsc_stats_rx {
u64 packets;
u64 bytes;
u64 broadcast;
u64 multicast;
u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 xdp_tx;
struct u64_stats_sync syncp;
};
@@ -1046,6 +1059,55 @@ struct net_device_context {
struct netvsc_device_info *saved_netvsc_dev_info;
};
+/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ * packets. We can use ethtool to change UDP hash level when necessary.
+ */
+static inline u32 netvsc_get_hash(struct sk_buff *skb,
+ const struct net_device_context *ndc)
+{
+ struct flow_keys flow;
+ u32 hash, pkt_proto = 0;
+ static u32 hashrnd __read_mostly;
+
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
+
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
+ return 0;
+
+ switch (flow.basic.ip_proto) {
+ case IPPROTO_TCP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_TCP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_TCP6_L4HASH;
+
+ break;
+
+ case IPPROTO_UDP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_UDP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_UDP6_L4HASH;
+
+ break;
+ }
+
+ if (pkt_proto & ndc->l4_hash) {
+ return skb_get_hash(skb);
+ } else {
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
+ else
+ return 0;
+
+ __skb_set_sw_hash(skb, hash, false);
+ }
+
+ return hash;
+}
+
/* Per channel data */
struct netvsc_channel {
struct vmbus_channel *channel;
@@ -1060,9 +1122,10 @@ struct netvsc_channel {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
+ bool xdp_flush;
- struct netvsc_stats tx_stats;
- struct netvsc_stats rx_stats;
+ struct netvsc_stats_tx tx_stats;
+ struct netvsc_stats_rx rx_stats;
};
/* Per netvsc device */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9442f751ad3a..6e42cb03e226 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -20,6 +20,7 @@
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
#include <linux/prefetch.h>
+#include <linux/filter.h>
#include <asm/sync_bitops.h>
#include <asm/mshyperv.h>
@@ -792,9 +793,9 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
int queue_sends;
u64 cmd_rqst;
- cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id);
+ cmd_rqst = channel->request_addr_callback(channel, desc->trans_id);
if (cmd_rqst == VMBUS_RQST_ERROR) {
- netdev_err(ndev, "Incorrect transaction id\n");
+ netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
return;
}
@@ -805,7 +806,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
struct hv_netvsc_packet *packet
= (struct hv_netvsc_packet *)skb->cb;
u32 send_index = packet->send_buf_index;
- struct netvsc_stats *tx_stats;
+ struct netvsc_stats_tx *tx_stats;
if (send_index != NETVSC_INVALID_INDEX)
netvsc_free_send_slot(net_device, send_index);
@@ -854,9 +855,9 @@ static void netvsc_send_completion(struct net_device *ndev,
/* First check if this is a VMBUS completion without data payload */
if (!msglen) {
cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
- (u64)desc->trans_id);
+ desc->trans_id);
if (cmd_rqst == VMBUS_RQST_ERROR) {
- netdev_err(ndev, "Invalid transaction id\n");
+ netdev_err(ndev, "Invalid transaction ID %llx\n", desc->trans_id);
return;
}
@@ -1670,12 +1671,17 @@ int netvsc_poll(struct napi_struct *napi, int budget)
if (!nvchan->desc)
nvchan->desc = hv_pkt_iter_first(channel);
+ nvchan->xdp_flush = false;
+
while (nvchan->desc && work_done < budget) {
work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
ndev, nvchan->desc, budget);
nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
}
+ if (nvchan->xdp_flush)
+ xdp_do_flush();
+
/* Send any pending receive completions */
ret = send_recv_completions(ndev, net_device, nvchan);
diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 7856905414eb..4a9522689fa4 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -10,6 +10,7 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/netpoll.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/kernel.h>
@@ -23,11 +24,13 @@
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
struct xdp_buff *xdp)
{
+ struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats;
void *data = nvchan->rsc.data[0];
u32 len = nvchan->rsc.len[0];
struct page *page = NULL;
struct bpf_prog *prog;
u32 act = XDP_PASS;
+ bool drop = true;
xdp->data_hard_start = NULL;
@@ -60,9 +63,34 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
switch (act) {
case XDP_PASS:
case XDP_TX:
+ drop = false;
+ break;
+
case XDP_DROP:
break;
+ case XDP_REDIRECT:
+ if (!xdp_do_redirect(ndev, xdp, prog)) {
+ nvchan->xdp_flush = true;
+ drop = false;
+
+ u64_stats_update_begin(&rx_stats->syncp);
+
+ rx_stats->xdp_redirect++;
+ rx_stats->packets++;
+ rx_stats->bytes += nvchan->rsc.pktlen;
+
+ u64_stats_update_end(&rx_stats->syncp);
+
+ break;
+ } else {
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->xdp_drop++;
+ u64_stats_update_end(&rx_stats->syncp);
+ }
+
+ fallthrough;
+
case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
break;
@@ -74,7 +102,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
out:
rcu_read_unlock();
- if (page && act != XDP_PASS && act != XDP_TX) {
+ if (page && drop) {
__free_page(page);
xdp->data_hard_start = NULL;
}
@@ -137,7 +165,6 @@ int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
{
struct netdev_bpf xdp;
- bpf_op_t ndo_bpf;
int ret;
ASSERT_RTNL();
@@ -145,8 +172,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
if (!vf_netdev)
return 0;
- ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
- if (!ndo_bpf)
+ if (!vf_netdev->netdev_ops->ndo_bpf)
return 0;
memset(&xdp, 0, sizeof(xdp));
@@ -157,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
xdp.command = XDP_SETUP_PROG;
xdp.prog = prog;
- ret = ndo_bpf(vf_netdev, &xdp);
+ ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp);
if (ret && prog)
bpf_prog_put(prog);
@@ -199,3 +225,68 @@ int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
return -EINVAL;
}
}
+
+static int netvsc_ndoxdp_xmit_fm(struct net_device *ndev,
+ struct xdp_frame *frame, u16 q_idx)
+{
+ struct sk_buff *skb;
+
+ skb = xdp_build_skb_from_frame(frame, ndev);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ netvsc_get_hash(skb, netdev_priv(ndev));
+
+ skb_record_rx_queue(skb, q_idx);
+
+ netvsc_xdp_xmit(skb, ndev);
+
+ return 0;
+}
+
+int netvsc_ndoxdp_xmit(struct net_device *ndev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ const struct net_device_ops *vf_ops;
+ struct netvsc_stats_tx *tx_stats;
+ struct netvsc_device *nvsc_dev;
+ struct net_device *vf_netdev;
+ int i, count = 0;
+ u16 q_idx;
+
+ /* Don't transmit if netvsc_device is gone */
+ nvsc_dev = rcu_dereference_bh(ndev_ctx->nvdev);
+ if (unlikely(!nvsc_dev || nvsc_dev->destroy))
+ return 0;
+
+ /* If VF is present and up then redirect packets to it.
+ * Skip the VF if it is marked down or has no carrier.
+ * If netpoll is in uses, then VF can not be used either.
+ */
+ vf_netdev = rcu_dereference_bh(ndev_ctx->vf_netdev);
+ if (vf_netdev && netif_running(vf_netdev) &&
+ netif_carrier_ok(vf_netdev) && !netpoll_tx_running(ndev) &&
+ vf_netdev->netdev_ops->ndo_xdp_xmit &&
+ ndev_ctx->data_path_is_vf) {
+ vf_ops = vf_netdev->netdev_ops;
+ return vf_ops->ndo_xdp_xmit(vf_netdev, n, frames, flags);
+ }
+
+ q_idx = smp_processor_id() % ndev->real_num_tx_queues;
+
+ for (i = 0; i < n; i++) {
+ if (netvsc_ndoxdp_xmit_fm(ndev, frames[i], q_idx))
+ break;
+
+ count++;
+ }
+
+ tx_stats = &nvsc_dev->chan_table[q_idx].tx_stats;
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->xdp_xmit += count;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ return count;
+}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index fde1c492ca02..27f6bbca6619 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -242,56 +242,6 @@ static inline void *init_ppi_data(struct rndis_message *msg,
return ppi + 1;
}
-/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
- * packets. We can use ethtool to change UDP hash level when necessary.
- */
-static inline u32 netvsc_get_hash(
- struct sk_buff *skb,
- const struct net_device_context *ndc)
-{
- struct flow_keys flow;
- u32 hash, pkt_proto = 0;
- static u32 hashrnd __read_mostly;
-
- net_get_random_once(&hashrnd, sizeof(hashrnd));
-
- if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
- return 0;
-
- switch (flow.basic.ip_proto) {
- case IPPROTO_TCP:
- if (flow.basic.n_proto == htons(ETH_P_IP))
- pkt_proto = HV_TCP4_L4HASH;
- else if (flow.basic.n_proto == htons(ETH_P_IPV6))
- pkt_proto = HV_TCP6_L4HASH;
-
- break;
-
- case IPPROTO_UDP:
- if (flow.basic.n_proto == htons(ETH_P_IP))
- pkt_proto = HV_UDP4_L4HASH;
- else if (flow.basic.n_proto == htons(ETH_P_IPV6))
- pkt_proto = HV_UDP6_L4HASH;
-
- break;
- }
-
- if (pkt_proto & ndc->l4_hash) {
- return skb_get_hash(skb);
- } else {
- if (flow.basic.n_proto == htons(ETH_P_IP))
- hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
- else if (flow.basic.n_proto == htons(ETH_P_IPV6))
- hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
- else
- return 0;
-
- __skb_set_sw_hash(skb, hash, false);
- }
-
- return hash;
-}
-
static inline int netvsc_get_tx_queue(struct net_device *ndev,
struct sk_buff *skb, int old_idx)
{
@@ -804,7 +754,7 @@ void netvsc_linkstatus_callback(struct net_device *net,
}
/* This function should only be called after skb_record_rx_queue() */
-static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
+void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
{
int rc;
@@ -925,7 +875,7 @@ int netvsc_recv_callback(struct net_device *net,
struct vmbus_channel *channel = nvchan->channel;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct sk_buff *skb;
- struct netvsc_stats *rx_stats = &nvchan->rx_stats;
+ struct netvsc_stats_rx *rx_stats = &nvchan->rx_stats;
struct xdp_buff xdp;
u32 act;
@@ -934,6 +884,9 @@ int netvsc_recv_callback(struct net_device *net,
act = netvsc_run_xdp(net, nvchan, &xdp);
+ if (act == XDP_REDIRECT)
+ return NVSP_STAT_SUCCESS;
+
if (act != XDP_PASS && act != XDP_TX) {
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->xdp_drop++;
@@ -958,6 +911,9 @@ int netvsc_recv_callback(struct net_device *net,
* statistics will not work correctly.
*/
u64_stats_update_begin(&rx_stats->syncp);
+ if (act == XDP_TX)
+ rx_stats->xdp_tx++;
+
rx_stats->packets++;
rx_stats->bytes += nvchan->rsc.pktlen;
@@ -1353,28 +1309,29 @@ static void netvsc_get_pcpu_stats(struct net_device *net,
/* fetch percpu stats of netvsc */
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
- const struct netvsc_stats *stats;
+ const struct netvsc_stats_tx *tx_stats;
+ const struct netvsc_stats_rx *rx_stats;
struct netvsc_ethtool_pcpu_stats *this_tot =
&pcpu_tot[nvchan->channel->target_cpu];
u64 packets, bytes;
unsigned int start;
- stats = &nvchan->tx_stats;
+ tx_stats = &nvchan->tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
this_tot->tx_bytes += bytes;
this_tot->tx_packets += packets;
- stats = &nvchan->rx_stats;
+ rx_stats = &nvchan->rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
this_tot->rx_bytes += bytes;
this_tot->rx_packets += packets;
@@ -1406,27 +1363,28 @@ static void netvsc_get_stats64(struct net_device *net,
for (i = 0; i < nvdev->num_chn; i++) {
const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
- const struct netvsc_stats *stats;
+ const struct netvsc_stats_tx *tx_stats;
+ const struct netvsc_stats_rx *rx_stats;
u64 packets, bytes, multicast;
unsigned int start;
- stats = &nvchan->tx_stats;
+ tx_stats = &nvchan->tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
t->tx_bytes += bytes;
t->tx_packets += packets;
- stats = &nvchan->rx_stats;
+ rx_stats = &nvchan->rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&stats->syncp);
- packets = stats->packets;
- bytes = stats->bytes;
- multicast = stats->multicast + stats->broadcast;
- } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ multicast = rx_stats->multicast + rx_stats->broadcast;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
t->rx_bytes += bytes;
t->rx_packets += packets;
@@ -1515,8 +1473,8 @@ static const struct {
/* statistics per queue (rx/tx packets/bytes) */
#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
-/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */
-#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5)
+/* 8 statistics per queue (rx/tx packets/bytes, XDP actions) */
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 8)
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
@@ -1543,12 +1501,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct net_device_context *ndc = netdev_priv(dev);
struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
- const struct netvsc_stats *qstats;
+ const struct netvsc_stats_tx *tx_stats;
+ const struct netvsc_stats_rx *rx_stats;
struct netvsc_vf_pcpu_stats sum;
struct netvsc_ethtool_pcpu_stats *pcpu_sum;
unsigned int start;
u64 packets, bytes;
u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 xdp_tx;
+ u64 xdp_xmit;
int i, j, cpu;
if (!nvdev)
@@ -1562,26 +1524,32 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
for (j = 0; j < nvdev->num_chn; j++) {
- qstats = &nvdev->chan_table[j].tx_stats;
+ tx_stats = &nvdev->chan_table[j].tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&qstats->syncp);
- packets = qstats->packets;
- bytes = qstats->bytes;
- } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ packets = tx_stats->packets;
+ bytes = tx_stats->bytes;
+ xdp_xmit = tx_stats->xdp_xmit;
+ } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_xmit;
- qstats = &nvdev->chan_table[j].rx_stats;
+ rx_stats = &nvdev->chan_table[j].rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&qstats->syncp);
- packets = qstats->packets;
- bytes = qstats->bytes;
- xdp_drop = qstats->xdp_drop;
- } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ packets = rx_stats->packets;
+ bytes = rx_stats->bytes;
+ xdp_drop = rx_stats->xdp_drop;
+ xdp_redirect = rx_stats->xdp_redirect;
+ xdp_tx = rx_stats->xdp_tx;
+ } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
data[i++] = xdp_drop;
+ data[i++] = xdp_redirect;
+ data[i++] = xdp_tx;
}
pcpu_sum = kvmalloc_array(num_possible_cpus(),
@@ -1622,9 +1590,12 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
for (i = 0; i < nvdev->num_chn; i++) {
ethtool_sprintf(&p, "tx_queue_%u_packets", i);
ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+ ethtool_sprintf(&p, "tx_queue_%u_xdp_xmit", i);
ethtool_sprintf(&p, "rx_queue_%u_packets", i);
ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i);
+ ethtool_sprintf(&p, "rx_queue_%u_xdp_redirect", i);
+ ethtool_sprintf(&p, "rx_queue_%u_xdp_tx", i);
}
for_each_present_cpu(cpu) {
@@ -2057,6 +2028,7 @@ static const struct net_device_ops device_ops = {
.ndo_select_queue = netvsc_select_queue,
.ndo_get_stats64 = netvsc_get_stats64,
.ndo_bpf = netvsc_bpf,
+ .ndo_xdp_xmit = netvsc_ndoxdp_xmit,
};
/*
diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
index e2273588c75b..7aa49827196f 100644
--- a/drivers/net/mdio/mdio-aspeed.c
+++ b/drivers/net/mdio/mdio-aspeed.c
@@ -21,6 +21,10 @@
#define ASPEED_MDIO_CTRL_OP GENMASK(27, 26)
#define MDIO_C22_OP_WRITE 0b01
#define MDIO_C22_OP_READ 0b10
+#define MDIO_C45_OP_ADDR 0b00
+#define MDIO_C45_OP_WRITE 0b01
+#define MDIO_C45_OP_PREAD 0b10
+#define MDIO_C45_OP_READ 0b11
#define ASPEED_MDIO_CTRL_PHYAD GENMASK(25, 21)
#define ASPEED_MDIO_CTRL_REGAD GENMASK(20, 16)
#define ASPEED_MDIO_CTRL_MIIWDATA GENMASK(15, 0)
@@ -39,34 +43,35 @@ struct aspeed_mdio {
void __iomem *base;
};
-static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum)
+static int aspeed_mdio_op(struct mii_bus *bus, u8 st, u8 op, u8 phyad, u8 regad,
+ u16 data)
{
struct aspeed_mdio *ctx = bus->priv;
u32 ctrl;
- u32 data;
- int rc;
-
- dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d\n", __func__, addr,
- regnum);
- /* Just clause 22 for the moment */
- if (regnum & MII_ADDR_C45)
- return -EOPNOTSUPP;
+ dev_dbg(&bus->dev, "%s: st: %u op: %u, phyad: %u, regad: %u, data: %u\n",
+ __func__, st, op, phyad, regad, data);
ctrl = ASPEED_MDIO_CTRL_FIRE
- | FIELD_PREP(ASPEED_MDIO_CTRL_ST, ASPEED_MDIO_CTRL_ST_C22)
- | FIELD_PREP(ASPEED_MDIO_CTRL_OP, MDIO_C22_OP_READ)
- | FIELD_PREP(ASPEED_MDIO_CTRL_PHYAD, addr)
- | FIELD_PREP(ASPEED_MDIO_CTRL_REGAD, regnum);
+ | FIELD_PREP(ASPEED_MDIO_CTRL_ST, st)
+ | FIELD_PREP(ASPEED_MDIO_CTRL_OP, op)
+ | FIELD_PREP(ASPEED_MDIO_CTRL_PHYAD, phyad)
+ | FIELD_PREP(ASPEED_MDIO_CTRL_REGAD, regad)
+ | FIELD_PREP(ASPEED_MDIO_DATA_MIIRDATA, data);
iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL);
- rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl,
+ return readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl,
!(ctrl & ASPEED_MDIO_CTRL_FIRE),
ASPEED_MDIO_INTERVAL_US,
ASPEED_MDIO_TIMEOUT_US);
- if (rc < 0)
- return rc;
+}
+
+static int aspeed_mdio_get_data(struct mii_bus *bus)
+{
+ struct aspeed_mdio *ctx = bus->priv;
+ u32 data;
+ int rc;
rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data,
data & ASPEED_MDIO_DATA_IDLE,
@@ -78,31 +83,80 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum)
return FIELD_GET(ASPEED_MDIO_DATA_MIIRDATA, data);
}
-static int aspeed_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
+static int aspeed_mdio_read_c22(struct mii_bus *bus, int addr, int regnum)
{
- struct aspeed_mdio *ctx = bus->priv;
- u32 ctrl;
+ int rc;
- dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d, val: 0x%x\n",
- __func__, addr, regnum, val);
+ rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C22, MDIO_C22_OP_READ,
+ addr, regnum, 0);
+ if (rc < 0)
+ return rc;
+
+ return aspeed_mdio_get_data(bus);
+}
+
+static int aspeed_mdio_write_c22(struct mii_bus *bus, int addr, int regnum,
+ u16 val)
+{
+ return aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C22, MDIO_C22_OP_WRITE,
+ addr, regnum, val);
+}
+
+static int aspeed_mdio_read_c45(struct mii_bus *bus, int addr, int regnum)
+{
+ u8 c45_dev = (regnum >> 16) & 0x1F;
+ u16 c45_addr = regnum & 0xFFFF;
+ int rc;
+
+ rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_ADDR,
+ addr, c45_dev, c45_addr);
+ if (rc < 0)
+ return rc;
+
+ rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_READ,
+ addr, c45_dev, 0);
+ if (rc < 0)
+ return rc;
+
+ return aspeed_mdio_get_data(bus);
+}
+
+static int aspeed_mdio_write_c45(struct mii_bus *bus, int addr, int regnum,
+ u16 val)
+{
+ u8 c45_dev = (regnum >> 16) & 0x1F;
+ u16 c45_addr = regnum & 0xFFFF;
+ int rc;
+
+ rc = aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_ADDR,
+ addr, c45_dev, c45_addr);
+ if (rc < 0)
+ return rc;
+
+ return aspeed_mdio_op(bus, ASPEED_MDIO_CTRL_ST_C45, MDIO_C45_OP_WRITE,
+ addr, c45_dev, val);
+}
+
+static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+ dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d\n", __func__, addr,
+ regnum);
- /* Just clause 22 for the moment */
if (regnum & MII_ADDR_C45)
- return -EOPNOTSUPP;
+ return aspeed_mdio_read_c45(bus, addr, regnum);
- ctrl = ASPEED_MDIO_CTRL_FIRE
- | FIELD_PREP(ASPEED_MDIO_CTRL_ST, ASPEED_MDIO_CTRL_ST_C22)
- | FIELD_PREP(ASPEED_MDIO_CTRL_OP, MDIO_C22_OP_WRITE)
- | FIELD_PREP(ASPEED_MDIO_CTRL_PHYAD, addr)
- | FIELD_PREP(ASPEED_MDIO_CTRL_REGAD, regnum)
- | FIELD_PREP(ASPEED_MDIO_CTRL_MIIWDATA, val);
+ return aspeed_mdio_read_c22(bus, addr, regnum);
+}
- iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL);
+static int aspeed_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
+{
+ dev_dbg(&bus->dev, "%s: addr: %d, regnum: %d, val: 0x%x\n",
+ __func__, addr, regnum, val);
- return readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl,
- !(ctrl & ASPEED_MDIO_CTRL_FIRE),
- ASPEED_MDIO_INTERVAL_US,
- ASPEED_MDIO_TIMEOUT_US);
+ if (regnum & MII_ADDR_C45)
+ return aspeed_mdio_write_c45(bus, addr, regnum, val);
+
+ return aspeed_mdio_write_c22(bus, addr, regnum, val);
}
static int aspeed_mdio_probe(struct platform_device *pdev)
@@ -125,6 +179,7 @@ static int aspeed_mdio_probe(struct platform_device *pdev)
bus->parent = &pdev->dev;
bus->read = aspeed_mdio_read;
bus->write = aspeed_mdio_write;
+ bus->probe_capabilities = MDIOBUS_C22_C45;
rc = of_mdiobus_register(bus, pdev->dev.of_node);
if (rc) {
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 582969751b4c..08541007b18a 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -7,6 +7,7 @@
*/
#include <linux/bitops.h>
+#include <linux/clk.h>
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
@@ -30,6 +31,8 @@
#define MSCC_MIIM_CMD_VLD BIT(31)
#define MSCC_MIIM_REG_DATA 0xC
#define MSCC_MIIM_DATA_ERROR (BIT(16) | BIT(17))
+#define MSCC_MIIM_REG_CFG 0x10
+#define MSCC_MIIM_CFG_PRESCALE_MASK GENMASK(7, 0)
#define MSCC_PHY_REG_PHY_CFG 0x0
#define PHY_CFG_PHY_ENA (BIT(0) | BIT(1) | BIT(2) | BIT(3))
@@ -50,6 +53,8 @@ struct mscc_miim_dev {
int mii_status_offset;
struct regmap *phy_regs;
const struct mscc_miim_info *info;
+ struct clk *clk;
+ u32 bus_freq;
};
/* When high resolution timers aren't built-in: we can't use usleep_range() as
@@ -241,9 +246,33 @@ int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name,
}
EXPORT_SYMBOL(mscc_miim_setup);
+static int mscc_miim_clk_set(struct mii_bus *bus)
+{
+ struct mscc_miim_dev *miim = bus->priv;
+ unsigned long rate;
+ u32 div;
+
+ /* Keep the current settings */
+ if (!miim->bus_freq)
+ return 0;
+
+ rate = clk_get_rate(miim->clk);
+
+ div = DIV_ROUND_UP(rate, 2 * miim->bus_freq) - 1;
+ if (div == 0 || div & ~MSCC_MIIM_CFG_PRESCALE_MASK) {
+ dev_err(&bus->dev, "Incorrect MDIO clock frequency\n");
+ return -EINVAL;
+ }
+
+ return regmap_update_bits(miim->regs, MSCC_MIIM_REG_CFG,
+ MSCC_MIIM_CFG_PRESCALE_MASK, div);
+}
+
static int mscc_miim_probe(struct platform_device *pdev)
{
struct regmap *mii_regmap, *phy_regmap = NULL;
+ struct device_node *np = pdev->dev.of_node;
+ struct device *dev = &pdev->dev;
void __iomem *regs, *phy_regs;
struct mscc_miim_dev *miim;
struct resource *res;
@@ -252,63 +281,87 @@ static int mscc_miim_probe(struct platform_device *pdev)
regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
if (IS_ERR(regs)) {
- dev_err(&pdev->dev, "Unable to map MIIM registers\n");
+ dev_err(dev, "Unable to map MIIM registers\n");
return PTR_ERR(regs);
}
- mii_regmap = devm_regmap_init_mmio(&pdev->dev, regs,
- &mscc_miim_regmap_config);
+ mii_regmap = devm_regmap_init_mmio(dev, regs, &mscc_miim_regmap_config);
if (IS_ERR(mii_regmap)) {
- dev_err(&pdev->dev, "Unable to create MIIM regmap\n");
+ dev_err(dev, "Unable to create MIIM regmap\n");
return PTR_ERR(mii_regmap);
}
/* This resource is optional */
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
if (res) {
- phy_regs = devm_ioremap_resource(&pdev->dev, res);
+ phy_regs = devm_ioremap_resource(dev, res);
if (IS_ERR(phy_regs)) {
- dev_err(&pdev->dev, "Unable to map internal phy registers\n");
+ dev_err(dev, "Unable to map internal phy registers\n");
return PTR_ERR(phy_regs);
}
- phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs,
+ phy_regmap = devm_regmap_init_mmio(dev, phy_regs,
&mscc_miim_phy_regmap_config);
if (IS_ERR(phy_regmap)) {
- dev_err(&pdev->dev, "Unable to create phy register regmap\n");
+ dev_err(dev, "Unable to create phy register regmap\n");
return PTR_ERR(phy_regmap);
}
}
- ret = mscc_miim_setup(&pdev->dev, &bus, "mscc_miim", mii_regmap, 0);
+ ret = mscc_miim_setup(dev, &bus, "mscc_miim", mii_regmap, 0);
if (ret < 0) {
- dev_err(&pdev->dev, "Unable to setup the MDIO bus\n");
+ dev_err(dev, "Unable to setup the MDIO bus\n");
return ret;
}
miim = bus->priv;
miim->phy_regs = phy_regmap;
- miim->info = device_get_match_data(&pdev->dev);
+ miim->info = device_get_match_data(dev);
if (!miim->info)
return -EINVAL;
- ret = of_mdiobus_register(bus, pdev->dev.of_node);
- if (ret < 0) {
- dev_err(&pdev->dev, "Cannot register MDIO bus (%d)\n", ret);
+ miim->clk = devm_clk_get_optional(dev, NULL);
+ if (IS_ERR(miim->clk))
+ return PTR_ERR(miim->clk);
+
+ of_property_read_u32(np, "clock-frequency", &miim->bus_freq);
+
+ if (miim->bus_freq && !miim->clk) {
+ dev_err(dev, "cannot use clock-frequency without a clock\n");
+ return -EINVAL;
+ }
+
+ ret = clk_prepare_enable(miim->clk);
+ if (ret)
return ret;
+
+ ret = mscc_miim_clk_set(bus);
+ if (ret)
+ goto out_disable_clk;
+
+ ret = of_mdiobus_register(bus, np);
+ if (ret < 0) {
+ dev_err(dev, "Cannot register MDIO bus (%d)\n", ret);
+ goto out_disable_clk;
}
platform_set_drvdata(pdev, bus);
return 0;
+
+out_disable_clk:
+ clk_disable_unprepare(miim->clk);
+ return ret;
}
static int mscc_miim_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
+ struct mscc_miim_dev *miim = bus->priv;
+ clk_disable_unprepare(miim->clk);
mdiobus_unregister(bus);
return 0;
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 378ee779061c..c8f398f5bc5b 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -22,6 +22,7 @@
#include <linux/spinlock_types.h>
#include <linux/types.h>
#include <net/fib_notifier.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/fib_rules.h>
@@ -78,7 +79,7 @@ struct nsim_fib_rt {
struct nsim_fib4_rt {
struct nsim_fib_rt common;
struct fib_info *fi;
- u8 tos;
+ dscp_t dscp;
u8 type;
};
@@ -283,7 +284,7 @@ nsim_fib4_rt_create(struct nsim_fib_data *data,
fib4_rt->fi = fen_info->fi;
fib_info_hold(fib4_rt->fi);
- fib4_rt->tos = fen_info->tos;
+ fib4_rt->dscp = fen_info->dscp;
fib4_rt->type = fen_info->type;
return fib4_rt;
@@ -322,7 +323,7 @@ nsim_fib4_rt_offload_failed_flag_set(struct net *net,
fri.tb_id = fen_info->tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = fen_info->dst_len;
- fri.tos = fen_info->tos;
+ fri.dscp = fen_info->dscp;
fri.type = fen_info->type;
fri.offload = false;
fri.trap = false;
@@ -342,7 +343,7 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net,
fri.tb_id = fib4_rt->common.key.tb_id;
fri.dst = cpu_to_be32(*p_dst);
fri.dst_len = dst_len;
- fri.tos = fib4_rt->tos;
+ fri.dscp = fib4_rt->dscp;
fri.type = fib4_rt->type;
fri.offload = false;
fri.trap = trap;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index fc53b71dc872..96840695debd 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -70,6 +70,27 @@
#define KSZ8081_LMD_SHORT_INDICATOR BIT(12)
#define KSZ8081_LMD_DELTA_TIME_MASK GENMASK(8, 0)
+#define KSZ9x31_LMD 0x12
+#define KSZ9x31_LMD_VCT_EN BIT(15)
+#define KSZ9x31_LMD_VCT_DIS_TX BIT(14)
+#define KSZ9x31_LMD_VCT_PAIR(n) (((n) & 0x3) << 12)
+#define KSZ9x31_LMD_VCT_SEL_RESULT 0
+#define KSZ9x31_LMD_VCT_SEL_THRES_HI BIT(10)
+#define KSZ9x31_LMD_VCT_SEL_THRES_LO BIT(11)
+#define KSZ9x31_LMD_VCT_SEL_MASK GENMASK(11, 10)
+#define KSZ9x31_LMD_VCT_ST_NORMAL 0
+#define KSZ9x31_LMD_VCT_ST_OPEN 1
+#define KSZ9x31_LMD_VCT_ST_SHORT 2
+#define KSZ9x31_LMD_VCT_ST_FAIL 3
+#define KSZ9x31_LMD_VCT_ST_MASK GENMASK(9, 8)
+#define KSZ9x31_LMD_VCT_DATA_REFLECTED_INVALID BIT(7)
+#define KSZ9x31_LMD_VCT_DATA_SIG_WAIT_TOO_LONG BIT(6)
+#define KSZ9x31_LMD_VCT_DATA_MASK100 BIT(5)
+#define KSZ9x31_LMD_VCT_DATA_NLP_FLP BIT(4)
+#define KSZ9x31_LMD_VCT_DATA_LO_PULSE_MASK GENMASK(3, 2)
+#define KSZ9x31_LMD_VCT_DATA_HI_PULSE_MASK GENMASK(1, 0)
+#define KSZ9x31_LMD_VCT_DATA_MASK GENMASK(7, 0)
+
/* Lan8814 general Interrupt control/status reg in GPHY specific block. */
#define LAN8814_INTC 0x18
#define LAN8814_INTS 0x1B
@@ -280,6 +301,7 @@ struct kszphy_priv {
struct kszphy_ptp_priv ptp_priv;
const struct kszphy_type *type;
int led_mode;
+ u16 vct_ctrl1000;
bool rmii_ref_clk_sel;
bool rmii_ref_clk_sel_val;
u64 stats[ARRAY_SIZE(kszphy_hw_stats)];
@@ -1326,6 +1348,199 @@ static int ksz9031_read_status(struct phy_device *phydev)
return 0;
}
+static int ksz9x31_cable_test_start(struct phy_device *phydev)
+{
+ struct kszphy_priv *priv = phydev->priv;
+ int ret;
+
+ /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic
+ * Prior to running the cable diagnostics, Auto-negotiation should
+ * be disabled, full duplex set and the link speed set to 1000Mbps
+ * via the Basic Control Register.
+ */
+ ret = phy_modify(phydev, MII_BMCR,
+ BMCR_SPEED1000 | BMCR_FULLDPLX |
+ BMCR_ANENABLE | BMCR_SPEED100,
+ BMCR_SPEED1000 | BMCR_FULLDPLX);
+ if (ret)
+ return ret;
+
+ /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic
+ * The Master-Slave configuration should be set to Slave by writing
+ * a value of 0x1000 to the Auto-Negotiation Master Slave Control
+ * Register.
+ */
+ ret = phy_read(phydev, MII_CTRL1000);
+ if (ret < 0)
+ return ret;
+
+ /* Cache these bits, they need to be restored once LinkMD finishes. */
+ priv->vct_ctrl1000 = ret & (CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER);
+ ret &= ~(CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER);
+ ret |= CTL1000_ENABLE_MASTER;
+
+ return phy_write(phydev, MII_CTRL1000, ret);
+}
+
+static int ksz9x31_cable_test_result_trans(u16 status)
+{
+ switch (FIELD_GET(KSZ9x31_LMD_VCT_ST_MASK, status)) {
+ case KSZ9x31_LMD_VCT_ST_NORMAL:
+ return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+ case KSZ9x31_LMD_VCT_ST_OPEN:
+ return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+ case KSZ9x31_LMD_VCT_ST_SHORT:
+ return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+ case KSZ9x31_LMD_VCT_ST_FAIL:
+ fallthrough;
+ default:
+ return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+ }
+}
+
+static bool ksz9x31_cable_test_failed(u16 status)
+{
+ int stat = FIELD_GET(KSZ9x31_LMD_VCT_ST_MASK, status);
+
+ return stat == KSZ9x31_LMD_VCT_ST_FAIL;
+}
+
+static bool ksz9x31_cable_test_fault_length_valid(u16 status)
+{
+ switch (FIELD_GET(KSZ9x31_LMD_VCT_ST_MASK, status)) {
+ case KSZ9x31_LMD_VCT_ST_OPEN:
+ fallthrough;
+ case KSZ9x31_LMD_VCT_ST_SHORT:
+ return true;
+ }
+ return false;
+}
+
+static int ksz9x31_cable_test_fault_length(struct phy_device *phydev, u16 stat)
+{
+ int dt = FIELD_GET(KSZ9x31_LMD_VCT_DATA_MASK, stat);
+
+ /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic
+ *
+ * distance to fault = (VCT_DATA - 22) * 4 / cable propagation velocity
+ */
+ if ((phydev->phy_id & MICREL_PHY_ID_MASK) == PHY_ID_KSZ9131)
+ dt = clamp(dt - 22, 0, 255);
+
+ return (dt * 400) / 10;
+}
+
+static int ksz9x31_cable_test_wait_for_completion(struct phy_device *phydev)
+{
+ int val, ret;
+
+ ret = phy_read_poll_timeout(phydev, KSZ9x31_LMD, val,
+ !(val & KSZ9x31_LMD_VCT_EN),
+ 30000, 100000, true);
+
+ return ret < 0 ? ret : 0;
+}
+
+static int ksz9x31_cable_test_get_pair(int pair)
+{
+ static const int ethtool_pair[] = {
+ ETHTOOL_A_CABLE_PAIR_A,
+ ETHTOOL_A_CABLE_PAIR_B,
+ ETHTOOL_A_CABLE_PAIR_C,
+ ETHTOOL_A_CABLE_PAIR_D,
+ };
+
+ return ethtool_pair[pair];
+}
+
+static int ksz9x31_cable_test_one_pair(struct phy_device *phydev, int pair)
+{
+ int ret, val;
+
+ /* KSZ9131RNX, DS00002841B-page 38, 4.14 LinkMD (R) Cable Diagnostic
+ * To test each individual cable pair, set the cable pair in the Cable
+ * Diagnostics Test Pair (VCT_PAIR[1:0]) field of the LinkMD Cable
+ * Diagnostic Register, along with setting the Cable Diagnostics Test
+ * Enable (VCT_EN) bit. The Cable Diagnostics Test Enable (VCT_EN) bit
+ * will self clear when the test is concluded.
+ */
+ ret = phy_write(phydev, KSZ9x31_LMD,
+ KSZ9x31_LMD_VCT_EN | KSZ9x31_LMD_VCT_PAIR(pair));
+ if (ret)
+ return ret;
+
+ ret = ksz9x31_cable_test_wait_for_completion(phydev);
+ if (ret)
+ return ret;
+
+ val = phy_read(phydev, KSZ9x31_LMD);
+ if (val < 0)
+ return val;
+
+ if (ksz9x31_cable_test_failed(val))
+ return -EAGAIN;
+
+ ret = ethnl_cable_test_result(phydev,
+ ksz9x31_cable_test_get_pair(pair),
+ ksz9x31_cable_test_result_trans(val));
+ if (ret)
+ return ret;
+
+ if (!ksz9x31_cable_test_fault_length_valid(val))
+ return 0;
+
+ return ethnl_cable_test_fault_length(phydev,
+ ksz9x31_cable_test_get_pair(pair),
+ ksz9x31_cable_test_fault_length(phydev, val));
+}
+
+static int ksz9x31_cable_test_get_status(struct phy_device *phydev,
+ bool *finished)
+{
+ struct kszphy_priv *priv = phydev->priv;
+ unsigned long pair_mask = 0xf;
+ int retries = 20;
+ int pair, ret, rv;
+
+ *finished = false;
+
+ /* Try harder if link partner is active */
+ while (pair_mask && retries--) {
+ for_each_set_bit(pair, &pair_mask, 4) {
+ ret = ksz9x31_cable_test_one_pair(phydev, pair);
+ if (ret == -EAGAIN)
+ continue;
+ if (ret < 0)
+ return ret;
+ clear_bit(pair, &pair_mask);
+ }
+ /* If link partner is in autonegotiation mode it will send 2ms
+ * of FLPs with at least 6ms of silence.
+ * Add 2ms sleep to have better chances to hit this silence.
+ */
+ if (pair_mask)
+ usleep_range(2000, 3000);
+ }
+
+ /* Report remaining unfinished pair result as unknown. */
+ for_each_set_bit(pair, &pair_mask, 4) {
+ ret = ethnl_cable_test_result(phydev,
+ ksz9x31_cable_test_get_pair(pair),
+ ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC);
+ }
+
+ *finished = true;
+
+ /* Restore cached bits from before LinkMD got started. */
+ rv = phy_modify(phydev, MII_CTRL1000,
+ CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER,
+ priv->vct_ctrl1000);
+ if (rv)
+ return rv;
+
+ return ret;
+}
+
static int ksz8873mll_config_aneg(struct phy_device *phydev)
{
return 0;
@@ -2806,6 +3021,7 @@ static struct phy_driver ksphy_driver[] = {
.phy_id = PHY_ID_KSZ9031,
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ9031 Gigabit PHY",
+ .flags = PHY_POLL_CABLE_TEST,
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.get_features = ksz9031_get_features,
@@ -2819,6 +3035,8 @@ static struct phy_driver ksphy_driver[] = {
.get_stats = kszphy_get_stats,
.suspend = kszphy_suspend,
.resume = kszphy_resume,
+ .cable_test_start = ksz9x31_cable_test_start,
+ .cable_test_get_status = ksz9x31_cable_test_get_status,
}, {
.phy_id = PHY_ID_LAN8814,
.phy_id_mask = MICREL_PHY_ID_MASK,
@@ -2853,6 +3071,7 @@ static struct phy_driver ksphy_driver[] = {
.phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Microchip KSZ9131 Gigabit PHY",
/* PHY_GBIT_FEATURES */
+ .flags = PHY_POLL_CABLE_TEST,
.driver_data = &ksz9021_type,
.probe = kszphy_probe,
.config_init = ksz9131_config_init,
@@ -2863,6 +3082,8 @@ static struct phy_driver ksphy_driver[] = {
.get_stats = kszphy_get_stats,
.suspend = kszphy_suspend,
.resume = kszphy_resume,
+ .cable_test_start = ksz9x31_cable_test_start,
+ .cable_test_get_status = ksz9x31_cable_test_get_status,
}, {
.phy_id = PHY_ID_KSZ8873MLL,
.phy_id_mask = MICREL_PHY_ID_MASK,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 06943889d747..33c285252584 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -2778,34 +2778,6 @@ static const struct sfp_upstream_ops sfp_phylink_ops = {
/* Helpers for MAC drivers */
-/**
- * phylink_helper_basex_speed() - 1000BaseX/2500BaseX helper
- * @state: a pointer to a &struct phylink_link_state
- *
- * Inspect the interface mode, advertising mask or forced speed and
- * decide whether to run at 2.5Gbit or 1Gbit appropriately, switching
- * the interface mode to suit. @state->interface is appropriately
- * updated, and the advertising mask has the "other" baseX_Full flag
- * cleared.
- */
-void phylink_helper_basex_speed(struct phylink_link_state *state)
-{
- if (phy_interface_mode_is_8023z(state->interface)) {
- bool want_2500 = state->an_enabled ?
- phylink_test(state->advertising, 2500baseX_Full) :
- state->speed == SPEED_2500;
-
- if (want_2500) {
- phylink_clear(state->advertising, 1000baseX_Full);
- state->interface = PHY_INTERFACE_MODE_2500BASEX;
- } else {
- phylink_clear(state->advertising, 2500baseX_Full);
- state->interface = PHY_INTERFACE_MODE_1000BASEX;
- }
- }
-}
-EXPORT_SYMBOL_GPL(phylink_helper_basex_speed);
-
static void phylink_decode_c37_word(struct phylink_link_state *state,
uint16_t config_reg, int speed)
{
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 3619520340b7..1b41cd9732d7 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1011,8 +1011,7 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
goto end;
}
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (error < 0)
goto end;
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 9b4dfa3001d6..2de09ad5bac0 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -479,7 +479,7 @@ static int usbnet_cdc_zte_bind(struct usbnet *dev, struct usb_interface *intf)
* device MAC address has been updated). Always set MAC address to that of the
* device.
*/
-static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
+int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
if (skb->len < ETH_HLEN || !(skb->data[0] & 0x02))
return 1;
@@ -489,6 +489,7 @@ static int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
return 1;
}
+EXPORT_SYMBOL_GPL(usbnet_cdc_zte_rx_fixup);
/* Ensure correct link state
*
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3353e761016d..9e2f48c2e85e 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -190,7 +190,6 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
skbn = netdev_alloc_skb(net, pkt_len + LL_MAX_HEADER);
if (!skbn)
return 0;
- skbn->dev = net;
switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) {
case 0x40:
@@ -1358,6 +1357,7 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 247f58cb0f84..4e70dec30e5a 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -418,10 +418,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags)
goto halt_fail_and_release;
}
- if (bp[0] & 0x02)
- eth_hw_addr_random(net);
- else
- eth_hw_addr_set(net, bp);
+ eth_hw_addr_set(net, bp);
/* set a nonzero filter to enable data transfers */
memset(u.set, 0, sizeof *u.set);
@@ -463,6 +460,16 @@ static int rndis_bind(struct usbnet *dev, struct usb_interface *intf)
return generic_rndis_bind(dev, intf, FLAG_RNDIS_PHYM_NOT_WIRELESS);
}
+static int zte_rndis_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+ int status = rndis_bind(dev, intf);
+
+ if (!status && (dev->net->dev_addr[0] & 0x02))
+ eth_hw_addr_random(dev->net);
+
+ return status;
+}
+
void rndis_unbind(struct usbnet *dev, struct usb_interface *intf)
{
struct rndis_halt *halt;
@@ -485,10 +492,14 @@ EXPORT_SYMBOL_GPL(rndis_unbind);
*/
int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
{
+ bool dst_mac_fixup;
+
/* This check is no longer done by usbnet */
if (skb->len < dev->net->hard_header_len)
return 0;
+ dst_mac_fixup = !!(dev->driver_info->data & RNDIS_DRIVER_DATA_DST_MAC_FIXUP);
+
/* peripheral may have batched packets to us... */
while (likely(skb->len)) {
struct rndis_data_hdr *hdr = (void *)skb->data;
@@ -523,10 +534,17 @@ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
break;
skb_pull(skb, msg_len - sizeof *hdr);
skb_trim(skb2, data_len);
+
+ if (unlikely(dst_mac_fixup))
+ usbnet_cdc_zte_rx_fixup(dev, skb2);
+
usbnet_skb_return(dev, skb2);
}
/* caller will usbnet_skb_return the remaining packet */
+ if (unlikely(dst_mac_fixup))
+ usbnet_cdc_zte_rx_fixup(dev, skb);
+
return 1;
}
EXPORT_SYMBOL_GPL(rndis_rx_fixup);
@@ -600,6 +618,17 @@ static const struct driver_info rndis_poll_status_info = {
.tx_fixup = rndis_tx_fixup,
};
+static const struct driver_info zte_rndis_info = {
+ .description = "ZTE RNDIS device",
+ .flags = FLAG_ETHER | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT,
+ .data = RNDIS_DRIVER_DATA_DST_MAC_FIXUP,
+ .bind = zte_rndis_bind,
+ .unbind = rndis_unbind,
+ .status = rndis_status,
+ .rx_fixup = rndis_rx_fixup,
+ .tx_fixup = rndis_tx_fixup,
+};
+
/*-------------------------------------------------------------------------*/
static const struct usb_device_id products [] = {
@@ -614,6 +643,16 @@ static const struct usb_device_id products [] = {
USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
.driver_info = (unsigned long)&rndis_info,
}, {
+ /* ZTE WWAN modules */
+ USB_VENDOR_AND_INTERFACE_INFO(0x19d2,
+ USB_CLASS_WIRELESS_CONTROLLER, 1, 3),
+ .driver_info = (unsigned long)&zte_rndis_info,
+}, {
+ /* ZTE WWAN modules, ACM flavour */
+ USB_VENDOR_AND_INTERFACE_INFO(0x19d2,
+ USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
+ .driver_info = (unsigned long)&zte_rndis_info,
+}, {
/* RNDIS is MSFT's un-official variant of CDC ACM */
USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff),
.driver_info = (unsigned long) &rndis_info,
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 140780ac1745..588b2333cdb8 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -57,34 +57,6 @@ config COSA
The driver will be compiled as a module: the
module will be called cosa.
-#
-# Lan Media's board. Currently 1000, 1200, 5200, 5245
-#
-config LANMEDIA
- tristate "LanMedia Corp. SSI/V.35, T1/E1, HSSI, T3 boards"
- depends on PCI && VIRT_TO_BUS && HDLC
- help
- Driver for the following Lan Media family of serial boards:
-
- - LMC 1000 board allows you to connect synchronous serial devices
- (for example base-band modems, or any other device with the X.21,
- V.24, V.35 or V.36 interface) to your Linux box.
-
- - LMC 1200 with on board DSU board allows you to connect your Linux
- box directly to a T1 or E1 circuit.
-
- - LMC 5200 board provides a HSSI interface capable of running up to
- 52 Mbits per second.
-
- - LMC 5245 board connects directly to a T3 circuit saving the
- additional external hardware.
-
- To change setting such as clock source you will need lmcctl.
- It is available at <ftp://ftp.lanmedia.com/> (broken link).
-
- To compile this driver as a module, choose M here: the
- module will be called lmc.
-
# There is no way to detect a Sealevel board. Force it modular
config SEALEVEL_4021
tristate "Sealevel Systems 4021 support"
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index 480bcd1f6c1c..1cd42147b34f 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -19,8 +19,6 @@ obj-$(CONFIG_SEALEVEL_4021) += z85230.o sealevel.o
obj-$(CONFIG_COSA) += cosa.o
obj-$(CONFIG_FARSYNC) += farsync.o
-obj-$(CONFIG_LANMEDIA) += lmc/
-
obj-$(CONFIG_LAPBETHER) += lapbether.o
obj-$(CONFIG_N2) += n2.o
obj-$(CONFIG_C101) += c101.o
diff --git a/drivers/net/wan/lmc/Makefile b/drivers/net/wan/lmc/Makefile
deleted file mode 100644
index f00fe4491d69..000000000000
--- a/drivers/net/wan/lmc/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the Lan Media 21140 based WAN cards
-# Specifically the 1000,1200,5200,5245
-#
-
-obj-$(CONFIG_LANMEDIA) += lmc.o
-
-lmc-objs := lmc_debug.o lmc_media.o lmc_main.o lmc_proto.o
-
-# Like above except every packet gets echoed to KERN_DEBUG
-# in hex
-#
-# DBDEF = \
-# -DDEBUG \
-# -DLMC_PACKET_LOG
-
-ccflags-y := $(DBGDEF)
diff --git a/drivers/net/wan/lmc/lmc.h b/drivers/net/wan/lmc/lmc.h
deleted file mode 100644
index d7d59b4595f9..000000000000
--- a/drivers/net/wan/lmc/lmc.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LMC_H_
-#define _LMC_H_
-
-#include "lmc_var.h"
-
-/*
- * prototypes for everyone
- */
-int lmc_probe(struct net_device * dev);
-unsigned lmc_mii_readreg(lmc_softc_t * const sc, unsigned
- devaddr, unsigned regno);
-void lmc_mii_writereg(lmc_softc_t * const sc, unsigned devaddr,
- unsigned regno, unsigned data);
-void lmc_led_on(lmc_softc_t * const, u32);
-void lmc_led_off(lmc_softc_t * const, u32);
-unsigned lmc_mii_readreg(lmc_softc_t * const, unsigned, unsigned);
-void lmc_mii_writereg(lmc_softc_t * const, unsigned, unsigned, unsigned);
-void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits);
-void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits);
-
-int lmc_ioctl(struct net_device *dev, struct if_settings *ifs);
-
-extern lmc_media_t lmc_ds3_media;
-extern lmc_media_t lmc_ssi_media;
-extern lmc_media_t lmc_t1_media;
-extern lmc_media_t lmc_hssi_media;
-
-#ifdef _DBG_EVENTLOG
-static void lmcEventLog(u32 EventNum, u32 arg2, u32 arg3);
-#endif
-
-#endif
diff --git a/drivers/net/wan/lmc/lmc_debug.c b/drivers/net/wan/lmc/lmc_debug.c
deleted file mode 100644
index 2b6051bda3fb..000000000000
--- a/drivers/net/wan/lmc/lmc_debug.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/types.h>
-#include <linux/netdevice.h>
-#include <linux/interrupt.h>
-
-#include "lmc_debug.h"
-
-/*
- * Prints out len, max to 80 octets using printk, 20 per line
- */
-#ifdef DEBUG
-#ifdef LMC_PACKET_LOG
-void lmcConsoleLog(char *type, unsigned char *ucData, int iLen)
-{
- int iNewLine = 1;
- char str[80], *pstr;
-
- sprintf(str, KERN_DEBUG "lmc: %s: ", type);
- pstr = str+strlen(str);
-
- if(iLen > 240){
- printk(KERN_DEBUG "lmc: Printing 240 chars... out of: %d\n", iLen);
- iLen = 240;
- }
- else{
- printk(KERN_DEBUG "lmc: Printing %d chars\n", iLen);
- }
-
- while(iLen > 0)
- {
- sprintf(pstr, "%02x ", *ucData);
- pstr+=3;
- ucData++;
- if( !(iNewLine % 20))
- {
- sprintf(pstr, "\n");
- printk(str);
- sprintf(str, KERN_DEBUG "lmc: %s: ", type);
- pstr=str+strlen(str);
- }
- iNewLine++;
- iLen--;
- }
- sprintf(pstr, "\n");
- printk(str);
-}
-#endif
-#endif
-
-#ifdef DEBUG
-u32 lmcEventLogIndex;
-u32 lmcEventLogBuf[LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS];
-
-void lmcEventLog(u32 EventNum, u32 arg2, u32 arg3)
-{
- lmcEventLogBuf[lmcEventLogIndex++] = EventNum;
- lmcEventLogBuf[lmcEventLogIndex++] = arg2;
- lmcEventLogBuf[lmcEventLogIndex++] = arg3;
- lmcEventLogBuf[lmcEventLogIndex++] = jiffies;
-
- lmcEventLogIndex &= (LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS) - 1;
-}
-#endif /* DEBUG */
-
-/* --------------------------- end if_lmc_linux.c ------------------------ */
diff --git a/drivers/net/wan/lmc/lmc_debug.h b/drivers/net/wan/lmc/lmc_debug.h
deleted file mode 100644
index cfae9eddf003..000000000000
--- a/drivers/net/wan/lmc/lmc_debug.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LMC_DEBUG_H_
-#define _LMC_DEBUG_H_
-
-#ifdef DEBUG
-#ifdef LMC_PACKET_LOG
-#define LMC_CONSOLE_LOG(x,y,z) lmcConsoleLog((x), (y), (z))
-#else
-#define LMC_CONSOLE_LOG(x,y,z)
-#endif
-#else
-#define LMC_CONSOLE_LOG(x,y,z)
-#endif
-
-
-
-/* Debug --- Event log definitions --- */
-/* EVENTLOGSIZE*EVENTLOGARGS needs to be a power of 2 */
-#define LMC_EVENTLOGSIZE 1024 /* number of events in eventlog */
-#define LMC_EVENTLOGARGS 4 /* number of args for each event */
-
-/* event indicators */
-#define LMC_EVENT_XMT 1
-#define LMC_EVENT_XMTEND 2
-#define LMC_EVENT_XMTINT 3
-#define LMC_EVENT_RCVINT 4
-#define LMC_EVENT_RCVEND 5
-#define LMC_EVENT_INT 6
-#define LMC_EVENT_XMTINTTMO 7
-#define LMC_EVENT_XMTPRCTMO 8
-#define LMC_EVENT_INTEND 9
-#define LMC_EVENT_RESET1 10
-#define LMC_EVENT_RESET2 11
-#define LMC_EVENT_FORCEDRESET 12
-#define LMC_EVENT_WATCHDOG 13
-#define LMC_EVENT_BADPKTSURGE 14
-#define LMC_EVENT_TBUSY0 15
-#define LMC_EVENT_TBUSY1 16
-
-
-#ifdef DEBUG
-extern u32 lmcEventLogIndex;
-extern u32 lmcEventLogBuf[LMC_EVENTLOGSIZE * LMC_EVENTLOGARGS];
-#define LMC_EVENT_LOG(x, y, z) lmcEventLog((x), (y), (z))
-#else
-#define LMC_EVENT_LOG(x,y,z)
-#endif /* end ifdef _DBG_EVENTLOG */
-
-void lmcConsoleLog(char *type, unsigned char *ucData, int iLen);
-void lmcEventLog(u32 EventNum, u32 arg2, u32 arg3);
-
-#endif
diff --git a/drivers/net/wan/lmc/lmc_ioctl.h b/drivers/net/wan/lmc/lmc_ioctl.h
deleted file mode 100644
index 8c65e2176e94..000000000000
--- a/drivers/net/wan/lmc/lmc_ioctl.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _LMC_IOCTL_H_
-#define _LMC_IOCTL_H_
-/* $Id: lmc_ioctl.h,v 1.15 2000/04/06 12:16:43 asj Exp $ */
-
- /*
- * Copyright (c) 1997-2000 LAN Media Corporation (LMC)
- * All rights reserved. www.lanmedia.com
- *
- * This code is written by:
- * Andrew Stanley-Jones (asj@cban.com)
- * Rob Braun (bbraun@vix.com),
- * Michael Graff (explorer@vix.com) and
- * Matt Thomas (matt@3am-software.com).
- */
-
-#define LMCIOCGINFO SIOCDEVPRIVATE+3 /* get current state */
-#define LMCIOCSINFO SIOCDEVPRIVATE+4 /* set state to user values */
-#define LMCIOCGETLMCSTATS SIOCDEVPRIVATE+5
-#define LMCIOCCLEARLMCSTATS SIOCDEVPRIVATE+6
-#define LMCIOCDUMPEVENTLOG SIOCDEVPRIVATE+7
-#define LMCIOCGETXINFO SIOCDEVPRIVATE+8
-#define LMCIOCSETCIRCUIT SIOCDEVPRIVATE+9
-#define LMCIOCUNUSEDATM SIOCDEVPRIVATE+10
-#define LMCIOCRESET SIOCDEVPRIVATE+11
-#define LMCIOCT1CONTROL SIOCDEVPRIVATE+12
-#define LMCIOCIFTYPE SIOCDEVPRIVATE+13
-#define LMCIOCXILINX SIOCDEVPRIVATE+14
-
-#define LMC_CARDTYPE_UNKNOWN -1
-#define LMC_CARDTYPE_HSSI 1 /* probed card is a HSSI card */
-#define LMC_CARDTYPE_DS3 2 /* probed card is a DS3 card */
-#define LMC_CARDTYPE_SSI 3 /* probed card is a SSI card */
-#define LMC_CARDTYPE_T1 4 /* probed card is a T1 card */
-
-#define LMC_CTL_CARDTYPE_LMC5200 0 /* HSSI */
-#define LMC_CTL_CARDTYPE_LMC5245 1 /* DS3 */
-#define LMC_CTL_CARDTYPE_LMC1000 2 /* SSI, V.35 */
-#define LMC_CTL_CARDTYPE_LMC1200 3 /* DS1 */
-
-#define LMC_CTL_OFF 0 /* generic OFF value */
-#define LMC_CTL_ON 1 /* generic ON value */
-
-#define LMC_CTL_CLOCK_SOURCE_EXT 0 /* clock off line */
-#define LMC_CTL_CLOCK_SOURCE_INT 1 /* internal clock */
-
-#define LMC_CTL_CRC_LENGTH_16 16
-#define LMC_CTL_CRC_LENGTH_32 32
-#define LMC_CTL_CRC_BYTESIZE_2 2
-#define LMC_CTL_CRC_BYTESIZE_4 4
-
-
-#define LMC_CTL_CABLE_LENGTH_LT_100FT 0 /* DS3 cable < 100 feet */
-#define LMC_CTL_CABLE_LENGTH_GT_100FT 1 /* DS3 cable >= 100 feet */
-
-#define LMC_CTL_CIRCUIT_TYPE_E1 0
-#define LMC_CTL_CIRCUIT_TYPE_T1 1
-
-/*
- * IFTYPE defines
- */
-#define LMC_PPP 1 /* use generic HDLC interface */
-#define LMC_NET 2 /* use direct net interface */
-#define LMC_RAW 3 /* use direct net interface */
-
-/*
- * These are not in the least IOCTL related, but I want them common.
- */
-/*
- * assignments for the GPIO register on the DEC chip (common)
- */
-#define LMC_GEP_INIT 0x01 /* 0: */
-#define LMC_GEP_RESET 0x02 /* 1: */
-#define LMC_GEP_MODE 0x10 /* 4: */
-#define LMC_GEP_DP 0x20 /* 5: */
-#define LMC_GEP_DATA 0x40 /* 6: serial out */
-#define LMC_GEP_CLK 0x80 /* 7: serial clock */
-
-/*
- * HSSI GPIO assignments
- */
-#define LMC_GEP_HSSI_ST 0x04 /* 2: receive timing sense (deprecated) */
-#define LMC_GEP_HSSI_CLOCK 0x08 /* 3: clock source */
-
-/*
- * T1 GPIO assignments
- */
-#define LMC_GEP_SSI_GENERATOR 0x04 /* 2: enable prog freq gen serial i/f */
-#define LMC_GEP_SSI_TXCLOCK 0x08 /* 3: provide clock on TXCLOCK output */
-
-/*
- * Common MII16 bits
- */
-#define LMC_MII16_LED0 0x0080
-#define LMC_MII16_LED1 0x0100
-#define LMC_MII16_LED2 0x0200
-#define LMC_MII16_LED3 0x0400 /* Error, and the red one */
-#define LMC_MII16_LED_ALL 0x0780 /* LED bit mask */
-#define LMC_MII16_FIFO_RESET 0x0800
-
-/*
- * definitions for HSSI
- */
-#define LMC_MII16_HSSI_TA 0x0001
-#define LMC_MII16_HSSI_CA 0x0002
-#define LMC_MII16_HSSI_LA 0x0004
-#define LMC_MII16_HSSI_LB 0x0008
-#define LMC_MII16_HSSI_LC 0x0010
-#define LMC_MII16_HSSI_TM 0x0020
-#define LMC_MII16_HSSI_CRC 0x0040
-
-/*
- * assignments for the MII register 16 (DS3)
- */
-#define LMC_MII16_DS3_ZERO 0x0001
-#define LMC_MII16_DS3_TRLBK 0x0002
-#define LMC_MII16_DS3_LNLBK 0x0004
-#define LMC_MII16_DS3_RAIS 0x0008
-#define LMC_MII16_DS3_TAIS 0x0010
-#define LMC_MII16_DS3_BIST 0x0020
-#define LMC_MII16_DS3_DLOS 0x0040
-#define LMC_MII16_DS3_CRC 0x1000
-#define LMC_MII16_DS3_SCRAM 0x2000
-#define LMC_MII16_DS3_SCRAM_LARS 0x4000
-
-/* Note: 2 pairs of LEDs where swapped by mistake
- * in Xilinx code for DS3 & DS1 adapters */
-#define LMC_DS3_LED0 0x0100 /* bit 08 yellow */
-#define LMC_DS3_LED1 0x0080 /* bit 07 blue */
-#define LMC_DS3_LED2 0x0400 /* bit 10 green */
-#define LMC_DS3_LED3 0x0200 /* bit 09 red */
-
-/*
- * framer register 0 and 7 (7 is latched and reset on read)
- */
-#define LMC_FRAMER_REG0_DLOS 0x80 /* digital loss of service */
-#define LMC_FRAMER_REG0_OOFS 0x40 /* out of frame sync */
-#define LMC_FRAMER_REG0_AIS 0x20 /* alarm indication signal */
-#define LMC_FRAMER_REG0_CIS 0x10 /* channel idle */
-#define LMC_FRAMER_REG0_LOC 0x08 /* loss of clock */
-
-/*
- * Framer register 9 contains the blue alarm signal
- */
-#define LMC_FRAMER_REG9_RBLUE 0x02 /* Blue alarm failure */
-
-/*
- * Framer register 0x10 contains xbit error
- */
-#define LMC_FRAMER_REG10_XBIT 0x01 /* X bit error alarm failure */
-
-/*
- * And SSI, LMC1000
- */
-#define LMC_MII16_SSI_DTR 0x0001 /* DTR output RW */
-#define LMC_MII16_SSI_DSR 0x0002 /* DSR input RO */
-#define LMC_MII16_SSI_RTS 0x0004 /* RTS output RW */
-#define LMC_MII16_SSI_CTS 0x0008 /* CTS input RO */
-#define LMC_MII16_SSI_DCD 0x0010 /* DCD input RO */
-#define LMC_MII16_SSI_RI 0x0020 /* RI input RO */
-#define LMC_MII16_SSI_CRC 0x1000 /* CRC select - RW */
-
-/*
- * bits 0x0080 through 0x0800 are generic, and described
- * above with LMC_MII16_LED[0123] _LED_ALL, and _FIFO_RESET
- */
-#define LMC_MII16_SSI_LL 0x1000 /* LL output RW */
-#define LMC_MII16_SSI_RL 0x2000 /* RL output RW */
-#define LMC_MII16_SSI_TM 0x4000 /* TM input RO */
-#define LMC_MII16_SSI_LOOP 0x8000 /* loopback enable RW */
-
-/*
- * Some of the MII16 bits are mirrored in the MII17 register as well,
- * but let's keep thing separate for now, and get only the cable from
- * the MII17.
- */
-#define LMC_MII17_SSI_CABLE_MASK 0x0038 /* mask to extract the cable type */
-#define LMC_MII17_SSI_CABLE_SHIFT 3 /* shift to extract the cable type */
-
-/*
- * And T1, LMC1200
- */
-#define LMC_MII16_T1_UNUSED1 0x0003
-#define LMC_MII16_T1_XOE 0x0004
-#define LMC_MII16_T1_RST 0x0008 /* T1 chip reset - RW */
-#define LMC_MII16_T1_Z 0x0010 /* output impedance T1=1, E1=0 output - RW */
-#define LMC_MII16_T1_INTR 0x0020 /* interrupt from 8370 - RO */
-#define LMC_MII16_T1_ONESEC 0x0040 /* one second square wave - ro */
-
-#define LMC_MII16_T1_LED0 0x0100
-#define LMC_MII16_T1_LED1 0x0080
-#define LMC_MII16_T1_LED2 0x0400
-#define LMC_MII16_T1_LED3 0x0200
-#define LMC_MII16_T1_FIFO_RESET 0x0800
-
-#define LMC_MII16_T1_CRC 0x1000 /* CRC select - RW */
-#define LMC_MII16_T1_UNUSED2 0xe000
-
-
-/* 8370 framer registers */
-
-#define T1FRAMER_ALARM1_STATUS 0x47
-#define T1FRAMER_ALARM2_STATUS 0x48
-#define T1FRAMER_FERR_LSB 0x50
-#define T1FRAMER_FERR_MSB 0x51 /* framing bit error counter */
-#define T1FRAMER_LCV_LSB 0x54
-#define T1FRAMER_LCV_MSB 0x55 /* line code violation counter */
-#define T1FRAMER_AERR 0x5A
-
-/* mask for the above AERR register */
-#define T1FRAMER_LOF_MASK (0x0f0) /* receive loss of frame */
-#define T1FRAMER_COFA_MASK (0x0c0) /* change of frame alignment */
-#define T1FRAMER_SEF_MASK (0x03) /* severely errored frame */
-
-/* 8370 framer register ALM1 (0x47) values
- * used to determine link status
- */
-
-#define T1F_SIGFRZ 0x01 /* signaling freeze */
-#define T1F_RLOF 0x02 /* receive loss of frame alignment */
-#define T1F_RLOS 0x04 /* receive loss of signal */
-#define T1F_RALOS 0x08 /* receive analog loss of signal or RCKI loss of clock */
-#define T1F_RAIS 0x10 /* receive alarm indication signal */
-#define T1F_UNUSED 0x20
-#define T1F_RYEL 0x40 /* receive yellow alarm */
-#define T1F_RMYEL 0x80 /* receive multiframe yellow alarm */
-
-#define LMC_T1F_WRITE 0
-#define LMC_T1F_READ 1
-
-typedef struct lmc_st1f_control {
- int command;
- int address;
- int value;
- char __user *data;
-} lmc_t1f_control;
-
-enum lmc_xilinx_c {
- lmc_xilinx_reset = 1,
- lmc_xilinx_load_prom = 2,
- lmc_xilinx_load = 3
-};
-
-struct lmc_xilinx_control {
- enum lmc_xilinx_c command;
- int len;
- char __user *data;
-};
-
-/* ------------------ end T1 defs ------------------- */
-
-#define LMC_MII_LedMask 0x0780
-#define LMC_MII_LedBitPos 7
-
-#endif
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
deleted file mode 100644
index 76c6b4f89890..000000000000
--- a/drivers/net/wan/lmc/lmc_main.c
+++ /dev/null
@@ -1,2009 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
- /*
- * Copyright (c) 1997-2000 LAN Media Corporation (LMC)
- * All rights reserved. www.lanmedia.com
- * Generic HDLC port Copyright (C) 2008 Krzysztof Halasa <khc@pm.waw.pl>
- *
- * This code is written by:
- * Andrew Stanley-Jones (asj@cban.com)
- * Rob Braun (bbraun@vix.com),
- * Michael Graff (explorer@vix.com) and
- * Matt Thomas (matt@3am-software.com).
- *
- * With Help By:
- * David Boggs
- * Ron Crane
- * Alan Cox
- *
- * Driver for the LanMedia LMC5200, LMC5245, LMC1000, LMC1200 cards.
- *
- * To control link specific options lmcctl is required.
- * It can be obtained from ftp.lanmedia.com.
- *
- * Linux driver notes:
- * Linux uses the device struct lmc_private to pass private information
- * around.
- *
- * The initialization portion of this driver (the lmc_reset() and the
- * lmc_dec_reset() functions, as well as the led controls and the
- * lmc_initcsrs() functions.
- *
- * The watchdog function runs every second and checks to see if
- * we still have link, and that the timing source is what we expected
- * it to be. If link is lost, the interface is marked down, and
- * we no longer can transmit.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/ptrace.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/hdlc.h>
-#include <linux/in.h>
-#include <linux/if_arp.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/inet.h>
-#include <linux/bitops.h>
-#include <asm/processor.h> /* Processor type for cache alignment. */
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <linux/uaccess.h>
-#include <linux/jiffies.h>
-//#include <asm/spinlock.h>
-
-#define DRIVER_MAJOR_VERSION 1
-#define DRIVER_MINOR_VERSION 34
-#define DRIVER_SUB_VERSION 0
-
-#define DRIVER_VERSION ((DRIVER_MAJOR_VERSION << 8) + DRIVER_MINOR_VERSION)
-
-#include "lmc.h"
-#include "lmc_var.h"
-#include "lmc_ioctl.h"
-#include "lmc_debug.h"
-#include "lmc_proto.h"
-
-static int LMC_PKT_BUF_SZ = 1542;
-
-static const struct pci_device_id lmc_pci_tbl[] = {
- { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_TULIP_FAST,
- PCI_VENDOR_ID_LMC, PCI_ANY_ID },
- { PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_TULIP_FAST,
- PCI_ANY_ID, PCI_VENDOR_ID_LMC },
- { 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, lmc_pci_tbl);
-MODULE_LICENSE("GPL v2");
-
-
-static netdev_tx_t lmc_start_xmit(struct sk_buff *skb,
- struct net_device *dev);
-static int lmc_rx (struct net_device *dev);
-static int lmc_open(struct net_device *dev);
-static int lmc_close(struct net_device *dev);
-static struct net_device_stats *lmc_get_stats(struct net_device *dev);
-static irqreturn_t lmc_interrupt(int irq, void *dev_instance);
-static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, size_t csr_size);
-static void lmc_softreset(lmc_softc_t * const);
-static void lmc_running_reset(struct net_device *dev);
-static int lmc_ifdown(struct net_device * const);
-static void lmc_watchdog(struct timer_list *t);
-static void lmc_reset(lmc_softc_t * const sc);
-static void lmc_dec_reset(lmc_softc_t * const sc);
-static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue);
-
-/*
- * linux reserves 16 device specific IOCTLs. We call them
- * LMCIOC* to control various bits of our world.
- */
-static int lmc_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
- void __user *data, int cmd) /*fold00*/
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- lmc_ctl_t ctl;
- int ret = -EOPNOTSUPP;
- u16 regVal;
- unsigned long flags;
-
- /*
- * Most functions mess with the structure
- * Disable interrupts while we do the polling
- */
-
- switch (cmd) {
- /*
- * Return current driver state. Since we keep this up
- * To date internally, just copy this out to the user.
- */
- case LMCIOCGINFO: /*fold01*/
- if (copy_to_user(data, &sc->ictl, sizeof(lmc_ctl_t)))
- ret = -EFAULT;
- else
- ret = 0;
- break;
-
- case LMCIOCSINFO: /*fold01*/
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
-
- if(dev->flags & IFF_UP){
- ret = -EBUSY;
- break;
- }
-
- if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) {
- ret = -EFAULT;
- break;
- }
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
- sc->lmc_media->set_status (sc, &ctl);
-
- if(ctl.crc_length != sc->ictl.crc_length) {
- sc->lmc_media->set_crc_length(sc, ctl.crc_length);
- if (sc->ictl.crc_length == LMC_CTL_CRC_LENGTH_16)
- sc->TxDescriptControlInit |= LMC_TDES_ADD_CRC_DISABLE;
- else
- sc->TxDescriptControlInit &= ~LMC_TDES_ADD_CRC_DISABLE;
- }
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- ret = 0;
- break;
-
- case LMCIOCIFTYPE: /*fold01*/
- {
- u16 old_type = sc->if_type;
- u16 new_type;
-
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
-
- if (copy_from_user(&new_type, data, sizeof(u16))) {
- ret = -EFAULT;
- break;
- }
-
-
- if (new_type == old_type)
- {
- ret = 0 ;
- break; /* no change */
- }
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
- lmc_proto_close(sc);
-
- sc->if_type = new_type;
- lmc_proto_attach(sc);
- ret = lmc_proto_open(sc);
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
- break;
- }
-
- case LMCIOCGETXINFO: /*fold01*/
- spin_lock_irqsave(&sc->lmc_lock, flags);
- sc->lmc_xinfo.Magic0 = 0xBEEFCAFE;
-
- sc->lmc_xinfo.PciCardType = sc->lmc_cardtype;
- sc->lmc_xinfo.PciSlotNumber = 0;
- sc->lmc_xinfo.DriverMajorVersion = DRIVER_MAJOR_VERSION;
- sc->lmc_xinfo.DriverMinorVersion = DRIVER_MINOR_VERSION;
- sc->lmc_xinfo.DriverSubVersion = DRIVER_SUB_VERSION;
- sc->lmc_xinfo.XilinxRevisionNumber =
- lmc_mii_readreg (sc, 0, 3) & 0xf;
- sc->lmc_xinfo.MaxFrameSize = LMC_PKT_BUF_SZ;
- sc->lmc_xinfo.link_status = sc->lmc_media->get_link_status (sc);
- sc->lmc_xinfo.mii_reg16 = lmc_mii_readreg (sc, 0, 16);
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- sc->lmc_xinfo.Magic1 = 0xDEADBEEF;
-
- if (copy_to_user(data, &sc->lmc_xinfo, sizeof(struct lmc_xinfo)))
- ret = -EFAULT;
- else
- ret = 0;
-
- break;
-
- case LMCIOCGETLMCSTATS:
- spin_lock_irqsave(&sc->lmc_lock, flags);
- if (sc->lmc_cardtype == LMC_CARDTYPE_T1) {
- lmc_mii_writereg(sc, 0, 17, T1FRAMER_FERR_LSB);
- sc->extra_stats.framingBitErrorCount +=
- lmc_mii_readreg(sc, 0, 18) & 0xff;
- lmc_mii_writereg(sc, 0, 17, T1FRAMER_FERR_MSB);
- sc->extra_stats.framingBitErrorCount +=
- (lmc_mii_readreg(sc, 0, 18) & 0xff) << 8;
- lmc_mii_writereg(sc, 0, 17, T1FRAMER_LCV_LSB);
- sc->extra_stats.lineCodeViolationCount +=
- lmc_mii_readreg(sc, 0, 18) & 0xff;
- lmc_mii_writereg(sc, 0, 17, T1FRAMER_LCV_MSB);
- sc->extra_stats.lineCodeViolationCount +=
- (lmc_mii_readreg(sc, 0, 18) & 0xff) << 8;
- lmc_mii_writereg(sc, 0, 17, T1FRAMER_AERR);
- regVal = lmc_mii_readreg(sc, 0, 18) & 0xff;
-
- sc->extra_stats.lossOfFrameCount +=
- (regVal & T1FRAMER_LOF_MASK) >> 4;
- sc->extra_stats.changeOfFrameAlignmentCount +=
- (regVal & T1FRAMER_COFA_MASK) >> 2;
- sc->extra_stats.severelyErroredFrameCount +=
- regVal & T1FRAMER_SEF_MASK;
- }
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
- if (copy_to_user(data, &sc->lmc_device->stats,
- sizeof(sc->lmc_device->stats)) ||
- copy_to_user(data + sizeof(sc->lmc_device->stats),
- &sc->extra_stats, sizeof(sc->extra_stats)))
- ret = -EFAULT;
- else
- ret = 0;
- break;
-
- case LMCIOCCLEARLMCSTATS:
- if (!capable(CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
- memset(&sc->lmc_device->stats, 0, sizeof(sc->lmc_device->stats));
- memset(&sc->extra_stats, 0, sizeof(sc->extra_stats));
- sc->extra_stats.check = STATCHECK;
- sc->extra_stats.version_size = (DRIVER_VERSION << 16) +
- sizeof(sc->lmc_device->stats) + sizeof(sc->extra_stats);
- sc->extra_stats.lmc_cardtype = sc->lmc_cardtype;
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
- ret = 0;
- break;
-
- case LMCIOCSETCIRCUIT: /*fold01*/
- if (!capable(CAP_NET_ADMIN)){
- ret = -EPERM;
- break;
- }
-
- if(dev->flags & IFF_UP){
- ret = -EBUSY;
- break;
- }
-
- if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) {
- ret = -EFAULT;
- break;
- }
- spin_lock_irqsave(&sc->lmc_lock, flags);
- sc->lmc_media->set_circuit_type(sc, ctl.circuit_type);
- sc->ictl.circuit_type = ctl.circuit_type;
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
- ret = 0;
-
- break;
-
- case LMCIOCRESET: /*fold01*/
- if (!capable(CAP_NET_ADMIN)){
- ret = -EPERM;
- break;
- }
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
- /* Reset driver and bring back to current state */
- printk (" REG16 before reset +%04x\n", lmc_mii_readreg (sc, 0, 16));
- lmc_running_reset (dev);
- printk (" REG16 after reset +%04x\n", lmc_mii_readreg (sc, 0, 16));
-
- LMC_EVENT_LOG(LMC_EVENT_FORCEDRESET, LMC_CSR_READ (sc, csr_status), lmc_mii_readreg (sc, 0, 16));
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- ret = 0;
- break;
-
-#ifdef DEBUG
- case LMCIOCDUMPEVENTLOG:
- if (copy_to_user(data, &lmcEventLogIndex, sizeof(u32))) {
- ret = -EFAULT;
- break;
- }
- if (copy_to_user(data + sizeof(u32), lmcEventLogBuf,
- sizeof(lmcEventLogBuf)))
- ret = -EFAULT;
- else
- ret = 0;
-
- break;
-#endif /* end ifdef _DBG_EVENTLOG */
- case LMCIOCT1CONTROL: /*fold01*/
- if (sc->lmc_cardtype != LMC_CARDTYPE_T1){
- ret = -EOPNOTSUPP;
- break;
- }
- break;
- case LMCIOCXILINX: /*fold01*/
- {
- struct lmc_xilinx_control xc; /*fold02*/
-
- if (!capable(CAP_NET_ADMIN)){
- ret = -EPERM;
- break;
- }
-
- /*
- * Stop the xwitter whlie we restart the hardware
- */
- netif_stop_queue(dev);
-
- if (copy_from_user(&xc, data, sizeof(struct lmc_xilinx_control))) {
- ret = -EFAULT;
- break;
- }
- switch(xc.command){
- case lmc_xilinx_reset: /*fold02*/
- {
- spin_lock_irqsave(&sc->lmc_lock, flags);
- lmc_mii_readreg (sc, 0, 16);
-
- /*
- * Make all of them 0 and make input
- */
- lmc_gpio_mkinput(sc, 0xff);
-
- /*
- * make the reset output
- */
- lmc_gpio_mkoutput(sc, LMC_GEP_RESET);
-
- /*
- * RESET low to force configuration. This also forces
- * the transmitter clock to be internal, but we expect to reset
- * that later anyway.
- */
-
- sc->lmc_gpio &= ~LMC_GEP_RESET;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
-
- /*
- * hold for more than 10 microseconds
- */
- udelay(50);
-
- sc->lmc_gpio |= LMC_GEP_RESET;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
-
- /*
- * stop driving Xilinx-related signals
- */
- lmc_gpio_mkinput(sc, 0xff);
-
- /* Reset the frammer hardware */
- sc->lmc_media->set_link_status (sc, 1);
- sc->lmc_media->set_status (sc, NULL);
-// lmc_softreset(sc);
-
- {
- int i;
- for(i = 0; i < 5; i++){
- lmc_led_on(sc, LMC_DS3_LED0);
- mdelay(100);
- lmc_led_off(sc, LMC_DS3_LED0);
- lmc_led_on(sc, LMC_DS3_LED1);
- mdelay(100);
- lmc_led_off(sc, LMC_DS3_LED1);
- lmc_led_on(sc, LMC_DS3_LED3);
- mdelay(100);
- lmc_led_off(sc, LMC_DS3_LED3);
- lmc_led_on(sc, LMC_DS3_LED2);
- mdelay(100);
- lmc_led_off(sc, LMC_DS3_LED2);
- }
- }
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
-
-
- ret = 0x0;
-
- }
-
- break;
- case lmc_xilinx_load_prom: /*fold02*/
- {
- int timeout = 500000;
- spin_lock_irqsave(&sc->lmc_lock, flags);
- lmc_mii_readreg (sc, 0, 16);
-
- /*
- * Make all of them 0 and make input
- */
- lmc_gpio_mkinput(sc, 0xff);
-
- /*
- * make the reset output
- */
- lmc_gpio_mkoutput(sc, LMC_GEP_DP | LMC_GEP_RESET);
-
- /*
- * RESET low to force configuration. This also forces
- * the transmitter clock to be internal, but we expect to reset
- * that later anyway.
- */
-
- sc->lmc_gpio &= ~(LMC_GEP_RESET | LMC_GEP_DP);
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
-
- /*
- * hold for more than 10 microseconds
- */
- udelay(50);
-
- sc->lmc_gpio |= LMC_GEP_DP | LMC_GEP_RESET;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
- /*
- * busy wait for the chip to reset
- */
- while( (LMC_CSR_READ(sc, csr_gp) & LMC_GEP_INIT) == 0 &&
- (timeout-- > 0))
- cpu_relax();
-
-
- /*
- * stop driving Xilinx-related signals
- */
- lmc_gpio_mkinput(sc, 0xff);
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- ret = 0x0;
-
-
- break;
-
- }
-
- case lmc_xilinx_load: /*fold02*/
- {
- char *data;
- int pos;
- int timeout = 500000;
-
- if (!xc.data) {
- ret = -EINVAL;
- break;
- }
-
- data = memdup_user(xc.data, xc.len);
- if (IS_ERR(data)) {
- ret = PTR_ERR(data);
- break;
- }
-
- printk("%s: Starting load of data Len: %d at 0x%p == 0x%p\n", dev->name, xc.len, xc.data, data);
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
- lmc_gpio_mkinput(sc, 0xff);
-
- /*
- * Clear the Xilinx and start prgramming from the DEC
- */
-
- /*
- * Set ouput as:
- * Reset: 0 (active)
- * DP: 0 (active)
- * Mode: 1
- *
- */
- sc->lmc_gpio = 0x00;
- sc->lmc_gpio &= ~LMC_GEP_DP;
- sc->lmc_gpio &= ~LMC_GEP_RESET;
- sc->lmc_gpio |= LMC_GEP_MODE;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
- lmc_gpio_mkoutput(sc, LMC_GEP_MODE | LMC_GEP_DP | LMC_GEP_RESET);
-
- /*
- * Wait at least 10 us 20 to be safe
- */
- udelay(50);
-
- /*
- * Clear reset and activate programming lines
- * Reset: Input
- * DP: Input
- * Clock: Output
- * Data: Output
- * Mode: Output
- */
- lmc_gpio_mkinput(sc, LMC_GEP_DP | LMC_GEP_RESET);
-
- /*
- * Set LOAD, DATA, Clock to 1
- */
- sc->lmc_gpio = 0x00;
- sc->lmc_gpio |= LMC_GEP_MODE;
- sc->lmc_gpio |= LMC_GEP_DATA;
- sc->lmc_gpio |= LMC_GEP_CLK;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
- lmc_gpio_mkoutput(sc, LMC_GEP_DATA | LMC_GEP_CLK | LMC_GEP_MODE );
-
- /*
- * busy wait for the chip to reset
- */
- while( (LMC_CSR_READ(sc, csr_gp) & LMC_GEP_INIT) == 0 &&
- (timeout-- > 0))
- cpu_relax();
-
- printk(KERN_DEBUG "%s: Waited %d for the Xilinx to clear its memory\n", dev->name, 500000-timeout);
-
- for(pos = 0; pos < xc.len; pos++){
- switch(data[pos]){
- case 0:
- sc->lmc_gpio &= ~LMC_GEP_DATA; /* Data is 0 */
- break;
- case 1:
- sc->lmc_gpio |= LMC_GEP_DATA; /* Data is 1 */
- break;
- default:
- printk(KERN_WARNING "%s Bad data in xilinx programming data at %d, got %d wanted 0 or 1\n", dev->name, pos, data[pos]);
- sc->lmc_gpio |= LMC_GEP_DATA; /* Assume it's 1 */
- }
- sc->lmc_gpio &= ~LMC_GEP_CLK; /* Clock to zero */
- sc->lmc_gpio |= LMC_GEP_MODE;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
- udelay(1);
-
- sc->lmc_gpio |= LMC_GEP_CLK; /* Put the clack back to one */
- sc->lmc_gpio |= LMC_GEP_MODE;
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
- udelay(1);
- }
- if((LMC_CSR_READ(sc, csr_gp) & LMC_GEP_INIT) == 0){
- printk(KERN_WARNING "%s: Reprogramming FAILED. Needs to be reprogrammed. (corrupted data)\n", dev->name);
- }
- else if((LMC_CSR_READ(sc, csr_gp) & LMC_GEP_DP) == 0){
- printk(KERN_WARNING "%s: Reprogramming FAILED. Needs to be reprogrammed. (done)\n", dev->name);
- }
- else {
- printk(KERN_DEBUG "%s: Done reprogramming Xilinx, %d bits, good luck!\n", dev->name, pos);
- }
-
- lmc_gpio_mkinput(sc, 0xff);
-
- sc->lmc_miireg16 |= LMC_MII16_FIFO_RESET;
- lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16);
-
- sc->lmc_miireg16 &= ~LMC_MII16_FIFO_RESET;
- lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16);
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- kfree(data);
-
- ret = 0;
-
- break;
- }
- default: /*fold02*/
- ret = -EBADE;
- break;
- }
-
- netif_wake_queue(dev);
- sc->lmc_txfull = 0;
-
- }
- break;
- default:
- break;
- }
-
- return ret;
-}
-
-
-/* the watchdog process that cruises around */
-static void lmc_watchdog(struct timer_list *t) /*fold00*/
-{
- lmc_softc_t *sc = from_timer(sc, t, timer);
- struct net_device *dev = sc->lmc_device;
- int link_status;
- u32 ticks;
- unsigned long flags;
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
-
- if(sc->check != 0xBEAFCAFE){
- printk("LMC: Corrupt net_device struct, breaking out\n");
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
- return;
- }
-
-
- /* Make sure the tx jabber and rx watchdog are off,
- * and the transmit and receive processes are running.
- */
-
- LMC_CSR_WRITE (sc, csr_15, 0x00000011);
- sc->lmc_cmdmode |= TULIP_CMD_TXRUN | TULIP_CMD_RXRUN;
- LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode);
-
- if (sc->lmc_ok == 0)
- goto kick_timer;
-
- LMC_EVENT_LOG(LMC_EVENT_WATCHDOG, LMC_CSR_READ (sc, csr_status), lmc_mii_readreg (sc, 0, 16));
-
- /* --- begin time out check -----------------------------------
- * check for a transmit interrupt timeout
- * Has the packet xmt vs xmt serviced threshold been exceeded */
- if (sc->lmc_taint_tx == sc->lastlmc_taint_tx &&
- sc->lmc_device->stats.tx_packets > sc->lasttx_packets &&
- sc->tx_TimeoutInd == 0)
- {
-
- /* wait for the watchdog to come around again */
- sc->tx_TimeoutInd = 1;
- }
- else if (sc->lmc_taint_tx == sc->lastlmc_taint_tx &&
- sc->lmc_device->stats.tx_packets > sc->lasttx_packets &&
- sc->tx_TimeoutInd)
- {
-
- LMC_EVENT_LOG(LMC_EVENT_XMTINTTMO, LMC_CSR_READ (sc, csr_status), 0);
-
- sc->tx_TimeoutDisplay = 1;
- sc->extra_stats.tx_TimeoutCnt++;
-
- /* DEC chip is stuck, hit it with a RESET!!!! */
- lmc_running_reset (dev);
-
-
- /* look at receive & transmit process state to make sure they are running */
- LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ (sc, csr_status), 0);
-
- /* look at: DSR - 02 for Reg 16
- * CTS - 08
- * DCD - 10
- * RI - 20
- * for Reg 17
- */
- LMC_EVENT_LOG(LMC_EVENT_RESET2, lmc_mii_readreg (sc, 0, 16), lmc_mii_readreg (sc, 0, 17));
-
- /* reset the transmit timeout detection flag */
- sc->tx_TimeoutInd = 0;
- sc->lastlmc_taint_tx = sc->lmc_taint_tx;
- sc->lasttx_packets = sc->lmc_device->stats.tx_packets;
- } else {
- sc->tx_TimeoutInd = 0;
- sc->lastlmc_taint_tx = sc->lmc_taint_tx;
- sc->lasttx_packets = sc->lmc_device->stats.tx_packets;
- }
-
- /* --- end time out check ----------------------------------- */
-
-
- link_status = sc->lmc_media->get_link_status (sc);
-
- /*
- * hardware level link lost, but the interface is marked as up.
- * Mark it as down.
- */
- if ((link_status == 0) && (sc->last_link_status != 0)) {
- printk(KERN_WARNING "%s: hardware/physical link down\n", dev->name);
- sc->last_link_status = 0;
- /* lmc_reset (sc); Why reset??? The link can go down ok */
-
- /* Inform the world that link has been lost */
- netif_carrier_off(dev);
- }
-
- /*
- * hardware link is up, but the interface is marked as down.
- * Bring it back up again.
- */
- if (link_status != 0 && sc->last_link_status == 0) {
- printk(KERN_WARNING "%s: hardware/physical link up\n", dev->name);
- sc->last_link_status = 1;
- /* lmc_reset (sc); Again why reset??? */
-
- netif_carrier_on(dev);
- }
-
- /* Call media specific watchdog functions */
- sc->lmc_media->watchdog(sc);
-
- /*
- * Poke the transmitter to make sure it
- * never stops, even if we run out of mem
- */
- LMC_CSR_WRITE(sc, csr_rxpoll, 0);
-
- /*
- * Check for code that failed
- * and try and fix it as appropriate
- */
- if(sc->failed_ring == 1){
- /*
- * Failed to setup the recv/xmit rin
- * Try again
- */
- sc->failed_ring = 0;
- lmc_softreset(sc);
- }
- if(sc->failed_recv_alloc == 1){
- /*
- * We failed to alloc mem in the
- * interrupt handler, go through the rings
- * and rebuild them
- */
- sc->failed_recv_alloc = 0;
- lmc_softreset(sc);
- }
-
-
- /*
- * remember the timer value
- */
-kick_timer:
-
- ticks = LMC_CSR_READ (sc, csr_gp_timer);
- LMC_CSR_WRITE (sc, csr_gp_timer, 0xffffffffUL);
- sc->ictl.ticks = 0x0000ffff - (ticks & 0x0000ffff);
-
- /*
- * restart this timer.
- */
- sc->timer.expires = jiffies + (HZ);
- add_timer (&sc->timer);
-
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-}
-
-static int lmc_attach(struct net_device *dev, unsigned short encoding,
- unsigned short parity)
-{
- if (encoding == ENCODING_NRZ && parity == PARITY_CRC16_PR1_CCITT)
- return 0;
- return -EINVAL;
-}
-
-static const struct net_device_ops lmc_ops = {
- .ndo_open = lmc_open,
- .ndo_stop = lmc_close,
- .ndo_start_xmit = hdlc_start_xmit,
- .ndo_siocwandev = hdlc_ioctl,
- .ndo_siocdevprivate = lmc_siocdevprivate,
- .ndo_tx_timeout = lmc_driver_timeout,
- .ndo_get_stats = lmc_get_stats,
-};
-
-static int lmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- lmc_softc_t *sc;
- struct net_device *dev;
- u16 subdevice;
- u16 AdapModelNum;
- int err;
- static int cards_found;
-
- err = pcim_enable_device(pdev);
- if (err) {
- printk(KERN_ERR "lmc: pci enable failed: %d\n", err);
- return err;
- }
-
- err = pci_request_regions(pdev, "lmc");
- if (err) {
- printk(KERN_ERR "lmc: pci_request_region failed\n");
- return err;
- }
-
- /*
- * Allocate our own device structure
- */
- sc = devm_kzalloc(&pdev->dev, sizeof(lmc_softc_t), GFP_KERNEL);
- if (!sc)
- return -ENOMEM;
-
- dev = alloc_hdlcdev(sc);
- if (!dev) {
- printk(KERN_ERR "lmc:alloc_netdev for device failed\n");
- return -ENOMEM;
- }
-
-
- dev->type = ARPHRD_HDLC;
- dev_to_hdlc(dev)->xmit = lmc_start_xmit;
- dev_to_hdlc(dev)->attach = lmc_attach;
- dev->netdev_ops = &lmc_ops;
- dev->watchdog_timeo = HZ; /* 1 second */
- dev->tx_queue_len = 100;
- sc->lmc_device = dev;
- sc->name = dev->name;
- sc->if_type = LMC_PPP;
- sc->check = 0xBEAFCAFE;
- dev->base_addr = pci_resource_start(pdev, 0);
- dev->irq = pdev->irq;
- pci_set_drvdata(pdev, dev);
- SET_NETDEV_DEV(dev, &pdev->dev);
-
- /*
- * This will get the protocol layer ready and do any 1 time init's
- * Must have a valid sc and dev structure
- */
- lmc_proto_attach(sc);
-
- /* Init the spin lock so can call it latter */
-
- spin_lock_init(&sc->lmc_lock);
- pci_set_master(pdev);
-
- printk(KERN_INFO "hdlc: detected at %lx, irq %d\n",
- dev->base_addr, dev->irq);
-
- err = register_hdlc_device(dev);
- if (err) {
- printk(KERN_ERR "%s: register_netdev failed.\n", dev->name);
- free_netdev(dev);
- return err;
- }
-
- sc->lmc_cardtype = LMC_CARDTYPE_UNKNOWN;
- sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_EXT;
-
- /*
- *
- * Check either the subvendor or the subdevice, some systems reverse
- * the setting in the bois, seems to be version and arch dependent?
- * Fix the error, exchange the two values
- */
- if ((subdevice = pdev->subsystem_device) == PCI_VENDOR_ID_LMC)
- subdevice = pdev->subsystem_vendor;
-
- switch (subdevice) {
- case PCI_DEVICE_ID_LMC_HSSI:
- printk(KERN_INFO "%s: LMC HSSI\n", dev->name);
- sc->lmc_cardtype = LMC_CARDTYPE_HSSI;
- sc->lmc_media = &lmc_hssi_media;
- break;
- case PCI_DEVICE_ID_LMC_DS3:
- printk(KERN_INFO "%s: LMC DS3\n", dev->name);
- sc->lmc_cardtype = LMC_CARDTYPE_DS3;
- sc->lmc_media = &lmc_ds3_media;
- break;
- case PCI_DEVICE_ID_LMC_SSI:
- printk(KERN_INFO "%s: LMC SSI\n", dev->name);
- sc->lmc_cardtype = LMC_CARDTYPE_SSI;
- sc->lmc_media = &lmc_ssi_media;
- break;
- case PCI_DEVICE_ID_LMC_T1:
- printk(KERN_INFO "%s: LMC T1\n", dev->name);
- sc->lmc_cardtype = LMC_CARDTYPE_T1;
- sc->lmc_media = &lmc_t1_media;
- break;
- default:
- printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name);
- unregister_hdlc_device(dev);
- return -EIO;
- break;
- }
-
- lmc_initcsrs (sc, dev->base_addr, 8);
-
- lmc_gpio_mkinput (sc, 0xff);
- sc->lmc_gpio = 0; /* drive no signals yet */
-
- sc->lmc_media->defaults (sc);
-
- sc->lmc_media->set_link_status (sc, LMC_LINK_UP);
-
- /* verify that the PCI Sub System ID matches the Adapter Model number
- * from the MII register
- */
- AdapModelNum = (lmc_mii_readreg (sc, 0, 3) & 0x3f0) >> 4;
-
- if ((AdapModelNum != LMC_ADAP_T1 || /* detect LMC1200 */
- subdevice != PCI_DEVICE_ID_LMC_T1) &&
- (AdapModelNum != LMC_ADAP_SSI || /* detect LMC1000 */
- subdevice != PCI_DEVICE_ID_LMC_SSI) &&
- (AdapModelNum != LMC_ADAP_DS3 || /* detect LMC5245 */
- subdevice != PCI_DEVICE_ID_LMC_DS3) &&
- (AdapModelNum != LMC_ADAP_HSSI || /* detect LMC5200 */
- subdevice != PCI_DEVICE_ID_LMC_HSSI))
- printk(KERN_WARNING "%s: Model number (%d) miscompare for PCI"
- " Subsystem ID = 0x%04x\n",
- dev->name, AdapModelNum, subdevice);
-
- /*
- * reset clock
- */
- LMC_CSR_WRITE (sc, csr_gp_timer, 0xFFFFFFFFUL);
-
- sc->board_idx = cards_found++;
- sc->extra_stats.check = STATCHECK;
- sc->extra_stats.version_size = (DRIVER_VERSION << 16) +
- sizeof(sc->lmc_device->stats) + sizeof(sc->extra_stats);
- sc->extra_stats.lmc_cardtype = sc->lmc_cardtype;
-
- sc->lmc_ok = 0;
- sc->last_link_status = 0;
-
- return 0;
-}
-
-/*
- * Called from pci when removing module.
- */
-static void lmc_remove_one(struct pci_dev *pdev)
-{
- struct net_device *dev = pci_get_drvdata(pdev);
-
- if (dev) {
- printk(KERN_DEBUG "%s: removing...\n", dev->name);
- unregister_hdlc_device(dev);
- free_netdev(dev);
- }
-}
-
-/* After this is called, packets can be sent.
- * Does not initialize the addresses
- */
-static int lmc_open(struct net_device *dev)
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- int err;
-
- lmc_led_on(sc, LMC_DS3_LED0);
-
- lmc_dec_reset(sc);
- lmc_reset(sc);
-
- LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ(sc, csr_status), 0);
- LMC_EVENT_LOG(LMC_EVENT_RESET2, lmc_mii_readreg(sc, 0, 16),
- lmc_mii_readreg(sc, 0, 17));
-
- if (sc->lmc_ok)
- return 0;
-
- lmc_softreset (sc);
-
- /* Since we have to use PCI bus, this should work on x86,alpha,ppc */
- if (request_irq (dev->irq, lmc_interrupt, IRQF_SHARED, dev->name, dev)){
- printk(KERN_WARNING "%s: could not get irq: %d\n", dev->name, dev->irq);
- return -EAGAIN;
- }
- sc->got_irq = 1;
-
- /* Assert Terminal Active */
- sc->lmc_miireg16 |= LMC_MII16_LED_ALL;
- sc->lmc_media->set_link_status (sc, LMC_LINK_UP);
-
- /*
- * reset to last state.
- */
- sc->lmc_media->set_status (sc, NULL);
-
- /* setup default bits to be used in tulip_desc_t transmit descriptor
- * -baz */
- sc->TxDescriptControlInit = (
- LMC_TDES_INTERRUPT_ON_COMPLETION
- | LMC_TDES_FIRST_SEGMENT
- | LMC_TDES_LAST_SEGMENT
- | LMC_TDES_SECOND_ADDR_CHAINED
- | LMC_TDES_DISABLE_PADDING
- );
-
- if (sc->ictl.crc_length == LMC_CTL_CRC_LENGTH_16) {
- /* disable 32 bit CRC generated by ASIC */
- sc->TxDescriptControlInit |= LMC_TDES_ADD_CRC_DISABLE;
- }
- sc->lmc_media->set_crc_length(sc, sc->ictl.crc_length);
- /* Acknoledge the Terminal Active and light LEDs */
-
- /* dev->flags |= IFF_UP; */
-
- if ((err = lmc_proto_open(sc)) != 0)
- return err;
-
- netif_start_queue(dev);
- sc->extra_stats.tx_tbusy0++;
-
- /*
- * select what interrupts we want to get
- */
- sc->lmc_intrmask = 0;
- /* Should be using the default interrupt mask defined in the .h file. */
- sc->lmc_intrmask |= (TULIP_STS_NORMALINTR
- | TULIP_STS_RXINTR
- | TULIP_STS_TXINTR
- | TULIP_STS_ABNRMLINTR
- | TULIP_STS_SYSERROR
- | TULIP_STS_TXSTOPPED
- | TULIP_STS_TXUNDERFLOW
- | TULIP_STS_RXSTOPPED
- | TULIP_STS_RXNOBUF
- );
- LMC_CSR_WRITE (sc, csr_intr, sc->lmc_intrmask);
-
- sc->lmc_cmdmode |= TULIP_CMD_TXRUN;
- sc->lmc_cmdmode |= TULIP_CMD_RXRUN;
- LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode);
-
- sc->lmc_ok = 1; /* Run watchdog */
-
- /*
- * Set the if up now - pfb
- */
-
- sc->last_link_status = 1;
-
- /*
- * Setup a timer for the watchdog on probe, and start it running.
- * Since lmc_ok == 0, it will be a NOP for now.
- */
- timer_setup(&sc->timer, lmc_watchdog, 0);
- sc->timer.expires = jiffies + HZ;
- add_timer (&sc->timer);
-
- return 0;
-}
-
-/* Total reset to compensate for the AdTran DSU doing bad things
- * under heavy load
- */
-
-static void lmc_running_reset (struct net_device *dev) /*fold00*/
-{
- lmc_softc_t *sc = dev_to_sc(dev);
-
- /* stop interrupts */
- /* Clear the interrupt mask */
- LMC_CSR_WRITE (sc, csr_intr, 0x00000000);
-
- lmc_dec_reset (sc);
- lmc_reset (sc);
- lmc_softreset (sc);
- /* sc->lmc_miireg16 |= LMC_MII16_LED_ALL; */
- sc->lmc_media->set_link_status (sc, 1);
- sc->lmc_media->set_status (sc, NULL);
-
- netif_wake_queue(dev);
-
- sc->lmc_txfull = 0;
- sc->extra_stats.tx_tbusy0++;
-
- sc->lmc_intrmask = TULIP_DEFAULT_INTR_MASK;
- LMC_CSR_WRITE (sc, csr_intr, sc->lmc_intrmask);
-
- sc->lmc_cmdmode |= (TULIP_CMD_TXRUN | TULIP_CMD_RXRUN);
- LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode);
-}
-
-
-/* This is what is called when you ifconfig down a device.
- * This disables the timer for the watchdog and keepalives,
- * and disables the irq for dev.
- */
-static int lmc_close(struct net_device *dev)
-{
- /* not calling release_region() as we should */
- lmc_softc_t *sc = dev_to_sc(dev);
-
- sc->lmc_ok = 0;
- sc->lmc_media->set_link_status (sc, 0);
- del_timer (&sc->timer);
- lmc_proto_close(sc);
- lmc_ifdown (dev);
-
- return 0;
-}
-
-/* Ends the transfer of packets */
-/* When the interface goes down, this is called */
-static int lmc_ifdown (struct net_device *dev) /*fold00*/
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- u32 csr6;
- int i;
-
- /* Don't let anything else go on right now */
- // dev->start = 0;
- netif_stop_queue(dev);
- sc->extra_stats.tx_tbusy1++;
-
- /* stop interrupts */
- /* Clear the interrupt mask */
- LMC_CSR_WRITE (sc, csr_intr, 0x00000000);
-
- /* Stop Tx and Rx on the chip */
- csr6 = LMC_CSR_READ (sc, csr_command);
- csr6 &= ~LMC_DEC_ST; /* Turn off the Transmission bit */
- csr6 &= ~LMC_DEC_SR; /* Turn off the Receive bit */
- LMC_CSR_WRITE (sc, csr_command, csr6);
-
- sc->lmc_device->stats.rx_missed_errors +=
- LMC_CSR_READ(sc, csr_missed_frames) & 0xffff;
-
- /* release the interrupt */
- if(sc->got_irq == 1){
- free_irq (dev->irq, dev);
- sc->got_irq = 0;
- }
-
- /* free skbuffs in the Rx queue */
- for (i = 0; i < LMC_RXDESCS; i++)
- {
- struct sk_buff *skb = sc->lmc_rxq[i];
- sc->lmc_rxq[i] = NULL;
- sc->lmc_rxring[i].status = 0;
- sc->lmc_rxring[i].length = 0;
- sc->lmc_rxring[i].buffer1 = 0xDEADBEEF;
- if (skb != NULL)
- dev_kfree_skb(skb);
- sc->lmc_rxq[i] = NULL;
- }
-
- for (i = 0; i < LMC_TXDESCS; i++)
- {
- if (sc->lmc_txq[i] != NULL)
- dev_kfree_skb(sc->lmc_txq[i]);
- sc->lmc_txq[i] = NULL;
- }
-
- lmc_led_off (sc, LMC_MII16_LED_ALL);
-
- netif_wake_queue(dev);
- sc->extra_stats.tx_tbusy0++;
-
- return 0;
-}
-
-/* Interrupt handling routine. This will take an incoming packet, or clean
- * up after a trasmit.
- */
-static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/
-{
- struct net_device *dev = (struct net_device *) dev_instance;
- lmc_softc_t *sc = dev_to_sc(dev);
- u32 csr;
- int i;
- s32 stat;
- unsigned int badtx;
- int max_work = LMC_RXDESCS;
- int handled = 0;
-
- spin_lock(&sc->lmc_lock);
-
- /*
- * Read the csr to find what interrupts we have (if any)
- */
- csr = LMC_CSR_READ (sc, csr_status);
-
- /*
- * Make sure this is our interrupt
- */
- if ( ! (csr & sc->lmc_intrmask)) {
- goto lmc_int_fail_out;
- }
-
- /* always go through this loop at least once */
- while (csr & sc->lmc_intrmask) {
- handled = 1;
-
- /*
- * Clear interrupt bits, we handle all case below
- */
- LMC_CSR_WRITE (sc, csr_status, csr);
-
- /*
- * One of
- * - Transmit process timed out CSR5<1>
- * - Transmit jabber timeout CSR5<3>
- * - Transmit underflow CSR5<5>
- * - Transmit Receiver buffer unavailable CSR5<7>
- * - Receive process stopped CSR5<8>
- * - Receive watchdog timeout CSR5<9>
- * - Early transmit interrupt CSR5<10>
- *
- * Is this really right? Should we do a running reset for jabber?
- * (being a WAN card and all)
- */
- if (csr & TULIP_STS_ABNRMLINTR){
- lmc_running_reset (dev);
- break;
- }
-
- if (csr & TULIP_STS_RXINTR)
- lmc_rx (dev);
-
- if (csr & (TULIP_STS_TXINTR | TULIP_STS_TXNOBUF | TULIP_STS_TXSTOPPED)) {
-
- int n_compl = 0 ;
- /* reset the transmit timeout detection flag -baz */
- sc->extra_stats.tx_NoCompleteCnt = 0;
-
- badtx = sc->lmc_taint_tx;
- i = badtx % LMC_TXDESCS;
-
- while ((badtx < sc->lmc_next_tx)) {
- stat = sc->lmc_txring[i].status;
-
- LMC_EVENT_LOG (LMC_EVENT_XMTINT, stat,
- sc->lmc_txring[i].length);
- /*
- * If bit 31 is 1 the tulip owns it break out of the loop
- */
- if (stat & 0x80000000)
- break;
-
- n_compl++ ; /* i.e., have an empty slot in ring */
- /*
- * If we have no skbuff or have cleared it
- * Already continue to the next buffer
- */
- if (sc->lmc_txq[i] == NULL)
- continue;
-
- /*
- * Check the total error summary to look for any errors
- */
- if (stat & 0x8000) {
- sc->lmc_device->stats.tx_errors++;
- if (stat & 0x4104)
- sc->lmc_device->stats.tx_aborted_errors++;
- if (stat & 0x0C00)
- sc->lmc_device->stats.tx_carrier_errors++;
- if (stat & 0x0200)
- sc->lmc_device->stats.tx_window_errors++;
- if (stat & 0x0002)
- sc->lmc_device->stats.tx_fifo_errors++;
- } else {
- sc->lmc_device->stats.tx_bytes += sc->lmc_txring[i].length & 0x7ff;
-
- sc->lmc_device->stats.tx_packets++;
- }
-
- dev_consume_skb_irq(sc->lmc_txq[i]);
- sc->lmc_txq[i] = NULL;
-
- badtx++;
- i = badtx % LMC_TXDESCS;
- }
-
- if (sc->lmc_next_tx - badtx > LMC_TXDESCS)
- {
- printk ("%s: out of sync pointer\n", dev->name);
- badtx += LMC_TXDESCS;
- }
- LMC_EVENT_LOG(LMC_EVENT_TBUSY0, n_compl, 0);
- sc->lmc_txfull = 0;
- netif_wake_queue(dev);
- sc->extra_stats.tx_tbusy0++;
-
-
-#ifdef DEBUG
- sc->extra_stats.dirtyTx = badtx;
- sc->extra_stats.lmc_next_tx = sc->lmc_next_tx;
- sc->extra_stats.lmc_txfull = sc->lmc_txfull;
-#endif
- sc->lmc_taint_tx = badtx;
-
- /*
- * Why was there a break here???
- */
- } /* end handle transmit interrupt */
-
- if (csr & TULIP_STS_SYSERROR) {
- u32 error;
- printk (KERN_WARNING "%s: system bus error csr: %#8.8x\n", dev->name, csr);
- error = csr>>23 & 0x7;
- switch(error){
- case 0x000:
- printk(KERN_WARNING "%s: Parity Fault (bad)\n", dev->name);
- break;
- case 0x001:
- printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name);
- break;
- case 0x002:
- printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name);
- break;
- default:
- printk(KERN_WARNING "%s: This bus error code was supposed to be reserved!\n", dev->name);
- }
- lmc_dec_reset (sc);
- lmc_reset (sc);
- LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ (sc, csr_status), 0);
- LMC_EVENT_LOG(LMC_EVENT_RESET2,
- lmc_mii_readreg (sc, 0, 16),
- lmc_mii_readreg (sc, 0, 17));
-
- }
-
-
- if(max_work-- <= 0)
- break;
-
- /*
- * Get current csr status to make sure
- * we've cleared all interrupts
- */
- csr = LMC_CSR_READ (sc, csr_status);
- } /* end interrupt loop */
- LMC_EVENT_LOG(LMC_EVENT_INT, firstcsr, csr);
-
-lmc_int_fail_out:
-
- spin_unlock(&sc->lmc_lock);
-
- return IRQ_RETVAL(handled);
-}
-
-static netdev_tx_t lmc_start_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- u32 flag;
- int entry;
- unsigned long flags;
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
-
- /* normal path, tbusy known to be zero */
-
- entry = sc->lmc_next_tx % LMC_TXDESCS;
-
- sc->lmc_txq[entry] = skb;
- sc->lmc_txring[entry].buffer1 = virt_to_bus (skb->data);
-
- LMC_CONSOLE_LOG("xmit", skb->data, skb->len);
-
-#ifndef GCOM
- /* If the queue is less than half full, don't interrupt */
- if (sc->lmc_next_tx - sc->lmc_taint_tx < LMC_TXDESCS / 2)
- {
- /* Do not interrupt on completion of this packet */
- flag = 0x60000000;
- netif_wake_queue(dev);
- }
- else if (sc->lmc_next_tx - sc->lmc_taint_tx == LMC_TXDESCS / 2)
- {
- /* This generates an interrupt on completion of this packet */
- flag = 0xe0000000;
- netif_wake_queue(dev);
- }
- else if (sc->lmc_next_tx - sc->lmc_taint_tx < LMC_TXDESCS - 1)
- {
- /* Do not interrupt on completion of this packet */
- flag = 0x60000000;
- netif_wake_queue(dev);
- }
- else
- {
- /* This generates an interrupt on completion of this packet */
- flag = 0xe0000000;
- sc->lmc_txfull = 1;
- netif_stop_queue(dev);
- }
-#else
- flag = LMC_TDES_INTERRUPT_ON_COMPLETION;
-
- if (sc->lmc_next_tx - sc->lmc_taint_tx >= LMC_TXDESCS - 1)
- { /* ring full, go busy */
- sc->lmc_txfull = 1;
- netif_stop_queue(dev);
- sc->extra_stats.tx_tbusy1++;
- LMC_EVENT_LOG(LMC_EVENT_TBUSY1, entry, 0);
- }
-#endif
-
-
- if (entry == LMC_TXDESCS - 1) /* last descriptor in ring */
- flag |= LMC_TDES_END_OF_RING; /* flag as such for Tulip */
-
- /* don't pad small packets either */
- flag = sc->lmc_txring[entry].length = (skb->len) | flag |
- sc->TxDescriptControlInit;
-
- /* set the transmit timeout flag to be checked in
- * the watchdog timer handler. -baz
- */
-
- sc->extra_stats.tx_NoCompleteCnt++;
- sc->lmc_next_tx++;
-
- /* give ownership to the chip */
- LMC_EVENT_LOG(LMC_EVENT_XMT, flag, entry);
- sc->lmc_txring[entry].status = 0x80000000;
-
- /* send now! */
- LMC_CSR_WRITE (sc, csr_txpoll, 0);
-
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- return NETDEV_TX_OK;
-}
-
-
-static int lmc_rx(struct net_device *dev)
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- int i;
- int rx_work_limit = LMC_RXDESCS;
- int rxIntLoopCnt; /* debug -baz */
- int localLengthErrCnt = 0;
- long stat;
- struct sk_buff *skb, *nsb;
- u16 len;
-
- lmc_led_on(sc, LMC_DS3_LED3);
-
- rxIntLoopCnt = 0; /* debug -baz */
-
- i = sc->lmc_next_rx % LMC_RXDESCS;
-
- while (((stat = sc->lmc_rxring[i].status) & LMC_RDES_OWN_BIT) != DESC_OWNED_BY_DC21X4)
- {
- rxIntLoopCnt++; /* debug -baz */
- len = ((stat & LMC_RDES_FRAME_LENGTH) >> RDES_FRAME_LENGTH_BIT_NUMBER);
- if ((stat & 0x0300) != 0x0300) { /* Check first segment and last segment */
- if ((stat & 0x0000ffff) != 0x7fff) {
- /* Oversized frame */
- sc->lmc_device->stats.rx_length_errors++;
- goto skip_packet;
- }
- }
-
- if (stat & 0x00000008) { /* Catch a dribbling bit error */
- sc->lmc_device->stats.rx_errors++;
- sc->lmc_device->stats.rx_frame_errors++;
- goto skip_packet;
- }
-
-
- if (stat & 0x00000004) { /* Catch a CRC error by the Xilinx */
- sc->lmc_device->stats.rx_errors++;
- sc->lmc_device->stats.rx_crc_errors++;
- goto skip_packet;
- }
-
- if (len > LMC_PKT_BUF_SZ) {
- sc->lmc_device->stats.rx_length_errors++;
- localLengthErrCnt++;
- goto skip_packet;
- }
-
- if (len < sc->lmc_crcSize + 2) {
- sc->lmc_device->stats.rx_length_errors++;
- sc->extra_stats.rx_SmallPktCnt++;
- localLengthErrCnt++;
- goto skip_packet;
- }
-
- if(stat & 0x00004000){
- printk(KERN_WARNING "%s: Receiver descriptor error, receiver out of sync?\n", dev->name);
- }
-
- len -= sc->lmc_crcSize;
-
- skb = sc->lmc_rxq[i];
-
- /*
- * We ran out of memory at some point
- * just allocate an skb buff and continue.
- */
-
- if (!skb) {
- nsb = dev_alloc_skb (LMC_PKT_BUF_SZ + 2);
- if (nsb) {
- sc->lmc_rxq[i] = nsb;
- nsb->dev = dev;
- sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
- }
- sc->failed_recv_alloc = 1;
- goto skip_packet;
- }
-
- sc->lmc_device->stats.rx_packets++;
- sc->lmc_device->stats.rx_bytes += len;
-
- LMC_CONSOLE_LOG("recv", skb->data, len);
-
- /*
- * I'm not sure of the sanity of this
- * Packets could be arriving at a constant
- * 44.210mbits/sec and we're going to copy
- * them into a new buffer??
- */
-
- if(len > (LMC_MTU - (LMC_MTU>>2))){ /* len > LMC_MTU * 0.75 */
- /*
- * If it's a large packet don't copy it just hand it up
- */
- give_it_anyways:
-
- sc->lmc_rxq[i] = NULL;
- sc->lmc_rxring[i].buffer1 = 0x0;
-
- skb_put (skb, len);
- skb->protocol = lmc_proto_type(sc, skb);
- skb_reset_mac_header(skb);
- /* skb_reset_network_header(skb); */
- skb->dev = dev;
- lmc_proto_netif(sc, skb);
-
- /*
- * This skb will be destroyed by the upper layers, make a new one
- */
- nsb = dev_alloc_skb (LMC_PKT_BUF_SZ + 2);
- if (nsb) {
- sc->lmc_rxq[i] = nsb;
- nsb->dev = dev;
- sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
- /* Transferred to 21140 below */
- }
- else {
- /*
- * We've run out of memory, stop trying to allocate
- * memory and exit the interrupt handler
- *
- * The chip may run out of receivers and stop
- * in which care we'll try to allocate the buffer
- * again. (once a second)
- */
- sc->extra_stats.rx_BuffAllocErr++;
- LMC_EVENT_LOG(LMC_EVENT_RCVINT, stat, len);
- sc->failed_recv_alloc = 1;
- goto skip_out_of_mem;
- }
- }
- else {
- nsb = dev_alloc_skb(len);
- if(!nsb) {
- goto give_it_anyways;
- }
- skb_copy_from_linear_data(skb, skb_put(nsb, len), len);
-
- nsb->protocol = lmc_proto_type(sc, nsb);
- skb_reset_mac_header(nsb);
- /* skb_reset_network_header(nsb); */
- nsb->dev = dev;
- lmc_proto_netif(sc, nsb);
- }
-
- skip_packet:
- LMC_EVENT_LOG(LMC_EVENT_RCVINT, stat, len);
- sc->lmc_rxring[i].status = DESC_OWNED_BY_DC21X4;
-
- sc->lmc_next_rx++;
- i = sc->lmc_next_rx % LMC_RXDESCS;
- rx_work_limit--;
- if (rx_work_limit < 0)
- break;
- }
-
- /* detect condition for LMC1000 where DSU cable attaches and fills
- * descriptors with bogus packets
- *
- if (localLengthErrCnt > LMC_RXDESCS - 3) {
- sc->extra_stats.rx_BadPktSurgeCnt++;
- LMC_EVENT_LOG(LMC_EVENT_BADPKTSURGE, localLengthErrCnt,
- sc->extra_stats.rx_BadPktSurgeCnt);
- } */
-
- /* save max count of receive descriptors serviced */
- if (rxIntLoopCnt > sc->extra_stats.rxIntLoopCnt)
- sc->extra_stats.rxIntLoopCnt = rxIntLoopCnt; /* debug -baz */
-
-#ifdef DEBUG
- if (rxIntLoopCnt == 0)
- {
- for (i = 0; i < LMC_RXDESCS; i++)
- {
- if ((sc->lmc_rxring[i].status & LMC_RDES_OWN_BIT)
- != DESC_OWNED_BY_DC21X4)
- {
- rxIntLoopCnt++;
- }
- }
- LMC_EVENT_LOG(LMC_EVENT_RCVEND, rxIntLoopCnt, 0);
- }
-#endif
-
-
- lmc_led_off(sc, LMC_DS3_LED3);
-
-skip_out_of_mem:
- return 0;
-}
-
-static struct net_device_stats *lmc_get_stats(struct net_device *dev)
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- unsigned long flags;
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
-
- sc->lmc_device->stats.rx_missed_errors += LMC_CSR_READ(sc, csr_missed_frames) & 0xffff;
-
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-
- return &sc->lmc_device->stats;
-}
-
-static struct pci_driver lmc_driver = {
- .name = "lmc",
- .id_table = lmc_pci_tbl,
- .probe = lmc_init_one,
- .remove = lmc_remove_one,
-};
-
-module_pci_driver(lmc_driver);
-
-unsigned lmc_mii_readreg (lmc_softc_t * const sc, unsigned devaddr, unsigned regno) /*fold00*/
-{
- int i;
- int command = (0xf6 << 10) | (devaddr << 5) | regno;
- int retval = 0;
-
- LMC_MII_SYNC (sc);
-
- for (i = 15; i >= 0; i--)
- {
- int dataval = (command & (1 << i)) ? 0x20000 : 0;
-
- LMC_CSR_WRITE (sc, csr_9, dataval);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- LMC_CSR_WRITE (sc, csr_9, dataval | 0x10000);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- }
-
- for (i = 19; i > 0; i--)
- {
- LMC_CSR_WRITE (sc, csr_9, 0x40000);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- retval = (retval << 1) | ((LMC_CSR_READ (sc, csr_9) & 0x80000) ? 1 : 0);
- LMC_CSR_WRITE (sc, csr_9, 0x40000 | 0x10000);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- }
-
- return (retval >> 1) & 0xffff;
-}
-
-void lmc_mii_writereg (lmc_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data) /*fold00*/
-{
- int i = 32;
- int command = (0x5002 << 16) | (devaddr << 23) | (regno << 18) | data;
-
- LMC_MII_SYNC (sc);
-
- i = 31;
- while (i >= 0)
- {
- int datav;
-
- if (command & (1 << i))
- datav = 0x20000;
- else
- datav = 0x00000;
-
- LMC_CSR_WRITE (sc, csr_9, datav);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- LMC_CSR_WRITE (sc, csr_9, (datav | 0x10000));
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- i--;
- }
-
- i = 2;
- while (i > 0)
- {
- LMC_CSR_WRITE (sc, csr_9, 0x40000);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- LMC_CSR_WRITE (sc, csr_9, 0x50000);
- lmc_delay ();
- /* __SLOW_DOWN_IO; */
- i--;
- }
-}
-
-static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
-{
- int i;
-
- /* Initialize the receive rings and buffers. */
- sc->lmc_txfull = 0;
- sc->lmc_next_rx = 0;
- sc->lmc_next_tx = 0;
- sc->lmc_taint_rx = 0;
- sc->lmc_taint_tx = 0;
-
- /*
- * Setup each one of the receiver buffers
- * allocate an skbuff for each one, setup the descriptor table
- * and point each buffer at the next one
- */
-
- for (i = 0; i < LMC_RXDESCS; i++)
- {
- struct sk_buff *skb;
-
- if (sc->lmc_rxq[i] == NULL)
- {
- skb = dev_alloc_skb (LMC_PKT_BUF_SZ + 2);
- if(skb == NULL){
- printk(KERN_WARNING "%s: Failed to allocate receiver ring, will try again\n", sc->name);
- sc->failed_ring = 1;
- break;
- }
- else{
- sc->lmc_rxq[i] = skb;
- }
- }
- else
- {
- skb = sc->lmc_rxq[i];
- }
-
- skb->dev = sc->lmc_device;
-
- /* owned by 21140 */
- sc->lmc_rxring[i].status = 0x80000000;
-
- /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
- sc->lmc_rxring[i].length = skb_tailroom(skb);
-
- /* use to be tail which is dumb since you're thinking why write
- * to the end of the packj,et but since there's nothing there tail == data
- */
- sc->lmc_rxring[i].buffer1 = virt_to_bus (skb->data);
-
- /* This is fair since the structure is static and we have the next address */
- sc->lmc_rxring[i].buffer2 = virt_to_bus (&sc->lmc_rxring[i + 1]);
-
- }
-
- /*
- * Sets end of ring
- */
- if (i != 0) {
- sc->lmc_rxring[i - 1].length |= 0x02000000; /* Set end of buffers flag */
- sc->lmc_rxring[i - 1].buffer2 = virt_to_bus(&sc->lmc_rxring[0]); /* Point back to the start */
- }
- LMC_CSR_WRITE (sc, csr_rxlist, virt_to_bus (sc->lmc_rxring)); /* write base address */
-
- /* Initialize the transmit rings and buffers */
- for (i = 0; i < LMC_TXDESCS; i++)
- {
- if (sc->lmc_txq[i] != NULL){ /* have buffer */
- dev_kfree_skb(sc->lmc_txq[i]); /* free it */
- sc->lmc_device->stats.tx_dropped++; /* We just dropped a packet */
- }
- sc->lmc_txq[i] = NULL;
- sc->lmc_txring[i].status = 0x00000000;
- sc->lmc_txring[i].buffer2 = virt_to_bus (&sc->lmc_txring[i + 1]);
- }
- sc->lmc_txring[i - 1].buffer2 = virt_to_bus (&sc->lmc_txring[0]);
- LMC_CSR_WRITE (sc, csr_txlist, virt_to_bus (sc->lmc_txring));
-}
-
-void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits) /*fold00*/
-{
- sc->lmc_gpio_io &= ~bits;
- LMC_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET | (sc->lmc_gpio_io));
-}
-
-void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits) /*fold00*/
-{
- sc->lmc_gpio_io |= bits;
- LMC_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET | (sc->lmc_gpio_io));
-}
-
-void lmc_led_on(lmc_softc_t * const sc, u32 led) /*fold00*/
-{
- if ((~sc->lmc_miireg16) & led) /* Already on! */
- return;
-
- sc->lmc_miireg16 &= ~led;
- lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16);
-}
-
-void lmc_led_off(lmc_softc_t * const sc, u32 led) /*fold00*/
-{
- if (sc->lmc_miireg16 & led) /* Already set don't do anything */
- return;
-
- sc->lmc_miireg16 |= led;
- lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16);
-}
-
-static void lmc_reset(lmc_softc_t * const sc) /*fold00*/
-{
- sc->lmc_miireg16 |= LMC_MII16_FIFO_RESET;
- lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16);
-
- sc->lmc_miireg16 &= ~LMC_MII16_FIFO_RESET;
- lmc_mii_writereg(sc, 0, 16, sc->lmc_miireg16);
-
- /*
- * make some of the GPIO pins be outputs
- */
- lmc_gpio_mkoutput(sc, LMC_GEP_RESET);
-
- /*
- * RESET low to force state reset. This also forces
- * the transmitter clock to be internal, but we expect to reset
- * that later anyway.
- */
- sc->lmc_gpio &= ~(LMC_GEP_RESET);
- LMC_CSR_WRITE(sc, csr_gp, sc->lmc_gpio);
-
- /*
- * hold for more than 10 microseconds
- */
- udelay(50);
-
- /*
- * stop driving Xilinx-related signals
- */
- lmc_gpio_mkinput(sc, LMC_GEP_RESET);
-
- /*
- * Call media specific init routine
- */
- sc->lmc_media->init(sc);
-
- sc->extra_stats.resetCount++;
-}
-
-static void lmc_dec_reset(lmc_softc_t * const sc) /*fold00*/
-{
- u32 val;
-
- /*
- * disable all interrupts
- */
- sc->lmc_intrmask = 0;
- LMC_CSR_WRITE(sc, csr_intr, sc->lmc_intrmask);
-
- /*
- * Reset the chip with a software reset command.
- * Wait 10 microseconds (actually 50 PCI cycles but at
- * 33MHz that comes to two microseconds but wait a
- * bit longer anyways)
- */
- LMC_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
- udelay(25);
-#ifdef __sparc__
- sc->lmc_busmode = LMC_CSR_READ(sc, csr_busmode);
- sc->lmc_busmode = 0x00100000;
- sc->lmc_busmode &= ~TULIP_BUSMODE_SWRESET;
- LMC_CSR_WRITE(sc, csr_busmode, sc->lmc_busmode);
-#endif
- sc->lmc_cmdmode = LMC_CSR_READ(sc, csr_command);
-
- /*
- * We want:
- * no ethernet address in frames we write
- * disable padding (txdesc, padding disable)
- * ignore runt frames (rdes0 bit 15)
- * no receiver watchdog or transmitter jabber timer
- * (csr15 bit 0,14 == 1)
- * if using 16-bit CRC, turn off CRC (trans desc, crc disable)
- */
-
- sc->lmc_cmdmode |= ( TULIP_CMD_PROMISCUOUS
- | TULIP_CMD_FULLDUPLEX
- | TULIP_CMD_PASSBADPKT
- | TULIP_CMD_NOHEARTBEAT
- | TULIP_CMD_PORTSELECT
- | TULIP_CMD_RECEIVEALL
- | TULIP_CMD_MUSTBEONE
- );
- sc->lmc_cmdmode &= ~( TULIP_CMD_OPERMODE
- | TULIP_CMD_THRESHOLDCTL
- | TULIP_CMD_STOREFWD
- | TULIP_CMD_TXTHRSHLDCTL
- );
-
- LMC_CSR_WRITE(sc, csr_command, sc->lmc_cmdmode);
-
- /*
- * disable receiver watchdog and transmit jabber
- */
- val = LMC_CSR_READ(sc, csr_sia_general);
- val |= (TULIP_WATCHDOG_TXDISABLE | TULIP_WATCHDOG_RXDISABLE);
- LMC_CSR_WRITE(sc, csr_sia_general, val);
-}
-
-static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, /*fold00*/
- size_t csr_size)
-{
- sc->lmc_csrs.csr_busmode = csr_base + 0 * csr_size;
- sc->lmc_csrs.csr_txpoll = csr_base + 1 * csr_size;
- sc->lmc_csrs.csr_rxpoll = csr_base + 2 * csr_size;
- sc->lmc_csrs.csr_rxlist = csr_base + 3 * csr_size;
- sc->lmc_csrs.csr_txlist = csr_base + 4 * csr_size;
- sc->lmc_csrs.csr_status = csr_base + 5 * csr_size;
- sc->lmc_csrs.csr_command = csr_base + 6 * csr_size;
- sc->lmc_csrs.csr_intr = csr_base + 7 * csr_size;
- sc->lmc_csrs.csr_missed_frames = csr_base + 8 * csr_size;
- sc->lmc_csrs.csr_9 = csr_base + 9 * csr_size;
- sc->lmc_csrs.csr_10 = csr_base + 10 * csr_size;
- sc->lmc_csrs.csr_11 = csr_base + 11 * csr_size;
- sc->lmc_csrs.csr_12 = csr_base + 12 * csr_size;
- sc->lmc_csrs.csr_13 = csr_base + 13 * csr_size;
- sc->lmc_csrs.csr_14 = csr_base + 14 * csr_size;
- sc->lmc_csrs.csr_15 = csr_base + 15 * csr_size;
-}
-
-static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue)
-{
- lmc_softc_t *sc = dev_to_sc(dev);
- u32 csr6;
- unsigned long flags;
-
- spin_lock_irqsave(&sc->lmc_lock, flags);
-
- printk("%s: Xmitter busy|\n", dev->name);
-
- sc->extra_stats.tx_tbusy_calls++;
- if (time_is_before_jiffies(dev_trans_start(dev) + TX_TIMEOUT))
- goto bug_out;
-
- /*
- * Chip seems to have locked up
- * Reset it
- * This whips out all our descriptor
- * table and starts from scartch
- */
-
- LMC_EVENT_LOG(LMC_EVENT_XMTPRCTMO,
- LMC_CSR_READ (sc, csr_status),
- sc->extra_stats.tx_ProcTimeout);
-
- lmc_running_reset (dev);
-
- LMC_EVENT_LOG(LMC_EVENT_RESET1, LMC_CSR_READ (sc, csr_status), 0);
- LMC_EVENT_LOG(LMC_EVENT_RESET2,
- lmc_mii_readreg (sc, 0, 16),
- lmc_mii_readreg (sc, 0, 17));
-
- /* restart the tx processes */
- csr6 = LMC_CSR_READ (sc, csr_command);
- LMC_CSR_WRITE (sc, csr_command, csr6 | 0x0002);
- LMC_CSR_WRITE (sc, csr_command, csr6 | 0x2002);
-
- /* immediate transmit */
- LMC_CSR_WRITE (sc, csr_txpoll, 0);
-
- sc->lmc_device->stats.tx_errors++;
- sc->extra_stats.tx_ProcTimeout++; /* -baz */
-
- netif_trans_update(dev); /* prevent tx timeout */
-
-bug_out:
-
- spin_unlock_irqrestore(&sc->lmc_lock, flags);
-}
diff --git a/drivers/net/wan/lmc/lmc_media.c b/drivers/net/wan/lmc/lmc_media.c
deleted file mode 100644
index ec1ac7b1f3fd..000000000000
--- a/drivers/net/wan/lmc/lmc_media.c
+++ /dev/null
@@ -1,1206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* $Id: lmc_media.c,v 1.13 2000/04/11 05:25:26 asj Exp $ */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/ptrace.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/in.h>
-#include <linux/if_arp.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/inet.h>
-#include <linux/bitops.h>
-
-#include <asm/processor.h> /* Processor type for cache alignment. */
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <linux/uaccess.h>
-
-#include "lmc.h"
-#include "lmc_var.h"
-#include "lmc_ioctl.h"
-#include "lmc_debug.h"
-
-#define CONFIG_LMC_IGNORE_HARDWARE_HANDSHAKE 1
-
- /*
- * Copyright (c) 1997-2000 LAN Media Corporation (LMC)
- * All rights reserved. www.lanmedia.com
- *
- * This code is written by:
- * Andrew Stanley-Jones (asj@cban.com)
- * Rob Braun (bbraun@vix.com),
- * Michael Graff (explorer@vix.com) and
- * Matt Thomas (matt@3am-software.com).
- */
-
-/*
- * protocol independent method.
- */
-static void lmc_set_protocol (lmc_softc_t * const, lmc_ctl_t *);
-
-/*
- * media independent methods to check on media status, link, light LEDs,
- * etc.
- */
-static void lmc_ds3_init (lmc_softc_t * const);
-static void lmc_ds3_default (lmc_softc_t * const);
-static void lmc_ds3_set_status (lmc_softc_t * const, lmc_ctl_t *);
-static void lmc_ds3_set_100ft (lmc_softc_t * const, int);
-static int lmc_ds3_get_link_status (lmc_softc_t * const);
-static void lmc_ds3_set_crc_length (lmc_softc_t * const, int);
-static void lmc_ds3_set_scram (lmc_softc_t * const, int);
-static void lmc_ds3_watchdog (lmc_softc_t * const);
-
-static void lmc_hssi_init (lmc_softc_t * const);
-static void lmc_hssi_default (lmc_softc_t * const);
-static void lmc_hssi_set_status (lmc_softc_t * const, lmc_ctl_t *);
-static void lmc_hssi_set_clock (lmc_softc_t * const, int);
-static int lmc_hssi_get_link_status (lmc_softc_t * const);
-static void lmc_hssi_set_link_status (lmc_softc_t * const, int);
-static void lmc_hssi_set_crc_length (lmc_softc_t * const, int);
-static void lmc_hssi_watchdog (lmc_softc_t * const);
-
-static void lmc_ssi_init (lmc_softc_t * const);
-static void lmc_ssi_default (lmc_softc_t * const);
-static void lmc_ssi_set_status (lmc_softc_t * const, lmc_ctl_t *);
-static void lmc_ssi_set_clock (lmc_softc_t * const, int);
-static void lmc_ssi_set_speed (lmc_softc_t * const, lmc_ctl_t *);
-static int lmc_ssi_get_link_status (lmc_softc_t * const);
-static void lmc_ssi_set_link_status (lmc_softc_t * const, int);
-static void lmc_ssi_set_crc_length (lmc_softc_t * const, int);
-static void lmc_ssi_watchdog (lmc_softc_t * const);
-
-static void lmc_t1_init (lmc_softc_t * const);
-static void lmc_t1_default (lmc_softc_t * const);
-static void lmc_t1_set_status (lmc_softc_t * const, lmc_ctl_t *);
-static int lmc_t1_get_link_status (lmc_softc_t * const);
-static void lmc_t1_set_circuit_type (lmc_softc_t * const, int);
-static void lmc_t1_set_crc_length (lmc_softc_t * const, int);
-static void lmc_t1_set_clock (lmc_softc_t * const, int);
-static void lmc_t1_watchdog (lmc_softc_t * const);
-
-static void lmc_dummy_set_1 (lmc_softc_t * const, int);
-static void lmc_dummy_set2_1 (lmc_softc_t * const, lmc_ctl_t *);
-
-static inline void write_av9110_bit (lmc_softc_t *, int);
-static void write_av9110(lmc_softc_t *, u32, u32, u32, u32, u32);
-
-lmc_media_t lmc_ds3_media = {
- .init = lmc_ds3_init, /* special media init stuff */
- .defaults = lmc_ds3_default, /* reset to default state */
- .set_status = lmc_ds3_set_status, /* reset status to state provided */
- .set_clock_source = lmc_dummy_set_1, /* set clock source */
- .set_speed = lmc_dummy_set2_1, /* set line speed */
- .set_cable_length = lmc_ds3_set_100ft, /* set cable length */
- .set_scrambler = lmc_ds3_set_scram, /* set scrambler */
- .get_link_status = lmc_ds3_get_link_status, /* get link status */
- .set_link_status = lmc_dummy_set_1, /* set link status */
- .set_crc_length = lmc_ds3_set_crc_length, /* set CRC length */
- .set_circuit_type = lmc_dummy_set_1, /* set T1 or E1 circuit type */
- .watchdog = lmc_ds3_watchdog
-};
-
-lmc_media_t lmc_hssi_media = {
- .init = lmc_hssi_init, /* special media init stuff */
- .defaults = lmc_hssi_default, /* reset to default state */
- .set_status = lmc_hssi_set_status, /* reset status to state provided */
- .set_clock_source = lmc_hssi_set_clock, /* set clock source */
- .set_speed = lmc_dummy_set2_1, /* set line speed */
- .set_cable_length = lmc_dummy_set_1, /* set cable length */
- .set_scrambler = lmc_dummy_set_1, /* set scrambler */
- .get_link_status = lmc_hssi_get_link_status, /* get link status */
- .set_link_status = lmc_hssi_set_link_status, /* set link status */
- .set_crc_length = lmc_hssi_set_crc_length, /* set CRC length */
- .set_circuit_type = lmc_dummy_set_1, /* set T1 or E1 circuit type */
- .watchdog = lmc_hssi_watchdog
-};
-
-lmc_media_t lmc_ssi_media = {
- .init = lmc_ssi_init, /* special media init stuff */
- .defaults = lmc_ssi_default, /* reset to default state */
- .set_status = lmc_ssi_set_status, /* reset status to state provided */
- .set_clock_source = lmc_ssi_set_clock, /* set clock source */
- .set_speed = lmc_ssi_set_speed, /* set line speed */
- .set_cable_length = lmc_dummy_set_1, /* set cable length */
- .set_scrambler = lmc_dummy_set_1, /* set scrambler */
- .get_link_status = lmc_ssi_get_link_status, /* get link status */
- .set_link_status = lmc_ssi_set_link_status, /* set link status */
- .set_crc_length = lmc_ssi_set_crc_length, /* set CRC length */
- .set_circuit_type = lmc_dummy_set_1, /* set T1 or E1 circuit type */
- .watchdog = lmc_ssi_watchdog
-};
-
-lmc_media_t lmc_t1_media = {
- .init = lmc_t1_init, /* special media init stuff */
- .defaults = lmc_t1_default, /* reset to default state */
- .set_status = lmc_t1_set_status, /* reset status to state provided */
- .set_clock_source = lmc_t1_set_clock, /* set clock source */
- .set_speed = lmc_dummy_set2_1, /* set line speed */
- .set_cable_length = lmc_dummy_set_1, /* set cable length */
- .set_scrambler = lmc_dummy_set_1, /* set scrambler */
- .get_link_status = lmc_t1_get_link_status, /* get link status */
- .set_link_status = lmc_dummy_set_1, /* set link status */
- .set_crc_length = lmc_t1_set_crc_length, /* set CRC length */
- .set_circuit_type = lmc_t1_set_circuit_type, /* set T1 or E1 circuit type */
- .watchdog = lmc_t1_watchdog
-};
-
-static void
-lmc_dummy_set_1 (lmc_softc_t * const sc, int a)
-{
-}
-
-static void
-lmc_dummy_set2_1 (lmc_softc_t * const sc, lmc_ctl_t * a)
-{
-}
-
-/*
- * HSSI methods
- */
-
-static void
-lmc_hssi_init (lmc_softc_t * const sc)
-{
- sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC5200;
-
- lmc_gpio_mkoutput (sc, LMC_GEP_HSSI_CLOCK);
-}
-
-static void
-lmc_hssi_default (lmc_softc_t * const sc)
-{
- sc->lmc_miireg16 = LMC_MII16_LED_ALL;
-
- sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN);
- sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT);
- sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16);
-}
-
-/*
- * Given a user provided state, set ourselves up to match it. This will
- * always reset the card if needed.
- */
-static void
-lmc_hssi_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl)
-{
- if (ctl == NULL)
- {
- sc->lmc_media->set_clock_source (sc, sc->ictl.clock_source);
- lmc_set_protocol (sc, NULL);
-
- return;
- }
-
- /*
- * check for change in clock source
- */
- if (ctl->clock_source && !sc->ictl.clock_source)
- {
- sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_INT);
- sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_INT;
- }
- else if (!ctl->clock_source && sc->ictl.clock_source)
- {
- sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_EXT;
- sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT);
- }
-
- lmc_set_protocol (sc, ctl);
-}
-
-/*
- * 1 == internal, 0 == external
- */
-static void
-lmc_hssi_set_clock (lmc_softc_t * const sc, int ie)
-{
- int old;
- old = sc->ictl.clock_source;
- if (ie == LMC_CTL_CLOCK_SOURCE_EXT)
- {
- sc->lmc_gpio |= LMC_GEP_HSSI_CLOCK;
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_EXT;
- if(old != ie)
- printk (LMC_PRINTF_FMT ": clock external\n", LMC_PRINTF_ARGS);
- }
- else
- {
- sc->lmc_gpio &= ~(LMC_GEP_HSSI_CLOCK);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT;
- if(old != ie)
- printk (LMC_PRINTF_FMT ": clock internal\n", LMC_PRINTF_ARGS);
- }
-}
-
-/*
- * return hardware link status.
- * 0 == link is down, 1 == link is up.
- */
-static int
-lmc_hssi_get_link_status (lmc_softc_t * const sc)
-{
- /*
- * We're using the same code as SSI since
- * they're practically the same
- */
- return lmc_ssi_get_link_status(sc);
-}
-
-static void
-lmc_hssi_set_link_status (lmc_softc_t * const sc, int state)
-{
- if (state == LMC_LINK_UP)
- sc->lmc_miireg16 |= LMC_MII16_HSSI_TA;
- else
- sc->lmc_miireg16 &= ~LMC_MII16_HSSI_TA;
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-/*
- * 0 == 16bit, 1 == 32bit
- */
-static void
-lmc_hssi_set_crc_length (lmc_softc_t * const sc, int state)
-{
- if (state == LMC_CTL_CRC_LENGTH_32)
- {
- /* 32 bit */
- sc->lmc_miireg16 |= LMC_MII16_HSSI_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32;
- }
- else
- {
- /* 16 bit */
- sc->lmc_miireg16 &= ~LMC_MII16_HSSI_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16;
- }
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-static void
-lmc_hssi_watchdog (lmc_softc_t * const sc)
-{
- /* HSSI is blank */
-}
-
-/*
- * DS3 methods
- */
-
-/*
- * Set cable length
- */
-static void
-lmc_ds3_set_100ft (lmc_softc_t * const sc, int ie)
-{
- if (ie == LMC_CTL_CABLE_LENGTH_GT_100FT)
- {
- sc->lmc_miireg16 &= ~LMC_MII16_DS3_ZERO;
- sc->ictl.cable_length = LMC_CTL_CABLE_LENGTH_GT_100FT;
- }
- else if (ie == LMC_CTL_CABLE_LENGTH_LT_100FT)
- {
- sc->lmc_miireg16 |= LMC_MII16_DS3_ZERO;
- sc->ictl.cable_length = LMC_CTL_CABLE_LENGTH_LT_100FT;
- }
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-static void
-lmc_ds3_default (lmc_softc_t * const sc)
-{
- sc->lmc_miireg16 = LMC_MII16_LED_ALL;
-
- sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN);
- sc->lmc_media->set_cable_length (sc, LMC_CTL_CABLE_LENGTH_LT_100FT);
- sc->lmc_media->set_scrambler (sc, LMC_CTL_OFF);
- sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16);
-}
-
-/*
- * Given a user provided state, set ourselves up to match it. This will
- * always reset the card if needed.
- */
-static void
-lmc_ds3_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl)
-{
- if (ctl == NULL)
- {
- sc->lmc_media->set_cable_length (sc, sc->ictl.cable_length);
- sc->lmc_media->set_scrambler (sc, sc->ictl.scrambler_onoff);
- lmc_set_protocol (sc, NULL);
-
- return;
- }
-
- /*
- * check for change in cable length setting
- */
- if (ctl->cable_length && !sc->ictl.cable_length)
- lmc_ds3_set_100ft (sc, LMC_CTL_CABLE_LENGTH_GT_100FT);
- else if (!ctl->cable_length && sc->ictl.cable_length)
- lmc_ds3_set_100ft (sc, LMC_CTL_CABLE_LENGTH_LT_100FT);
-
- /*
- * Check for change in scrambler setting (requires reset)
- */
- if (ctl->scrambler_onoff && !sc->ictl.scrambler_onoff)
- lmc_ds3_set_scram (sc, LMC_CTL_ON);
- else if (!ctl->scrambler_onoff && sc->ictl.scrambler_onoff)
- lmc_ds3_set_scram (sc, LMC_CTL_OFF);
-
- lmc_set_protocol (sc, ctl);
-}
-
-static void
-lmc_ds3_init (lmc_softc_t * const sc)
-{
- int i;
-
- sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC5245;
-
- /* writes zeros everywhere */
- for (i = 0; i < 21; i++)
- {
- lmc_mii_writereg (sc, 0, 17, i);
- lmc_mii_writereg (sc, 0, 18, 0);
- }
-
- /* set some essential bits */
- lmc_mii_writereg (sc, 0, 17, 1);
- lmc_mii_writereg (sc, 0, 18, 0x25); /* ser, xtx */
-
- lmc_mii_writereg (sc, 0, 17, 5);
- lmc_mii_writereg (sc, 0, 18, 0x80); /* emode */
-
- lmc_mii_writereg (sc, 0, 17, 14);
- lmc_mii_writereg (sc, 0, 18, 0x30); /* rcgen, tcgen */
-
- /* clear counters and latched bits */
- for (i = 0; i < 21; i++)
- {
- lmc_mii_writereg (sc, 0, 17, i);
- lmc_mii_readreg (sc, 0, 18);
- }
-}
-
-/*
- * 1 == DS3 payload scrambled, 0 == not scrambled
- */
-static void
-lmc_ds3_set_scram (lmc_softc_t * const sc, int ie)
-{
- if (ie == LMC_CTL_ON)
- {
- sc->lmc_miireg16 |= LMC_MII16_DS3_SCRAM;
- sc->ictl.scrambler_onoff = LMC_CTL_ON;
- }
- else
- {
- sc->lmc_miireg16 &= ~LMC_MII16_DS3_SCRAM;
- sc->ictl.scrambler_onoff = LMC_CTL_OFF;
- }
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-/*
- * return hardware link status.
- * 0 == link is down, 1 == link is up.
- */
-static int
-lmc_ds3_get_link_status (lmc_softc_t * const sc)
-{
- u16 link_status, link_status_11;
- int ret = 1;
-
- lmc_mii_writereg (sc, 0, 17, 7);
- link_status = lmc_mii_readreg (sc, 0, 18);
-
- /* LMC5245 (DS3) & LMC1200 (DS1) LED definitions
- * led0 yellow = far-end adapter is in Red alarm condition
- * led1 blue = received an Alarm Indication signal
- * (upstream failure)
- * led2 Green = power to adapter, Gate Array loaded & driver
- * attached
- * led3 red = Loss of Signal (LOS) or out of frame (OOF)
- * conditions detected on T3 receive signal
- */
-
- lmc_led_on(sc, LMC_DS3_LED2);
-
- if ((link_status & LMC_FRAMER_REG0_DLOS) ||
- (link_status & LMC_FRAMER_REG0_OOFS)){
- ret = 0;
- if(sc->last_led_err[3] != 1){
- u16 r1;
- lmc_mii_writereg (sc, 0, 17, 01); /* Turn on Xbit error as our cisco does */
- r1 = lmc_mii_readreg (sc, 0, 18);
- r1 &= 0xfe;
- lmc_mii_writereg(sc, 0, 18, r1);
- printk(KERN_WARNING "%s: Red Alarm - Loss of Signal or Loss of Framing\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED3); /* turn on red LED */
- sc->last_led_err[3] = 1;
- }
- else {
- lmc_led_off(sc, LMC_DS3_LED3); /* turn on red LED */
- if(sc->last_led_err[3] == 1){
- u16 r1;
- lmc_mii_writereg (sc, 0, 17, 01); /* Turn off Xbit error */
- r1 = lmc_mii_readreg (sc, 0, 18);
- r1 |= 0x01;
- lmc_mii_writereg(sc, 0, 18, r1);
- }
- sc->last_led_err[3] = 0;
- }
-
- lmc_mii_writereg(sc, 0, 17, 0x10);
- link_status_11 = lmc_mii_readreg(sc, 0, 18);
- if((link_status & LMC_FRAMER_REG0_AIS) ||
- (link_status_11 & LMC_FRAMER_REG10_XBIT)) {
- ret = 0;
- if(sc->last_led_err[0] != 1){
- printk(KERN_WARNING "%s: AIS Alarm or XBit Error\n", sc->name);
- printk(KERN_WARNING "%s: Remote end has loss of signal or framing\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED0);
- sc->last_led_err[0] = 1;
- }
- else {
- lmc_led_off(sc, LMC_DS3_LED0);
- sc->last_led_err[0] = 0;
- }
-
- lmc_mii_writereg (sc, 0, 17, 9);
- link_status = lmc_mii_readreg (sc, 0, 18);
-
- if(link_status & LMC_FRAMER_REG9_RBLUE){
- ret = 0;
- if(sc->last_led_err[1] != 1){
- printk(KERN_WARNING "%s: Blue Alarm - Receiving all 1's\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED1);
- sc->last_led_err[1] = 1;
- }
- else {
- lmc_led_off(sc, LMC_DS3_LED1);
- sc->last_led_err[1] = 0;
- }
-
- return ret;
-}
-
-/*
- * 0 == 16bit, 1 == 32bit
- */
-static void
-lmc_ds3_set_crc_length (lmc_softc_t * const sc, int state)
-{
- if (state == LMC_CTL_CRC_LENGTH_32)
- {
- /* 32 bit */
- sc->lmc_miireg16 |= LMC_MII16_DS3_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32;
- }
- else
- {
- /* 16 bit */
- sc->lmc_miireg16 &= ~LMC_MII16_DS3_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16;
- }
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-static void
-lmc_ds3_watchdog (lmc_softc_t * const sc)
-{
-
-}
-
-
-/*
- * SSI methods
- */
-
-static void lmc_ssi_init(lmc_softc_t * const sc)
-{
- u16 mii17;
- int cable;
-
- sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC1000;
-
- mii17 = lmc_mii_readreg(sc, 0, 17);
-
- cable = (mii17 & LMC_MII17_SSI_CABLE_MASK) >> LMC_MII17_SSI_CABLE_SHIFT;
- sc->ictl.cable_type = cable;
-
- lmc_gpio_mkoutput(sc, LMC_GEP_SSI_TXCLOCK);
-}
-
-static void
-lmc_ssi_default (lmc_softc_t * const sc)
-{
- sc->lmc_miireg16 = LMC_MII16_LED_ALL;
-
- /*
- * make TXCLOCK always be an output
- */
- lmc_gpio_mkoutput (sc, LMC_GEP_SSI_TXCLOCK);
-
- sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN);
- sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT);
- sc->lmc_media->set_speed (sc, NULL);
- sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16);
-}
-
-/*
- * Given a user provided state, set ourselves up to match it. This will
- * always reset the card if needed.
- */
-static void
-lmc_ssi_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl)
-{
- if (ctl == NULL)
- {
- sc->lmc_media->set_clock_source (sc, sc->ictl.clock_source);
- sc->lmc_media->set_speed (sc, &sc->ictl);
- lmc_set_protocol (sc, NULL);
-
- return;
- }
-
- /*
- * check for change in clock source
- */
- if (ctl->clock_source == LMC_CTL_CLOCK_SOURCE_INT
- && sc->ictl.clock_source == LMC_CTL_CLOCK_SOURCE_EXT)
- {
- sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_INT);
- sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_INT;
- }
- else if (ctl->clock_source == LMC_CTL_CLOCK_SOURCE_EXT
- && sc->ictl.clock_source == LMC_CTL_CLOCK_SOURCE_INT)
- {
- sc->lmc_media->set_clock_source (sc, LMC_CTL_CLOCK_SOURCE_EXT);
- sc->lmc_timing = LMC_CTL_CLOCK_SOURCE_EXT;
- }
-
- if (ctl->clock_rate != sc->ictl.clock_rate)
- sc->lmc_media->set_speed (sc, ctl);
-
- lmc_set_protocol (sc, ctl);
-}
-
-/*
- * 1 == internal, 0 == external
- */
-static void
-lmc_ssi_set_clock (lmc_softc_t * const sc, int ie)
-{
- int old;
- old = ie;
- if (ie == LMC_CTL_CLOCK_SOURCE_EXT)
- {
- sc->lmc_gpio &= ~(LMC_GEP_SSI_TXCLOCK);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_EXT;
- if(ie != old)
- printk (LMC_PRINTF_FMT ": clock external\n", LMC_PRINTF_ARGS);
- }
- else
- {
- sc->lmc_gpio |= LMC_GEP_SSI_TXCLOCK;
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT;
- if(ie != old)
- printk (LMC_PRINTF_FMT ": clock internal\n", LMC_PRINTF_ARGS);
- }
-}
-
-static void
-lmc_ssi_set_speed (lmc_softc_t * const sc, lmc_ctl_t * ctl)
-{
- lmc_ctl_t *ictl = &sc->ictl;
- lmc_av9110_t *av;
-
- /* original settings for clock rate of:
- * 100 Khz (8,25,0,0,2) were incorrect
- * they should have been 80,125,1,3,3
- * There are 17 param combinations to produce this freq.
- * For 1.5 Mhz use 120,100,1,1,2 (226 param. combinations)
- */
- if (ctl == NULL)
- {
- av = &ictl->cardspec.ssi;
- ictl->clock_rate = 1500000;
- av->f = ictl->clock_rate;
- av->n = 120;
- av->m = 100;
- av->v = 1;
- av->x = 1;
- av->r = 2;
-
- write_av9110 (sc, av->n, av->m, av->v, av->x, av->r);
- return;
- }
-
- av = &ctl->cardspec.ssi;
-
- if (av->f == 0)
- return;
-
- ictl->clock_rate = av->f; /* really, this is the rate we are */
- ictl->cardspec.ssi = *av;
-
- write_av9110 (sc, av->n, av->m, av->v, av->x, av->r);
-}
-
-/*
- * return hardware link status.
- * 0 == link is down, 1 == link is up.
- */
-static int
-lmc_ssi_get_link_status (lmc_softc_t * const sc)
-{
- u16 link_status;
- u32 ticks;
- int ret = 1;
- int hw_hdsk = 1;
-
- /*
- * missing CTS? Hmm. If we require CTS on, we may never get the
- * link to come up, so omit it in this test.
- *
- * Also, it seems that with a loopback cable, DCD isn't asserted,
- * so just check for things like this:
- * DSR _must_ be asserted.
- * One of DCD or CTS must be asserted.
- */
-
- /* LMC 1000 (SSI) LED definitions
- * led0 Green = power to adapter, Gate Array loaded &
- * driver attached
- * led1 Green = DSR and DTR and RTS and CTS are set
- * led2 Green = Cable detected
- * led3 red = No timing is available from the
- * cable or the on-board frequency
- * generator.
- */
-
- link_status = lmc_mii_readreg (sc, 0, 16);
-
- /* Is the transmit clock still available */
- ticks = LMC_CSR_READ (sc, csr_gp_timer);
- ticks = 0x0000ffff - (ticks & 0x0000ffff);
-
- lmc_led_on (sc, LMC_MII16_LED0);
-
- /* ====== transmit clock determination ===== */
- if (sc->lmc_timing == LMC_CTL_CLOCK_SOURCE_INT) {
- lmc_led_off(sc, LMC_MII16_LED3);
- }
- else if (ticks == 0 ) { /* no clock found ? */
- ret = 0;
- if (sc->last_led_err[3] != 1) {
- sc->extra_stats.tx_lossOfClockCnt++;
- printk(KERN_WARNING "%s: Lost Clock, Link Down\n", sc->name);
- }
- sc->last_led_err[3] = 1;
- lmc_led_on (sc, LMC_MII16_LED3); /* turn ON red LED */
- }
- else {
- if(sc->last_led_err[3] == 1)
- printk(KERN_WARNING "%s: Clock Returned\n", sc->name);
- sc->last_led_err[3] = 0;
- lmc_led_off (sc, LMC_MII16_LED3); /* turn OFF red LED */
- }
-
- if ((link_status & LMC_MII16_SSI_DSR) == 0) { /* Also HSSI CA */
- ret = 0;
- hw_hdsk = 0;
- }
-
-#ifdef CONFIG_LMC_IGNORE_HARDWARE_HANDSHAKE
- if ((link_status & (LMC_MII16_SSI_CTS | LMC_MII16_SSI_DCD)) == 0){
- ret = 0;
- hw_hdsk = 0;
- }
-#endif
-
- if(hw_hdsk == 0){
- if(sc->last_led_err[1] != 1)
- printk(KERN_WARNING "%s: DSR not asserted\n", sc->name);
- sc->last_led_err[1] = 1;
- lmc_led_off(sc, LMC_MII16_LED1);
- }
- else {
- if(sc->last_led_err[1] != 0)
- printk(KERN_WARNING "%s: DSR now asserted\n", sc->name);
- sc->last_led_err[1] = 0;
- lmc_led_on(sc, LMC_MII16_LED1);
- }
-
- if(ret == 1) {
- lmc_led_on(sc, LMC_MII16_LED2); /* Over all good status? */
- }
-
- return ret;
-}
-
-static void
-lmc_ssi_set_link_status (lmc_softc_t * const sc, int state)
-{
- if (state == LMC_LINK_UP)
- {
- sc->lmc_miireg16 |= (LMC_MII16_SSI_DTR | LMC_MII16_SSI_RTS);
- printk (LMC_PRINTF_FMT ": asserting DTR and RTS\n", LMC_PRINTF_ARGS);
- }
- else
- {
- sc->lmc_miireg16 &= ~(LMC_MII16_SSI_DTR | LMC_MII16_SSI_RTS);
- printk (LMC_PRINTF_FMT ": deasserting DTR and RTS\n", LMC_PRINTF_ARGS);
- }
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-
-}
-
-/*
- * 0 == 16bit, 1 == 32bit
- */
-static void
-lmc_ssi_set_crc_length (lmc_softc_t * const sc, int state)
-{
- if (state == LMC_CTL_CRC_LENGTH_32)
- {
- /* 32 bit */
- sc->lmc_miireg16 |= LMC_MII16_SSI_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32;
- sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_4;
-
- }
- else
- {
- /* 16 bit */
- sc->lmc_miireg16 &= ~LMC_MII16_SSI_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16;
- sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_2;
- }
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-/*
- * These are bits to program the ssi frequency generator
- */
-static inline void
-write_av9110_bit (lmc_softc_t * sc, int c)
-{
- /*
- * set the data bit as we need it.
- */
- sc->lmc_gpio &= ~(LMC_GEP_CLK);
- if (c & 0x01)
- sc->lmc_gpio |= LMC_GEP_DATA;
- else
- sc->lmc_gpio &= ~(LMC_GEP_DATA);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
-
- /*
- * set the clock to high
- */
- sc->lmc_gpio |= LMC_GEP_CLK;
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
-
- /*
- * set the clock to low again.
- */
- sc->lmc_gpio &= ~(LMC_GEP_CLK);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
-}
-
-static void write_av9110(lmc_softc_t *sc, u32 n, u32 m, u32 v, u32 x, u32 r)
-{
- int i;
-
-#if 0
- printk (LMC_PRINTF_FMT ": speed %u, %d %d %d %d %d\n",
- LMC_PRINTF_ARGS, sc->ictl.clock_rate, n, m, v, x, r);
-#endif
-
- sc->lmc_gpio |= LMC_GEP_SSI_GENERATOR;
- sc->lmc_gpio &= ~(LMC_GEP_DATA | LMC_GEP_CLK);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
-
- /*
- * Set the TXCLOCK, GENERATOR, SERIAL, and SERIALCLK
- * as outputs.
- */
- lmc_gpio_mkoutput (sc, (LMC_GEP_DATA | LMC_GEP_CLK
- | LMC_GEP_SSI_GENERATOR));
-
- sc->lmc_gpio &= ~(LMC_GEP_SSI_GENERATOR);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
-
- /*
- * a shifting we will go...
- */
- for (i = 0; i < 7; i++)
- write_av9110_bit (sc, n >> i);
- for (i = 0; i < 7; i++)
- write_av9110_bit (sc, m >> i);
- for (i = 0; i < 1; i++)
- write_av9110_bit (sc, v >> i);
- for (i = 0; i < 2; i++)
- write_av9110_bit (sc, x >> i);
- for (i = 0; i < 2; i++)
- write_av9110_bit (sc, r >> i);
- for (i = 0; i < 5; i++)
- write_av9110_bit (sc, 0x17 >> i);
-
- /*
- * stop driving serial-related signals
- */
- lmc_gpio_mkinput (sc,
- (LMC_GEP_DATA | LMC_GEP_CLK
- | LMC_GEP_SSI_GENERATOR));
-}
-
-static void lmc_ssi_watchdog(lmc_softc_t * const sc)
-{
- u16 mii17 = lmc_mii_readreg(sc, 0, 17);
- if (((mii17 >> 3) & 7) == 7)
- lmc_led_off(sc, LMC_MII16_LED2);
- else
- lmc_led_on(sc, LMC_MII16_LED2);
-}
-
-/*
- * T1 methods
- */
-
-/*
- * The framer regs are multiplexed through MII regs 17 & 18
- * write the register address to MII reg 17 and the * data to MII reg 18. */
-static void
-lmc_t1_write (lmc_softc_t * const sc, int a, int d)
-{
- lmc_mii_writereg (sc, 0, 17, a);
- lmc_mii_writereg (sc, 0, 18, d);
-}
-
-/* Save a warning
-static int
-lmc_t1_read (lmc_softc_t * const sc, int a)
-{
- lmc_mii_writereg (sc, 0, 17, a);
- return lmc_mii_readreg (sc, 0, 18);
-}
-*/
-
-
-static void
-lmc_t1_init (lmc_softc_t * const sc)
-{
- u16 mii16;
- int i;
-
- sc->ictl.cardtype = LMC_CTL_CARDTYPE_LMC1200;
- mii16 = lmc_mii_readreg (sc, 0, 16);
-
- /* reset 8370 */
- mii16 &= ~LMC_MII16_T1_RST;
- lmc_mii_writereg (sc, 0, 16, mii16 | LMC_MII16_T1_RST);
- lmc_mii_writereg (sc, 0, 16, mii16);
-
- /* set T1 or E1 line. Uses sc->lmcmii16 reg in function so update it */
- sc->lmc_miireg16 = mii16;
- lmc_t1_set_circuit_type(sc, LMC_CTL_CIRCUIT_TYPE_T1);
- mii16 = sc->lmc_miireg16;
-
- lmc_t1_write (sc, 0x01, 0x1B); /* CR0 - primary control */
- lmc_t1_write (sc, 0x02, 0x42); /* JAT_CR - jitter atten config */
- lmc_t1_write (sc, 0x14, 0x00); /* LOOP - loopback config */
- lmc_t1_write (sc, 0x15, 0x00); /* DL3_TS - external data link timeslot */
- lmc_t1_write (sc, 0x18, 0xFF); /* PIO - programmable I/O */
- lmc_t1_write (sc, 0x19, 0x30); /* POE - programmable OE */
- lmc_t1_write (sc, 0x1A, 0x0F); /* CMUX - clock input mux */
- lmc_t1_write (sc, 0x20, 0x41); /* LIU_CR - RX LIU config */
- lmc_t1_write (sc, 0x22, 0x76); /* RLIU_CR - RX LIU config */
- lmc_t1_write (sc, 0x40, 0x03); /* RCR0 - RX config */
- lmc_t1_write (sc, 0x45, 0x00); /* RALM - RX alarm config */
- lmc_t1_write (sc, 0x46, 0x05); /* LATCH - RX alarm/err/cntr latch */
- lmc_t1_write (sc, 0x68, 0x40); /* TLIU_CR - TX LIU config */
- lmc_t1_write (sc, 0x70, 0x0D); /* TCR0 - TX framer config */
- lmc_t1_write (sc, 0x71, 0x05); /* TCR1 - TX config */
- lmc_t1_write (sc, 0x72, 0x0B); /* TFRM - TX frame format */
- lmc_t1_write (sc, 0x73, 0x00); /* TERROR - TX error insert */
- lmc_t1_write (sc, 0x74, 0x00); /* TMAN - TX manual Sa/FEBE config */
- lmc_t1_write (sc, 0x75, 0x00); /* TALM - TX alarm signal config */
- lmc_t1_write (sc, 0x76, 0x00); /* TPATT - TX test pattern config */
- lmc_t1_write (sc, 0x77, 0x00); /* TLB - TX inband loopback config */
- lmc_t1_write (sc, 0x90, 0x05); /* CLAD_CR - clock rate adapter config */
- lmc_t1_write (sc, 0x91, 0x05); /* CSEL - clad freq sel */
- lmc_t1_write (sc, 0xA6, 0x00); /* DL1_CTL - DL1 control */
- lmc_t1_write (sc, 0xB1, 0x00); /* DL2_CTL - DL2 control */
- lmc_t1_write (sc, 0xD0, 0x47); /* SBI_CR - sys bus iface config */
- lmc_t1_write (sc, 0xD1, 0x70); /* RSB_CR - RX sys bus config */
- lmc_t1_write (sc, 0xD4, 0x30); /* TSB_CR - TX sys bus config */
- for (i = 0; i < 32; i++)
- {
- lmc_t1_write (sc, 0x0E0 + i, 0x00); /* SBCn - sys bus per-channel ctl */
- lmc_t1_write (sc, 0x100 + i, 0x00); /* TPCn - TX per-channel ctl */
- lmc_t1_write (sc, 0x180 + i, 0x00); /* RPCn - RX per-channel ctl */
- }
- for (i = 1; i < 25; i++)
- {
- lmc_t1_write (sc, 0x0E0 + i, 0x0D); /* SBCn - sys bus per-channel ctl */
- }
-
- mii16 |= LMC_MII16_T1_XOE;
- lmc_mii_writereg (sc, 0, 16, mii16);
- sc->lmc_miireg16 = mii16;
-}
-
-static void
-lmc_t1_default (lmc_softc_t * const sc)
-{
- sc->lmc_miireg16 = LMC_MII16_LED_ALL;
- sc->lmc_media->set_link_status (sc, LMC_LINK_DOWN);
- sc->lmc_media->set_circuit_type (sc, LMC_CTL_CIRCUIT_TYPE_T1);
- sc->lmc_media->set_crc_length (sc, LMC_CTL_CRC_LENGTH_16);
- /* Right now we can only clock from out internal source */
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT;
-}
-/* * Given a user provided state, set ourselves up to match it. This will * always reset the card if needed.
- */
-static void
-lmc_t1_set_status (lmc_softc_t * const sc, lmc_ctl_t * ctl)
-{
- if (ctl == NULL)
- {
- sc->lmc_media->set_circuit_type (sc, sc->ictl.circuit_type);
- lmc_set_protocol (sc, NULL);
-
- return;
- }
- /*
- * check for change in circuit type */
- if (ctl->circuit_type == LMC_CTL_CIRCUIT_TYPE_T1
- && sc->ictl.circuit_type ==
- LMC_CTL_CIRCUIT_TYPE_E1) sc->lmc_media->set_circuit_type (sc,
- LMC_CTL_CIRCUIT_TYPE_E1);
- else if (ctl->circuit_type == LMC_CTL_CIRCUIT_TYPE_E1
- && sc->ictl.circuit_type == LMC_CTL_CIRCUIT_TYPE_T1)
- sc->lmc_media->set_circuit_type (sc, LMC_CTL_CIRCUIT_TYPE_T1);
- lmc_set_protocol (sc, ctl);
-}
-/*
- * return hardware link status.
- * 0 == link is down, 1 == link is up.
- */ static int
-lmc_t1_get_link_status (lmc_softc_t * const sc)
-{
- u16 link_status;
- int ret = 1;
-
- /* LMC5245 (DS3) & LMC1200 (DS1) LED definitions
- * led0 yellow = far-end adapter is in Red alarm condition
- * led1 blue = received an Alarm Indication signal
- * (upstream failure)
- * led2 Green = power to adapter, Gate Array loaded & driver
- * attached
- * led3 red = Loss of Signal (LOS) or out of frame (OOF)
- * conditions detected on T3 receive signal
- */
- lmc_led_on(sc, LMC_DS3_LED2);
-
- lmc_mii_writereg (sc, 0, 17, T1FRAMER_ALARM1_STATUS);
- link_status = lmc_mii_readreg (sc, 0, 18);
-
-
- if (link_status & T1F_RAIS) { /* turn on blue LED */
- ret = 0;
- if(sc->last_led_err[1] != 1){
- printk(KERN_WARNING "%s: Receive AIS/Blue Alarm. Far end in RED alarm\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED1);
- sc->last_led_err[1] = 1;
- }
- else {
- if(sc->last_led_err[1] != 0){
- printk(KERN_WARNING "%s: End AIS/Blue Alarm\n", sc->name);
- }
- lmc_led_off (sc, LMC_DS3_LED1);
- sc->last_led_err[1] = 0;
- }
-
- /*
- * Yellow Alarm is nasty evil stuff, looks at data patterns
- * inside the channel and confuses it with HDLC framing
- * ignore all yellow alarms.
- *
- * Do listen to MultiFrame Yellow alarm which while implemented
- * different ways isn't in the channel and hence somewhat
- * more reliable
- */
-
- if (link_status & T1F_RMYEL) {
- ret = 0;
- if(sc->last_led_err[0] != 1){
- printk(KERN_WARNING "%s: Receive Yellow AIS Alarm\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED0);
- sc->last_led_err[0] = 1;
- }
- else {
- if(sc->last_led_err[0] != 0){
- printk(KERN_WARNING "%s: End of Yellow AIS Alarm\n", sc->name);
- }
- lmc_led_off(sc, LMC_DS3_LED0);
- sc->last_led_err[0] = 0;
- }
-
- /*
- * Loss of signal and los of frame
- * Use the green bit to identify which one lit the led
- */
- if(link_status & T1F_RLOF){
- ret = 0;
- if(sc->last_led_err[3] != 1){
- printk(KERN_WARNING "%s: Local Red Alarm: Loss of Framing\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED3);
- sc->last_led_err[3] = 1;
-
- }
- else {
- if(sc->last_led_err[3] != 0){
- printk(KERN_WARNING "%s: End Red Alarm (LOF)\n", sc->name);
- }
- if( ! (link_status & T1F_RLOS))
- lmc_led_off(sc, LMC_DS3_LED3);
- sc->last_led_err[3] = 0;
- }
-
- if(link_status & T1F_RLOS){
- ret = 0;
- if(sc->last_led_err[2] != 1){
- printk(KERN_WARNING "%s: Local Red Alarm: Loss of Signal\n", sc->name);
- }
- lmc_led_on(sc, LMC_DS3_LED3);
- sc->last_led_err[2] = 1;
-
- }
- else {
- if(sc->last_led_err[2] != 0){
- printk(KERN_WARNING "%s: End Red Alarm (LOS)\n", sc->name);
- }
- if( ! (link_status & T1F_RLOF))
- lmc_led_off(sc, LMC_DS3_LED3);
- sc->last_led_err[2] = 0;
- }
-
- sc->lmc_xinfo.t1_alarm1_status = link_status;
-
- lmc_mii_writereg (sc, 0, 17, T1FRAMER_ALARM2_STATUS);
- sc->lmc_xinfo.t1_alarm2_status = lmc_mii_readreg (sc, 0, 18);
-
- return ret;
-}
-
-/*
- * 1 == T1 Circuit Type , 0 == E1 Circuit Type
- */
-static void
-lmc_t1_set_circuit_type (lmc_softc_t * const sc, int ie)
-{
- if (ie == LMC_CTL_CIRCUIT_TYPE_T1) {
- sc->lmc_miireg16 |= LMC_MII16_T1_Z;
- sc->ictl.circuit_type = LMC_CTL_CIRCUIT_TYPE_T1;
- printk(KERN_INFO "%s: In T1 Mode\n", sc->name);
- }
- else {
- sc->lmc_miireg16 &= ~LMC_MII16_T1_Z;
- sc->ictl.circuit_type = LMC_CTL_CIRCUIT_TYPE_E1;
- printk(KERN_INFO "%s: In E1 Mode\n", sc->name);
- }
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-
-}
-
-/*
- * 0 == 16bit, 1 == 32bit */
-static void
-lmc_t1_set_crc_length (lmc_softc_t * const sc, int state)
-{
- if (state == LMC_CTL_CRC_LENGTH_32)
- {
- /* 32 bit */
- sc->lmc_miireg16 |= LMC_MII16_T1_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_32;
- sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_4;
-
- }
- else
- {
- /* 16 bit */ sc->lmc_miireg16 &= ~LMC_MII16_T1_CRC;
- sc->ictl.crc_length = LMC_CTL_CRC_LENGTH_16;
- sc->lmc_crcSize = LMC_CTL_CRC_BYTESIZE_2;
-
- }
-
- lmc_mii_writereg (sc, 0, 16, sc->lmc_miireg16);
-}
-
-/*
- * 1 == internal, 0 == external
- */
-static void
-lmc_t1_set_clock (lmc_softc_t * const sc, int ie)
-{
- int old;
- old = ie;
- if (ie == LMC_CTL_CLOCK_SOURCE_EXT)
- {
- sc->lmc_gpio &= ~(LMC_GEP_SSI_TXCLOCK);
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_EXT;
- if(old != ie)
- printk (LMC_PRINTF_FMT ": clock external\n", LMC_PRINTF_ARGS);
- }
- else
- {
- sc->lmc_gpio |= LMC_GEP_SSI_TXCLOCK;
- LMC_CSR_WRITE (sc, csr_gp, sc->lmc_gpio);
- sc->ictl.clock_source = LMC_CTL_CLOCK_SOURCE_INT;
- if(old != ie)
- printk (LMC_PRINTF_FMT ": clock internal\n", LMC_PRINTF_ARGS);
- }
-}
-
-static void
-lmc_t1_watchdog (lmc_softc_t * const sc)
-{
-}
-
-static void
-lmc_set_protocol (lmc_softc_t * const sc, lmc_ctl_t * ctl)
-{
- if (!ctl)
- sc->ictl.keepalive_onoff = LMC_CTL_ON;
-}
diff --git a/drivers/net/wan/lmc/lmc_proto.c b/drivers/net/wan/lmc/lmc_proto.c
deleted file mode 100644
index e5487616a816..000000000000
--- a/drivers/net/wan/lmc/lmc_proto.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
- /*
- * Copyright (c) 1997-2000 LAN Media Corporation (LMC)
- * All rights reserved. www.lanmedia.com
- *
- * This code is written by:
- * Andrew Stanley-Jones (asj@cban.com)
- * Rob Braun (bbraun@vix.com),
- * Michael Graff (explorer@vix.com) and
- * Matt Thomas (matt@3am-software.com).
- *
- * With Help By:
- * David Boggs
- * Ron Crane
- * Allan Cox
- *
- * Driver for the LanMedia LMC5200, LMC5245, LMC1000, LMC1200 cards.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/ptrace.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/in.h>
-#include <linux/if_arp.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/inet.h>
-#include <linux/workqueue.h>
-#include <linux/proc_fs.h>
-#include <linux/bitops.h>
-#include <asm/processor.h> /* Processor type for cache alignment. */
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <linux/smp.h>
-
-#include "lmc.h"
-#include "lmc_var.h"
-#include "lmc_debug.h"
-#include "lmc_ioctl.h"
-#include "lmc_proto.h"
-
-// attach
-void lmc_proto_attach(lmc_softc_t *sc) /*FOLD00*/
-{
- if (sc->if_type == LMC_NET) {
- struct net_device *dev = sc->lmc_device;
- /*
- * They set a few basics because they don't use HDLC
- */
- dev->flags |= IFF_POINTOPOINT;
- dev->hard_header_len = 0;
- dev->addr_len = 0;
- }
-}
-
-int lmc_proto_open(lmc_softc_t *sc)
-{
- int ret = 0;
-
- if (sc->if_type == LMC_PPP) {
- ret = hdlc_open(sc->lmc_device);
- if (ret < 0)
- printk(KERN_WARNING "%s: HDLC open failed: %d\n",
- sc->name, ret);
- }
- return ret;
-}
-
-void lmc_proto_close(lmc_softc_t *sc)
-{
- if (sc->if_type == LMC_PPP)
- hdlc_close(sc->lmc_device);
-}
-
-__be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb) /*FOLD00*/
-{
- switch(sc->if_type){
- case LMC_PPP:
- return hdlc_type_trans(skb, sc->lmc_device);
- case LMC_NET:
- return htons(ETH_P_802_2);
- case LMC_RAW: /* Packet type for skbuff kind of useless */
- return htons(ETH_P_802_2);
- default:
- printk(KERN_WARNING "%s: No protocol set for this interface, assuming 802.2 (which is wrong!!)\n", sc->name);
- return htons(ETH_P_802_2);
- }
-}
-
-void lmc_proto_netif(lmc_softc_t *sc, struct sk_buff *skb) /*FOLD00*/
-{
- switch(sc->if_type){
- case LMC_PPP:
- case LMC_NET:
- default:
- netif_rx(skb);
- break;
- case LMC_RAW:
- break;
- }
-}
diff --git a/drivers/net/wan/lmc/lmc_proto.h b/drivers/net/wan/lmc/lmc_proto.h
deleted file mode 100644
index e56e7072de44..000000000000
--- a/drivers/net/wan/lmc/lmc_proto.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LMC_PROTO_H_
-#define _LMC_PROTO_H_
-
-#include <linux/hdlc.h>
-
-void lmc_proto_attach(lmc_softc_t *sc);
-int lmc_proto_open(lmc_softc_t *sc);
-void lmc_proto_close(lmc_softc_t *sc);
-__be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb);
-void lmc_proto_netif(lmc_softc_t *sc, struct sk_buff *skb);
-
-static inline lmc_softc_t* dev_to_sc(struct net_device *dev)
-{
- return (lmc_softc_t *)dev_to_hdlc(dev)->priv;
-}
-
-#endif
diff --git a/drivers/net/wan/lmc/lmc_var.h b/drivers/net/wan/lmc/lmc_var.h
deleted file mode 100644
index 99f0aa787a35..000000000000
--- a/drivers/net/wan/lmc/lmc_var.h
+++ /dev/null
@@ -1,468 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _LMC_VAR_H_
-#define _LMC_VAR_H_
-
- /*
- * Copyright (c) 1997-2000 LAN Media Corporation (LMC)
- * All rights reserved. www.lanmedia.com
- *
- * This code is written by:
- * Andrew Stanley-Jones (asj@cban.com)
- * Rob Braun (bbraun@vix.com),
- * Michael Graff (explorer@vix.com) and
- * Matt Thomas (matt@3am-software.com).
- */
-
-#include <linux/timer.h>
-
-/*
- * basic definitions used in lmc include files
- */
-
-typedef struct lmc___softc lmc_softc_t;
-typedef struct lmc___media lmc_media_t;
-typedef struct lmc___ctl lmc_ctl_t;
-
-#define lmc_csrptr_t unsigned long
-
-#define LMC_REG_RANGE 0x80
-
-#define LMC_PRINTF_FMT "%s"
-#define LMC_PRINTF_ARGS (sc->lmc_device->name)
-
-#define TX_TIMEOUT (2*HZ)
-
-#define LMC_TXDESCS 32
-#define LMC_RXDESCS 32
-
-#define LMC_LINK_UP 1
-#define LMC_LINK_DOWN 0
-
-/* These macros for generic read and write to and from the dec chip */
-#define LMC_CSR_READ(sc, csr) \
- inl((sc)->lmc_csrs.csr)
-#define LMC_CSR_WRITE(sc, reg, val) \
- outl((val), (sc)->lmc_csrs.reg)
-
-//#ifdef _LINUX_DELAY_H
-// #define SLOW_DOWN_IO udelay(2);
-// #undef __SLOW_DOWN_IO
-// #define __SLOW_DOWN_IO udelay(2);
-//#endif
-
-#define DELAY(n) SLOW_DOWN_IO
-
-#define lmc_delay() inl(sc->lmc_csrs.csr_9)
-
-/* This macro sync's up with the mii so that reads and writes can take place */
-#define LMC_MII_SYNC(sc) do {int n=32; while( n >= 0 ) { \
- LMC_CSR_WRITE((sc), csr_9, 0x20000); \
- lmc_delay(); \
- LMC_CSR_WRITE((sc), csr_9, 0x30000); \
- lmc_delay(); \
- n--; }} while(0)
-
-struct lmc_regfile_t {
- lmc_csrptr_t csr_busmode; /* CSR0 */
- lmc_csrptr_t csr_txpoll; /* CSR1 */
- lmc_csrptr_t csr_rxpoll; /* CSR2 */
- lmc_csrptr_t csr_rxlist; /* CSR3 */
- lmc_csrptr_t csr_txlist; /* CSR4 */
- lmc_csrptr_t csr_status; /* CSR5 */
- lmc_csrptr_t csr_command; /* CSR6 */
- lmc_csrptr_t csr_intr; /* CSR7 */
- lmc_csrptr_t csr_missed_frames; /* CSR8 */
- lmc_csrptr_t csr_9; /* CSR9 */
- lmc_csrptr_t csr_10; /* CSR10 */
- lmc_csrptr_t csr_11; /* CSR11 */
- lmc_csrptr_t csr_12; /* CSR12 */
- lmc_csrptr_t csr_13; /* CSR13 */
- lmc_csrptr_t csr_14; /* CSR14 */
- lmc_csrptr_t csr_15; /* CSR15 */
-};
-
-#define csr_enetrom csr_9 /* 21040 */
-#define csr_reserved csr_10 /* 21040 */
-#define csr_full_duplex csr_11 /* 21040 */
-#define csr_bootrom csr_10 /* 21041/21140A/?? */
-#define csr_gp csr_12 /* 21140* */
-#define csr_watchdog csr_15 /* 21140* */
-#define csr_gp_timer csr_11 /* 21041/21140* */
-#define csr_srom_mii csr_9 /* 21041/21140* */
-#define csr_sia_status csr_12 /* 2104x */
-#define csr_sia_connectivity csr_13 /* 2104x */
-#define csr_sia_tx_rx csr_14 /* 2104x */
-#define csr_sia_general csr_15 /* 2104x */
-
-/* tulip length/control transmit descriptor definitions
- * used to define bits in the second tulip_desc_t field (length)
- * for the transmit descriptor -baz */
-
-#define LMC_TDES_FIRST_BUFFER_SIZE ((u32)(0x000007FF))
-#define LMC_TDES_SECOND_BUFFER_SIZE ((u32)(0x003FF800))
-#define LMC_TDES_HASH_FILTERING ((u32)(0x00400000))
-#define LMC_TDES_DISABLE_PADDING ((u32)(0x00800000))
-#define LMC_TDES_SECOND_ADDR_CHAINED ((u32)(0x01000000))
-#define LMC_TDES_END_OF_RING ((u32)(0x02000000))
-#define LMC_TDES_ADD_CRC_DISABLE ((u32)(0x04000000))
-#define LMC_TDES_SETUP_PACKET ((u32)(0x08000000))
-#define LMC_TDES_INVERSE_FILTERING ((u32)(0x10000000))
-#define LMC_TDES_FIRST_SEGMENT ((u32)(0x20000000))
-#define LMC_TDES_LAST_SEGMENT ((u32)(0x40000000))
-#define LMC_TDES_INTERRUPT_ON_COMPLETION ((u32)(0x80000000))
-
-#define TDES_SECOND_BUFFER_SIZE_BIT_NUMBER 11
-#define TDES_COLLISION_COUNT_BIT_NUMBER 3
-
-/* Constants for the RCV descriptor RDES */
-
-#define LMC_RDES_OVERFLOW ((u32)(0x00000001))
-#define LMC_RDES_CRC_ERROR ((u32)(0x00000002))
-#define LMC_RDES_DRIBBLING_BIT ((u32)(0x00000004))
-#define LMC_RDES_REPORT_ON_MII_ERR ((u32)(0x00000008))
-#define LMC_RDES_RCV_WATCHDOG_TIMEOUT ((u32)(0x00000010))
-#define LMC_RDES_FRAME_TYPE ((u32)(0x00000020))
-#define LMC_RDES_COLLISION_SEEN ((u32)(0x00000040))
-#define LMC_RDES_FRAME_TOO_LONG ((u32)(0x00000080))
-#define LMC_RDES_LAST_DESCRIPTOR ((u32)(0x00000100))
-#define LMC_RDES_FIRST_DESCRIPTOR ((u32)(0x00000200))
-#define LMC_RDES_MULTICAST_FRAME ((u32)(0x00000400))
-#define LMC_RDES_RUNT_FRAME ((u32)(0x00000800))
-#define LMC_RDES_DATA_TYPE ((u32)(0x00003000))
-#define LMC_RDES_LENGTH_ERROR ((u32)(0x00004000))
-#define LMC_RDES_ERROR_SUMMARY ((u32)(0x00008000))
-#define LMC_RDES_FRAME_LENGTH ((u32)(0x3FFF0000))
-#define LMC_RDES_OWN_BIT ((u32)(0x80000000))
-
-#define RDES_FRAME_LENGTH_BIT_NUMBER 16
-
-#define LMC_RDES_ERROR_MASK ( (u32)( \
- LMC_RDES_OVERFLOW \
- | LMC_RDES_DRIBBLING_BIT \
- | LMC_RDES_REPORT_ON_MII_ERR \
- | LMC_RDES_COLLISION_SEEN ) )
-
-
-/*
- * Ioctl info
- */
-
-typedef struct {
- u32 n;
- u32 m;
- u32 v;
- u32 x;
- u32 r;
- u32 f;
- u32 exact;
-} lmc_av9110_t;
-
-/*
- * Common structure passed to the ioctl code.
- */
-struct lmc___ctl {
- u32 cardtype;
- u32 clock_source; /* HSSI, T1 */
- u32 clock_rate; /* T1 */
- u32 crc_length;
- u32 cable_length; /* DS3 */
- u32 scrambler_onoff; /* DS3 */
- u32 cable_type; /* T1 */
- u32 keepalive_onoff; /* protocol */
- u32 ticks; /* ticks/sec */
- union {
- lmc_av9110_t ssi;
- } cardspec;
- u32 circuit_type; /* T1 or E1 */
-};
-
-
-/*
- * Careful, look at the data sheet, there's more to this
- * structure than meets the eye. It should probably be:
- *
- * struct tulip_desc_t {
- * u8 own:1;
- * u32 status:31;
- * u32 control:10;
- * u32 buffer1;
- * u32 buffer2;
- * };
- * You could also expand status control to provide more bit information
- */
-
-struct tulip_desc_t {
- s32 status;
- s32 length;
- u32 buffer1;
- u32 buffer2;
-};
-
-/*
- * media independent methods to check on media status, link, light LEDs,
- * etc.
- */
-struct lmc___media {
- void (* init)(lmc_softc_t * const);
- void (* defaults)(lmc_softc_t * const);
- void (* set_status)(lmc_softc_t * const, lmc_ctl_t *);
- void (* set_clock_source)(lmc_softc_t * const, int);
- void (* set_speed)(lmc_softc_t * const, lmc_ctl_t *);
- void (* set_cable_length)(lmc_softc_t * const, int);
- void (* set_scrambler)(lmc_softc_t * const, int);
- int (* get_link_status)(lmc_softc_t * const);
- void (* set_link_status)(lmc_softc_t * const, int);
- void (* set_crc_length)(lmc_softc_t * const, int);
- void (* set_circuit_type)(lmc_softc_t * const, int);
- void (* watchdog)(lmc_softc_t * const);
-};
-
-
-#define STATCHECK 0xBEEFCAFE
-
-struct lmc_extra_statistics
-{
- u32 version_size;
- u32 lmc_cardtype;
-
- u32 tx_ProcTimeout;
- u32 tx_IntTimeout;
- u32 tx_NoCompleteCnt;
- u32 tx_MaxXmtsB4Int;
- u32 tx_TimeoutCnt;
- u32 tx_OutOfSyncPtr;
- u32 tx_tbusy0;
- u32 tx_tbusy1;
- u32 tx_tbusy_calls;
- u32 resetCount;
- u32 lmc_txfull;
- u32 tbusy;
- u32 dirtyTx;
- u32 lmc_next_tx;
- u32 otherTypeCnt;
- u32 lastType;
- u32 lastTypeOK;
- u32 txLoopCnt;
- u32 usedXmtDescripCnt;
- u32 txIndexCnt;
- u32 rxIntLoopCnt;
-
- u32 rx_SmallPktCnt;
- u32 rx_BadPktSurgeCnt;
- u32 rx_BuffAllocErr;
- u32 tx_lossOfClockCnt;
-
- /* T1 error counters */
- u32 framingBitErrorCount;
- u32 lineCodeViolationCount;
-
- u32 lossOfFrameCount;
- u32 changeOfFrameAlignmentCount;
- u32 severelyErroredFrameCount;
-
- u32 check;
-};
-
-typedef struct lmc_xinfo {
- u32 Magic0; /* BEEFCAFE */
-
- u32 PciCardType;
- u32 PciSlotNumber; /* PCI slot number */
-
- u16 DriverMajorVersion;
- u16 DriverMinorVersion;
- u16 DriverSubVersion;
-
- u16 XilinxRevisionNumber;
- u16 MaxFrameSize;
-
- u16 t1_alarm1_status;
- u16 t1_alarm2_status;
-
- int link_status;
- u32 mii_reg16;
-
- u32 Magic1; /* DEADBEEF */
-} LMC_XINFO;
-
-
-/*
- * forward decl
- */
-struct lmc___softc {
- char *name;
- u8 board_idx;
- struct lmc_extra_statistics extra_stats;
- struct net_device *lmc_device;
-
- int hang, rxdesc, bad_packet, some_counter;
- u32 txgo;
- struct lmc_regfile_t lmc_csrs;
- volatile u32 lmc_txtick;
- volatile u32 lmc_rxtick;
- u32 lmc_flags;
- u32 lmc_intrmask; /* our copy of csr_intr */
- u32 lmc_cmdmode; /* our copy of csr_cmdmode */
- u32 lmc_busmode; /* our copy of csr_busmode */
- u32 lmc_gpio_io; /* state of in/out settings */
- u32 lmc_gpio; /* state of outputs */
- struct sk_buff* lmc_txq[LMC_TXDESCS];
- struct sk_buff* lmc_rxq[LMC_RXDESCS];
- volatile
- struct tulip_desc_t lmc_rxring[LMC_RXDESCS];
- volatile
- struct tulip_desc_t lmc_txring[LMC_TXDESCS];
- unsigned int lmc_next_rx, lmc_next_tx;
- volatile
- unsigned int lmc_taint_tx, lmc_taint_rx;
- int lmc_tx_start, lmc_txfull;
- int lmc_txbusy;
- u16 lmc_miireg16;
- int lmc_ok;
- int last_link_status;
- int lmc_cardtype;
- u32 last_frameerr;
- lmc_media_t *lmc_media;
- struct timer_list timer;
- lmc_ctl_t ictl;
- u32 TxDescriptControlInit;
-
- int tx_TimeoutInd; /* additional driver state */
- int tx_TimeoutDisplay;
- unsigned int lastlmc_taint_tx;
- int lasttx_packets;
- u32 tx_clockState;
- u32 lmc_crcSize;
- LMC_XINFO lmc_xinfo;
- char lmc_yel, lmc_blue, lmc_red; /* for T1 and DS3 */
- char lmc_timing; /* for HSSI and SSI */
- int got_irq;
-
- char last_led_err[4];
-
- u32 last_int;
- u32 num_int;
-
- spinlock_t lmc_lock;
- u16 if_type; /* HDLC/PPP or NET */
-
- /* Failure cases */
- u8 failed_ring;
- u8 failed_recv_alloc;
-
- /* Structure check */
- u32 check;
-};
-
-#define LMC_PCI_TIME 1
-#define LMC_EXT_TIME 0
-
-#define PKT_BUF_SZ 1542 /* was 1536 */
-
-/* CSR5 settings */
-#define TIMER_INT 0x00000800
-#define TP_LINK_FAIL 0x00001000
-#define TP_LINK_PASS 0x00000010
-#define NORMAL_INT 0x00010000
-#define ABNORMAL_INT 0x00008000
-#define RX_JABBER_INT 0x00000200
-#define RX_DIED 0x00000100
-#define RX_NOBUFF 0x00000080
-#define RX_INT 0x00000040
-#define TX_FIFO_UNDER 0x00000020
-#define TX_JABBER 0x00000008
-#define TX_NOBUFF 0x00000004
-#define TX_DIED 0x00000002
-#define TX_INT 0x00000001
-
-/* CSR6 settings */
-#define OPERATION_MODE 0x00000200 /* Full Duplex */
-#define PROMISC_MODE 0x00000040 /* Promiscuous Mode */
-#define RECEIVE_ALL 0x40000000 /* Receive All */
-#define PASS_BAD_FRAMES 0x00000008 /* Pass Bad Frames */
-
-/* Dec control registers CSR6 as well */
-#define LMC_DEC_ST 0x00002000
-#define LMC_DEC_SR 0x00000002
-
-/* CSR15 settings */
-#define RECV_WATCHDOG_DISABLE 0x00000010
-#define JABBER_DISABLE 0x00000001
-
-/* More settings */
-/*
- * aSR6 -- Command (Operation Mode) Register
- */
-#define TULIP_CMD_RECEIVEALL 0x40000000L /* (RW) Receivel all frames? */
-#define TULIP_CMD_MUSTBEONE 0x02000000L /* (RW) Must Be One (21140) */
-#define TULIP_CMD_TXTHRSHLDCTL 0x00400000L /* (RW) Transmit Threshold Mode (21140) */
-#define TULIP_CMD_STOREFWD 0x00200000L /* (RW) Store and Forward (21140) */
-#define TULIP_CMD_NOHEARTBEAT 0x00080000L /* (RW) No Heartbeat (21140) */
-#define TULIP_CMD_PORTSELECT 0x00040000L /* (RW) Post Select (100Mb) (21140) */
-#define TULIP_CMD_FULLDUPLEX 0x00000200L /* (RW) Full Duplex Mode */
-#define TULIP_CMD_OPERMODE 0x00000C00L /* (RW) Operating Mode */
-#define TULIP_CMD_PROMISCUOUS 0x00000041L /* (RW) Promiscuous Mode */
-#define TULIP_CMD_PASSBADPKT 0x00000008L /* (RW) Pass Bad Frames */
-#define TULIP_CMD_THRESHOLDCTL 0x0000C000L /* (RW) Threshold Control */
-
-#define TULIP_GP_PINSET 0x00000100L
-#define TULIP_BUSMODE_SWRESET 0x00000001L
-#define TULIP_WATCHDOG_TXDISABLE 0x00000001L
-#define TULIP_WATCHDOG_RXDISABLE 0x00000010L
-
-#define TULIP_STS_NORMALINTR 0x00010000L /* (RW) Normal Interrupt */
-#define TULIP_STS_ABNRMLINTR 0x00008000L /* (RW) Abnormal Interrupt */
-#define TULIP_STS_ERI 0x00004000L /* (RW) Early Receive Interrupt */
-#define TULIP_STS_SYSERROR 0x00002000L /* (RW) System Error */
-#define TULIP_STS_GTE 0x00000800L /* (RW) General Pupose Timer Exp */
-#define TULIP_STS_ETI 0x00000400L /* (RW) Early Transmit Interrupt */
-#define TULIP_STS_RXWT 0x00000200L /* (RW) Receiver Watchdog Timeout */
-#define TULIP_STS_RXSTOPPED 0x00000100L /* (RW) Receiver Process Stopped */
-#define TULIP_STS_RXNOBUF 0x00000080L /* (RW) Receive Buf Unavail */
-#define TULIP_STS_RXINTR 0x00000040L /* (RW) Receive Interrupt */
-#define TULIP_STS_TXUNDERFLOW 0x00000020L /* (RW) Transmit Underflow */
-#define TULIP_STS_TXJABER 0x00000008L /* (RW) Jabber timeout */
-#define TULIP_STS_TXNOBUF 0x00000004L
-#define TULIP_STS_TXSTOPPED 0x00000002L /* (RW) Transmit Process Stopped */
-#define TULIP_STS_TXINTR 0x00000001L /* (RW) Transmit Interrupt */
-
-#define TULIP_STS_RXS_STOPPED 0x00000000L /* 000 - Stopped */
-
-#define TULIP_STS_RXSTOPPED 0x00000100L /* (RW) Receive Process Stopped */
-#define TULIP_STS_RXNOBUF 0x00000080L
-
-#define TULIP_CMD_TXRUN 0x00002000L /* (RW) Start/Stop Transmitter */
-#define TULIP_CMD_RXRUN 0x00000002L /* (RW) Start/Stop Receive Filtering */
-#define TULIP_DSTS_TxDEFERRED 0x00000001 /* Initially Deferred */
-#define TULIP_DSTS_OWNER 0x80000000 /* Owner (1 = 21040) */
-#define TULIP_DSTS_RxMIIERR 0x00000008
-#define LMC_DSTS_ERRSUM (TULIP_DSTS_RxMIIERR)
-
-#define TULIP_DEFAULT_INTR_MASK (TULIP_STS_NORMALINTR \
- | TULIP_STS_RXINTR \
- | TULIP_STS_TXINTR \
- | TULIP_STS_ABNRMLINTR \
- | TULIP_STS_SYSERROR \
- | TULIP_STS_TXSTOPPED \
- | TULIP_STS_TXUNDERFLOW\
- | TULIP_STS_RXSTOPPED )
-
-#define DESC_OWNED_BY_SYSTEM ((u32)(0x00000000))
-#define DESC_OWNED_BY_DC21X4 ((u32)(0x80000000))
-
-#ifndef TULIP_CMD_RECEIVEALL
-#define TULIP_CMD_RECEIVEALL 0x40000000L
-#endif
-
-/* Adapter module number */
-#define LMC_ADAP_HSSI 2
-#define LMC_ADAP_DS3 3
-#define LMC_ADAP_SSI 4
-#define LMC_ADAP_T1 5
-
-#define LMC_MTU 1500
-
-#define LMC_CRC_LEN_16 2 /* 16-bit CRC */
-#define LMC_CRC_LEN_32 4
-
-#endif /* _LMC_VAR_H_ */
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 9055cb380ee2..db0f4fcfdaf4 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -79,8 +79,9 @@ extern int icmpv6_init(void);
extern int icmpv6_err_convert(u8 type, u8 code,
int *err);
extern void icmpv6_cleanup(void);
-extern void icmpv6_param_prob(struct sk_buff *skb,
- u8 code, int pos);
+extern void icmpv6_param_prob_reason(struct sk_buff *skb,
+ u8 code, int pos,
+ enum skb_drop_reason reason);
struct flowi6;
struct in6_addr;
@@ -91,6 +92,12 @@ extern void icmpv6_flow_init(struct sock *sk,
const struct in6_addr *daddr,
int oif);
+static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+{
+ icmpv6_param_prob_reason(skb, code, pos,
+ SKB_DROP_REASON_NOT_SPECIFIED);
+}
+
static inline bool icmpv6_is_err(int type)
{
switch (type) {
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index dacf69516002..0f2596297f6a 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -111,46 +111,43 @@ struct mlx5_accel_esp_xfrm {
struct mlx5_accel_esp_xfrm_attrs attrs;
};
-enum {
- MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0,
-};
-
enum mlx5_accel_ipsec_cap {
MLX5_ACCEL_IPSEC_CAP_DEVICE = 1 << 0,
- MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA = 1 << 1,
- MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 2,
- MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 3,
- MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 4,
- MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER = 1 << 5,
- MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 6,
- MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN = 1 << 7,
+ MLX5_ACCEL_IPSEC_CAP_ESP = 1 << 1,
+ MLX5_ACCEL_IPSEC_CAP_IPV6 = 1 << 2,
+ MLX5_ACCEL_IPSEC_CAP_LSO = 1 << 3,
+ MLX5_ACCEL_IPSEC_CAP_ESN = 1 << 4,
};
-#ifdef CONFIG_MLX5_ACCEL
+#ifdef CONFIG_MLX5_EN_IPSEC
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev);
struct mlx5_accel_esp_xfrm *
mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags);
+ const struct mlx5_accel_esp_xfrm_attrs *attrs);
void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
const struct mlx5_accel_esp_xfrm_attrs *attrs);
#else
-static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+static inline u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
static inline struct mlx5_accel_esp_xfrm *
mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
- const struct mlx5_accel_esp_xfrm_attrs *attrs,
- u32 flags) { return ERR_PTR(-EOPNOTSUPP); }
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
static inline void
mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {}
static inline int
mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; }
-#endif /* CONFIG_MLX5_ACCEL */
+#endif /* CONFIG_MLX5_EN_IPSEC */
#endif /* __MLX5_ACCEL_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9424503eb8d3..ff47d49d8be4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -778,9 +778,6 @@ struct mlx5_core_dev {
#ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
#endif
-#ifdef CONFIG_MLX5_ACCEL
- const struct mlx5_accel_ipsec_ops *ipsec_ops;
-#endif
struct mlx5_clock clock;
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 07d77323f78a..45c7c0d67635 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -54,7 +54,6 @@ enum {
enum {
MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC = 0x2,
- MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS = 0x3,
};
struct mlx5_ifc_fpga_shell_caps_bits {
@@ -387,89 +386,6 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
u8 reserved_at_40[0x40];
};
-struct mlx5_ifc_tls_extended_cap_bits {
- u8 aes_gcm_128[0x1];
- u8 aes_gcm_256[0x1];
- u8 reserved_at_2[0x1e];
- u8 reserved_at_20[0x20];
- u8 context_capacity_total[0x20];
- u8 context_capacity_rx[0x20];
- u8 context_capacity_tx[0x20];
- u8 reserved_at_a0[0x10];
- u8 tls_counter_size[0x10];
- u8 tls_counters_addr_low[0x20];
- u8 tls_counters_addr_high[0x20];
- u8 rx[0x1];
- u8 tx[0x1];
- u8 tls_v12[0x1];
- u8 tls_v13[0x1];
- u8 lro[0x1];
- u8 ipv6[0x1];
- u8 reserved_at_106[0x1a];
-};
-
-struct mlx5_ifc_ipsec_extended_cap_bits {
- u8 encapsulation[0x20];
-
- u8 reserved_0[0x12];
- u8 v2_command[0x1];
- u8 udp_encap[0x1];
- u8 rx_no_trailer[0x1];
- u8 ipv4_fragment[0x1];
- u8 ipv6[0x1];
- u8 esn[0x1];
- u8 lso[0x1];
- u8 transport_and_tunnel_mode[0x1];
- u8 tunnel_mode[0x1];
- u8 transport_mode[0x1];
- u8 ah_esp[0x1];
- u8 esp[0x1];
- u8 ah[0x1];
- u8 ipv4_options[0x1];
-
- u8 auth_alg[0x20];
-
- u8 enc_alg[0x20];
-
- u8 sa_cap[0x20];
-
- u8 reserved_1[0x10];
- u8 number_of_ipsec_counters[0x10];
-
- u8 ipsec_counters_addr_low[0x20];
- u8 ipsec_counters_addr_high[0x20];
-};
-
-struct mlx5_ifc_ipsec_counters_bits {
- u8 dec_in_packets[0x40];
-
- u8 dec_out_packets[0x40];
-
- u8 dec_bypass_packets[0x40];
-
- u8 enc_in_packets[0x40];
-
- u8 enc_out_packets[0x40];
-
- u8 enc_bypass_packets[0x40];
-
- u8 drop_dec_packets[0x40];
-
- u8 failed_auth_dec_packets[0x40];
-
- u8 drop_enc_packets[0x40];
-
- u8 success_add_sa[0x40];
-
- u8 fail_add_sa[0x40];
-
- u8 success_delete_sa[0x40];
-
- u8 fail_delete_sa[0x40];
-
- u8 dropped_cmd[0x40];
-};
-
enum {
MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RETRY_COUNTER_EXPIRED = 0x1,
MLX5_FPGA_QP_ERROR_EVENT_SYNDROME_RNR_EXPIRED = 0x2,
@@ -486,131 +402,4 @@ struct mlx5_ifc_fpga_qp_error_event_bits {
u8 reserved_at_c0[0x8];
u8 fpga_qpn[0x18];
};
-enum mlx5_ifc_fpga_ipsec_response_syndrome {
- MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
- MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
- MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2,
- MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
-};
-
-struct mlx5_ifc_fpga_ipsec_cmd_resp {
- __be32 syndrome;
- union {
- __be32 sw_sa_handle;
- __be32 flags;
- };
- u8 reserved[24];
-} __packed;
-
-enum mlx5_ifc_fpga_ipsec_cmd_opcode {
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0,
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1,
- MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2,
- MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3,
- MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4,
- MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5,
-};
-
-enum mlx5_ifc_fpga_ipsec_cap {
- MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0),
-};
-
-struct mlx5_ifc_fpga_ipsec_cmd_cap {
- __be32 cmd;
- __be32 flags;
- u8 reserved[24];
-} __packed;
-
-enum mlx5_ifc_fpga_ipsec_sa_flags {
- MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0),
- MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1),
- MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2),
- MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3),
- MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4),
- MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5),
- MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6),
- MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7),
-};
-
-enum mlx5_ifc_fpga_ipsec_sa_enc_mode {
- MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0,
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1,
- MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3,
-};
-
-struct mlx5_ifc_fpga_ipsec_sa_v1 {
- __be32 cmd;
- u8 key_enc[32];
- u8 key_auth[32];
- __be32 sip[4];
- __be32 dip[4];
- union {
- struct {
- __be32 reserved;
- u8 salt_iv[8];
- __be32 salt;
- } __packed gcm;
- struct {
- u8 salt[16];
- } __packed cbc;
- };
- __be32 spi;
- __be32 sw_sa_handle;
- __be16 tfclen;
- u8 enc_mode;
- u8 reserved1[2];
- u8 flags;
- u8 reserved2[2];
-};
-
-struct mlx5_ifc_fpga_ipsec_sa {
- struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1;
- __be16 udp_sp;
- __be16 udp_dp;
- u8 reserved1[4];
- __be32 esn;
- __be16 vid; /* only 12 bits, rest is reserved */
- __be16 reserved2;
-} __packed;
-
-enum fpga_tls_cmds {
- CMD_SETUP_STREAM = 0x1001,
- CMD_TEARDOWN_STREAM = 0x1002,
- CMD_RESYNC_RX = 0x1003,
-};
-
-#define MLX5_TLS_1_2 (0)
-
-#define MLX5_TLS_ALG_AES_GCM_128 (0)
-#define MLX5_TLS_ALG_AES_GCM_256 (1)
-
-struct mlx5_ifc_tls_cmd_bits {
- u8 command_type[0x20];
- u8 ipv6[0x1];
- u8 direction_sx[0x1];
- u8 tls_version[0x2];
- u8 reserved[0x1c];
- u8 swid[0x20];
- u8 src_port[0x10];
- u8 dst_port[0x10];
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
- union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
- u8 tls_rcd_sn[0x40];
- u8 tcp_sn[0x20];
- u8 tls_implicit_iv[0x20];
- u8 tls_xor_iv[0x40];
- u8 encryption_key[0x100];
- u8 alg[4];
- u8 reserved2[0x1c];
- u8 reserved3[0x4a0];
-};
-
-struct mlx5_ifc_tls_resp_bits {
- u8 syndrome[0x20];
- u8 stream_id[0x20];
- u8 reserved[0x40];
-};
-
-#define MLX5_TLS_COMMAND_SIZE (0x100)
-
#endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 28a928b0684b..e96ee1e348cb 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -141,7 +141,7 @@ enum mlx5_ptys_width {
MLX5_PTYS_WIDTH_12X = 1 << 4,
};
-#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
+#define MLX5E_PROT_MASK(link_mode) (1U << link_mode)
#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \
(ext ? MLX5_GET(reg, out, ext_##field) : \
MLX5_GET(reg, out, field))
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59e27a2b7bf0..a602f29365b0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -59,7 +59,8 @@ struct dsa_port;
struct ip_tunnel_parm;
struct macsec_context;
struct macsec_ops;
-
+struct netdev_name_node;
+struct sd_flow_limit;
struct sfp_bus;
/* 802.11 specific */
struct wireless_dev;
@@ -202,6 +203,7 @@ struct net_device_core_stats {
local_t rx_dropped;
local_t tx_dropped;
local_t rx_nohandler;
+ local_t rx_otherhost_dropped;
} __aligned(4 * sizeof(local_t));
#include <linux/cache.h>
@@ -862,6 +864,7 @@ enum net_device_path_type {
DEV_PATH_BRIDGE,
DEV_PATH_PPPOE,
DEV_PATH_DSA,
+ DEV_PATH_MTK_WDMA,
};
struct net_device_path {
@@ -887,6 +890,12 @@ struct net_device_path {
int port;
u16 proto;
} dsa;
+ struct {
+ u8 wdma_idx;
+ u8 queue;
+ u16 wcid;
+ u8 bss;
+ } mtk_wdma;
};
};
@@ -1013,16 +1022,6 @@ struct dev_ifalias {
struct devlink;
struct tlsdev_ops;
-struct netdev_name_node {
- struct hlist_node hlist;
- struct list_head list;
- struct net_device *dev;
- const char *name;
-};
-
-int netdev_name_node_alt_create(struct net_device *dev, const char *name);
-int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
-
struct netdev_net_notifier {
struct list_head list;
struct notifier_block *nb;
@@ -1261,6 +1260,10 @@ struct netdev_net_notifier {
* struct net_device *dev,
* const unsigned char *addr, u16 vid)
* Deletes the FDB entry from dev coresponding to addr.
+ * int (*ndo_fdb_del_bulk)(struct ndmsg *ndm, struct nlattr *tb[],
+ * struct net_device *dev,
+ * u16 vid,
+ * struct netlink_ext_ack *extack);
* int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
* struct net_device *dev, struct net_device *filter_dev,
* int *idx)
@@ -1511,6 +1514,11 @@ struct net_device_ops {
struct net_device *dev,
const unsigned char *addr,
u16 vid);
+ int (*ndo_fdb_del_bulk)(struct ndmsg *ndm,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ u16 vid,
+ struct netlink_ext_ack *extack);
int (*ndo_fdb_dump)(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
@@ -2968,7 +2976,6 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex);
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
-int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_restart(struct net_device *dev);
@@ -3027,19 +3034,6 @@ static inline bool dev_has_header(const struct net_device *dev)
return dev->header_ops && dev->header_ops->create;
}
-#ifdef CONFIG_NET_FLOW_LIMIT
-#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
-struct sd_flow_limit {
- u64 count;
- unsigned int num_buckets;
- unsigned int history_head;
- u16 history[FLOW_LIMIT_HISTORY];
- u8 buckets[];
-};
-
-extern int netdev_flow_limit_table_len;
-#endif /* CONFIG_NET_FLOW_LIMIT */
-
/*
* Incoming packets are placed on per-CPU queues
*/
@@ -3763,7 +3757,6 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
void __dev_notify_flags(struct net_device *, unsigned int old_flags,
unsigned int gchanges);
-int dev_change_name(struct net_device *, const char *);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
@@ -3775,13 +3768,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net,
return __dev_change_net_namespace(dev, net, pat, 0);
}
int __dev_set_mtu(struct net_device *, int);
-int dev_validate_mtu(struct net_device *dev, int mtu,
- struct netlink_ext_ack *extack);
-int dev_set_mtu_ext(struct net_device *dev, int mtu,
- struct netlink_ext_ack *extack);
int dev_set_mtu(struct net_device *, int);
-int dev_change_tx_queue_len(struct net_device *, unsigned long);
-void dev_set_group(struct net_device *, int);
int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
struct netlink_ext_ack *extack);
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
@@ -3789,24 +3776,13 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack);
int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
-int dev_change_carrier(struct net_device *, bool new_carrier);
-int dev_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid);
-int dev_get_phys_port_name(struct net_device *dev,
- char *name, size_t len);
int dev_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid, bool recurse);
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
-int dev_change_proto_down(struct net_device *dev, bool proto_down);
-void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
- u32 value);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
-typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
-int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
- int fd, int expected_fd, u32 flags);
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
@@ -3871,6 +3847,7 @@ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \
DEV_CORE_STATS_INC(rx_dropped)
DEV_CORE_STATS_INC(tx_dropped)
DEV_CORE_STATS_INC(rx_nohandler)
+DEV_CORE_STATS_INC(rx_otherhost_dropped)
static __always_inline int ____dev_forward_skb(struct net_device *dev,
struct sk_buff *skb,
@@ -3891,12 +3868,6 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
bool dev_nit_active(struct net_device *dev);
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
-extern int netdev_budget;
-extern unsigned int netdev_budget_usecs;
-
-/* Called by rtnetlink.c:rtnl_unlock() */
-void netdev_run_todo(void);
-
static inline void __dev_put(struct net_device *dev)
{
if (dev) {
@@ -4013,10 +3984,7 @@ static inline void dev_replace_track(struct net_device *odev,
* called netif_lowerlayer_*() because they represent the state of any
* kind of lower layer not just hardware media.
*/
-
-void linkwatch_init_dev(struct net_device *dev);
void linkwatch_fire_event(struct net_device *dev);
-void linkwatch_forget_dev(struct net_device *dev);
/**
* netif_carrier_ok - test if carrier present
@@ -4462,9 +4430,6 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr,
unsigned char addr_type);
int dev_addr_del(struct net_device *dev, const unsigned char *addr,
unsigned char addr_type);
-void dev_addr_flush(struct net_device *dev);
-int dev_addr_init(struct net_device *dev);
-void dev_addr_check(struct net_device *dev);
/* Functions used for unicast addresses handling */
int dev_uc_add(struct net_device *dev, const unsigned char *addr);
@@ -4554,7 +4519,6 @@ static inline void __dev_mc_unsync(struct net_device *dev,
/* Functions used for secondary unicast and multicast support */
void dev_set_rx_mode(struct net_device *dev);
-void __dev_set_rx_mode(struct net_device *dev);
int dev_set_promiscuity(struct net_device *dev, int inc);
int dev_set_allmulti(struct net_device *dev, int inc);
void netdev_state_change(struct net_device *dev);
@@ -4572,11 +4536,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
extern int netdev_max_backlog;
-extern int netdev_tstamp_prequeue;
-extern int netdev_unregister_timeout_secs;
-extern int weight_p;
-extern int dev_weight_rx_bias;
-extern int dev_weight_tx_bias;
extern int dev_rx_weight;
extern int dev_tx_weight;
extern int gro_normal_batch;
@@ -4764,12 +4723,6 @@ static inline void netdev_rx_csum_fault(struct net_device *dev,
void net_enable_timestamp(void);
void net_disable_timestamp(void);
-#ifdef CONFIG_PROC_FS
-int __init dev_proc_init(void);
-#else
-#define dev_proc_init() 0
-#endif
-
static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
struct sk_buff *skb, struct net_device *dev,
bool more)
@@ -4805,8 +4758,6 @@ extern const struct kobj_ns_type_operations net_ns_type_operations;
const char *netdev_drivername(const struct net_device *dev);
-void linkwatch_run_queue(void);
-
static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
netdev_features_t f2)
{
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 223781622b33..6d06896fc20d 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -160,11 +160,6 @@ struct phylink_mac_ops {
* clearing unsupported speeds and duplex settings. The port modes
* should not be cleared; phylink_set_port_modes() will help with this.
*
- * If the @state->interface mode is %PHY_INTERFACE_MODE_1000BASEX
- * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode
- * based on @state->advertising and/or @state->speed and update
- * @state->interface accordingly. See phylink_helper_basex_speed().
- *
* When @config->supported_interfaces has been set, phylink will iterate
* over the supported interfaces to determine the full capability of the
* MAC. The validation function must not print errors if @state->interface
@@ -579,7 +574,6 @@ int phylink_speed_up(struct phylink *pl);
#define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode)
void phylink_set_port_modes(unsigned long *bits);
-void phylink_helper_basex_speed(struct phylink_link_state *state);
void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
u16 bmsr, u16 lpa);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3a30cae8b0a5..0ef11df1bc67 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -408,11 +408,9 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_XDP, /* dropped by XDP in input path */
SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */
- SKB_DROP_REASON_PTYPE_ABSENT, /* not packet_type found to handle
- * the skb. For an etner packet,
- * this means that L3 protocol is
- * not supported
- */
+ SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented
+ * or not supported
+ */
SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation
* error
*/
@@ -444,9 +442,35 @@ enum skb_drop_reason {
SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented
* at tun/tap, e.g., check_filter()
*/
+ SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */
+ SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC
+ * 2211, such as a broadcasts
+ * ICMP_TIMESTAMP
+ */
+ SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding
+ * to IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding
+ * to IPSTATS_MIB_INADDRERRORS
+ */
+ SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed
+ * the MTU)
+ */
SKB_DROP_REASON_MAX,
};
+#define SKB_DR_INIT(name, reason) \
+ enum skb_drop_reason name = SKB_DROP_REASON_##reason
+#define SKB_DR(name) \
+ SKB_DR_INIT(name, NOT_SPECIFIED)
+#define SKB_DR_SET(name, reason) \
+ (name = SKB_DROP_REASON_##reason)
+#define SKB_DR_OR(name, reason) \
+ do { \
+ if (name == SKB_DROP_REASON_NOT_SPECIFIED) \
+ SKB_DR_SET(name, reason); \
+ } while (0)
+
/* To allow 64K frame to be packed as single skb without frag_list we
* require 64K/PAGE_SIZE pages plus 1 additional page to allow for
* buffers which do not start on a page boundary.
@@ -3836,8 +3860,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
struct sk_buff *__skb_recv_datagram(struct sock *sk,
struct sk_buff_head *sk_queue,
unsigned int flags, int *off, int *err);
-struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
- int *err);
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
__poll_t datagram_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
new file mode 100644
index 000000000000..7e00cca06709
--- /dev/null
+++ b/include/linux/soc/mediatek/mtk_wed.h
@@ -0,0 +1,131 @@
+#ifndef __MTK_WED_H
+#define __MTK_WED_H
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/regmap.h>
+#include <linux/pci.h>
+
+#define MTK_WED_TX_QUEUES 2
+
+struct mtk_wed_hw;
+struct mtk_wdma_desc;
+
+struct mtk_wed_ring {
+ struct mtk_wdma_desc *desc;
+ dma_addr_t desc_phys;
+ int size;
+
+ u32 reg_base;
+ void __iomem *wpdma;
+};
+
+struct mtk_wed_device {
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ const struct mtk_wed_ops *ops;
+ struct device *dev;
+ struct mtk_wed_hw *hw;
+ bool init_done, running;
+ int wdma_idx;
+ int irq;
+
+ struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
+ struct mtk_wed_ring txfree_ring;
+ struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
+
+ struct {
+ int size;
+ void **pages;
+ struct mtk_wdma_desc *desc;
+ dma_addr_t desc_phys;
+ } buf_ring;
+
+ /* filled by driver: */
+ struct {
+ struct pci_dev *pci_dev;
+
+ u32 wpdma_phys;
+
+ u16 token_start;
+ unsigned int nbuf;
+
+ u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
+ int (*offload_enable)(struct mtk_wed_device *wed);
+ void (*offload_disable)(struct mtk_wed_device *wed);
+ } wlan;
+#endif
+};
+
+struct mtk_wed_ops {
+ int (*attach)(struct mtk_wed_device *dev);
+ int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
+ void __iomem *regs);
+ int (*txfree_ring_setup)(struct mtk_wed_device *dev,
+ void __iomem *regs);
+ void (*detach)(struct mtk_wed_device *dev);
+
+ void (*stop)(struct mtk_wed_device *dev);
+ void (*start)(struct mtk_wed_device *dev, u32 irq_mask);
+ void (*reset_dma)(struct mtk_wed_device *dev);
+
+ u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg);
+ void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val);
+
+ u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask);
+ void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
+};
+
+extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
+
+static inline int
+mtk_wed_device_attach(struct mtk_wed_device *dev)
+{
+ int ret = -ENODEV;
+
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ rcu_read_lock();
+ dev->ops = rcu_dereference(mtk_soc_wed_ops);
+ if (dev->ops)
+ ret = dev->ops->attach(dev);
+ else
+ rcu_read_unlock();
+
+ if (ret)
+ dev->ops = NULL;
+#endif
+
+ return ret;
+}
+
+#ifdef CONFIG_NET_MEDIATEK_SOC_WED
+#define mtk_wed_device_active(_dev) !!(_dev)->ops
+#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
+#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask)
+#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \
+ (_dev)->ops->tx_ring_setup(_dev, _ring, _regs)
+#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \
+ (_dev)->ops->txfree_ring_setup(_dev, _regs)
+#define mtk_wed_device_reg_read(_dev, _reg) \
+ (_dev)->ops->reg_read(_dev, _reg)
+#define mtk_wed_device_reg_write(_dev, _reg, _val) \
+ (_dev)->ops->reg_write(_dev, _reg, _val)
+#define mtk_wed_device_irq_get(_dev, _mask) \
+ (_dev)->ops->irq_get(_dev, _mask)
+#define mtk_wed_device_irq_set_mask(_dev, _mask) \
+ (_dev)->ops->irq_set_mask(_dev, _mask)
+#else
+static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
+{
+ return false;
+}
+#define mtk_wed_device_detach(_dev) do {} while (0)
+#define mtk_wed_device_start(_dev, _mask) do {} while (0)
+#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV
+#define mtk_wed_device_reg_read(_dev, _reg) 0
+#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
+#define mtk_wed_device_irq_get(_dev, _mask) 0
+#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
+#endif
+
+#endif
diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h
index 809bccd08455..cc42db51bbba 100644
--- a/include/linux/usb/rndis_host.h
+++ b/include/linux/usb/rndis_host.h
@@ -197,6 +197,7 @@ struct rndis_keepalive_c { /* IN (optionally OUT) */
/* Flags for driver_info::data */
#define RNDIS_DRIVER_DATA_POLL_STATUS 1 /* poll status before control */
+#define RNDIS_DRIVER_DATA_DST_MAC_FIXUP 2 /* device ignores configured MAC address */
extern void rndis_status(struct usbnet *dev, struct urb *urb);
extern int
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 8336e86ce606..1b4d72d5e891 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -214,6 +214,7 @@ extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf)
extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *);
extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
extern void usbnet_cdc_status(struct usbnet *, struct urb *);
+extern int usbnet_cdc_zte_rx_fixup(struct usbnet *dev, struct sk_buff *skb);
/* CDC and RNDIS support the same host-chosen packet filters for IN transfers */
#define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3049cb69c025..9cf6870b526e 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -134,7 +134,8 @@ struct tc_action_ops {
(*get_psample_group)(const struct tc_action *a,
tc_action_priv_destructor *destructor);
int (*offload_act_setup)(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind);
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack);
};
struct tc_action_net {
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 4cfdef6ca4f6..c8490729b4ae 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,6 +64,14 @@ struct inet6_ifaddr {
struct hlist_node addr_lst;
struct list_head if_list;
+ /*
+ * Used to safely traverse idev->addr_list in process context
+ * if the idev->lock needed to protect idev->addr_list cannot be held.
+ * In that case, add the items to this list temporarily and iterate
+ * without holding idev->lock.
+ * See addrconf_ifdown and dev_forward_change.
+ */
+ struct list_head if_list_aux;
struct list_head tmp_list;
struct inet6_ifaddr *ifpub;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6a82bcb8813b..a378eff827c7 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -212,7 +212,7 @@ struct fib_rt_info {
u32 tb_id;
__be32 dst;
int dst_len;
- u8 tos;
+ dscp_t dscp;
u8 type;
u8 offload:1,
trap:1,
@@ -225,7 +225,7 @@ struct fib_entry_notifier_info {
u32 dst;
int dst_len;
struct fib_info *fi;
- u8 tos;
+ dscp_t dscp;
u8 type;
u32 tb_id;
};
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b08b70989d2c..69e6c6a218be 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -43,6 +43,11 @@ union nf_conntrack_expect_proto {
/* insert expect proto private data here */
};
+struct nf_conntrack_net_ecache {
+ struct delayed_work dwork;
+ struct netns_ct *ct_net;
+};
+
struct nf_conntrack_net {
/* only used when new connection is allocated: */
atomic_t count;
@@ -58,8 +63,7 @@ struct nf_conntrack_net {
struct ctl_table_header *sysctl_header;
#endif
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- struct delayed_work ecache_dwork;
- struct netns_ct *ct_net;
+ struct nf_conntrack_net_ecache ecache;
#endif
};
diff --git a/include/net/ping.h b/include/net/ping.h
index 2fe78874318c..e4ff3911cbf5 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -71,12 +71,12 @@ void ping_err(struct sk_buff *skb, int offset, u32 info);
int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd,
struct sk_buff *);
-int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len);
int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
void *user_icmph, size_t icmph_len);
int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-bool ping_rcv(struct sk_buff *skb);
+enum skb_drop_reason ping_rcv(struct sk_buff *skb);
#ifdef CONFIG_PROC_FS
void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3b57a93228a..8cf001aed858 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -547,10 +547,12 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
}
int tc_setup_offload_action(struct flow_action *flow_action,
- const struct tcf_exts *exts);
+ const struct tcf_exts *exts,
+ struct netlink_ext_ack *extack);
void tc_cleanup_offload_action(struct flow_action *flow_action);
int tc_setup_action(struct flow_action *flow_action,
- struct tc_action *actions[]);
+ struct tc_action *actions[],
+ struct netlink_ext_ack *extack);
int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
void *type_data, bool err_stop, bool rtnl_held);
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 9f48733bfd21..bf8bb3357825 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -10,9 +10,23 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
- RTNL_FLAG_DOIT_UNLOCKED = 1,
+ RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
+ RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
};
+enum rtnl_kinds {
+ RTNL_KIND_NEW,
+ RTNL_KIND_DEL,
+ RTNL_KIND_GET,
+ RTNL_KIND_SET
+};
+#define RTNL_KIND_MASK 0x3
+
+static inline enum rtnl_kinds rtnl_msgtype_kind(int msgtype)
+{
+ return msgtype & RTNL_KIND_MASK;
+}
+
void rtnl_register(int protocol, int msgtype,
rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
int rtnl_register_module(struct module *owner, int protocol, int msgtype,
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index bf3716fe83e0..a04999ee99b0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -103,7 +103,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct sctp_association *asoc);
extern struct percpu_counter sctp_sockets_allocated;
int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
-struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
+struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *);
typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *);
void sctp_transport_walk_start(struct rhashtable_iter *iter);
diff --git a/include/net/sock.h b/include/net/sock.h
index c4b91fc19b9c..a01d6c421aa2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1202,8 +1202,7 @@ struct proto {
int (*sendmsg)(struct sock *sk, struct msghdr *msg,
size_t len);
int (*recvmsg)(struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags,
- int *addr_len);
+ size_t len, int flags, int *addr_len);
int (*sendpage)(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int (*bind)(struct sock *sk,
@@ -2392,7 +2391,14 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
void (*destructor)(struct sock *sk,
struct sk_buff *skb));
int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason);
+
+static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return sock_queue_rcv_skb_reason(sk, skb, NULL);
+}
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 732b7097d78e..a191486eb1e4 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -70,6 +70,10 @@ struct sk_skb_cb {
* when dst_reg == src_reg.
*/
u64 temp_reg;
+ struct tls_msg {
+ u8 control;
+ u8 decrypted;
+ } tls;
};
static inline struct strp_msg *strp_msg(struct sk_buff *skb)
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index eb8f01c819e6..832efd40e023 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -59,4 +59,19 @@ static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a)
return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK;
}
+static inline bool is_tcf_gact_continue(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_UNSPEC, false);
+}
+
+static inline bool is_tcf_gact_reclassify(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_RECLASSIFY, false);
+}
+
+static inline bool is_tcf_gact_pipe(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_PIPE, false);
+}
+
#endif /* __NET_TC_GACT_H */
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 00bfee70609e..cab8229b9bed 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -94,4 +94,16 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a)
return priority;
}
+/* Return true iff action is queue_mapping */
+static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a)
+{
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING);
+}
+
+/* Return true iff action is inheritdsfield */
+static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a)
+{
+ return is_tcf_skbedit_with_flag(a, SKBEDIT_F_INHERITDSFIELD);
+}
+
#endif /* __NET_TC_SKBEDIT_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70ca4a5e330a..679b1964d494 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -407,7 +407,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int flags, int *addr_len);
int tcp_set_rcvlowat(struct sock *sk, int val);
int tcp_set_window_clamp(struct sock *sk, int val);
@@ -1139,15 +1139,6 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
}
-static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ca_ops->set_state)
- icsk->icsk_ca_ops->set_state(sk, ca_state);
- icsk->icsk_ca_state = ca_state;
-}
-
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1156,6 +1147,9 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
icsk->icsk_ca_ops->cwnd_event(sk, event);
}
+/* From tcp_cong.c */
+void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
+
/* From tcp_rate.c */
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
@@ -1207,9 +1201,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
#define TCP_INFINITE_SSTHRESH 0x7fffffff
+static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd;
+}
+
+static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val)
+{
+ WARN_ON_ONCE((int)val <= 0);
+ tp->snd_cwnd = val;
+}
+
static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
- return tp->snd_cwnd < tp->snd_ssthresh;
+ return tcp_snd_cwnd(tp) < tp->snd_ssthresh;
}
static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
@@ -1235,8 +1240,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
return tp->snd_ssthresh;
else
return max(tp->snd_ssthresh,
- ((tp->snd_cwnd >> 1) +
- (tp->snd_cwnd >> 2)));
+ ((tcp_snd_cwnd(tp) >> 1) +
+ (tcp_snd_cwnd(tp) >> 2)));
}
/* Use define here intentionally to get WARN_ON location shown at the caller */
@@ -1278,7 +1283,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
- return tp->snd_cwnd < 2 * tp->max_packets_out;
+ return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out;
return tp->is_cwnd_limited;
}
diff --git a/include/net/tls.h b/include/net/tls.h
index b6968a5b5538..b59f0a63292b 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -64,6 +64,7 @@
#define TLS_AAD_SPACE_SIZE 13
#define MAX_IV_SIZE 16
+#define TLS_TAG_SIZE 16
#define TLS_MAX_REC_SEQ_SIZE 8
/* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes.
@@ -117,11 +118,6 @@ struct tls_rec {
u8 aead_req_ctx[];
};
-struct tls_msg {
- struct strp_msg rxm;
- u8 control;
-};
-
struct tx_work {
struct delayed_work work;
struct sock *sk;
@@ -152,13 +148,10 @@ struct tls_sw_context_rx {
void (*saved_data_ready)(struct sock *sk);
struct sk_buff *recv_pkt;
- u8 control;
u8 async_capable:1;
- u8 decrypted:1;
atomic_t decrypt_pending;
/* protect crypto_wait with decrypt_pending*/
spinlock_t decrypt_compl_lock;
- bool async_notify;
};
struct tls_record_info {
@@ -378,7 +371,7 @@ void tls_sw_free_resources_rx(struct sock *sk);
void tls_sw_release_resources_rx(struct sock *sk);
void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len);
+ int flags, int *addr_len);
bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
@@ -411,7 +404,9 @@ void tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
static inline struct tls_msg *tls_msg(struct sk_buff *skb)
{
- return (struct tls_msg *)strp_msg(skb);
+ struct sk_skb_cb *scb = (struct sk_skb_cb *)skb->cb;
+
+ return &scb->tls;
}
static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
diff --git a/include/net/udp.h b/include/net/udp.h
index f1c2a88c9005..b83a00330566 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -250,14 +250,14 @@ void udp_destruct_sock(struct sock *sk);
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
-struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *off, int *err);
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off,
+ int *err);
static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *err)
+ int *err)
{
int off = 0;
- return __skb_recv_udp(sk, flags, noblock, &off, err);
+ return __skb_recv_udp(sk, flags, &off, err);
}
int udp_v4_early_demux(struct sk_buff *skb);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 69d883f7fb41..11ee4eaf84bd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2497,15 +2497,7 @@ struct ib_device_ops {
struct ib_flow_attr *flow_attr,
struct ib_udata *udata);
int (*destroy_flow)(struct ib_flow *flow_id);
- struct ib_flow_action *(*create_flow_action_esp)(
- struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
int (*destroy_flow_action)(struct ib_flow_action *action);
- int (*modify_flow_action_esp)(
- struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
int (*set_vf_link_state)(struct ib_device *device, int vf, u32 port,
int state);
int (*get_vf_config)(struct ib_device *device, int vf, u32 port,
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index e1670e1e4934..2da72a9a5764 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -50,7 +50,7 @@
EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \
EM(SKB_DROP_REASON_XDP, XDP) \
EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \
- EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \
+ EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \
EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \
EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \
EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \
@@ -61,6 +61,11 @@
EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \
EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \
EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \
+ EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \
+ EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \
+ EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \
+ EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \
+ EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \
EMe(SKB_DROP_REASON_MAX, MAX)
#undef EM
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 521059d8dc0a..901b440238d5 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe,
__entry->data_len = skb->len - __tcp_hdrlen(th);
__entry->snd_nxt = tp->snd_nxt;
__entry->snd_una = tp->snd_una;
- __entry->snd_cwnd = tp->snd_cwnd;
+ __entry->snd_cwnd = tcp_snd_cwnd(tp);
__entry->snd_wnd = tp->snd_wnd;
__entry->rcv_wnd = tp->rcv_wnd;
__entry->ssthresh = tcp_current_ssthresh(sk);
@@ -371,6 +371,51 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
TP_ARGS(skb)
);
+TRACE_EVENT(tcp_cong_state_set,
+
+ TP_PROTO(struct sock *sk, const u8 ca_state),
+
+ TP_ARGS(sk, ca_state),
+
+ TP_STRUCT__entry(
+ __field(const void *, skaddr)
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __array(__u8, saddr, 4)
+ __array(__u8, daddr, 4)
+ __array(__u8, saddr_v6, 16)
+ __array(__u8, daddr_v6, 16)
+ __field(__u8, cong_state)
+ ),
+
+ TP_fast_assign(
+ struct inet_sock *inet = inet_sk(sk);
+ __be32 *p32;
+
+ __entry->skaddr = sk;
+
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = inet->inet_saddr;
+
+ p32 = (__be32 *) __entry->daddr;
+ *p32 = inet->inet_daddr;
+
+ TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+ sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+
+ __entry->cong_state = ca_state;
+ ),
+
+ TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+ __entry->sport, __entry->dport,
+ __entry->saddr, __entry->daddr,
+ __entry->saddr_v6, __entry->daddr_v6,
+ __entry->cong_state)
+);
+
#endif /* _TRACE_TCP_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index b0d8fea1951d..a9162a6c0284 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -33,8 +33,8 @@ struct btf_type {
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
- * bits 24-27: kind (e.g. int, ptr, array...etc)
- * bits 28-30: unused
+ * bits 24-28: kind (e.g. int, ptr, array...etc)
+ * bits 29-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index cc284c048e69..d1e600816b82 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -211,6 +211,9 @@ struct rtnl_link_stats {
* @rx_nohandler: Number of packets received on the interface
* but dropped by the networking stack because the device is
* not designated to receive packets (e.g. backup link in a bond).
+ *
+ * @rx_otherhost_dropped: Number of packets dropped due to mismatch
+ * in destination MAC address.
*/
struct rtnl_link_stats64 {
__u64 rx_packets;
@@ -243,6 +246,8 @@ struct rtnl_link_stats64 {
__u64 rx_compressed;
__u64 tx_compressed;
__u64 rx_nohandler;
+
+ __u64 rx_otherhost_dropped;
};
/* Subset of link stats useful for in-HW collection. Meaning of the fields is as
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index db05fb55055e..39c565e460c7 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -32,6 +32,8 @@ enum {
NDA_NH_ID,
NDA_FDB_EXT_ATTRS,
NDA_FLAGS_EXT,
+ NDA_NDM_STATE_MASK,
+ NDA_NDM_FLAGS_MASK,
__NDA_MAX
};
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 4c0cde075c27..855dffb4c1c3 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -72,6 +72,7 @@ struct nlmsghdr {
/* Modifiers to DELETE request */
#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+#define NLM_F_BULK 0x200 /* Delete multiple objects */
/* Flags for ACK message */
#define NLM_F_CAPPED 0x100 /* request was capped */
diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 4dfc05651c98..c00adf2fe868 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -43,10 +43,6 @@
#include <linux/tipc.h>
#include <asm/byteorder.h>
-#ifndef __KERNEL__
-#include <arpa/inet.h> /* for ntohs etc. */
-#endif
-
/*
* Configuration
*
@@ -269,33 +265,33 @@ static inline int TLV_OK(const void *tlv, __u16 space)
*/
return (space >= TLV_SPACE(0)) &&
- (ntohs(((struct tlv_desc *)tlv)->tlv_len) <= space);
+ (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space);
}
static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
{
return TLV_OK(tlv, space) &&
- (ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
+ (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
}
static inline int TLV_GET_LEN(struct tlv_desc *tlv)
{
- return ntohs(tlv->tlv_len);
+ return __be16_to_cpu(tlv->tlv_len);
}
static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len)
{
- tlv->tlv_len = htons(len);
+ tlv->tlv_len = __cpu_to_be16(len);
}
static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type)
{
- return (ntohs(tlv->tlv_type) == type);
+ return (__be16_to_cpu(tlv->tlv_type) == type);
}
static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type)
{
- tlv->tlv_type = htons(type);
+ tlv->tlv_type = __cpu_to_be16(type);
}
static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
@@ -305,8 +301,8 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
tlv_len = TLV_LENGTH(len);
tlv_ptr = (struct tlv_desc *)tlv;
- tlv_ptr->tlv_type = htons(type);
- tlv_ptr->tlv_len = htons(tlv_len);
+ tlv_ptr->tlv_type = __cpu_to_be16(type);
+ tlv_ptr->tlv_len = __cpu_to_be16(tlv_len);
if (len && data) {
memcpy(TLV_DATA(tlv_ptr), data, len);
memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len);
@@ -348,7 +344,7 @@ static inline void *TLV_LIST_DATA(struct tlv_list_desc *list)
static inline void TLV_LIST_STEP(struct tlv_list_desc *list)
{
- __u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len));
+ __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len));
list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space);
list->tlv_space -= tlv_space;
@@ -404,9 +400,9 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags,
msg_len = TCM_LENGTH(data_len);
tcm_hdr = (struct tipc_cfg_msg_hdr *)msg;
- tcm_hdr->tcm_len = htonl(msg_len);
- tcm_hdr->tcm_type = htons(cmd);
- tcm_hdr->tcm_flags = htons(flags);
+ tcm_hdr->tcm_len = __cpu_to_be32(msg_len);
+ tcm_hdr->tcm_type = __cpu_to_be16(cmd);
+ tcm_hdr->tcm_flags = __cpu_to_be16(flags);
if (data_len && data) {
memcpy(TCM_DATA(msg), data, data_len);
memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 110029ede71e..dea920b3b840 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -330,35 +330,34 @@ static void cache_btf_id(struct bpf_iter_target_info *tinfo,
bool bpf_iter_prog_supported(struct bpf_prog *prog)
{
const char *attach_fname = prog->aux->attach_func_name;
+ struct bpf_iter_target_info *tinfo = NULL, *iter;
u32 prog_btf_id = prog->aux->attach_btf_id;
const char *prefix = BPF_ITER_FUNC_PREFIX;
- struct bpf_iter_target_info *tinfo;
int prefix_len = strlen(prefix);
- bool supported = false;
if (strncmp(attach_fname, prefix, prefix_len))
return false;
mutex_lock(&targets_mutex);
- list_for_each_entry(tinfo, &targets, list) {
- if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
- supported = true;
+ list_for_each_entry(iter, &targets, list) {
+ if (iter->btf_id && iter->btf_id == prog_btf_id) {
+ tinfo = iter;
break;
}
- if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
- cache_btf_id(tinfo, prog);
- supported = true;
+ if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) {
+ cache_btf_id(iter, prog);
+ tinfo = iter;
break;
}
}
mutex_unlock(&targets_mutex);
- if (supported) {
+ if (tinfo) {
prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
}
- return supported;
+ return tinfo != NULL;
}
const struct bpf_func_proto *
@@ -499,12 +498,11 @@ bool bpf_link_is_iter(struct bpf_link *link)
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
struct bpf_prog *prog)
{
+ struct bpf_iter_target_info *tinfo = NULL, *iter;
struct bpf_link_primer link_primer;
- struct bpf_iter_target_info *tinfo;
union bpf_iter_link_info linfo;
struct bpf_iter_link *link;
u32 prog_btf_id, linfo_len;
- bool existed = false;
bpfptr_t ulinfo;
int err;
@@ -530,14 +528,14 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
prog_btf_id = prog->aux->attach_btf_id;
mutex_lock(&targets_mutex);
- list_for_each_entry(tinfo, &targets, list) {
- if (tinfo->btf_id == prog_btf_id) {
- existed = true;
+ list_for_each_entry(iter, &targets, list) {
+ if (iter->btf_id == prog_btf_id) {
+ tinfo = iter;
break;
}
}
mutex_unlock(&targets_mutex);
- if (!existed)
+ if (!tinfo)
return -ENOENT;
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 34725bfa1e97..1dd5266fbebb 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -100,13 +100,11 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
return ERR_PTR(-E2BIG);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
- cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
- smap->map.value_size = value_size;
smap->n_buckets = n_buckets;
err = get_callchain_buffers(sysctl_perf_event_max_stack);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d175b70067b3..9c1a02b82ecd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4861,6 +4861,11 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
return check_mem_region_access(env, regno, reg->off, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
@@ -4871,13 +4876,23 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_map_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MEM:
+ if (type_is_rdonly_mem(reg->type)) {
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
+ }
return check_mem_region_access(env, regno, reg->off,
access_size, reg->mem_size,
zero_size_allowed);
case PTR_TO_BUF:
if (type_is_rdonly_mem(reg->type)) {
- if (meta && meta->raw_mode)
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
return -EACCES;
+ }
max_access = &env->prog->aux->max_rdonly_access;
} else {
@@ -4919,8 +4934,7 @@ static int check_mem_size_reg(struct bpf_verifier_env *env,
* out. Only upper bounds can be learned because retval is an
* int type and negative retvals are allowed.
*/
- if (meta)
- meta->msize_max_value = reg->umax_value;
+ meta->msize_max_value = reg->umax_value;
/* The register is SCALAR_VALUE; the access check
* happens using its boundaries.
@@ -4963,24 +4977,33 @@ static int check_mem_size_reg(struct bpf_verifier_env *env,
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size)
{
+ bool may_be_null = type_may_be_null(reg->type);
+ struct bpf_reg_state saved_reg;
+ struct bpf_call_arg_meta meta;
+ int err;
+
if (register_is_null(reg))
return 0;
- if (type_may_be_null(reg->type)) {
- /* Assuming that the register contains a value check if the memory
- * access is safe. Temporarily save and restore the register's state as
- * the conversion shouldn't be visible to a caller.
- */
- const struct bpf_reg_state saved_reg = *reg;
- int rv;
-
+ memset(&meta, 0, sizeof(meta));
+ /* Assuming that the register contains a value check if the memory
+ * access is safe. Temporarily save and restore the register's state as
+ * the conversion shouldn't be visible to a caller.
+ */
+ if (may_be_null) {
+ saved_reg = *reg;
mark_ptr_not_null_reg(reg);
- rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
- *reg = saved_reg;
- return rv;
}
- return check_helper_mem_access(env, regno, mem_size, true, NULL);
+ err = check_helper_mem_access(env, regno, mem_size, true, &meta);
+ /* Check access for BPF_WRITE */
+ meta.raw_mode = true;
+ err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
+
+ if (may_be_null)
+ *reg = saved_reg;
+
+ return err;
}
int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
@@ -4989,16 +5012,22 @@ int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state
struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
bool may_be_null = type_may_be_null(mem_reg->type);
struct bpf_reg_state saved_reg;
+ struct bpf_call_arg_meta meta;
int err;
WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+ memset(&meta, 0, sizeof(meta));
+
if (may_be_null) {
saved_reg = *mem_reg;
mark_ptr_not_null_reg(mem_reg);
}
- err = check_mem_size_reg(env, reg, regno, true, NULL);
+ err = check_mem_size_reg(env, reg, regno, true, &meta);
+ /* Check access for BPF_WRITE */
+ meta.raw_mode = true;
+ err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
if (may_be_null)
*mem_reg = saved_reg;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d8553f46caa2..b26f3da943de 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2254,15 +2254,13 @@ static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void
const struct bpf_kprobe_multi_link *link = priv;
unsigned long *addr_a = a, *addr_b = b;
u64 *cookie_a, *cookie_b;
- unsigned long tmp1;
- u64 tmp2;
cookie_a = link->cookies + (addr_a - link->addrs);
cookie_b = link->cookies + (addr_b - link->addrs);
/* swap addr_a/addr_b and cookie_a/cookie_b values */
- tmp1 = *addr_a; *addr_a = *addr_b; *addr_b = tmp1;
- tmp2 = *cookie_a; *cookie_a = *cookie_b; *cookie_b = tmp2;
+ swap(*addr_a, *addr_b);
+ swap(*cookie_a, *cookie_b);
}
static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b)
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 0c5cb2d6436a..2a7836e115b4 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -53,6 +53,7 @@
#define FLAG_EXPECTED_FAIL BIT(1)
#define FLAG_SKB_FRAG BIT(2)
#define FLAG_VERIFIER_ZEXT BIT(3)
+#define FLAG_LARGE_MEM BIT(4)
enum {
CLASSIC = BIT(6), /* Old BPF instructions only. */
@@ -7838,7 +7839,7 @@ static struct bpf_test tests[] = {
},
/* BPF_LDX_MEM B/H/W/DW */
{
- "BPF_LDX_MEM | BPF_B",
+ "BPF_LDX_MEM | BPF_B, base",
.u.insns_int = {
BPF_LD_IMM64(R1, 0x0102030405060708ULL),
BPF_LD_IMM64(R2, 0x0000000000000008ULL),
@@ -7878,7 +7879,56 @@ static struct bpf_test tests[] = {
.stack_depth = 8,
},
{
- "BPF_LDX_MEM | BPF_H",
+ "BPF_LDX_MEM | BPF_B, negative offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000000088ULL),
+ BPF_ALU64_IMM(BPF_ADD, R1, 512),
+ BPF_STX_MEM(BPF_B, R1, R2, -256),
+ BPF_LDX_MEM(BPF_B, R0, R1, -256),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_B, small positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000000088ULL),
+ BPF_STX_MEM(BPF_B, R1, R2, 256),
+ BPF_LDX_MEM(BPF_B, R0, R1, 256),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_B, large positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000000088ULL),
+ BPF_STX_MEM(BPF_B, R1, R2, 4096),
+ BPF_LDX_MEM(BPF_B, R0, R1, 4096),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 4096 + 16, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_H, base",
.u.insns_int = {
BPF_LD_IMM64(R1, 0x0102030405060708ULL),
BPF_LD_IMM64(R2, 0x0000000000000708ULL),
@@ -7918,7 +7968,72 @@ static struct bpf_test tests[] = {
.stack_depth = 8,
},
{
- "BPF_LDX_MEM | BPF_W",
+ "BPF_LDX_MEM | BPF_H, negative offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000008788ULL),
+ BPF_ALU64_IMM(BPF_ADD, R1, 512),
+ BPF_STX_MEM(BPF_H, R1, R2, -256),
+ BPF_LDX_MEM(BPF_H, R0, R1, -256),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_H, small positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000008788ULL),
+ BPF_STX_MEM(BPF_H, R1, R2, 256),
+ BPF_LDX_MEM(BPF_H, R0, R1, 256),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_H, large positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000008788ULL),
+ BPF_STX_MEM(BPF_H, R1, R2, 8192),
+ BPF_LDX_MEM(BPF_H, R0, R1, 8192),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 8192 + 16, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_H, unaligned positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000000008788ULL),
+ BPF_STX_MEM(BPF_H, R1, R2, 13),
+ BPF_LDX_MEM(BPF_H, R0, R1, 13),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 32, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_W, base",
.u.insns_int = {
BPF_LD_IMM64(R1, 0x0102030405060708ULL),
BPF_LD_IMM64(R2, 0x0000000005060708ULL),
@@ -7957,6 +8072,162 @@ static struct bpf_test tests[] = {
{ { 0, 0 } },
.stack_depth = 8,
},
+ {
+ "BPF_LDX_MEM | BPF_W, negative offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000085868788ULL),
+ BPF_ALU64_IMM(BPF_ADD, R1, 512),
+ BPF_STX_MEM(BPF_W, R1, R2, -256),
+ BPF_LDX_MEM(BPF_W, R0, R1, -256),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_W, small positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000085868788ULL),
+ BPF_STX_MEM(BPF_W, R1, R2, 256),
+ BPF_LDX_MEM(BPF_W, R0, R1, 256),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_W, large positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000085868788ULL),
+ BPF_STX_MEM(BPF_W, R1, R2, 16384),
+ BPF_LDX_MEM(BPF_W, R0, R1, 16384),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 16384 + 16, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_W, unaligned positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_LD_IMM64(R3, 0x0000000085868788ULL),
+ BPF_STX_MEM(BPF_W, R1, R2, 13),
+ BPF_LDX_MEM(BPF_W, R0, R1, 13),
+ BPF_JMP_REG(BPF_JNE, R0, R3, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 32, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_DW, base",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0102030405060708ULL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -8),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -8),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 8,
+ },
+ {
+ "BPF_LDX_MEM | BPF_DW, MSB set",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x8182838485868788ULL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -8),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -8),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 8,
+ },
+ {
+ "BPF_LDX_MEM | BPF_DW, negative offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_ALU64_IMM(BPF_ADD, R1, 512),
+ BPF_STX_MEM(BPF_DW, R1, R2, -256),
+ BPF_LDX_MEM(BPF_DW, R0, R1, -256),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_DW, small positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_STX_MEM(BPF_DW, R1, R2, 256),
+ BPF_LDX_MEM(BPF_DW, R0, R1, 256),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 512, 0 } },
+ .stack_depth = 8,
+ },
+ {
+ "BPF_LDX_MEM | BPF_DW, large positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_STX_MEM(BPF_DW, R1, R2, 32760),
+ BPF_LDX_MEM(BPF_DW, R0, R1, 32760),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 32768, 0 } },
+ .stack_depth = 0,
+ },
+ {
+ "BPF_LDX_MEM | BPF_DW, unaligned positive offset",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0x8182838485868788ULL),
+ BPF_STX_MEM(BPF_DW, R1, R2, 13),
+ BPF_LDX_MEM(BPF_DW, R0, R1, 13),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL | FLAG_LARGE_MEM,
+ { },
+ { { 32, 0 } },
+ .stack_depth = 0,
+ },
/* BPF_STX_MEM B/H/W/DW */
{
"BPF_STX_MEM | BPF_B",
@@ -14094,6 +14365,9 @@ static void *generate_test_data(struct bpf_test *test, int sub)
if (test->aux & FLAG_NO_DATA)
return NULL;
+ if (test->aux & FLAG_LARGE_MEM)
+ return kmalloc(test->test[sub].data_size, GFP_KERNEL);
+
/* Test case expects an skb, so populate one. Various
* subtests generate skbs of different sizes based on
* the same data.
@@ -14137,7 +14411,10 @@ static void release_test_data(const struct bpf_test *test, void *data)
if (test->aux & FLAG_NO_DATA)
return;
- kfree_skb(data);
+ if (test->aux & FLAG_LARGE_MEM)
+ kfree(data);
+ else
+ kfree_skb(data);
}
static int filter_length(int which)
@@ -14674,6 +14951,36 @@ static struct tail_call_test tail_call_tests[] = {
.result = 10,
},
{
+ "Tail call load/store leaf",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, R1, 1),
+ BPF_ALU64_IMM(BPF_MOV, R2, 2),
+ BPF_ALU64_REG(BPF_MOV, R3, BPF_REG_FP),
+ BPF_STX_MEM(BPF_DW, R3, R1, -8),
+ BPF_STX_MEM(BPF_DW, R3, R2, -16),
+ BPF_LDX_MEM(BPF_DW, R0, BPF_REG_FP, -8),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 3),
+ BPF_LDX_MEM(BPF_DW, R0, BPF_REG_FP, -16),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = 0,
+ .stack_depth = 32,
+ },
+ {
+ "Tail call load/store",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, R0, 3),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, R0, -8),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 0,
+ .stack_depth = 16,
+ },
+ {
"Tail call error path, max count reached",
.insns = {
BPF_LDX_MEM(BPF_W, R2, R1, 0),
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index bf5736c1d458..a06f4d4a6f47 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int err = 0;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (!skb)
diff --git a/net/atm/common.c b/net/atm/common.c
index 1cfa9bf1d187..d0c8ab7ff8f6 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
!test_bit(ATM_VF_READY, &vcc->flags))
return 0;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 363d47f94532..116481e4da82 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1669,8 +1669,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
/* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a0cb2e3da8d4..62705734343b 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -251,7 +251,6 @@ EXPORT_SYMBOL(bt_accept_dequeue);
int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
size_t copied;
@@ -263,7 +262,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 33b3c0ffc339..189e3115c8c6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1453,7 +1453,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1470,7 +1469,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (sk->sk_state == BT_CLOSED)
return 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8d6bab244c4a..58a4f70e01e3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -465,6 +465,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fix_features = br_fix_features,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
+ .ndo_fdb_del_bulk = br_fdb_delete_bulk,
.ndo_fdb_dump = br_fdb_dump,
.ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ccda68bd473..1a3d583fbc8e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -558,18 +558,161 @@ void br_fdb_cleanup(struct work_struct *work)
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
}
-/* Completely flush all dynamic entries in forwarding database.*/
-void br_fdb_flush(struct net_bridge *br)
+static bool __fdb_flush_matches(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *f,
+ const struct net_bridge_fdb_flush_desc *desc)
+{
+ const struct net_bridge_port *dst = READ_ONCE(f->dst);
+ int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex;
+
+ if (desc->vlan_id && desc->vlan_id != f->key.vlan_id)
+ return false;
+ if (desc->port_ifindex && desc->port_ifindex != port_ifidx)
+ return false;
+ if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags)
+ return false;
+
+ return true;
+}
+
+/* Flush forwarding database entries matching the description */
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc)
{
struct net_bridge_fdb_entry *f;
- struct hlist_node *tmp;
- spin_lock_bh(&br->hash_lock);
- hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
- if (!test_bit(BR_FDB_STATIC, &f->flags))
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (!__fdb_flush_matches(br, f, desc))
+ continue;
+
+ spin_lock_bh(&br->hash_lock);
+ if (!hlist_unhashed(&f->fdb_node))
fdb_delete(br, f, true);
+ spin_unlock_bh(&br->hash_lock);
}
- spin_unlock_bh(&br->hash_lock);
+ rcu_read_unlock();
+}
+
+static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state)
+{
+ unsigned long flags = 0;
+
+ if (ndm_state & NUD_PERMANENT)
+ __set_bit(BR_FDB_LOCAL, &flags);
+ if (ndm_state & NUD_NOARP)
+ __set_bit(BR_FDB_STATIC, &flags);
+
+ return flags;
+}
+
+static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags)
+{
+ unsigned long flags = 0;
+
+ if (ndm_flags & NTF_USE)
+ __set_bit(BR_FDB_ADDED_BY_USER, &flags);
+ if (ndm_flags & NTF_EXT_LEARNED)
+ __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags);
+ if (ndm_flags & NTF_OFFLOADED)
+ __set_bit(BR_FDB_OFFLOADED, &flags);
+ if (ndm_flags & NTF_STICKY)
+ __set_bit(BR_FDB_STICKY, &flags);
+
+ return flags;
+}
+
+static int __fdb_flush_validate_ifindex(const struct net_bridge *br,
+ int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device *dev;
+
+ dev = __dev_get_by_index(dev_net(br->dev), ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex");
+ return -ENODEV;
+ }
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_master(dev) && dev != br->dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Flush bridge device does not match target bridge device");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_port(dev)) {
+ struct net_bridge_port *p = br_port_get_rtnl(dev);
+
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
+ struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid };
+ struct net_bridge_port *p = NULL;
+ struct net_bridge *br;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br = p->br;
+ }
+
+ if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
+ return -EINVAL;
+ }
+ if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set");
+ return -EINVAL;
+ }
+
+ desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state);
+ desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags);
+ if (tb[NDA_NDM_STATE_MASK]) {
+ u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]);
+
+ desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask);
+ }
+ if (tb[NDA_NDM_FLAGS_MASK]) {
+ u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]);
+
+ desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask);
+ }
+ if (tb[NDA_IFINDEX]) {
+ int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]);
+
+ err = __fdb_flush_validate_ifindex(br, ifidx, extack);
+ if (err)
+ return err;
+ desc.port_ifindex = ifidx;
+ } else if (p) {
+ /* flush was invoked with port device and NTF_MASTER */
+ desc.port_ifindex = p->dev->ifindex;
+ }
+
+ br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n",
+ desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask);
+
+ br_fdb_flush(br, &desc);
+
+ return 0;
}
/* Flush all entries referring to a specific port.
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 4556d913955b..fdcc641fc89a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -251,14 +251,16 @@ static int __mdb_fill_info(struct sk_buff *skb,
__mdb_entry_fill_flags(&e, flags);
e.ifindex = ifindex;
e.vid = mp->addr.vid;
- if (mp->addr.proto == htons(ETH_P_IP))
+ if (mp->addr.proto == htons(ETH_P_IP)) {
e.addr.u.ip4 = mp->addr.dst.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- else if (mp->addr.proto == htons(ETH_P_IPV6))
+ } else if (mp->addr.proto == htons(ETH_P_IPV6)) {
e.addr.u.ip6 = mp->addr.dst.ip6;
#endif
- else
+ } else {
ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
+ e.state = MDB_PG_FLAGS_PERMANENT;
+ }
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
MDBA_MDB_ENTRY_INFO);
@@ -873,8 +875,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EINVAL;
/* host join errors which can happen before creating the group */
- if (!port) {
- /* don't allow any flags for host-joined groups */
+ if (!port && !br_group_is_l2(&group)) {
+ /* don't allow any flags for host-joined IP groups */
if (entry->state) {
NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
return -EINVAL;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 200ad05b296f..bb01776d2d88 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1326,8 +1326,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
br_recalculate_fwd_mask(br);
}
- if (data[IFLA_BR_FDB_FLUSH])
- br_fdb_flush(br);
+ if (data[IFLA_BR_FDB_FLUSH]) {
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BR_FDB_STATIC
+ };
+
+ br_fdb_flush(br, &desc);
+ }
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (data[IFLA_BR_MCAST_ROUTER]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 18ccc3d5d296..6ae882cfae1c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -274,6 +274,13 @@ struct net_bridge_fdb_entry {
struct rcu_head rcu;
};
+struct net_bridge_fdb_flush_desc {
+ unsigned long flags;
+ unsigned long flags_mask;
+ int port_ifindex;
+ u16 vlan_id;
+};
+
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
@@ -755,11 +762,17 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
#endif
/* br_fdb.c */
+#define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF)
+#define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP)
+#define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \
+ NTF_STICKY | NTF_OFFLOADED)
+
int br_fdb_init(void);
void br_fdb_fini(void);
int br_fdb_hash_init(struct net_bridge *br);
void br_fdb_hash_fini(struct net_bridge *br);
-void br_fdb_flush(struct net_bridge *br);
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc);
void br_fdb_find_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
@@ -781,6 +794,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid);
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
struct netlink_ext_ack *extack);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 8cc44c367231..81400e0b26ac 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -72,7 +72,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
/* Flags that can be offloaded to hardware */
#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
- BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED)
+ BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \
+ BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST)
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 3f7ca88c2aa3..612e367fff20 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -344,7 +344,11 @@ static DEVICE_ATTR_RW(group_addr);
static int set_flush(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br_fdb_flush(br);
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BR_FDB_STATIC
+ };
+
+ br_fdb_flush(br, &desc);
return 0;
}
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2b8892d502f7..251e666ba9a2 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -282,7 +282,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
if (flags & MSG_OOB)
goto read_error;
- skb = skb_recv_datagram(sk, flags, 0 , &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
goto read_error;
copylen = skb->len;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 95d209b52e6a..64c07e650bb4 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1632,12 +1632,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error = 0;
- int noblock;
int err;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
- skb = skb_recv_datagram(sk, flags, noblock, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index bafb0fb5f0e0..02d81effaa54 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1047,7 +1047,6 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
struct isotp_sock *so = isotp_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
int ret = 0;
if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
@@ -1056,8 +1055,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (!so->bound)
return -EADDRNOTAVAIL;
- flags &= ~MSG_DONTWAIT;
- skb = skb_recv_datagram(sk, flags, noblock, &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
return ret;
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6dff4510687a..0bb4fd3f6264 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -802,7 +802,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
SCM_J1939_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, 0, &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
return ret;
diff --git a/net/can/raw.c b/net/can/raw.c
index 7105fa4824e4..0cf728dcff36 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -846,16 +846,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int err = 0;
- int noblock;
-
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
if (flags & MSG_ERRQUEUE)
return sock_recv_errqueue(sk, msg, size,
SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee290776c661..70126d15ca6e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -310,12 +310,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
EXPORT_SYMBOL(__skb_recv_datagram);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
- int noblock, int *err)
+ int *err)
{
int off = 0;
- return __skb_recv_datagram(sk, &sk->sk_receive_queue,
- flags | (noblock ? MSG_DONTWAIT : 0),
+ return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags,
&off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8c6c08446556..ba853e878007 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include "dev.h"
#include "net-sysfs.h"
@@ -701,6 +702,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
if (WARN_ON_ONCE(last_dev == ctx.dev))
return -1;
}
+
+ if (!ctx.dev)
+ return ret;
+
path = dev_fwd_path(stack);
if (!path)
return -1;
@@ -5370,13 +5375,11 @@ check_vlan_id:
*ppt_prev = pt_prev;
} else {
drop:
- if (!deliver_exact) {
+ if (!deliver_exact)
dev_core_stats_rx_dropped_inc(skb->dev);
- kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT);
- } else {
+ else
dev_core_stats_rx_nohandler_inc(skb->dev);
- kfree_skb(skb);
- }
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -8641,7 +8644,6 @@ void dev_set_group(struct net_device *dev, int new_group)
{
dev->group = new_group;
}
-EXPORT_SYMBOL(dev_set_group);
/**
* dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
@@ -8756,7 +8758,6 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
return -ENODEV;
return ops->ndo_change_carrier(dev, new_carrier);
}
-EXPORT_SYMBOL(dev_change_carrier);
/**
* dev_get_phys_port_id - Get device physical port ID
@@ -8774,7 +8775,6 @@ int dev_get_phys_port_id(struct net_device *dev,
return -EOPNOTSUPP;
return ops->ndo_get_phys_port_id(dev, ppid);
}
-EXPORT_SYMBOL(dev_get_phys_port_id);
/**
* dev_get_phys_port_name - Get device physical port name
@@ -8797,7 +8797,6 @@ int dev_get_phys_port_name(struct net_device *dev,
}
return devlink_compat_phys_port_name_get(dev, name, len);
}
-EXPORT_SYMBOL(dev_get_phys_port_name);
/**
* dev_get_port_parent_id - Get the device's port parent identifier
@@ -8879,7 +8878,6 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
dev->proto_down = proto_down;
return 0;
}
-EXPORT_SYMBOL(dev_change_proto_down);
/**
* dev_change_proto_down_reason - proto down reason
@@ -8904,7 +8902,6 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
}
}
}
-EXPORT_SYMBOL(dev_change_proto_down_reason);
struct bpf_xdp_link {
struct bpf_link link;
@@ -9431,7 +9428,7 @@ static int dev_new_index(struct net *net)
}
/* Delayed registration/unregisteration */
-static LIST_HEAD(net_todo_list);
+LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
@@ -10359,6 +10356,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
storage->rx_dropped += local_read(&core_stats->rx_dropped);
storage->tx_dropped += local_read(&core_stats->tx_dropped);
storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
+ storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped);
}
}
return storage;
diff --git a/net/core/dev.h b/net/core/dev.h
new file mode 100644
index 000000000000..27923df00637
--- /dev/null
+++ b/net/core/dev.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_DEV_H
+#define _NET_CORE_DEV_H
+
+#include <linux/types.h>
+
+struct net;
+struct net_device;
+struct netdev_bpf;
+struct netdev_phys_item_id;
+struct netlink_ext_ack;
+
+/* Random bits of netdevice that don't need to be exposed */
+#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
+struct sd_flow_limit {
+ u64 count;
+ unsigned int num_buckets;
+ unsigned int history_head;
+ u16 history[FLOW_LIMIT_HISTORY];
+ u8 buckets[];
+};
+
+extern int netdev_flow_limit_table_len;
+
+#ifdef CONFIG_PROC_FS
+int __init dev_proc_init(void);
+#else
+#define dev_proc_init() 0
+#endif
+
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_run_queue(void);
+
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
+void dev_addr_check(struct net_device *dev);
+
+/* sysctls not referred to from outside net/core/ */
+extern int netdev_budget;
+extern unsigned int netdev_budget_usecs;
+
+extern int netdev_tstamp_prequeue;
+extern int netdev_unregister_timeout_secs;
+extern int weight_p;
+extern int dev_weight_rx_bias;
+extern int dev_weight_tx_bias;
+
+/* rtnl helpers */
+extern struct list_head net_todo_list;
+void netdev_run_todo(void);
+
+/* netdev management, shared between various uAPI entry points */
+struct netdev_name_node {
+ struct hlist_node hlist;
+ struct list_head list;
+ struct net_device *dev;
+ const char *name;
+};
+
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_change_name(struct net_device *dev, const char *newname);
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
+
+int dev_validate_mtu(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+
+int dev_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len);
+
+int dev_change_proto_down(struct net_device *dev, bool proto_down);
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value);
+
+typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags);
+
+int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len);
+void dev_set_group(struct net_device *dev, int new_group);
+int dev_change_carrier(struct net_device *dev, bool new_carrier);
+
+void __dev_set_rx_mode(struct net_device *dev);
+
+#endif
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index bead38ca50bd..baa63dee2829 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -12,6 +12,8 @@
#include <linux/export.h>
#include <linux/list.h>
+#include "dev.h"
+
/*
* General list handling functions
*/
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 1b807d119da5..4f6be442ae7e 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -10,6 +10,8 @@
#include <net/dsa.h>
#include <net/wext.h>
+#include "dev.h"
+
/*
* Map an interface index to its name (SIOCGIFNAME)
*/
diff --git a/net/core/filter.c b/net/core/filter.c
index 64470a727ef7..143f442a9505 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5173,7 +5173,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (val <= 0 || tp->data_segs_out > tp->syn_data)
ret = -EINVAL;
else
- tp->snd_cwnd = val;
+ tcp_snd_cwnd_set(tp, val);
break;
case TCP_BPF_SNDCWND_CLAMP:
if (val <= 0) {
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 95098d1a49bd..a244d3bade7d 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -18,6 +18,7 @@
#include <linux/bitops.h>
#include <linux/types.h>
+#include "dev.h"
enum lw_bits {
LW_URGENT = 0,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 88cc0ad7d386..1ec23bf8b05c 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -4,6 +4,8 @@
#include <linux/seq_file.h>
#include <net/wext.h>
+#include "dev.h"
+
#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9cbc1c8289bc..4980c3a50475 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include "dev.h"
#include "net-sysfs.h"
#ifdef CONFIG_SYSFS
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1943c0f0307d..4af55d28ffa3 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -36,6 +36,12 @@
this_cpu_inc(s->__stat); \
} while (0)
+#define recycle_stat_add(pool, __stat, val) \
+ do { \
+ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
+ this_cpu_add(s->__stat, val); \
+ } while (0)
+
bool page_pool_get_stats(struct page_pool *pool,
struct page_pool_stats *stats)
{
@@ -63,6 +69,7 @@ EXPORT_SYMBOL(page_pool_get_stats);
#else
#define alloc_stat_inc(pool, __stat)
#define recycle_stat_inc(pool, __stat)
+#define recycle_stat_add(pool, __stat, val)
#endif
static int page_pool_init(struct page_pool *pool,
@@ -566,9 +573,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
/* Bulk producer into ptr_ring page_pool cache */
page_pool_ring_lock(pool);
for (i = 0; i < bulk_len; i++) {
- if (__ptr_ring_produce(&pool->ring, data[i]))
- break; /* ring full */
+ if (__ptr_ring_produce(&pool->ring, data[i])) {
+ /* ring full */
+ recycle_stat_inc(pool, ring_full);
+ break;
+ }
}
+ recycle_stat_add(pool, ring, i);
page_pool_ring_unlock(pool);
/* Hopefully all pages was return into ptr_ring */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1381ea6d52e..8bf770a7261e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -54,6 +54,8 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include "dev.h"
+
#define RTNL_MAX_TYPE 50
#define RTNL_SLAVE_MAX_TYPE 40
@@ -95,6 +97,39 @@ void __rtnl_unlock(void)
defer_kfree_skb_list = NULL;
+ /* Ensure that we didn't actually add any TODO item when __rtnl_unlock()
+ * is used. In some places, e.g. in cfg80211, we have code that will do
+ * something like
+ * rtnl_lock()
+ * wiphy_lock()
+ * ...
+ * rtnl_unlock()
+ *
+ * and because netdev_run_todo() acquires the RTNL for items on the list
+ * we could cause a situation such as this:
+ * Thread 1 Thread 2
+ * rtnl_lock()
+ * unregister_netdevice()
+ * __rtnl_unlock()
+ * rtnl_lock()
+ * wiphy_lock()
+ * rtnl_unlock()
+ * netdev_run_todo()
+ * __rtnl_unlock()
+ *
+ * // list not empty now
+ * // because of thread 2
+ * rtnl_lock()
+ * while (!list_empty(...))
+ * rtnl_lock()
+ * wiphy_lock()
+ * **** DEADLOCK ****
+ *
+ * However, usage of __rtnl_unlock() is rare, and so we can ensure that
+ * it's not used in cases where something is added to do the list.
+ */
+ WARN_ON(!list_empty(&net_todo_list));
+
mutex_unlock(&rtnl_mutex);
while (head) {
@@ -214,6 +249,8 @@ static int rtnl_register_internal(struct module *owner,
if (dumpit)
link->dumpit = dumpit;
+ WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL &&
+ (flags & RTNL_FLAG_BULK_DEL_SUPPORTED));
link->flags |= flags;
/* publish protocol:msgtype */
@@ -4132,22 +4169,36 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
+static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = {
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
+ [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
+};
+
static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
struct net *net = sock_net(skb->sk);
+ const struct net_device_ops *ops;
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
- __u8 *addr;
+ __u8 *addr = NULL;
int err;
u16 vid;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL,
- extack);
+ if (!del_bulk) {
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
+ NULL, extack);
+ } else {
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX,
+ fdb_del_bulk_policy, extack);
+ }
if (err < 0)
return err;
@@ -4163,9 +4214,12 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
}
- if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- NL_SET_ERR_MSG(extack, "invalid address");
- return -EINVAL;
+ if (!del_bulk) {
+ if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "invalid address");
+ return -EINVAL;
+ }
+ addr = nla_data(tb[NDA_LLADDR]);
}
if (dev->type != ARPHRD_ETHER) {
@@ -4173,8 +4227,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
- addr = nla_data(tb[NDA_LLADDR]);
-
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -4185,10 +4237,16 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
- const struct net_device_ops *ops = br_dev->netdev_ops;
- if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ ops = br_dev->netdev_ops;
+ if (!del_bulk) {
+ if (ops->ndo_fdb_del)
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ } else {
+ if (ops->ndo_fdb_del_bulk)
+ err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
+ extack);
+ }
if (err)
goto out;
@@ -4198,15 +4256,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
- if (dev->netdev_ops->ndo_fdb_del)
- err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
- vid);
- else
- err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
+ ops = dev->netdev_ops;
+ if (!del_bulk) {
+ if (ops->ndo_fdb_del)
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ else
+ err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
+ } else {
+ /* in case err was cleared by NTF_MASTER call */
+ err = -EOPNOTSUPP;
+ if (ops->ndo_fdb_del_bulk)
+ err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
+ extack);
+ }
if (!err) {
- rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
- ndm->ndm_state);
+ if (!del_bulk)
+ rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
+ ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -5896,11 +5963,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct rtnl_link *link;
+ enum rtnl_kinds kind;
struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
- int kind;
int family;
int type;
@@ -5915,13 +5982,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
- kind = type&3;
+ kind = rtnl_msgtype_kind(type);
- if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
+ if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
rcu_read_lock();
- if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u32 min_dump_alloc = 0;
@@ -5977,6 +6044,12 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
flags = link->flags;
+ if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) &&
+ !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) {
+ NL_SET_ERR_MSG(extack, "Bulk delete is not supported");
+ goto err_unlock;
+ }
+
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
doit = link->doit;
rcu_read_unlock();
@@ -6105,7 +6178,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
+ RTNL_FLAG_BULK_DEL_SUPPORTED);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
diff --git a/net/core/sock.c b/net/core/sock.c
index 1180a0cb0110..29abec3eabd8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -141,6 +141,8 @@
#include <linux/ethtool.h>
+#include "dev.h"
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
@@ -503,17 +505,35 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(__sock_queue_rcv_skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
+ enum skb_drop_reason drop_reason;
int err;
err = sk_filter(sk, skb);
- if (err)
- return err;
-
- return __sock_queue_rcv_skb(sk, skb);
+ if (err) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ goto out;
+ }
+ err = __sock_queue_rcv_skb(sk, skb);
+ switch (err) {
+ case -ENOMEM:
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ break;
+ case -ENOBUFS:
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ break;
+ default:
+ drop_reason = SKB_NOT_DROPPED_YET;
+ break;
+ }
+out:
+ if (reason)
+ *reason = drop_reason;
+ return err;
}
-EXPORT_SYMBOL(sock_queue_rcv_skb);
+EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested, unsigned int trim_cap, bool refcounted)
@@ -3486,8 +3506,7 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int addr_len = 0;
int err;
- err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7123fe7feeac..8295e5877eb3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,6 +23,8 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
+#include "dev.h"
+
static int two = 2;
static int three = 3;
static int int_3600 = 3600;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 671c377f0889..7dfc00c9fb32 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -293,8 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len);
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
__poll_t dccp_poll(struct file *file, struct socket *sock,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a976b4d29892..58421f94427e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -791,8 +791,8 @@ out_discard:
EXPORT_SYMBOL_GPL(dccp_sendmsg);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
const struct dccp_hdr *dh;
long timeo;
@@ -804,7 +804,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ebcc812735a4..62b89d6f54fd 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -391,7 +391,7 @@ EXPORT_SYMBOL(ether_setup);
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
- return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
+ return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_ENUM,
ether_setup, txqs, rxqs);
}
EXPORT_SYMBOL(alloc_etherdev_mqs);
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 3b2366a88c3c..f24852814fa3 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -308,13 +308,13 @@ out:
}
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -695,7 +695,7 @@ out:
}
static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
@@ -703,7 +703,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct dgram_sock *ro = dgram_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..e983bb0c5012 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -321,7 +321,6 @@ config NET_UDP_TUNNEL
config NET_FOU
tristate "IP: Foo (IP protocols) over UDP"
- select XFRM
select NET_UDP_TUNNEL
help
Foo over UDP allows any IP protocol to be directly encapsulated
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 72fde2888ad2..195ecfa2f000 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -836,7 +836,7 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
EXPORT_SYMBOL(inet_sendpage);
INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -848,8 +848,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
sock_rps_record_flow(sk);
err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ccb62038f6a4..a57ba23571c9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -524,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
- fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
@@ -1781,7 +1781,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = fri->dst_len;
rtm->rtm_src_len = 0;
- rtm->rtm_tos = fri->tos;
+ rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp);
if (tb_id < 256)
rtm->rtm_table = tb_id;
else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fb0e49c36c2e..1f7f25532fa1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -82,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = inet_dscp_to_dsfield(fa->fa_dscp),
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -99,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = inet_dscp_to_dsfield(fa->fa_dscp),
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -1032,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
- fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) &&
- fa->fa_info == fri->fi && fa->fa_type == fri->type)
+ fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
+ fa->fa_type == fri->type)
return fa;
}
@@ -2305,7 +2305,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
- fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 0d085cc8d96c..025a33c1b04d 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -16,7 +16,6 @@
#include <net/protocol.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
-#include <net/xfrm.h>
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 72a375c7f417..236debd9fded 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert);
*/
struct icmp_control {
- bool (*handler)(struct sk_buff *skb);
+ enum skb_drop_reason (*handler)(struct sk_buff *skb);
short error; /* This ICMP is classed as an error message */
};
@@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto)
* ICMP_PARAMETERPROB.
*/
-static bool icmp_unreach(struct sk_buff *skb)
+static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
@@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmph = icmp_hdr(skb);
iph = (const struct iphdr *)skb->data;
- if (iph->ihl < 5) /* Mangled header, drop. */
+ if (iph->ihl < 5) { /* Mangled header, drop. */
+ reason = SKB_DROP_REASON_IP_INHDR;
goto out_err;
+ }
switch (icmph->type) {
case ICMP_DEST_UNREACH:
@@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmp_socket_deliver(skb, info);
out:
- return true;
+ return reason;
out_err:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return false;
+ return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
}
@@ -952,20 +955,20 @@ out_err:
* Handle ICMP_REDIRECT.
*/
-static bool icmp_redirect(struct sk_buff *skb)
+static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
/* there aught to be a stat */
- return false;
+ return SKB_DROP_REASON_NOMEM;
}
icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb)
* See also WRT handling of options once they are done and working.
*/
-static bool icmp_echo(struct sk_buff *skb)
+static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
struct net *net;
@@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
if (net->ipv4.sysctl_icmp_echo_ignore_all)
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.skb = skb;
@@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb)
if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/* Helper for icmp_echo and icmpv6_echo_reply.
@@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe);
* MUST be accurate to a few minutes.
* MUST be updated at least at 15Hz.
*/
-static bool icmp_timestamp(struct sk_buff *skb)
+static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
/*
@@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb)
icmp_param.data_len = 0;
icmp_param.head_len = sizeof(struct icmphdr) + 12;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
out_err:
__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
-static bool icmp_discard(struct sk_buff *skb)
+static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
{
/* pretend it was a success */
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb)
*/
int icmp_rcv(struct sk_buff *skb)
{
- struct icmphdr *icmph;
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
- bool success;
+ struct icmphdr *icmph;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
goto drop;
@@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*icmph));
- if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
skb_set_network_header(skb, nh);
}
@@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb)
/* We can't use icmp_pointers[].handler() because it is an array of
* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
- success = icmp_echo(skb);
- goto success_check;
+ reason = icmp_echo(skb);
+ goto reason_check;
}
if (icmph->type == ICMP_EXT_ECHOREPLY) {
- success = ping_rcv(skb);
- goto success_check;
+ reason = ping_rcv(skb);
+ goto reason_check;
}
/*
@@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb)
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
- if (icmph->type > NR_ICMP_TYPES)
+ if (icmph->type > NR_ICMP_TYPES) {
+ reason = SKB_DROP_REASON_UNHANDLED_PROTO;
goto error;
+ }
/*
* Parse the ICMP message
@@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb)
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
}
- success = icmp_pointers[icmph->type].handler(skb);
-success_check:
- if (success) {
+ reason = icmp_pointers[icmph->type].handler(skb);
+reason_check:
+ if (!reason) {
consume_skb(skb);
return NET_RX_SUCCESS;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 92ba3350274b..e3aa436a1bdf 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
struct net *net;
+ SKB_DR(reason);
/* that should never happen */
if (skb->pkt_type != PACKET_HOST)
@@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
@@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb)
if (ip_hdr(skb)->ttl <= 1)
goto too_many_hops;
- if (!xfrm4_route_forward(skb))
+ if (!xfrm4_route_forward(skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
rt = skb_rtable(skb);
@@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb)
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ SKB_DR_SET(reason, PKT_TOO_BIG);
goto drop;
}
@@ -169,7 +175,8 @@ too_many_hops:
/* Tell the sender its packet died... */
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+ SKB_DR_SET(reason, IP_INHDR);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 95f7bb052784..b1165f717cd1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
* that it receives, do not try to analyse it.
*/
if (skb->pkt_type == PACKET_OTHERHOST) {
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
drop_reason = SKB_DROP_REASON_OTHERHOST;
goto drop;
}
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 4151eb1262dd..b75cac69bd7e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_iif = nft_out(pkt)->ifindex;
+
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 3ee947557b88..319c181bfbb6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -844,8 +844,8 @@ do_confirm:
goto out;
}
-int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
int family = sk->sk_family;
@@ -861,7 +861,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
if (flags & MSG_ERRQUEUE)
return inet_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -934,16 +934,24 @@ out:
}
EXPORT_SYMBOL_GPL(ping_recvmsg);
-int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk,
+ struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
pr_debug("ping_queue_rcv_skb -> failed\n");
- return -1;
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
+}
+
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return __ping_queue_rcv_skb(sk, skb) ? -1 : 0;
}
EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
@@ -952,12 +960,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
* All we need to do is get the socket.
*/
-bool ping_rcv(struct sk_buff *skb)
+enum skb_drop_reason ping_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
- bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -972,15 +980,17 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2 && !ping_queue_rcv_skb(sk, skb2))
- rc = true;
+ if (skb2)
+ reason = __ping_queue_rcv_skb(sk, skb2);
+ else
+ reason = SKB_DROP_REASON_NOMEM;
sock_put(sk);
}
- if (!rc)
+ if (reason)
pr_debug("no socket, dropping\n");
- return rc;
+ return reason;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9f97b9cbf7b3..4056b0da85ea 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -753,7 +753,7 @@ out:
*/
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -769,7 +769,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98c6f3429593..e839d424b861 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -945,6 +945,7 @@ static int ip_error(struct sk_buff *skb)
struct inet_peer *peer;
unsigned long now;
struct net *net;
+ SKB_DR(reason);
bool send;
int code;
@@ -964,10 +965,12 @@ static int ip_error(struct sk_buff *skb)
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
case EHOSTUNREACH:
+ SKB_DR_SET(reason, IP_INADDRERRORS);
__IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
break;
case ENETUNREACH:
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
}
@@ -983,6 +986,7 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
@@ -1009,7 +1013,7 @@ static int ip_error(struct sk_buff *skb)
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
-out: kfree_skb(skb);
+out: kfree_skb_reason(skb, reason);
return 0;
}
@@ -3394,7 +3398,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
- fri.tos = fl4.flowi4_tos;
+ fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
@@ -3407,7 +3411,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
- fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
+ fa->fa_dscp == fri.dscp &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf18fbcbf123..e20b87b3bf90 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,7 +429,7 @@ void tcp_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
@@ -1877,8 +1877,7 @@ static void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
}
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags);
static int receive_fallback_to_copy(struct sock *sk,
struct tcp_zerocopy_receive *zc, int inq,
@@ -1900,7 +1899,7 @@ static int receive_fallback_to_copy(struct sock *sk,
if (err)
return err;
- err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
+ err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT,
tss, &zc->msg_flags);
if (err < 0)
return err;
@@ -2316,8 +2315,7 @@ static int tcp_inq_hint(struct sock *sk)
*/
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2337,7 +2335,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (tp->recvmsg_inq)
*cmsg_flags = TCP_CMSG_INQ;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
/* Urgent data needs to be handled specially. */
if (flags & MSG_OOB)
@@ -2556,8 +2554,8 @@ recv_sndq:
goto out;
}
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
int cmsg_flags = 0, ret, inq;
struct scm_timestamping_internal tss;
@@ -2568,11 +2566,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
- sk_busy_loop(sk, nonblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
lock_sock(sk);
- ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss,
- &cmsg_flags);
+ ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
release_sock(sk);
sk_defer_free_flush(sk);
@@ -3033,7 +3030,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto_min = TCP_RTO_MIN;
icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tp->delivered = 0;
@@ -3744,7 +3741,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_max_pacing_rate = rate64;
info->tcpi_reordering = tp->reordering;
- info->tcpi_snd_cwnd = tp->snd_cwnd;
+ info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);
if (info->tcpi_state == TCP_LISTEN) {
/* listeners aliased fields :
@@ -3915,7 +3912,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
rate64 = tcp_compute_delivery_rate(tp);
nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
- nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+ nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 02e8626ccb27..c7d30a3bbd81 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
- bw = (u64)tp->snd_cwnd * BW_UNIT;
+ bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
@@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
- bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
+ bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
- bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+ bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
@@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
@@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
+ u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
if (!acked)
goto done; /* no packet fully ACKed; just apply caps */
@@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
- tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
@@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
bbr->ack_epoch_acked + rs->acked_sacked);
extra_acked = bbr->ack_epoch_acked - expected_acked;
- extra_acked = min(extra_acked, tp->snd_cwnd);
+ extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
}
@@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
return;
bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
- tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
bbr_reset_mode(sk);
}
@@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
bbr->full_bw_cnt = 0;
bbr_reset_lt_bw_sampling(sk);
- return tcp_sk(sk)->snd_cwnd;
+ return tcp_snd_cwnd(tcp_sk(sk));
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index f5f588b1f6e9..58358bf92e1b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd);
+ bictcp_update(ca, tcp_snd_cwnd(tp));
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- if (tp->snd_cwnd <= low_window)
- return max(tp->snd_cwnd >> 1U, 2U);
+ if (tcp_snd_cwnd(tp) <= low_window)
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
else
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 1cdcb4df0eb7..be3947e70fec 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -174,7 +174,6 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
@@ -186,7 +185,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
@@ -211,7 +210,7 @@ msg_bytes_ready:
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
if (!timeo) {
copied = -EAGAIN;
goto out;
@@ -234,7 +233,7 @@ out:
}
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -244,11 +243,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock)) {
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
lock_sock(sk);
msg_bytes_ready:
@@ -257,14 +256,14 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = tcp_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
release_sock(sk);
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
copied = -EAGAIN;
}
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 709d23801823..ddc7ba0554bd 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
return;
}
}
@@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
return false;
}
- ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp));
ca->state = CDG_BACKOFF;
tcp_enter_cwr(sk);
return true;
@@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (!tcp_is_cwnd_limited(sk)) {
- ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp));
return;
}
- prior_snd_cwnd = tp->snd_cwnd;
+ prior_snd_cwnd = tcp_snd_cwnd(tp);
tcp_reno_cong_avoid(sk, ack, acked);
- incr = tp->snd_cwnd - prior_snd_cwnd;
+ incr = tcp_snd_cwnd(tp) - prior_snd_cwnd;
ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
}
@@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (ca->state == CDG_BACKOFF)
- return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
+ return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10);
if (ca->state == CDG_NONFULL && use_tolerance)
- return tp->snd_cwnd;
+ return tcp_snd_cwnd(tp);
- ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp));
if (use_shadow)
- return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
- return max(2U, tp->snd_cwnd >> 1);
+ return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1);
+ return max(2U, tcp_snd_cwnd(tp) >> 1);
}
static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
@@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
ca->gradients = gradients;
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
break;
case CA_EVENT_COMPLETE_CWR:
ca->state = CDG_UNKNOWN;
@@ -380,7 +380,7 @@ static void tcp_cdg_init(struct sock *sk)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
GFP_NOWAIT | __GFP_NOWARN);
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
}
static void tcp_cdg_release(struct sock *sk)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index dc95572163df..d3cae40749e8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>
+#include <trace/events/tcp.h>
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
@@ -33,6 +34,17 @@ struct tcp_congestion_ops *tcp_ca_find(const char *name)
return NULL;
}
+void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ trace_tcp_cong_state_set(sk, ca_state);
+
+ if (icsk->icsk_ca_ops->set_state)
+ icsk->icsk_ca_ops->set_state(sk, ca_state);
+ icsk->icsk_ca_state = ca_state;
+}
+
/* Must be called with rcu lock held */
static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
const char *name)
@@ -393,10 +405,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
+ u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+ acked -= cwnd - tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));
return acked;
}
@@ -410,7 +422,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
tp->snd_cwnd_cnt += acked;
@@ -418,9 +430,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
u32 delta = tp->snd_cwnd_cnt / w;
tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
}
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -445,7 +457,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
}
/* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
@@ -454,7 +466,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
@@ -462,7 +474,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd, tp->prior_cwnd);
+ return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
}
EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 24d562dd6225..b0918839bee7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd, acked);
+ bictcp_update(ca, tcp_snd_cwnd(tp), acked);
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -346,13 +346,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void cubictcp_state(struct sock *sk, u8 new_state)
@@ -413,13 +413,13 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->found = 1;
pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
now - ca->round_start, threshold,
- ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
+ ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp));
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -438,8 +438,8 @@ static void hystart_update(struct sock *sk, u32 delay)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -469,7 +469,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
/* hystart triggers when cwnd is larger than some threshold */
if (!ca->found && tcp_in_slow_start(tp) && hystart &&
- tp->snd_cwnd >= hystart_low_window)
+ tcp_snd_cwnd(tp) >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 1943a6630341..ab034a4e9324 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -106,8 +106,8 @@ static u32 dctcp_ssthresh(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
}
static void dctcp_update_alpha(struct sock *sk, u32 flags)
@@ -148,8 +148,8 @@ static void dctcp_react_to_loss(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static void dctcp_state(struct sock *sk, u8 new_state)
@@ -211,8 +211,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
static u32 dctcp_cwnd_undo(struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+ return max(tcp_snd_cwnd(tp), ca->loss_cwnd);
}
static struct tcp_congestion_ops dctcp __read_mostly = {
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 349069d6cd0a..c6de5ce79ad3 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* snd_cwnd <=
* hstcp_aimd_vals[ca->ai].cwnd
*/
- if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
+ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) {
+ while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd &&
ca->ai < HSTCP_AIMD_MAX - 1)
ca->ai++;
- } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
- while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
+ } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+ while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd)
ca->ai--;
}
/* Do additive increase */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
/* cwnd = cwnd + a(w) / cwnd */
tp->snd_cwnd_cnt += ca->ai + 1;
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd_cnt -= tp->snd_cwnd;
- tp->snd_cwnd++;
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
}
}
@@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
struct hstcp *ca = inet_csk_ca(sk);
/* Do multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
}
static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 55adcfcf96fe..52b1f2665dfa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk,
ca->packetcount += sample->pkts_acked;
- if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
+ if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) &&
now - ca->lasttime >= ca->minRTT &&
ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
@@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
- return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
+ return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U);
}
static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
- if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
htcp_alpha_update(ca);
} else
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index be39327e04e6..abd7d91807e5 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk)
ca->rho2_7ls = 0;
ca->snd_cwnd_cents = 0;
ca->hybla_en = true;
- tp->snd_cwnd = 2;
+ tcp_snd_cwnd_set(tp, 2);
tp->snd_cwnd_clamp = 65535;
/* 1st Rho measurement based on initial srtt */
@@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk)
/* set minimum rtt as this is the 1st ever seen */
ca->minrtt_us = tp->srtt_us;
- tp->snd_cwnd = ca->rho;
+ tcp_snd_cwnd_set(tp, ca->rho);
}
static void hybla_state(struct sock *sk, u8 ca_state)
@@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* as long as increment is estimated as (rho<<7)/window
* it already is <<7 and we can easily count its fractions.
*/
- increment = ca->rho2_7ls / tp->snd_cwnd;
+ increment = ca->rho2_7ls / tcp_snd_cwnd(tp);
if (increment < 128)
tp->snd_cwnd_cnt++;
}
odd = increment % 128;
- tp->snd_cwnd += increment >> 7;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7));
ca->snd_cwnd_cents += odd;
/* check when fractions goes >=128 and increase cwnd by 1. */
while (ca->snd_cwnd_cents >= 128) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
ca->snd_cwnd_cents -= 128;
tp->snd_cwnd_cnt = 0;
}
/* check when cwnd has not been incremented for a while */
- if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd++;
+ if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
}
/* clamp down slowstart cwnd to ssthresh value. */
if (is_slowstart)
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh));
- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00e54873213e..c0c81a2c77fa 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -224,7 +224,7 @@ static void update_params(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
- if (tp->snd_cwnd < win_thresh) {
+ if (tcp_snd_cwnd(tp) < win_thresh) {
ca->alpha = ALPHA_BASE;
ca->beta = BETA_BASE;
} else if (ca->cnt_rtt > 0) {
@@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* tp->snd_cwnd += alpha/tp->snd_cwnd
*/
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
- if (delta >= tp->snd_cwnd) {
- tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
- (u32)tp->snd_cwnd_clamp);
+ if (delta >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp),
+ (u32)tp->snd_cwnd_clamp));
tp->snd_cwnd_cnt = 0;
}
}
@@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
+ u32 decr;
/* Multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
+ decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT;
+ return max(tcp_snd_cwnd(tp) - decr, 2U);
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2088f93fa37b..1595b76ea2be 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
per_mss = roundup_pow_of_two(per_mss) +
SKB_DATA_ALIGN(sizeof(struct sk_buff));
- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+ nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
/* Fast Recovery (RFC 5681 3.2) :
@@ -909,12 +909,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
* If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
* end of slow start and should slow down.
*/
- if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
- rate *= max(tp->snd_cwnd, tp->packets_out);
+ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
@@ -2147,12 +2147,12 @@ void tcp_enter_loss(struct sock *sk)
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
- tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -2458,7 +2458,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
msg,
&inet->inet_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2467,7 +2467,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
&sk->sk_v6_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2492,7 +2492,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (tp->prior_ssthresh) {
const struct inet_connection_sock *icsk = inet_csk(sk);
- tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
+ tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2599,7 +2599,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tp->high_seq = tp->snd_nxt;
tp->tlp_high_seq = 0;
tp->snd_cwnd_cnt = 0;
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->prr_delivered = 0;
tp->prr_out = 0;
tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
@@ -2629,7 +2629,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
}
/* Force a fast retransmit upon entering fast recovery */
sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
- tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
}
static inline void tcp_end_cwnd_reduction(struct sock *sk)
@@ -2642,7 +2642,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
(inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
- tp->snd_cwnd = tp->snd_ssthresh;
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2709,9 +2709,9 @@ static void tcp_mtup_probe_success(struct sock *sk)
/* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = tp->snd_cwnd *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) *
+ tcp_mss_to_mtu(sk, tp->mss_cache) /
+ icsk->icsk_mtup.probe_size);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
@@ -3034,7 +3034,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tp->snd_una == tp->mtu_probe.probe_seq_start) {
tcp_mtup_probe_failed(sk);
/* Restores the reduction we did in tcp_mtup_probe() */
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tcp_simple_retransmit(sk);
return;
}
@@ -5436,7 +5436,7 @@ static bool tcp_should_expand_sndbuf(struct sock *sk)
return false;
/* If we filled the congestion window, do not expand. */
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
return false;
return true;
@@ -5998,9 +5998,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
* retransmission has occurred.
*/
if (tp->total_retrans > 1 && tp->undo_marker)
- tp->snd_cwnd = 1;
+ tcp_snd_cwnd_set(tp, 1);
else
- tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
tp->snd_cwnd_stamp = tcp_jiffies32;
bpf_skops_established(sk, bpf_op, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f9cec624068d..157265aecbed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2621,7 +2621,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 82b36ec3f2f8..ae36780977d2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
lp->flag &= ~LP_WITHIN_THR;
pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
- tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
+ tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max,
lp->sowd >> 3);
if (lp->flag & LP_WITHIN_THR)
@@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
/* happened within inference
* drop snd_cwnd into 1 */
if (lp->flag & LP_WITHIN_INF)
- tp->snd_cwnd = 1U;
+ tcp_snd_cwnd_set(tp, 1U);
/* happened after inference
* cut snd_cwnd into half */
else
- tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U));
/* record this drop time */
lp->last_drop = now;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0588b004ddac..7029b0e98edb 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -388,15 +388,15 @@ void tcp_update_metrics(struct sock *sk)
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
- if (val && (tp->snd_cwnd >> 1) > val)
+ if (val && (tcp_snd_cwnd(tp) >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- tp->snd_cwnd >> 1);
+ tcp_snd_cwnd(tp) >> 1);
}
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- if (tp->snd_cwnd > val)
+ if (tcp_snd_cwnd(tp) > val)
tcp_metric_set(tm, TCP_METRIC_CWND,
- tp->snd_cwnd);
+ tcp_snd_cwnd(tp));
}
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
@@ -404,10 +404,10 @@ void tcp_update_metrics(struct sock *sk)
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
+ max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
+ tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1);
}
} else {
/* Else slow start did not finish, cwnd is non-sense,
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index ab552356bdba..a60662f4bdf9 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -197,10 +197,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (ca->cwnd_growth_factor < 0) {
- cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
+ cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor;
tcp_cong_avoid_ai(tp, cnt, acked);
} else {
- cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
+ cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor);
tcp_cong_avoid_ai(tp, cnt, acked);
}
}
@@ -209,7 +209,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
+ return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U);
}
static void tcpnv_state(struct sock *sk, u8 new_state)
@@ -257,7 +257,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
return;
/* Stop cwnd growth if we were in catch up mode */
- if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
+ if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) {
ca->nv_catchup = 0;
ca->nv_allow_cwnd_growth = 0;
}
@@ -371,7 +371,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* if cwnd < max_win, grow cwnd
* else leave the same
*/
- if (tp->snd_cwnd > max_win) {
+ if (tcp_snd_cwnd(tp) > max_win) {
/* there is congestion, check that it is ok
* to make a CA decision
* 1. We should have at least nv_dec_eval_min_calls
@@ -398,20 +398,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
ca->nv_allow_cwnd_growth = 0;
tp->snd_ssthresh =
(nv_ssthresh_factor * max_win) >> 3;
- if (tp->snd_cwnd - max_win > 2) {
+ if (tcp_snd_cwnd(tp) - max_win > 2) {
/* gap > 2, we do exponential cwnd decrease */
int dec;
- dec = max(2U, ((tp->snd_cwnd - max_win) *
+ dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) *
nv_cong_dec_mult) >> 7);
- tp->snd_cwnd -= dec;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec);
} else if (nv_cong_dec_mult > 0) {
- tp->snd_cwnd = max_win;
+ tcp_snd_cwnd_set(tp, max_win);
}
if (ca->cwnd_growth_factor > 0)
ca->cwnd_growth_factor = 0;
ca->nv_no_cong_cnt = 0;
- } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
+ } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) {
/* There is no congestion, grow cwnd if allowed*/
if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
return;
@@ -444,8 +444,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* (it wasn't before, if it is now is because nv
* decreased it).
*/
- if (tp->snd_cwnd < nv_min_cwnd)
- tp->snd_cwnd = nv_min_cwnd;
+ if (tcp_snd_cwnd(tp) < nv_min_cwnd)
+ tcp_snd_cwnd_set(tp, nv_min_cwnd);
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9ede847f4199..c221f3bce975 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -142,7 +142,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
@@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
- tp->snd_cwnd = max(cwnd, restart_cwnd);
+ tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd));
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_cwnd_used = 0;
}
@@ -1013,7 +1013,7 @@ static void tcp_tsq_write(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) {
tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
}
@@ -1860,9 +1860,9 @@ static void tcp_cwnd_application_limited(struct sock *sk)
/* Limited by application or receiver window. */
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
- if (win_used < tp->snd_cwnd) {
+ if (win_used < tcp_snd_cwnd(tp)) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+ tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
}
tp->snd_cwnd_used = 0;
}
@@ -2043,7 +2043,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
return 1;
in_flight = tcp_packets_in_flight(tp);
- cwnd = tp->snd_cwnd;
+ cwnd = tcp_snd_cwnd(tp);
if (in_flight >= cwnd)
return 0;
@@ -2196,12 +2196,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
in_flight = tcp_packets_in_flight(tp);
BUG_ON(tcp_skb_pcount(skb) <= 1);
- BUG_ON(tp->snd_cwnd <= in_flight);
+ BUG_ON(tcp_snd_cwnd(tp) <= in_flight);
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* From in_flight test above, we know that cwnd > in_flight. */
- cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+ cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache;
limit = min(send_win, cong_win);
@@ -2215,7 +2215,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
- u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+ u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
@@ -2345,7 +2345,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (likely(!icsk->icsk_mtup.enabled ||
icsk->icsk_mtup.probe_size ||
inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
- tp->snd_cwnd < 11 ||
+ tcp_snd_cwnd(tp) < 11 ||
tp->rx_opt.num_sacks || tp->rx_opt.dsack))
return -1;
@@ -2381,7 +2381,7 @@ static int tcp_mtu_probe(struct sock *sk)
return 0;
/* Do we need to wait to drain cwnd? With none in flight, don't stall */
- if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
+ if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) {
if (!tcp_packets_in_flight(tp))
return -1;
else
@@ -2450,7 +2450,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
/* Decrement cwnd here because we are sending
* effectively two packets. */
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tcp_event_new_data_sent(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
@@ -2708,7 +2708,7 @@ repair:
else
tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp));
if (likely(sent_pkts || is_cwnd_limited))
tcp_cwnd_validate(sk, is_cwnd_limited);
@@ -2818,7 +2818,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (unlikely(!skb)) {
WARN_ONCE(tp->packets_out,
"invalid inflight: %u state %u cwnd %u mss %d\n",
- tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
inet_csk(sk)->icsk_pending = 0;
return;
}
@@ -3302,7 +3302,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (!hole)
tp->retransmit_skb_hint = skb;
- segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
+ segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp);
if (segs <= 0)
break;
sacked = TCP_SKB_CB(skb)->sacked;
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index fbab921670cc..617b8187c03d 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -195,7 +195,7 @@ void tcp_rate_check_app_limited(struct sock *sk)
/* Nothing in sending host's qdisc queues or NIC tx queue. */
sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) &&
/* We are not limited by CWND. */
- tcp_packets_in_flight(tp) < tp->snd_cwnd &&
+ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) &&
/* All lost packets have been retransmitted. */
tp->lost_out <= tp->retrans_out)
tp->app_limited =
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 5842081bc8a2..862b96248a92 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
}
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
+ return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U);
}
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c8003c8aad2c..786848ad37ea 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd);
+ return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
* is the "Diff" from the Arizona Vegas papers.
*/
- diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
+ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
@@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* truncation robs us of full link
* utilization.
*/
- tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
+ (u32)target_cwnd + 1));
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
} else if (tcp_in_slow_start(tp)) {
@@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* The old window was too fast, so
* we slow down.
*/
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tp->snd_ssthresh
= tcp_vegas_ssthresh(tp);
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
*/
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
} else {
/* Sending just as fast as we
* should be.
@@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
}
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index cd50a61c9976..366ff6f214b2 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rtt = veno->minrtt;
- target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
- veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
+ veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd;
if (tcp_in_slow_start(tp)) {
/* Slow start. */
@@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In the "non-congestive state", increase cwnd
* every rtt.
*/
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
} else {
/* In the "congestive state", increase cwnd
* every other rtt.
*/
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
if (veno->inc &&
- tp->snd_cwnd < tp->snd_cwnd_clamp) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
veno->inc = 0;
} else
veno->inc = 1;
@@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd_cnt += acked;
}
done:
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
}
/* Wipe the slate clean for the next rtt. */
/* veno->cntrtt = 0; */
@@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
if (veno->diff < beta)
/* in "non-congestive state", cut cwnd by 1/5 */
- return max(tp->snd_cwnd * 4 / 5, 2U);
+ return max(tcp_snd_cwnd(tp) * 4 / 5, 2U);
else
/* in "congestive state", cut cwnd by 1/2 */
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static struct tcp_congestion_ops tcp_veno __read_mostly = {
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index b2e05c4cea00..c6e97141eef2 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
switch (event) {
case CA_EVENT_COMPLETE_CWR:
- tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
break;
case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 07c4c93b9fdb..18b07ff5d20e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!yeah->doing_reno_now) {
/* Scalable */
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
} else {
/* Reno */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
@@ -130,7 +130,7 @@ do_vegas:
/* Compute excess number of packets above bandwidth
* Avoid doing full 64 bit divide.
*/
- bw = tp->snd_cwnd;
+ bw = tcp_snd_cwnd(tp);
bw *= rtt - yeah->vegas.baseRTT;
do_div(bw, rtt);
queue = bw;
@@ -138,20 +138,20 @@ do_vegas:
if (queue > TCP_YEAH_ALPHA ||
rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
if (queue > TCP_YEAH_ALPHA &&
- tp->snd_cwnd > yeah->reno_count) {
+ tcp_snd_cwnd(tp) > yeah->reno_count) {
u32 reduction = min(queue / TCP_YEAH_GAMMA ,
- tp->snd_cwnd >> TCP_YEAH_EPSILON);
+ tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON);
- tp->snd_cwnd -= reduction;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction);
- tp->snd_cwnd = max(tp->snd_cwnd,
- yeah->reno_count);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
+ yeah->reno_count));
- tp->snd_ssthresh = tp->snd_cwnd;
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
if (yeah->reno_count <= 2)
- yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+ yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U);
else
yeah->reno_count++;
@@ -176,7 +176,7 @@ do_vegas:
*/
yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
yeah->vegas.beg_snd_nxt = tp->snd_nxt;
- yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+ yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp);
/* Wipe the slate clean for the next RTT. */
yeah->vegas.cntRTT = 0;
@@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
if (yeah->doing_reno_now < TCP_YEAH_RHO) {
reduction = yeah->lastQ;
- reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
+ reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U));
- reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+ reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA);
} else
- reduction = max(tp->snd_cwnd>>1, 2U);
+ reduction = max(tcp_snd_cwnd(tp)>>1, 2U);
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return max_t(int, tp->snd_cwnd - reduction, 2);
+ return max_t(int, tcp_snd_cwnd(tp) - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b4d8361560f..aa8545ca6964 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1726,7 +1726,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
EXPORT_SYMBOL(udp_ioctl);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *off, int *err)
+ int *off, int *err)
{
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff_head *queue;
@@ -1735,7 +1735,6 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
int error;
queue = &udp_sk(sk)->reader_queue;
- flags |= noblock ? MSG_DONTWAIT : 0;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb;
@@ -1805,7 +1804,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
struct sk_buff *skb;
int err, used;
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (!skb)
return err;
@@ -1843,8 +1842,8 @@ EXPORT_SYMBOL(udp_read_sock);
* return it, otherwise we block.
*/
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
@@ -1859,7 +1858,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index bbe6569c9ad3..ff15918b7bdc 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -11,14 +11,13 @@
static struct proto *udpv6_prot_saved __read_mostly;
static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags,
- addr_len);
+ return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len);
#endif
- return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
+ return udp_prot.recvmsg(sk, msg, len, flags, addr_len);
}
static bool udp_sk_has_data(struct sock *sk)
@@ -61,7 +60,7 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
}
static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -71,10 +70,10 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return sk_udp_recvmsg(sk, msg, len, flags, addr_len);
if (!psock_has_data(psock)) {
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
@@ -84,12 +83,12 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = udp_msg_wait_data(sk, psock, timeo);
if (data) {
if (psock_has_data(psock))
goto msg_bytes_ready;
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
copied = -EAGAIN;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 2878d8285caf..4ba7a88a1b1d 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -17,8 +17,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
void udp_destroy_sock(struct sock *sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b22504176588..1afc4c024981 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -797,6 +797,7 @@ static void dev_forward_change(struct inet6_dev *idev)
{
struct net_device *dev;
struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
if (!idev)
return;
@@ -815,14 +816,24 @@ static void dev_forward_change(struct inet6_dev *idev)
}
}
+ read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa->flags&IFA_F_TENTATIVE)
continue;
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ }
+ read_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
if (idev->cnf.forwarding)
addrconf_join_anycast(ifa);
else
addrconf_leave_anycast(ifa);
}
+
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
@@ -3728,7 +3739,8 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa, *tmp;
+ struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
bool keep_addr = false;
bool was_ready;
int state, i;
@@ -3820,16 +3832,23 @@ restart:
write_lock_bh(&idev->lock);
}
- list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list)
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ write_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
struct fib6_info *rt = NULL;
bool keep;
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
+
addrconf_del_dad_work(ifa);
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (keep) {
@@ -3860,15 +3879,14 @@ restart:
addrconf_leave_solict(ifa->idev, &ifa->addr);
}
- write_lock_bh(&idev->lock);
if (!keep) {
+ write_lock_bh(&idev->lock);
list_del_rcu(&ifa->if_list);
+ write_unlock_bh(&idev->lock);
in6_ifa_put(ifa);
}
}
- write_unlock_bh(&idev->lock);
-
/* Step 5: Discard anycast and multicast list */
if (unregister) {
ipv6_ac_destroy_dev(idev);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7d7b7523d126..6595a78672c8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -654,7 +654,7 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -669,8 +669,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
prot = READ_ONCE(sk->sk_prot);
err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 658d5eabaf7e..a8d961d3a477 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -90,12 +90,13 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
break;
fallthrough;
case 2: /* send ICMP PARM PROB regardless and drop packet */
- icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
+ icmpv6_param_prob_reason(skb, ICMPV6_UNK_OPTION, optoff,
+ SKB_DROP_REASON_UNHANDLED_PROTO);
return false;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
return false;
}
@@ -218,7 +219,7 @@ static bool ip6_parse_tlv(bool hopbyhop,
if (len == 0)
return true;
bad:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -232,6 +233,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ SKB_DR(reason);
int ret;
if (opt->dsthao) {
@@ -246,19 +248,23 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (hao->length != 16) {
net_dbg_ratelimited("hao invalid option length = %d\n",
hao->length);
+ SKB_DR_SET(reason, IP_INHDR);
goto discard;
}
if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
net_dbg_ratelimited("hao is not an unicast addr: %pI6\n",
&hao->addr);
+ SKB_DR_SET(reason, INVALID_PROTO);
goto discard;
}
ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
(xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
- if (unlikely(ret < 0))
+ if (unlikely(ret < 0)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto discard;
+ }
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -281,7 +287,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
return true;
discard:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return false;
}
#endif
@@ -487,7 +493,6 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev;
struct ipv6hdr *oldhdr;
- struct in6_addr addr;
unsigned char *buf;
int accept_rpl_seg;
int i, err;
@@ -616,9 +621,7 @@ looped_back:
return -1;
}
- addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = ohdr->rpl_segaddr[i];
- ohdr->rpl_segaddr[i] = addr;
+ swap(ipv6_hdr(skb)->daddr, ohdr->rpl_segaddr[i]);
ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n);
@@ -934,7 +937,7 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
}
net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n",
nh[optoff + 1]);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -988,7 +991,7 @@ ignore:
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -997,31 +1000,30 @@ drop:
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
- struct net *net = ipv6_skb_net(skb);
+ SKB_DR(reason);
u32 pkt_len;
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
nh[optoff+1]);
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ SKB_DR_SET(reason, IP_INHDR);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+ icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff + 2,
+ SKB_DROP_REASON_IP_INHDR);
return false;
}
if (ipv6_hdr(skb)->payload_len) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
+ icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff,
+ SKB_DROP_REASON_IP_INHDR);
return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ SKB_DR_SET(reason, PKT_TOO_SMALL);
goto drop;
}
@@ -1032,7 +1034,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return false;
}
@@ -1054,7 +1056,7 @@ static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff)
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e6b978ea0e87..61770220774e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -629,12 +629,13 @@ out_bh_enable:
}
EXPORT_SYMBOL(icmp6_send);
-/* Slightly more convenient version of icmp6_send.
+/* Slightly more convenient version of icmp6_send with drop reasons.
*/
-void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
+ enum skb_drop_reason reason)
{
icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
@@ -864,21 +865,23 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct net *net = dev_net(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
struct icmp6hdr *hdr;
u8 type;
- bool success = false;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
@@ -886,8 +889,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*hdr));
- if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
skb_set_network_header(skb, nh);
}
@@ -924,11 +930,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_EXT_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
@@ -994,19 +1000,20 @@ static int icmpv6_rcv(struct sk_buff *skb)
/* until the v6 path can be better sorted assume failure and
* preserve the status quo behaviour for the rest of the paths to here
*/
- if (success)
- consume_skb(skb);
+ if (reason)
+ kfree_skb_reason(skb, reason);
else
- kfree_skb(skb);
+ consume_skb(skb);
return 0;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 5b5ea35635f9..126ae3aa67e1 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -145,12 +145,14 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
struct net *net)
{
+ enum skb_drop_reason reason;
const struct ipv6hdr *hdr;
u32 pkt_len;
struct inet6_dev *idev;
if (skb->pkt_type == PACKET_OTHERHOST) {
- kfree_skb(skb);
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
+ kfree_skb_reason(skb, SKB_DROP_REASON_OTHERHOST);
return NULL;
}
@@ -160,9 +162,12 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
__IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
+ SKB_DR_SET(reason, NOT_SPECIFIED);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ if (unlikely(idev->cnf.disable_ipv6))
+ SKB_DR_SET(reason, IPV6DISABLED);
goto drop;
}
@@ -186,8 +191,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
hdr = ipv6_hdr(skb);
- if (hdr->version != 6)
+ if (hdr->version != 6) {
+ SKB_DR_SET(reason, UNHANDLED_PROTO);
goto err;
+ }
__IP6_ADD_STATS(net, idev,
IPSTATS_MIB_NOECTPKTS +
@@ -225,8 +232,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
- idev->cnf.drop_unicast_in_l2_multicast)
+ idev->cnf.drop_unicast_in_l2_multicast) {
+ SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST);
goto err;
+ }
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
@@ -255,12 +264,11 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
__IP6_INC_STATS(net,
idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ SKB_DR_SET(reason, PKT_TOO_SMALL);
goto drop;
}
- if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- goto drop;
- }
+ if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
+ goto err;
hdr = ipv6_hdr(skb);
}
@@ -281,9 +289,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
return skb;
err:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ SKB_DR_OR(reason, IP_INHDR);
drop:
rcu_read_unlock();
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NULL;
}
@@ -353,6 +362,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
+ SKB_DR(reason);
bool raw;
/*
@@ -412,12 +422,16 @@ resubmit_final:
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(dev, &hdr->daddr,
&hdr->saddr) &&
- !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
+ !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) {
+ SKB_DR_SET(reason, IP_INADDRERRORS);
goto discard;
+ }
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
- !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto discard;
+ }
ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
skb);
@@ -443,8 +457,11 @@ resubmit_final:
IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
+ SKB_DR_SET(reason, IP_NOPROTO);
+ } else {
+ SKB_DR_SET(reason, XFRM_POLICY);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
} else {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
@@ -454,7 +471,7 @@ resubmit_final:
discard:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index fa63ef2bd99c..1f3d777e7694 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -469,6 +469,7 @@ int ip6_forward(struct sk_buff *skb)
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
+ SKB_DR(reason);
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
@@ -518,7 +519,7 @@ int ip6_forward(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return -ETIMEDOUT;
}
@@ -537,6 +538,7 @@ int ip6_forward(struct sk_buff *skb)
if (!xfrm6_route_forward(skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
}
dst = skb_dst(skb);
@@ -596,7 +598,7 @@ int ip6_forward(struct sk_buff *skb)
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
return -EMSGSIZE;
}
@@ -618,8 +620,9 @@ int ip6_forward(struct sk_buff *skb)
error:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
+ SKB_DR_SET(reason, IP_INADDRERRORS);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 53f632a560ec..19325b7600bb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -257,8 +257,6 @@ static int ip6_tnl_create2(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
int err;
- t = netdev_priv(dev);
-
dev->rtnl_link_ops = &ip6_link_ops;
err = register_netdevice(dev);
if (err < 0)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index b3f163b40c2b..8970d0b4faeb 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->daddr = iph->daddr;
fl6->saddr = iph->saddr;
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl6->flowi6_iif = nft_out(pkt)->ifindex;
+
fl6->daddr = iph->saddr;
fl6->saddr = iph->daddr;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c51d5ce3711c..0d7c13d33d1a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -460,7 +460,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
*/
static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -477,7 +477,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 169e9df6d172..9471ab4421c8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4482,6 +4482,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
+ SKB_DR(reason);
int type;
if (netif_is_l3_master(skb->dev) ||
@@ -4494,11 +4495,14 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
+ SKB_DR_SET(reason, IP_INADDRERRORS);
IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
break;
}
+ SKB_DR_SET(reason, IP_INNOROUTES);
fallthrough;
case IPSTATS_MIB_OUTNOROUTES:
+ SKB_DR_OR(reason, IP_OUTNOROUTES);
IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
break;
}
@@ -4508,7 +4512,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
skb_dst_drop(skb);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 13678d3908fa..782df529ff69 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2044,7 +2044,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7f0fa9bd9ffe..db9449b52dbe 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -322,7 +322,7 @@ static int udp6_skb_len(struct sk_buff *skb)
*/
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -342,7 +342,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index b2fcc46c1630..4251e49d32a0 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -20,8 +20,8 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
-int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
void udpv6_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a1760add5bf1..a0385ddbffcf 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1223,7 +1223,6 @@ static void iucv_process_message_q(struct sock *sk)
static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
unsigned int copied, rlen;
@@ -1242,7 +1241,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
/* receive/dequeue next skb:
* the function understands MSG_PEEK and, thus, does not dequeue skb */
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fd51db3be91c..d09ec26b1081 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3696,7 +3696,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
goto out;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index b3edafa5fba4..6af09e188e52 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -515,7 +515,7 @@ no_route:
}
static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+ size_t len, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -526,7 +526,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 96f975777438..217c7192691e 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -657,7 +657,7 @@ do_confirm:
}
static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
@@ -671,7 +671,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bf35710127dd..8be1fdc68a0b 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -191,8 +191,7 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg,
goto end;
err = 0;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto end;
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index e22b0cbb2f35..221863afc4b1 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -216,7 +216,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb)
return rc;
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 61205cf40074..24df29e135ed 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -352,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
if (params->deliver) {
KUNIT_EXPECT_EQ(test, rc, 0);
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
KUNIT_EXPECT_EQ(test, skb->len, 1);
@@ -360,7 +360,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
} else {
KUNIT_EXPECT_NE(test, rc, 0);
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
KUNIT_EXPECT_PTR_EQ(test, skb2, NULL);
}
@@ -423,7 +423,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
rc = mctp_route_input(&rt->rt, skb);
}
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
if (params->rx_len) {
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
@@ -582,7 +582,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
rc = mctp_route_input(&rt->rt, skb);
/* (potentially) receive message */
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
if (params->deliver)
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d6fdc5782d33..35b5f806fdda 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1527,10 +1527,9 @@ static int mpls_ifdown(struct net_device *dev, int event)
rt->rt_nh_size;
struct mpls_route *orig = rt;
- rt = kmalloc(size, GFP_KERNEL);
+ rt = kmemdup(orig, size, GFP_KERNEL);
if (!rt)
return -ENOMEM;
- memcpy(rt, orig, size);
}
}
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f44125dd6697..dbb6d876a203 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -66,20 +66,103 @@ out_nosk:
return err;
}
+struct mptcp_diag_ctx {
+ long s_slot;
+ long s_num;
+ unsigned int l_slot;
+ unsigned int l_num;
+};
+
+static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ bool net_admin)
+{
+ struct inet_diag_dump_data *cb_data = cb->data;
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
+ struct nlattr *bc = cb_data->inet_diag_nla_bc;
+ struct net *net = sock_net(skb->sk);
+ int i;
+
+ for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) {
+ struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
+ struct sock *sk;
+ int num = 0;
+
+ ilb = &tcp_hashinfo.listening_hash[i];
+
+ rcu_read_lock();
+ spin_lock(&ilb->lock);
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ int ret;
+
+ if (num < diag_ctx->l_num)
+ goto next_listen;
+
+ if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
+ goto next_listen;
+
+ sk = ctx->conn;
+ if (!sk || !net_eq(sock_net(sk), net))
+ goto next_listen;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next_listen;
+
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto next_listen;
+
+ ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+
+ sock_put(sk);
+
+ if (ret < 0) {
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+ diag_ctx->l_slot = i;
+ diag_ctx->l_num = num;
+ return;
+ }
+ diag_ctx->l_num = num + 1;
+ num = 0;
+next_listen:
+ ++num;
+ }
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+
+ cond_resched();
+ diag_ctx->l_num = 0;
+ }
+
+ diag_ctx->l_num = 0;
+ diag_ctx->l_slot = i;
+}
+
static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
struct mptcp_sock *msk;
struct nlattr *bc;
+ BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
+
cb_data = cb->data;
bc = cb_data->inet_diag_nla_bc;
- while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
- NULL) {
+ while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
+ &diag_ctx->s_num)) != NULL) {
struct inet_sock *inet = (struct inet_sock *)msk;
struct sock *sk = (struct sock *)msk;
int ret = 0;
@@ -101,11 +184,14 @@ next:
sock_put(sk);
if (ret < 0) {
/* will retry on the same position */
- cb->args[1]--;
+ diag_ctx->s_num--;
break;
}
cond_resched();
}
+
+ if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
+ mptcp_diag_dump_listeners(skb, cb, r, net_admin);
}
static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -116,6 +202,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
+
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ struct sock *lsk = READ_ONCE(msk->first);
+
+ if (lsk) {
+ /* override with settings from tcp listener,
+ * so Send-Q will show accept queue.
+ */
+ r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
+ }
+ }
+
if (!info)
return;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 01809eef29b4..8aa0cdb7ad46 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -262,14 +262,25 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
+void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
- subflow->backup = bkup;
+ msk = mptcp_sk(sk);
+ if (subflow->backup != bkup) {
+ subflow->backup = bkup;
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ msk->last_snd = NULL;
+ else
+ __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
+ mptcp_data_unlock(sk);
+ }
- mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
+ mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index b5e8de6f7507..c20261b612e9 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -55,6 +55,17 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
+static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
+{
+ return net_generic(net, pm_nl_pernet_id);
+}
+
+static struct pm_nl_pernet *
+pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
+{
+ return pm_nl_get_pernet(sock_net((struct sock *)msk));
+}
+
static bool addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port)
{
@@ -206,43 +217,39 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
{
- const struct pm_nl_pernet *pernet;
+ const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_signal_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_accept_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->subflows_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->local_addr_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
@@ -508,7 +515,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(sock_net(sk));
add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
local_addr_max = mptcp_pm_get_local_addr_max(msk);
@@ -604,7 +611,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
unsigned int subflows_max;
int i = 0;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
@@ -727,6 +734,8 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
if (!addresses_equal(&local, addr, addr->port))
continue;
+ if (subflow->backup != bkup)
+ msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
@@ -1021,7 +1030,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (addresses_equal(&msk_local, &skc_local, false))
return 0;
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
@@ -1212,7 +1221,7 @@ skip_family:
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
{
- return net_generic(genl_info_net(info), pm_nl_pernet_id);
+ return pm_nl_get_pernet(genl_info_net(info));
}
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
@@ -1306,7 +1315,7 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
if (id) {
rcu_read_lock();
- entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
if (entry) {
*flags = entry->flags;
*ifindex = entry->ifindex;
@@ -1653,7 +1662,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
void *hdr;
int i;
- pernet = net_generic(net, pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(net);
spin_lock_bh(&pernet->lock);
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
@@ -2165,7 +2174,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = {
static int __net_init pm_nl_init_net(struct net *net)
{
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
@@ -2187,7 +2196,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct net *net;
list_for_each_entry(net, net_list, exit_list) {
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
/* net is removed from namespace list, can't race with
* other modifiers, also netns core already waited for a
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0cbea3b6d0a4..0492aa9308c7 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2012,7 +2012,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
}
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct scm_timestamping_internal tss;
@@ -2030,7 +2030,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -3092,15 +3092,19 @@ static void mptcp_release_cb(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
}
- /* be sure to set the current sk state before tacking actions
- * depending on sk_state
- */
- if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
- __mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
- __mptcp_error_report(sk);
+ if (unlikely(&msk->cb_flags)) {
+ /* be sure to set the current sk state before tacking actions
+ * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+ */
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+ __mptcp_set_connected(sk);
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
+ __mptcp_error_report(sk);
+ if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
+ msk->last_snd = NULL;
+ }
__mptcp_update_rmem(sk);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3c1a3036550f..aca1fb56523f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -124,6 +124,7 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
+#define MPTCP_RESET_SCHEDULER 7
static inline bool before64(__u64 seq1, __u64 seq2)
{
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index f949d22f52bd..826b0c1dae98 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -853,15 +853,11 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
u32 flags = 0;
- bool slow;
u8 val;
memset(info, 0, sizeof(*info));
- slow = lock_sock_fast(sk);
-
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
@@ -882,8 +878,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
-
- unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index fe98673dd5ac..bc4d5cd63a94 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -38,6 +38,7 @@
* @l4proto - Layer 4 protocol
* Values:
* IPPROTO_TCP, IPPROTO_UDP
+ * @dir: - connection tracking tuple direction.
* @reserved - Reserved member, will be reused for more options in future
* Values:
* 0
@@ -46,7 +47,8 @@ struct bpf_ct_opts {
s32 netns_id;
s32 error;
u8 l4proto;
- u8 reserved[3];
+ u8 dir;
+ u8 reserved[2];
};
enum {
@@ -56,10 +58,11 @@ enum {
static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
struct bpf_sock_tuple *bpf_tuple,
u32 tuple_len, u8 protonum,
- s32 netns_id)
+ s32 netns_id, u8 *dir)
{
struct nf_conntrack_tuple_hash *hash;
struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct;
if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
return ERR_PTR(-EPROTO);
@@ -99,7 +102,12 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
put_net(net);
if (!hash)
return ERR_PTR(-ENOENT);
- return nf_ct_tuplehash_to_ctrack(hash);
+
+ ct = nf_ct_tuplehash_to_ctrack(hash);
+ if (dir)
+ *dir = NF_CT_DIRECTION(hash);
+
+ return ct;
}
__diag_push();
@@ -135,13 +143,13 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
if (!opts)
return NULL;
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts__sz != NF_BPF_CT_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
caller_net = dev_net(ctx->rxq->dev);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
- opts->netns_id);
+ opts->netns_id, &opts->dir);
if (IS_ERR(nfct)) {
opts->error = PTR_ERR(nfct);
return NULL;
@@ -178,13 +186,13 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
if (!opts)
return NULL;
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts__sz != NF_BPF_CT_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
- opts->netns_id);
+ opts->netns_id, &opts->dir);
if (IS_ERR(nfct)) {
opts->error = PTR_ERR(nfct);
return NULL;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 07e65b4e92f8..0cb2da0a759a 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
static void ecache_work(struct work_struct *work)
{
- struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
- struct netns_ct *ctnet = cnet->ct_net;
+ struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
+ struct netns_ct *ctnet = cnet->ecache.ct_net;
int cpu, delay = -1;
struct ct_pcpu *pcpu;
@@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work)
ctnet->ecache_dwork_pending = delay > 0;
if (delay >= 0)
- schedule_delayed_work(&cnet->ecache_dwork, delay);
+ schedule_delayed_work(&cnet->ecache.dwork, delay);
}
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
@@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (state == NFCT_ECACHE_DESTROY_FAIL &&
- !delayed_work_pending(&cnet->ecache_dwork)) {
- schedule_delayed_work(&cnet->ecache_dwork, HZ);
+ !delayed_work_pending(&cnet->ecache.dwork)) {
+ schedule_delayed_work(&cnet->ecache.dwork, HZ);
net->ct.ecache_dwork_pending = true;
} else if (state == NFCT_ECACHE_DESTROY_SENT) {
net->ct.ecache_dwork_pending = false;
- mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
+ mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
}
}
@@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
net->ct.sysctl_events = nf_ct_events;
- cnet->ct_net = &net->ct;
- INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
+
+ cnet->ecache.ct_net = &net->ct;
+ INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
}
@@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- cancel_delayed_work_sync(&cnet->ecache_dwork);
+ cancel_delayed_work_sync(&cnet->ecache.dwork);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1ea2ad732d57..924d766e6c53 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
return 0;
}
+static int ctnetlink_dump_one_entry(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nf_conn *ct,
+ bool dying)
+{
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ u8 l3proto = nfmsg->nfgen_family;
+ int res;
+
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
+ return 0;
+
+ if (ctx->last) {
+ if (ct != ctx->last)
+ return 0;
+
+ ctx->last = NULL;
+ }
+
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct, dying, 0);
+ if (res < 0) {
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ return 0;
+
+ ctx->last = ct;
+ }
+
+ return res;
+}
+
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
@@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
- int cpu;
struct hlist_nulls_head *list;
struct net *net = sock_net(skb->sk);
+ int res, cpu;
if (ctx->done)
return 0;
@@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
- continue;
- if (ctx->last) {
- if (ct != last)
- continue;
- ctx->last = NULL;
- }
- /* We can't dump extension info for the unconfirmed
- * list because unconfirmed conntracks can have
- * ct->ext reallocated (and thus freed).
- *
- * In the dying list case ct->ext can't be free'd
- * until after we drop pcpu->lock.
- */
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct, dying, 0);
+ res = ctnetlink_dump_one_entry(skb, cb, ct, dying);
if (res < 0) {
- if (!refcount_inc_not_zero(&ct->ct_general.use))
- continue;
ctx->cpu = cpu;
- ctx->last = ct;
spin_unlock_bh(&pcpu->lock);
goto out;
}
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 13234641cdb3..77bcb10fc586 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -40,6 +40,12 @@ struct arppayload {
unsigned char ip_dst[4];
};
+/* Guard against containers flooding syslog. */
+static bool nf_log_allowed(const struct net *net)
+{
+ return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
+}
+
static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
@@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
}
-static void dump_ipv4_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
+static void dump_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
unsigned int logflags = 0;
@@ -798,9 +803,26 @@ fallback:
const unsigned char *p = skb_mac_header(skb);
unsigned int i;
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < dev->hard_header_len; i++, p++)
- nf_log_buf_add(m, ":%02x", *p);
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < dev->hard_header_len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+
+ nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
+ &iph->daddr);
+ }
}
nf_log_buf_add(m, " ");
}
@@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
out, loginfo, prefix);
if (in)
- dump_ipv4_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv4_packet(net, m, loginfo, skb, 0);
@@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = {
.me = THIS_MODULE,
};
-static void dump_ipv6_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & NF_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
- nf_log_dump_vlan(m, skb);
- nf_log_buf_add(m, "MACPROTO=%04x ",
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- nf_log_buf_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT) {
- p -= ETH_HLEN;
-
- if (p < skb->head)
- p = NULL;
- }
-
- if (p) {
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- nf_log_buf_add(m, ":%02x", *p++);
- }
- nf_log_buf_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
- &iph->daddr);
- }
- } else {
- nf_log_buf_add(m, " ");
- }
-}
-
static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
@@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo, prefix);
if (in)
- dump_ipv6_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
@@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
.me = THIS_MODULE,
};
+static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ if (!nf_log_allowed(net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+
+ dump_mac_header(m, loginfo, skb);
+
+ nf_log_buf_close(m);
+}
+
static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
@@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
case htons(ETH_P_RARP):
nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
break;
+ default:
+ nf_log_unknown_packet(net, pf, hooknum, skb,
+ in, out, loginfo, prefix);
+ break;
}
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 16c3a39689f4..f3ad02a399f8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8367,10 +8367,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
- rule = list_entry(&chain->rules, struct nft_rule, list);
-
data_size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule)) {
data_size += sizeof(*prule) + rule->dlen;
if (data_size > INT_MAX)
@@ -8387,7 +8385,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
data_boundary = data + data_size;
size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(net, rule))
continue;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index b0d8888a539b..eea486f32971 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -158,6 +158,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb,
timeout->timeout.l3num = l3num;
timeout->timeout.l4proto = l4proto;
refcount_set(&timeout->refcnt, 1);
+ __module_get(THIS_MODULE);
list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
return 0;
@@ -506,13 +507,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- if (!try_module_get(THIS_MODULE))
+ if (!refcount_inc_not_zero(&timeout->refcnt))
goto err;
-
- if (!refcount_inc_not_zero(&timeout->refcnt)) {
- module_put(THIS_MODULE);
- goto err;
- }
matching = timeout;
break;
}
@@ -525,10 +521,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
struct ctnl_timeout *timeout =
container_of(t, struct ctnl_timeout, timeout);
- if (refcount_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt)) {
kfree_rcu(timeout, rcu_head);
-
- module_put(THIS_MODULE);
+ module_put(THIS_MODULE);
+ }
}
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index f590ee1c8a1b..83590afe3768 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
{
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
@@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
return err;
if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
err = -EINVAL;
- goto err1;
+ goto err_mask_release;
}
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
- goto err1;
+ goto err_mask_release;
if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
err = -EINVAL;
- goto err2;
+ goto err_xor_release;
}
return 0;
-err2:
+
+err_xor_release:
nft_data_release(&priv->xor, xor.type);
-err1:
+err_mask_release:
nft_data_release(&priv->mask, mask.type);
return err;
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index f198f2d9ef90..1f12d7ade606 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
hooks = (1 << NF_INET_PRE_ROUTING);
+ if (priv->flags & NFTA_FIB_F_IIF) {
+ hooks |= (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ }
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 47a876ccd288..9fa85bb36c0e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1931,7 +1931,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
size_t copied;
struct sk_buff *skb, *data_skb;
int err, ret;
@@ -1941,7 +1940,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fa9dc2ba3941..6f7f4392cffb 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1159,7 +1159,8 @@ static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
/* Now we can treat all alike */
- if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) {
+ skb = skb_recv_datagram(sk, flags, &er);
+ if (!skb) {
release_sock(sk);
return er;
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index dc7a2404efdf..67524982b89b 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1165,6 +1165,7 @@ void nfc_unregister_device(struct nfc_dev *dev)
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
+ dev->rfkill = NULL;
}
device_unlock(&dev->dev);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 4ca35791c93b..77642d18a3b4 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -821,7 +821,6 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
unsigned int copied, rlen;
struct sk_buff *skb, *cskb;
@@ -842,7 +841,7 @@ static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
pr_err("Recv datagram failed state %d %d %d",
sk->sk_state, err, sock_error(sk));
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 0ca214ab5aef..8dd569765f96 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -238,7 +238,6 @@ static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied;
@@ -246,7 +245,7 @@ static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags);
- skb = skb_recv_datagram(sk, flags, noblock, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb)
return rc;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c39c09899fd0..d3caaf4d4b3e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3421,7 +3421,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
* but then it will block.
*/
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
/*
* An error occurred so return it. Because skb_recv_datagram()
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 393e6aa7a592..ff5f49ab236e 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -112,7 +112,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_buff *skb = NULL;
struct sockaddr_pn sa;
@@ -123,7 +123,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
MSG_CMSG_COMPAT))
goto out_nofree;
- skb = skb_recv_datagram(sk, flags, noblock, &rval);
+ skb = skb_recv_datagram(sk, flags, &rval);
if (skb == NULL)
goto out_nofree;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 65d463ad8770..83ea13a50690 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
- skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
+ skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ errp);
if (!skb)
return NULL;
@@ -1238,7 +1239,7 @@ struct sk_buff *pep_read(struct sock *sk)
}
static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_buff *skb;
int err;
@@ -1267,7 +1268,7 @@ static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EINVAL;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (skb == NULL) {
if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT)
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index ec2322529727..5c2fb992803b 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -1035,8 +1035,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
return -EADDRNOTAVAIL;
}
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb) {
release_sock(sk);
return rc;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 30a1cf4c16c6..bf2d986a6bc3 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1230,7 +1230,8 @@ static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return -ENOTCONN;
/* Now we can treat all alike */
- if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL)
+ skb = skb_recv_datagram(sk, flags, &er);
+ if (!skb)
return er;
qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4f51094da9da..da9733da9868 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -195,7 +195,7 @@ static int offload_action_init(struct flow_offload_action *fl_action,
if (act->ops->offload_act_setup) {
spin_lock_bh(&act->tcfa_lock);
err = act->ops->offload_act_setup(act, fl_action, NULL,
- false);
+ false, extack);
spin_unlock_bh(&act->tcfa_lock);
return err;
}
@@ -271,7 +271,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
if (err)
goto fl_err;
- err = tc_setup_action(&fl_action->action, actions);
+ err = tc_setup_action(&fl_action->action, actions, extack);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed to setup tc actions for offload");
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e0f515b774ca..22847ee009ef 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -696,7 +696,8 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
}
static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b1f502fce595..8af9d6e5ba61 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1584,7 +1584,8 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
}
static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index bde6a6c01e64..ac29d1065232 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -253,7 +253,8 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
}
static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -267,7 +268,17 @@ static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data,
} else if (is_tcf_gact_goto_chain(act)) {
entry->id = FLOW_ACTION_GOTO;
entry->chain_index = tcf_gact_goto_chain_index(act);
+ } else if (is_tcf_gact_continue(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_gact_reclassify(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_gact_pipe(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported");
+ return -EOPNOTSUPP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index d56e73843a4b..fd5155274733 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -619,7 +619,8 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry,
}
static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
int err;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 39acd1d18609..ebb92fb072ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -460,7 +460,8 @@ static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry,
}
static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -478,6 +479,7 @@ static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
entry->id = FLOW_ACTION_MIRRED_INGRESS;
tcf_offload_mirred_get_dev(entry, act);
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index b9ff3459fdab..adabeccb63e1 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -385,7 +385,8 @@ static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index)
}
static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -410,7 +411,14 @@ static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data,
entry->mpls_mangle.bos = tcf_mpls_bos(act);
entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
break;
+ case TCA_MPLS_ACT_DEC_TTL:
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"dec_ttl\" option is used");
+ return -EOPNOTSUPP;
+ case TCA_MPLS_ACT_MAC_PUSH:
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"mac_push\" option is used");
+ return -EOPNOTSUPP;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported MPLS mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 31fcd279c177..e01ef7f109f4 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -488,7 +488,8 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
}
static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -503,6 +504,7 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
entry->id = FLOW_ACTION_ADD;
break;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
return -EOPNOTSUPP;
}
entry->mangle.htype = tcf_pedit_htype(act, k);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index f4d917705263..79c8901f66ab 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -419,7 +419,8 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
-static int tcf_police_act_to_flow_act(int tc_act, u32 *extval)
+static int tcf_police_act_to_flow_act(int tc_act, u32 *extval,
+ struct netlink_ext_ack *extack)
{
int act_id = -EOPNOTSUPP;
@@ -430,19 +431,28 @@ static int tcf_police_act_to_flow_act(int tc_act, u32 *extval)
act_id = FLOW_ACTION_DROP;
else if (tc_act == TC_ACT_PIPE)
act_id = FLOW_ACTION_PIPE;
+ else if (tc_act == TC_ACT_RECLASSIFY)
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\"");
+ else
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
} else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) {
act_id = FLOW_ACTION_GOTO;
*extval = tc_act & TC_ACT_EXT_VAL_MASK;
} else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) {
act_id = FLOW_ACTION_JUMP;
*extval = tc_act & TC_ACT_EXT_VAL_MASK;
+ } else if (tc_act == TC_ACT_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"continue\"");
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
}
return act_id;
}
static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -466,14 +476,16 @@ static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
entry->police.mtu = tcf_police_tcfp_mtu(act);
act_id = tcf_police_act_to_flow_act(police->tcf_action,
- &entry->police.exceed.extval);
+ &entry->police.exceed.extval,
+ extack);
if (act_id < 0)
return act_id;
entry->police.exceed.act_id = act_id;
act_id = tcf_police_act_to_flow_act(p->tcfp_result,
- &entry->police.notexceed.extval);
+ &entry->police.notexceed.extval,
+ extack);
if (act_id < 0)
return act_id;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9a22cdda6bbd..2f7f5e44d28c 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -291,7 +291,8 @@ static void tcf_offload_sample_get_group(struct flow_action_entry *entry,
}
static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ceba11b198bb..92d0dc754207 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -328,7 +328,8 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
}
static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -342,7 +343,14 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data
} else if (is_tcf_skbedit_priority(act)) {
entry->id = FLOW_ACTION_PRIORITY;
entry->priority = tcf_skbedit_priority(act);
+ } else if (is_tcf_skbedit_queue_mapping(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_skbedit_inheritdsfield(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used");
+ return -EOPNOTSUPP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 23aba03d26a8..856dc23cef8c 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -808,7 +808,8 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
static int tcf_tunnel_key_offload_act_setup(struct tc_action *act,
void *entry_data,
u32 *index_inc,
- bool bind)
+ bool bind,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -823,6 +824,7 @@ static int tcf_tunnel_key_offload_act_setup(struct tc_action *act,
} else if (is_tcf_tunnel_release(act)) {
entry->id = FLOW_ACTION_TUNNEL_DECAP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 883454c4f921..68b5e772386a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -369,7 +369,8 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act)
}
static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -398,6 +399,7 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act);
break;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f0699f39afdb..9bb4d3dcc994 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3513,20 +3513,25 @@ EXPORT_SYMBOL(tc_cleanup_offload_action);
static int tc_setup_offload_act(struct tc_action *act,
struct flow_action_entry *entry,
- u32 *index_inc)
+ u32 *index_inc,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
- if (act->ops->offload_act_setup)
- return act->ops->offload_act_setup(act, entry, index_inc, true);
- else
+ if (act->ops->offload_act_setup) {
+ return act->ops->offload_act_setup(act, entry, index_inc, true,
+ extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "Action does not support offload");
return -EOPNOTSUPP;
+ }
#else
return 0;
#endif
}
int tc_setup_action(struct flow_action *flow_action,
- struct tc_action *actions[])
+ struct tc_action *actions[],
+ struct netlink_ext_ack *extack)
{
int i, j, index, err = 0;
struct tc_action *act;
@@ -3551,7 +3556,7 @@ int tc_setup_action(struct flow_action *flow_action,
entry->hw_stats = tc_act_hw_stats(act->hw_stats);
entry->hw_index = act->tcfa_index;
index = 0;
- err = tc_setup_offload_act(act, entry, &index);
+ err = tc_setup_offload_act(act, entry, &index, extack);
if (!err)
j += index;
else
@@ -3570,13 +3575,14 @@ err_out_locked:
}
int tc_setup_offload_action(struct flow_action *flow_action,
- const struct tcf_exts *exts)
+ const struct tcf_exts *exts,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
if (!exts)
return 0;
- return tc_setup_action(flow_action, exts->actions);
+ return tc_setup_action(flow_action, exts->actions, extack);
#else
return 0;
#endif
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index ed5e6f08e74a..222b0b8fac7a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -464,14 +464,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
- err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts,
+ cls_flower.common.extack);
if (err) {
kfree(cls_flower.rule);
- if (skip_sw) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+
+ return skip_sw ? err : 0;
}
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
@@ -2362,11 +2360,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.rule->match.mask = &f->mask->key;
cls_flower.rule->match.key = &f->mkey;
- err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts,
+ cls_flower.common.extack);
if (err) {
kfree(cls_flower.rule);
if (tc_skip_sw(f->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
__fl_put(f);
return err;
}
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index ca5670fd5228..06cf22adbab7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -97,16 +97,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.cookie = cookie;
- err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts,
+ cls_mall.common.extack);
if (err) {
kfree(cls_mall.rule);
mall_destroy_hw_filter(tp, head, cookie, NULL);
- if (skip_sw)
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- else
- err = 0;
- return err;
+ return skip_sw ? err : 0;
}
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
@@ -302,14 +299,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = (unsigned long)head;
- err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts,
+ cls_mall.common.extack);
if (err) {
kfree(cls_mall.rule);
- if (add && tc_skip_sw(head->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+
+ return add && tc_skip_sw(head->flags) ? err : 0;
}
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7b0427658056..3e3fe923bed5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2084,7 +2084,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
* 5 for complete description of the flags.
*/
static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sctp_ulpevent *event = NULL;
struct sctp_sock *sp = sctp_sk(sk);
@@ -2093,9 +2093,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int err = 0;
int skb_len;
- pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, "
- "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags,
- addr_len);
+ pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n",
+ __func__, sk, msg, len, flags, addr_len);
lock_sock(sk);
@@ -2105,7 +2104,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = sctp_skb_recv_datagram(sk, flags, noblock, &err);
+ skb = sctp_skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -8978,14 +8977,13 @@ out:
* Note: This is pretty much the same routine as in core/datagram.c
* with a few changes to make lksctp work.
*/
-struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
- int noblock, int *err)
+struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err)
{
int error;
struct sk_buff *skb;
long timeo;
- timeo = sock_rcvtimeo(sk, noblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo,
MAX_SCHEDULE_TIMEOUT);
@@ -9018,7 +9016,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
break;
if (sk_can_busy_loop(sk)) {
- sk_busy_loop(sk, noblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
continue;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 0c3d2b4d7321..8920ca92a011 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -1063,7 +1063,7 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
struct sk_buff *skb;
int err;
- skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err);
+ skb = sctp_skb_recv_datagram(sk, MSG_PEEK | MSG_DONTWAIT, &err);
if (skb != NULL) {
__sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb),
msghdr, skb);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index cc35ec433400..45336e68bf79 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -464,7 +464,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
0, 0, MSG_PEEK | MSG_DONTWAIT);
if (err < 0)
goto out_recv_err;
- skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
+ skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err);
if (!skb)
goto out_recv_err;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8ab64ea46870..5c91c5457197 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1355,7 +1355,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
if (sk == NULL)
goto out;
for (;;) {
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (skb == NULL)
break;
xs_udp_data_read_skb(&transport->xprt, sk, skb);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 12f7b56771d9..5e4bd8ba48d3 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -962,11 +962,9 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
tls_ctx->rx.rec_seq, rxm->full_len,
is_encrypted, is_decrypted);
- ctx->sw.decrypted |= is_decrypted;
-
if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
if (likely(is_encrypted || is_decrypted))
- return 0;
+ return is_decrypted;
/* After tls_device_down disables the offload, the next SKB will
* likely have initial fragments decrypted, and final ones not
@@ -981,7 +979,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
*/
if (is_decrypted) {
ctx->resync_nh_reset = 1;
- return 0;
+ return is_decrypted;
}
if (is_encrypted) {
tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a8976ef95528..ddbe05ec5489 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -44,6 +44,11 @@
#include <net/strparser.h>
#include <net/tls.h>
+struct tls_decrypt_arg {
+ bool zc;
+ bool async;
+};
+
noinline void tls_err_abort(struct sock *sk, int err)
{
WARN_ON_ONCE(err >= 0);
@@ -128,32 +133,31 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len)
return __skb_nsg(skb, offset, len, 0);
}
-static int padding_length(struct tls_sw_context_rx *ctx,
- struct tls_prot_info *prot, struct sk_buff *skb)
+static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb)
{
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
int sub = 0;
/* Determine zero-padding length */
if (prot->version == TLS_1_3_VERSION) {
+ int offset = rxm->full_len - TLS_TAG_SIZE - 1;
char content_type = 0;
int err;
- int back = 17;
while (content_type == 0) {
- if (back > rxm->full_len - prot->prepend_size)
+ if (offset < prot->prepend_size)
return -EBADMSG;
- err = skb_copy_bits(skb,
- rxm->offset + rxm->full_len - back,
+ err = skb_copy_bits(skb, rxm->offset + offset,
&content_type, 1);
if (err)
return err;
if (content_type)
break;
sub++;
- back++;
+ offset--;
}
- ctx->control = content_type;
+ tlm->control = content_type;
}
return sub;
}
@@ -169,7 +173,6 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sg;
struct sk_buff *skb;
unsigned int pages;
- int pending;
skb = (struct sk_buff *)req->data;
tls_ctx = tls_get_ctx(skb->sk);
@@ -185,17 +188,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
tls_err_abort(skb->sk, err);
} else {
struct strp_msg *rxm = strp_msg(skb);
- int pad;
- pad = padding_length(ctx, prot, skb);
- if (pad < 0) {
- ctx->async_wait.err = pad;
- tls_err_abort(skb->sk, pad);
- } else {
- rxm->full_len -= pad;
- rxm->offset += prot->prepend_size;
- rxm->full_len -= prot->overhead_size;
- }
+ /* No TLS 1.3 support with async crypto */
+ WARN_ON(prot->tail_size);
+
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
}
/* After using skb->sk to propagate sk through crypto async callback
@@ -217,9 +215,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
kfree(aead_req);
spin_lock_bh(&ctx->decrypt_compl_lock);
- pending = atomic_dec_return(&ctx->decrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (!atomic_dec_return(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->decrypt_compl_lock);
}
@@ -231,7 +227,7 @@ static int tls_do_decryption(struct sock *sk,
char *iv_recv,
size_t data_len,
struct aead_request *aead_req,
- bool async)
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -244,7 +240,7 @@ static int tls_do_decryption(struct sock *sk,
data_len + prot->tag_size,
(u8 *)iv_recv);
- if (async) {
+ if (darg->async) {
/* Using skb->sk to push sk through to crypto async callback
* handler. This allows propagating errors up to the socket
* if needed. It _must_ be cleared in the async handler
@@ -264,14 +260,15 @@ static int tls_do_decryption(struct sock *sk,
ret = crypto_aead_decrypt(aead_req);
if (ret == -EINPROGRESS) {
- if (async)
- return ret;
+ if (darg->async)
+ return 0;
ret = crypto_wait_req(ret, &ctx->async_wait);
}
+ darg->async = false;
- if (async)
- atomic_dec(&ctx->decrypt_pending);
+ if (ret == -EBADMSG)
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
return ret;
}
@@ -1346,15 +1343,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
return skb;
}
-static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+static int tls_setup_from_iter(struct iov_iter *from,
int length, int *pages_used,
- unsigned int *size_used,
struct scatterlist *to,
int to_max_pages)
{
int rc = 0, i = 0, num_elem = *pages_used, maxpages;
struct page *pages[MAX_SKB_FRAGS];
- unsigned int size = *size_used;
+ unsigned int size = 0;
ssize_t copied, use;
size_t offset;
@@ -1397,8 +1393,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
sg_mark_end(&to[num_elem - 1]);
out:
if (rc)
- iov_iter_revert(from, size - *size_used);
- *size_used = size;
+ iov_iter_revert(from, size);
*pages_used = num_elem;
return rc;
@@ -1415,12 +1410,13 @@ out:
static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
struct iov_iter *out_iov,
struct scatterlist *out_sg,
- int *chunk, bool *zc, bool async)
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
struct aead_request *aead_req;
struct sk_buff *unused;
@@ -1431,7 +1427,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
prot->tail_size;
int iv_offset = 0;
- if (*zc && (out_iov || out_sg)) {
+ if (darg->zc && (out_iov || out_sg)) {
if (out_iov)
n_sgout = 1 +
iov_iter_npages_cap(out_iov, INT_MAX, data_len);
@@ -1441,7 +1437,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
rxm->full_len - prot->prepend_size);
} else {
n_sgout = 0;
- *zc = false;
+ darg->zc = false;
n_sgin = skb_cow_data(skb, 0, &unused);
}
@@ -1456,7 +1452,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
mem_size = aead_size + (nsg * sizeof(struct scatterlist));
mem_size = mem_size + prot->aad_size;
- mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
+ mem_size = mem_size + MAX_IV_SIZE;
/* Allocate a single block of memory which contains
* aead_req || sgin[] || sgout[] || aad || iv.
@@ -1486,26 +1482,26 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
}
/* Prepare IV */
- err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
- iv + iv_offset + prot->salt_size,
- prot->iv_size);
- if (err < 0) {
- kfree(mem);
- return err;
- }
if (prot->version == TLS_1_3_VERSION ||
- prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305)
+ prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) {
memcpy(iv + iv_offset, tls_ctx->rx.iv,
prot->iv_size + prot->salt_size);
- else
+ } else {
+ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+ iv + iv_offset + prot->salt_size,
+ prot->iv_size);
+ if (err < 0) {
+ kfree(mem);
+ return err;
+ }
memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
-
+ }
xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq);
/* Prepare AAD */
tls_make_aad(aad, rxm->full_len - prot->overhead_size +
prot->tail_size,
- tls_ctx->rx.rec_seq, ctx->control, prot);
+ tls_ctx->rx.rec_seq, tlm->control, prot);
/* Prepare sgin */
sg_init_table(sgin, n_sgin);
@@ -1523,9 +1519,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
sg_init_table(sgout, n_sgout);
sg_set_buf(&sgout[0], aad, prot->aad_size);
- *chunk = 0;
- err = tls_setup_from_iter(sk, out_iov, data_len,
- &pages, chunk, &sgout[1],
+ err = tls_setup_from_iter(out_iov, data_len,
+ &pages, &sgout[1],
(n_sgout - 1));
if (err < 0)
goto fallback_to_reg_recv;
@@ -1538,15 +1533,14 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
fallback_to_reg_recv:
sgout = sgin;
pages = 0;
- *chunk = data_len;
- *zc = false;
+ darg->zc = false;
}
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, skb, sgin, sgout, iv,
- data_len, aead_req, async);
- if (err == -EINPROGRESS)
- return err;
+ data_len, aead_req, darg);
+ if (darg->async)
+ return 0;
/* Release the pages in case iov was mapped to pages */
for (; pages > 0; pages--)
@@ -1557,87 +1551,81 @@ fallback_to_reg_recv:
}
static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *dest, int *chunk, bool *zc,
- bool async)
+ struct iov_iter *dest,
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct strp_msg *rxm = strp_msg(skb);
- int pad, err = 0;
+ struct tls_msg *tlm = tls_msg(skb);
+ int pad, err;
- if (!ctx->decrypted) {
- if (tls_ctx->rx_conf == TLS_HW) {
- err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
- if (err < 0)
- return err;
- }
+ if (tlm->decrypted) {
+ darg->zc = false;
+ return 0;
+ }
- /* Still not decrypted after tls_device */
- if (!ctx->decrypted) {
- err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
- async);
- if (err < 0) {
- if (err == -EINPROGRESS)
- tls_advance_record_sn(sk, prot,
- &tls_ctx->rx);
- else if (err == -EBADMSG)
- TLS_INC_STATS(sock_net(sk),
- LINUX_MIB_TLSDECRYPTERROR);
- return err;
- }
- } else {
- *zc = false;
+ if (tls_ctx->rx_conf == TLS_HW) {
+ err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
+ if (err < 0)
+ return err;
+ if (err > 0) {
+ tlm->decrypted = 1;
+ darg->zc = false;
+ goto decrypt_done;
}
+ }
- pad = padding_length(ctx, prot, skb);
- if (pad < 0)
- return pad;
+ err = decrypt_internal(sk, skb, dest, NULL, darg);
+ if (err < 0)
+ return err;
+ if (darg->async)
+ goto decrypt_next;
- rxm->full_len -= pad;
- rxm->offset += prot->prepend_size;
- rxm->full_len -= prot->overhead_size;
- tls_advance_record_sn(sk, prot, &tls_ctx->rx);
- ctx->decrypted = 1;
- ctx->saved_data_ready(sk);
- } else {
- *zc = false;
- }
+decrypt_done:
+ pad = padding_length(prot, skb);
+ if (pad < 0)
+ return pad;
- return err;
+ rxm->full_len -= pad;
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
+ tlm->decrypted = 1;
+decrypt_next:
+ tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+
+ return 0;
}
int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout)
{
- bool zc = true;
- int chunk;
+ struct tls_decrypt_arg darg = { .zc = true, };
- return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
+ return decrypt_internal(sk, skb, NULL, sgout, &darg);
}
-static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
- unsigned int len)
+static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm,
+ u8 *control)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-
- if (skb) {
- struct strp_msg *rxm = strp_msg(skb);
-
- if (len < rxm->full_len) {
- rxm->offset += len;
- rxm->full_len -= len;
- return false;
+ int err;
+
+ if (!*control) {
+ *control = tlm->control;
+ if (!*control)
+ return -EBADMSG;
+
+ err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+ sizeof(*control), control);
+ if (*control != TLS_RECORD_TYPE_DATA) {
+ if (err || msg->msg_flags & MSG_CTRUNC)
+ return -EIO;
}
- consume_skb(skb);
+ } else if (*control != tlm->control) {
+ return 0;
}
- /* Finished with message */
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
-
- return true;
+ return 1;
}
/* This function traverses the rx_list in tls receive context to copies the
@@ -1648,31 +1636,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
static int process_rx_list(struct tls_sw_context_rx *ctx,
struct msghdr *msg,
u8 *control,
- bool *cmsg,
size_t skip,
size_t len,
bool zc,
bool is_peek)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
- u8 ctrl = *control;
- u8 msgc = *cmsg;
struct tls_msg *tlm;
ssize_t copied = 0;
-
- /* Set the record type in 'control' if caller didn't pass it */
- if (!ctrl && skb) {
- tlm = tls_msg(skb);
- ctrl = tlm->control;
- }
+ int err;
while (skip && skb) {
struct strp_msg *rxm = strp_msg(skb);
tlm = tls_msg(skb);
- /* Cannot process a record of different type */
- if (ctrl != tlm->control)
- return 0;
+ err = tls_record_content_type(msg, tlm, control);
+ if (err <= 0)
+ goto out;
if (skip < rxm->full_len)
break;
@@ -1688,30 +1668,15 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
tlm = tls_msg(skb);
- /* Cannot process a record of different type */
- if (ctrl != tlm->control)
- return 0;
-
- /* Set record type if not already done. For a non-data record,
- * do not proceed if record type could not be copied.
- */
- if (!msgc) {
- int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
- sizeof(ctrl), &ctrl);
- msgc = true;
- if (ctrl != TLS_RECORD_TYPE_DATA) {
- if (cerr || msg->msg_flags & MSG_CTRUNC)
- return -EIO;
-
- *cmsg = msgc;
- }
- }
+ err = tls_record_content_type(msg, tlm, control);
+ if (err <= 0)
+ goto out;
if (!zc || (rxm->full_len - skip) > len) {
- int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
+ err = skb_copy_datagram_msg(skb, rxm->offset + skip,
msg, chunk);
if (err < 0)
- return err;
+ goto out;
}
len = len - chunk;
@@ -1738,21 +1703,21 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
next_skb = skb_peek_next(skb, &ctx->rx_list);
if (!is_peek) {
- skb_unlink(skb, &ctx->rx_list);
+ __skb_unlink(skb, &ctx->rx_list);
consume_skb(skb);
}
skb = next_skb;
}
+ err = 0;
- *control = ctrl;
- return copied;
+out:
+ return copied ? : err;
}
int tls_sw_recvmsg(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
@@ -1766,16 +1731,13 @@ int tls_sw_recvmsg(struct sock *sk,
struct tls_msg *tlm;
struct sk_buff *skb;
ssize_t copied = 0;
- bool cmsg = false;
+ bool async = false;
int target, err = 0;
long timeo;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
bool bpf_strp_enabled;
- int num_async = 0;
- int pending;
-
- flags |= nonblock;
+ bool zc_capable;
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
@@ -1784,81 +1746,64 @@ int tls_sw_recvmsg(struct sock *sk,
lock_sock(sk);
bpf_strp_enabled = sk_psock_strp_enabled(psock);
+ /* If crypto failed the connection is broken */
+ err = ctx->async_wait.err;
+ if (err)
+ goto end;
+
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
- is_peek);
- if (err < 0) {
- tls_err_abort(sk, err);
+ err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek);
+ if (err < 0)
goto end;
- } else {
- copied = err;
- }
+ copied = err;
if (len <= copied)
- goto recv_end;
+ goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
len = len - copied;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
+ prot->version != TLS_1_3_VERSION;
+ decrypted = 0;
while (len && (decrypted + copied < target || ctx->recv_pkt)) {
- bool retain_skb = false;
- bool zc = false;
- int to_decrypt;
- int chunk = 0;
- bool async_capable;
- bool async = false;
+ struct tls_decrypt_arg darg = {};
+ int to_decrypt, chunk;
skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
if (!skb) {
if (psock) {
- int ret = sk_msg_recvmsg(sk, psock, msg, len,
- flags);
-
- if (ret > 0) {
- decrypted += ret;
- len -= ret;
- continue;
- }
+ chunk = sk_msg_recvmsg(sk, psock, msg, len,
+ flags);
+ if (chunk > 0)
+ goto leave_on_list;
}
goto recv_end;
- } else {
- tlm = tls_msg(skb);
- if (prot->version == TLS_1_3_VERSION)
- tlm->control = 0;
- else
- tlm->control = ctx->control;
}
rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
to_decrypt = rxm->full_len - prot->overhead_size;
- if (to_decrypt <= len && !is_kvec && !is_peek &&
- ctx->control == TLS_RECORD_TYPE_DATA &&
- prot->version != TLS_1_3_VERSION &&
- !bpf_strp_enabled)
- zc = true;
+ if (zc_capable && to_decrypt <= len &&
+ tlm->control == TLS_RECORD_TYPE_DATA)
+ darg.zc = true;
/* Do not use async mode if record is non-data */
- if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
- async_capable = ctx->async_capable;
+ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
+ darg.async = ctx->async_capable;
else
- async_capable = false;
+ darg.async = false;
- err = decrypt_skb_update(sk, skb, &msg->msg_iter,
- &chunk, &zc, async_capable);
- if (err < 0 && err != -EINPROGRESS) {
+ err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg);
+ if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto recv_end;
}
- if (err == -EINPROGRESS) {
- async = true;
- num_async++;
- } else if (prot->version == TLS_1_3_VERSION) {
- tlm->control = ctx->control;
- }
+ async |= darg.async;
/* If the type of records being processed is not known yet,
* set it to record type just dequeued. If it is already known,
@@ -1867,124 +1812,99 @@ int tls_sw_recvmsg(struct sock *sk,
* is known just after record is dequeued from stream parser.
* For tls1.3, we disable async.
*/
-
- if (!control)
- control = tlm->control;
- else if (control != tlm->control)
+ err = tls_record_content_type(msg, tlm, &control);
+ if (err <= 0)
goto recv_end;
- if (!cmsg) {
- int cerr;
-
- cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
- sizeof(control), &control);
- cmsg = true;
- if (control != TLS_RECORD_TYPE_DATA) {
- if (cerr || msg->msg_flags & MSG_CTRUNC) {
- err = -EIO;
- goto recv_end;
- }
- }
+ ctx->recv_pkt = NULL;
+ __strp_unpause(&ctx->strp);
+ __skb_queue_tail(&ctx->rx_list, skb);
+
+ if (async) {
+ /* TLS 1.2-only, to_decrypt must be text length */
+ chunk = min_t(int, to_decrypt, len);
+leave_on_list:
+ decrypted += chunk;
+ len -= chunk;
+ continue;
}
+ /* TLS 1.3 may have updated the length by more than overhead */
+ chunk = rxm->full_len;
- if (async)
- goto pick_next_record;
+ if (!darg.zc) {
+ bool partially_consumed = chunk > len;
- if (!zc) {
if (bpf_strp_enabled) {
err = sk_psock_tls_strp_read(psock, skb);
if (err != __SK_PASS) {
rxm->offset = rxm->offset + rxm->full_len;
rxm->full_len = 0;
+ __skb_unlink(skb, &ctx->rx_list);
if (err == __SK_DROP)
consume_skb(skb);
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
continue;
}
}
- if (rxm->full_len > len) {
- retain_skb = true;
+ if (partially_consumed)
chunk = len;
- } else {
- chunk = rxm->full_len;
- }
err = skb_copy_datagram_msg(skb, rxm->offset,
msg, chunk);
if (err < 0)
goto recv_end;
- if (!is_peek) {
- rxm->offset = rxm->offset + chunk;
- rxm->full_len = rxm->full_len - chunk;
+ if (is_peek)
+ goto leave_on_list;
+
+ if (partially_consumed) {
+ rxm->offset += chunk;
+ rxm->full_len -= chunk;
+ goto leave_on_list;
}
}
-pick_next_record:
- if (chunk > len)
- chunk = len;
-
decrypted += chunk;
len -= chunk;
- /* For async or peek case, queue the current skb */
- if (async || is_peek || retain_skb) {
- skb_queue_tail(&ctx->rx_list, skb);
- skb = NULL;
- }
+ __skb_unlink(skb, &ctx->rx_list);
+ consume_skb(skb);
- if (tls_sw_advance_skb(sk, skb, chunk)) {
- /* Return full control message to
- * userspace before trying to parse
- * another message type
- */
- msg->msg_flags |= MSG_EOR;
- if (control != TLS_RECORD_TYPE_DATA)
- goto recv_end;
- } else {
+ /* Return full control message to userspace before trying
+ * to parse another message type
+ */
+ msg->msg_flags |= MSG_EOR;
+ if (control != TLS_RECORD_TYPE_DATA)
break;
- }
}
recv_end:
- if (num_async) {
+ if (async) {
+ int ret, pending;
+
/* Wait for all previously submitted records to be decrypted */
spin_lock_bh(&ctx->decrypt_compl_lock);
- ctx->async_notify = true;
+ reinit_completion(&ctx->async_wait.completion);
pending = atomic_read(&ctx->decrypt_pending);
spin_unlock_bh(&ctx->decrypt_compl_lock);
if (pending) {
- err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- if (err) {
- /* one of async decrypt failed */
- tls_err_abort(sk, err);
- copied = 0;
+ ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ if (ret) {
+ if (err >= 0 || err == -EINPROGRESS)
+ err = ret;
decrypted = 0;
goto end;
}
- } else {
- reinit_completion(&ctx->async_wait.completion);
}
- /* There can be no concurrent accesses, since we have no
- * pending decrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, &cmsg, copied,
+ err = process_rx_list(ctx, msg, &control, copied,
decrypted, false, is_peek);
else
- err = process_rx_list(ctx, msg, &control, &cmsg, 0,
+ err = process_rx_list(ctx, msg, &control, 0,
decrypted, true, is_peek);
- if (err < 0) {
- tls_err_abort(sk, err);
- copied = 0;
- goto end;
- }
+ decrypted = max(err, 0);
}
copied += decrypted;
@@ -2005,13 +1925,13 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm = NULL;
struct sock *sk = sock->sk;
+ struct tls_msg *tlm;
struct sk_buff *skb;
ssize_t copied = 0;
bool from_queue;
int err = 0;
long timeo;
int chunk;
- bool zc = false;
lock_sock(sk);
@@ -2021,26 +1941,29 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
if (from_queue) {
skb = __skb_dequeue(&ctx->rx_list);
} else {
+ struct tls_decrypt_arg darg = {};
+
skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
&err);
if (!skb)
goto splice_read_end;
- err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
+ err = decrypt_skb_update(sk, skb, NULL, &darg);
if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto splice_read_end;
}
}
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+
/* splice does not support reading control messages */
- if (ctx->control != TLS_RECORD_TYPE_DATA) {
+ if (tlm->control != TLS_RECORD_TYPE_DATA) {
err = -EINVAL;
goto splice_read_end;
}
- rxm = strp_msg(skb);
-
chunk = min_t(unsigned int, rxm->full_len, len);
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
if (copied < 0)
@@ -2084,10 +2007,10 @@ bool tls_sw_sock_is_readable(struct sock *sk)
static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
size_t cipher_overhead;
size_t data_len = 0;
int ret;
@@ -2104,11 +2027,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
/* Linearize header to local buffer */
ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
-
if (ret < 0)
goto read_failure;
- ctx->control = header[0];
+ tlm->decrypted = 0;
+ tlm->control = header[0];
data_len = ((header[4] & 0xFF) | (header[3] << 8));
@@ -2149,8 +2072,6 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- ctx->decrypted = 0;
-
ctx->recv_pkt = skb;
strp_pause(strp);
@@ -2241,7 +2162,7 @@ void tls_sw_release_resources_rx(struct sock *sk)
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;
- skb_queue_purge(&ctx->rx_list);
+ __skb_queue_purge(&ctx->rx_list);
crypto_free_aead(ctx->aead_recv);
strp_stop(&ctx->strp);
/* If tls_sw_strparser_arm() was not called (cleanup paths)
@@ -2501,7 +2422,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
/* Sanity-check the sizes for stack allocations. */
if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
- rec_seq_size > TLS_MAX_REC_SEQ_SIZE) {
+ rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE) {
rc = -EINVAL;
goto free_priv;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e71a312faa1e..e1dd9e9c8452 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1643,7 +1643,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
* so that no locks are necessary.
*/
- skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
+ skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &err);
if (!skb) {
/* This means receive shutdown. */
if (err == 0)
@@ -2483,8 +2484,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
const struct proto *prot = READ_ONCE(sk->sk_prot);
if (prot != &unix_dgram_proto)
- return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, NULL);
+ return prot->recvmsg(sk, msg, size, flags, NULL);
#endif
return __unix_dgram_recvmsg(sk, msg, size, flags);
}
@@ -2500,7 +2500,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
int used, err;
mutex_lock(&u->iolock);
- skb = skb_recv_datagram(sk, 0, 1, &err);
+ skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
mutex_unlock(&u->iolock);
if (!skb)
return err;
@@ -2916,8 +2916,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
const struct proto *prot = READ_ONCE(sk->sk_prot);
if (prot != &unix_stream_proto)
- return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, NULL);
+ return prot->recvmsg(sk, msg, size, flags, NULL);
#endif
return unix_stream_read_generic(&state, true);
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 452376c6f419..7cf14c6b1725 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -48,8 +48,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
}
static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags,
- int *addr_len)
+ size_t len, int flags, int *addr_len)
{
struct unix_sock *u = unix_sk(sk);
struct sk_psock *psock;
@@ -73,7 +72,7 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = unix_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b17dc9745188..b14f0ed7427b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1732,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
int flags)
{
int err;
- int noblock;
struct vmci_datagram *dg;
size_t payload_len;
struct sk_buff *skb;
- noblock = flags & MSG_DONTWAIT;
-
if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
return -EOPNOTSUPP;
/* Retrieve the head sk_buff from the socket's receive queue. */
err = 0;
- skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+ skb = skb_recv_datagram(&vsk->sk, flags, &err);
if (!skb)
return err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3a171828638b..6bc2ac8d8146 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1315,8 +1315,7 @@ static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
} else {
/* Now we can treat all alike */
release_sock(sk);
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
lock_sock(sk);
if (!skb)
goto out;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 1f08ebf7d80c..82d14eea1b5a 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -131,7 +131,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
}
static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
struct sk_buff *skb;
@@ -139,8 +139,6 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int copied;
int off = 0;
- flags |= nonblock ? MSG_DONTWAIT : 0;
-
skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
if (!skb) {
if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 38638845db9d..8fff5ad3444b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -96,7 +96,6 @@ test_cgrp2_sock2-objs := test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := xdp1_user.o
-xdp_router_ipv4-objs := xdp_router_ipv4_user.o
test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o
trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
@@ -124,6 +123,7 @@ xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE)
xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
+xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE)
# Tell kbuild to always build the programs
always-y := $(tprogs-y)
@@ -153,7 +153,6 @@ always-y += parse_varlen.o parse_simple.o parse_ldabs.o
always-y += test_cgrp2_tc_kern.o
always-y += xdp1_kern.o
always-y += xdp2_kern.o
-always-y += xdp_router_ipv4_kern.o
always-y += test_current_task_under_cgroup_kern.o
always-y += trace_event_kern.o
always-y += sampleip_kern.o
@@ -220,6 +219,7 @@ TPROGLDLIBS_xdp_redirect += -lm
TPROGLDLIBS_xdp_redirect_cpu += -lm
TPROGLDLIBS_xdp_redirect_map += -lm
TPROGLDLIBS_xdp_redirect_map_multi += -lm
+TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread
TPROGLDLIBS_tracex4 += -lrt
TPROGLDLIBS_trace_output += -lrt
TPROGLDLIBS_map_perf_test += -lrt
@@ -342,6 +342,7 @@ $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
+$(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
@@ -399,6 +400,7 @@ $(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o
$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
@echo " CLANG-BPF " $@
@@ -409,7 +411,8 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x
-c $(filter %.bpf.c,$^) -o $@
LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
- xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h
+ xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \
+ xdp_router_ipv4.skel.h
clean-files += $(LINKED_SKELS)
xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
@@ -417,6 +420,7 @@ xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bp
xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o
xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
+xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o
LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index a0ebf1833ed3..c55383068384 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -36,6 +36,9 @@ static void verify_map(int map_id)
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
return;
}
+
+ printf("verify map:%d val: %d\n", map_id, val);
+
val = 0;
if (bpf_map_update_elem(map_id, &key, &val, BPF_ANY) != 0) {
fprintf(stderr, "map_update failed: %s\n", strerror(errno));
diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c
new file mode 100644
index 000000000000..248119ca7938
--- /dev/null
+++ b/samples/bpf/xdp_router_ipv4.bpf.c
@@ -0,0 +1,180 @@
+/* Copyright (C) 2017 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+
+struct trie_value {
+ __u8 prefix[4];
+ __be64 value;
+ int ifindex;
+ int metric;
+ __be32 gw;
+};
+
+/* Key for lpm_trie */
+union key_4 {
+ u32 b32[2];
+ u8 b8[8];
+};
+
+struct arp_entry {
+ __be64 mac;
+ __be32 dst;
+};
+
+struct direct_map {
+ struct arp_entry arp;
+ int ifindex;
+ __be64 mac;
+};
+
+/* Map for trie implementation */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(key_size, 8);
+ __uint(value_size, sizeof(struct trie_value));
+ __uint(max_entries, 50);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} lpm_map SEC(".maps");
+
+/* Map for ARP table */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, __be64);
+ __uint(max_entries, 50);
+} arp_table SEC(".maps");
+
+/* Map to keep the exact match entries in the route table */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __be32);
+ __type(value, struct direct_map);
+ __uint(max_entries, 50);
+} exact_match SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 100);
+} tx_port SEC(".maps");
+
+SEC("xdp")
+int xdp_router_ipv4_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off = sizeof(*eth);
+ struct datarec *rec;
+ __be16 h_proto;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (rec)
+ NO_TEAR_INC(rec->processed);
+
+ if (data + nh_off > data_end)
+ goto drop;
+
+ h_proto = eth->h_proto;
+ if (h_proto == bpf_htons(ETH_P_8021Q) ||
+ h_proto == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ goto drop;
+
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ switch (bpf_ntohs(h_proto)) {
+ case ETH_P_ARP:
+ if (rec)
+ NO_TEAR_INC(rec->xdp_pass);
+ return XDP_PASS;
+ case ETH_P_IP: {
+ struct iphdr *iph = data + nh_off;
+ struct direct_map *direct_entry;
+ __be64 *dest_mac, *src_mac;
+ int forward_to;
+
+ if (iph + 1 > data_end)
+ goto drop;
+
+ direct_entry = bpf_map_lookup_elem(&exact_match, &iph->daddr);
+
+ /* Check for exact match, this would give a faster lookup */
+ if (direct_entry && direct_entry->mac &&
+ direct_entry->arp.mac) {
+ src_mac = &direct_entry->mac;
+ dest_mac = &direct_entry->arp.mac;
+ forward_to = direct_entry->ifindex;
+ } else {
+ struct trie_value *prefix_value;
+ union key_4 key4;
+
+ /* Look up in the trie for lpm */
+ key4.b32[0] = 32;
+ key4.b8[4] = iph->daddr & 0xff;
+ key4.b8[5] = (iph->daddr >> 8) & 0xff;
+ key4.b8[6] = (iph->daddr >> 16) & 0xff;
+ key4.b8[7] = (iph->daddr >> 24) & 0xff;
+
+ prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
+ if (!prefix_value)
+ goto drop;
+
+ forward_to = prefix_value->ifindex;
+ src_mac = &prefix_value->value;
+ if (!src_mac)
+ goto drop;
+
+ dest_mac = bpf_map_lookup_elem(&arp_table, &iph->daddr);
+ if (!dest_mac) {
+ if (!prefix_value->gw)
+ goto drop;
+
+ dest_mac = bpf_map_lookup_elem(&arp_table,
+ &prefix_value->gw);
+ }
+ }
+
+ if (src_mac && dest_mac) {
+ int ret;
+
+ __builtin_memcpy(eth->h_dest, dest_mac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, src_mac, ETH_ALEN);
+
+ ret = bpf_redirect_map(&tx_port, forward_to, 0);
+ if (ret == XDP_REDIRECT) {
+ if (rec)
+ NO_TEAR_INC(rec->xdp_redirect);
+ return ret;
+ }
+ }
+ }
+ default:
+ break;
+ }
+drop:
+ if (rec)
+ NO_TEAR_INC(rec->xdp_drop);
+
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_kern.c b/samples/bpf/xdp_router_ipv4_kern.c
deleted file mode 100644
index b37ca2b13063..000000000000
--- a/samples/bpf/xdp_router_ipv4_kern.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* Copyright (C) 2017 Cavium, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-#include <linux/slab.h>
-#include <net/ip_fib.h>
-
-struct trie_value {
- __u8 prefix[4];
- __be64 value;
- int ifindex;
- int metric;
- __be32 gw;
-};
-
-/* Key for lpm_trie*/
-union key_4 {
- u32 b32[2];
- u8 b8[8];
-};
-
-struct arp_entry {
- __be64 mac;
- __be32 dst;
-};
-
-struct direct_map {
- struct arp_entry arp;
- int ifindex;
- __be64 mac;
-};
-
-/* Map for trie implementation*/
-struct {
- __uint(type, BPF_MAP_TYPE_LPM_TRIE);
- __uint(key_size, 8);
- __uint(value_size, sizeof(struct trie_value));
- __uint(max_entries, 50);
- __uint(map_flags, BPF_F_NO_PREALLOC);
-} lpm_map SEC(".maps");
-
-/* Map for counter*/
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, 256);
-} rxcnt SEC(".maps");
-
-/* Map for ARP table*/
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, __be32);
- __type(value, __be64);
- __uint(max_entries, 50);
-} arp_table SEC(".maps");
-
-/* Map to keep the exact match entries in the route table*/
-struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __type(key, __be32);
- __type(value, struct direct_map);
- __uint(max_entries, 50);
-} exact_match SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_DEVMAP);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
- __uint(max_entries, 100);
-} tx_port SEC(".maps");
-
-/* Function to set source and destination mac of the packet */
-static inline void set_src_dst_mac(void *data, void *src, void *dst)
-{
- unsigned short *source = src;
- unsigned short *dest = dst;
- unsigned short *p = data;
-
- __builtin_memcpy(p, dest, 6);
- __builtin_memcpy(p + 3, source, 6);
-}
-
-/* Parse IPV4 packet to get SRC, DST IP and protocol */
-static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
- __be32 *src, __be32 *dest)
-{
- struct iphdr *iph = data + nh_off;
-
- if (iph + 1 > data_end)
- return 0;
- *src = iph->saddr;
- *dest = iph->daddr;
- return iph->protocol;
-}
-
-SEC("xdp_router_ipv4")
-int xdp_router_ipv4_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- __be64 *dest_mac = NULL, *src_mac = NULL;
- void *data = (void *)(long)ctx->data;
- struct trie_value *prefix_value;
- int rc = XDP_DROP, forward_to;
- struct ethhdr *eth = data;
- union key_4 key4;
- long *value;
- u16 h_proto;
- u32 ipproto;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- h_proto = eth->h_proto;
-
- if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
- struct vlan_hdr *vhdr;
-
- vhdr = data + nh_off;
- nh_off += sizeof(struct vlan_hdr);
- if (data + nh_off > data_end)
- return rc;
- h_proto = vhdr->h_vlan_encapsulated_proto;
- }
- if (h_proto == htons(ETH_P_ARP)) {
- return XDP_PASS;
- } else if (h_proto == htons(ETH_P_IP)) {
- struct direct_map *direct_entry;
- __be32 src_ip = 0, dest_ip = 0;
-
- ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
- direct_entry = bpf_map_lookup_elem(&exact_match, &dest_ip);
- /* Check for exact match, this would give a faster lookup*/
- if (direct_entry && direct_entry->mac && direct_entry->arp.mac) {
- src_mac = &direct_entry->mac;
- dest_mac = &direct_entry->arp.mac;
- forward_to = direct_entry->ifindex;
- } else {
- /* Look up in the trie for lpm*/
- key4.b32[0] = 32;
- key4.b8[4] = dest_ip & 0xff;
- key4.b8[5] = (dest_ip >> 8) & 0xff;
- key4.b8[6] = (dest_ip >> 16) & 0xff;
- key4.b8[7] = (dest_ip >> 24) & 0xff;
- prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
- if (!prefix_value)
- return XDP_DROP;
- src_mac = &prefix_value->value;
- if (!src_mac)
- return XDP_DROP;
- dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
- if (!dest_mac) {
- if (!prefix_value->gw)
- return XDP_DROP;
- dest_ip = prefix_value->gw;
- dest_mac = bpf_map_lookup_elem(&arp_table, &dest_ip);
- }
- forward_to = prefix_value->ifindex;
- }
- } else {
- ipproto = 0;
- }
- if (src_mac && dest_mac) {
- set_src_dst_mac(data, src_mac, dest_mac);
- value = bpf_map_lookup_elem(&rxcnt, &ipproto);
- if (value)
- *value += 1;
- return bpf_redirect_map(&tx_port, forward_to, 0);
- }
- return rc;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 6dae87d83e1c..f32bbd5c32bf 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -24,70 +24,40 @@
#include <bpf/libbpf.h>
#include <sys/resource.h>
#include <libgen.h>
+#include <getopt.h>
+#include <pthread.h>
+#include "xdp_sample_user.h"
+#include "xdp_router_ipv4.skel.h"
-int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int total_ifindex;
-static int *ifindex_list;
-static __u32 *prog_id_list;
-char buf[8192];
+static const char *__doc__ =
+"XDP IPv4 router implementation\n"
+"Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n";
+
+static char buf[8192];
static int lpm_map_fd;
-static int rxcnt_map_fd;
static int arp_table_map_fd;
static int exact_match_map_fd;
static int tx_port_map_fd;
-static int get_route_table(int rtm_family);
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i = 0;
+static bool routes_thread_exit;
+static int interval = 5;
- for (i = 0; i < total_ifindex; i++) {
- if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) {
- printf("bpf_xdp_query_id on iface %d failed\n",
- ifindex_list[i]);
- exit(1);
- }
- if (prog_id_list[i] == prog_id)
- bpf_xdp_detach(ifindex_list[i], flags, NULL);
- else if (!prog_id)
- printf("couldn't find a prog id on iface %d\n",
- ifindex_list[i]);
- else
- printf("program on iface %d changed, not removing\n",
- ifindex_list[i]);
- prog_id = 0;
- }
- exit(0);
-}
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_EXCEPTION_CNT;
-static void close_and_exit(int sig)
-{
- close(sock);
- close(sock_arp);
+DEFINE_SAMPLE_INIT(xdp_router_ipv4);
- int_exit(0);
-}
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "stats", no_argument, NULL, 's' },
+ {}
+};
-/* Get the mac address of the interface given interface name */
-static __be64 getmac(char *iface)
-{
- struct ifreq ifr;
- __be64 mac = 0;
- int fd, i;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- ifr.ifr_addr.sa_family = AF_INET;
- strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
- printf("ioctl failed leaving....\n");
- return -1;
- }
- for (i = 0; i < 6 ; i++)
- *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
- close(fd);
- return mac;
-}
+static int get_route_table(int rtm_family);
static int recv_msg(struct sockaddr_nl sock_addr, int sock)
{
@@ -130,7 +100,6 @@ static void read_route(struct nlmsghdr *nh, int nll)
int i;
struct route_table {
int dst_len, iface, metric;
- char *iface_name;
__be32 dst, gw;
__be64 mac;
} route;
@@ -145,17 +114,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
__be64 mac;
} direct_entry;
- if (nh->nlmsg_type == RTM_DELROUTE)
- printf("DELETING Route entry\n");
- else if (nh->nlmsg_type == RTM_GETROUTE)
- printf("READING Route entry\n");
- else if (nh->nlmsg_type == RTM_NEWROUTE)
- printf("NEW Route entry\n");
- else
- printf("%d\n", nh->nlmsg_type);
-
memset(&route, 0, sizeof(route));
- printf("Destination Gateway Genmask Metric Iface\n");
for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
rtm_family = rt_msg->rtm_family;
@@ -192,11 +151,7 @@ static void read_route(struct nlmsghdr *nh, int nll)
route.gw = atoi(gws);
route.iface = atoi(ifs);
route.metric = atoi(metrics);
- route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
- route.iface_name = if_indextoname(route.iface, route.iface_name);
- route.mac = getmac(route.iface_name);
- if (route.mac == -1)
- int_exit(0);
+ assert(get_mac_addr(route.iface, &route.mac) == 0);
assert(bpf_map_update_elem(tx_port_map_fd,
&route.iface, &route.iface, 0) == 0);
if (rtm_family == AF_INET) {
@@ -207,7 +162,6 @@ static void read_route(struct nlmsghdr *nh, int nll)
int metric;
__be32 gw;
} *prefix_value;
- struct in_addr dst_addr, gw_addr, mask_addr;
prefix_key = alloca(sizeof(*prefix_key) + 3);
prefix_value = alloca(sizeof(*prefix_value));
@@ -235,17 +189,6 @@ static void read_route(struct nlmsghdr *nh, int nll)
for (i = 0; i < 4; i++)
prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
- dst_addr.s_addr = route.dst;
- printf("%-16s", inet_ntoa(dst_addr));
-
- gw_addr.s_addr = route.gw;
- printf("%-16s", inet_ntoa(gw_addr));
-
- mask_addr.s_addr = htonl(~(0xffffffffU >> route.dst_len));
- printf("%-16s%-7d%s\n", inet_ntoa(mask_addr),
- route.metric,
- route.iface_name);
-
if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
prefix_value) < 0) {
for (i = 0; i < 4; i++)
@@ -261,13 +204,6 @@ static void read_route(struct nlmsghdr *nh, int nll)
) == 0);
} else {
if (nh->nlmsg_type == RTM_DELROUTE) {
- printf("deleting entry\n");
- printf("prefix key=%d.%d.%d.%d/%d",
- prefix_key->data[0],
- prefix_key->data[1],
- prefix_key->data[2],
- prefix_key->data[3],
- prefix_key->prefixlen);
assert(bpf_map_delete_elem(lpm_map_fd,
prefix_key
) == 0);
@@ -331,14 +267,14 @@ static int get_route_table(int rtm_family)
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return -errno;
}
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(&req, 0, sizeof(req));
@@ -357,15 +293,15 @@ static int get_route_table(int rtm_family)
msg.msg_iovlen = 1;
ret = sendmsg(sock, &msg, 0);
if (ret < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "send to netlink: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(buf, 0, sizeof(buf));
nll = recv_msg(sa, sock);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
+ ret = nll;
goto cleanup;
}
nh = (struct nlmsghdr *)buf;
@@ -395,14 +331,7 @@ static void read_arp(struct nlmsghdr *nh, int nll)
__be64 mac;
} direct_entry;
- if (nh->nlmsg_type == RTM_GETNEIGH)
- printf("READING arp entry\n");
- printf("Address HwAddress\n");
for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
- struct in_addr dst_addr;
- char mac_str[18];
- int len = 0, i;
-
rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
ndm_family = rt_msg->ndm_family;
@@ -424,13 +353,6 @@ static void read_arp(struct nlmsghdr *nh, int nll)
arp_entry.dst = atoi(dsts);
arp_entry.mac = atol(mac);
- dst_addr.s_addr = arp_entry.dst;
- for (i = 0; i < 6; i++)
- len += snprintf(mac_str + len, 18 - len, "%02llx%s",
- ((arp_entry.mac >> i * 8) & 0xff),
- i < 5 ? ":" : "");
- printf("%-16s%s\n", inet_ntoa(dst_addr), mac_str);
-
if (ndm_family == AF_INET) {
if (bpf_map_lookup_elem(exact_match_map_fd,
&arp_entry.dst,
@@ -481,14 +403,14 @@ static int get_arp_table(int rtm_family)
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return -errno;
}
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(&req, 0, sizeof(req));
@@ -506,15 +428,15 @@ static int get_arp_table(int rtm_family)
msg.msg_iovlen = 1;
ret = sendmsg(sock, &msg, 0);
if (ret < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "send to netlink: %s\n", strerror(errno));
+ ret = -errno;
goto cleanup;
}
memset(buf, 0, sizeof(buf));
nll = recv_msg(sa, sock);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
+ ret = nll;
goto cleanup;
}
nh = (struct nlmsghdr *)buf;
@@ -527,24 +449,17 @@ cleanup:
/* Function to keep track and update changes in route and arp table
* Give regular statistics of packets forwarded
*/
-static int monitor_route(void)
+static void *monitor_routes_thread(void *arg)
{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- const unsigned int nr_keys = 256;
struct pollfd fds_route, fds_arp;
- __u64 prev[nr_keys][nr_cpus];
struct sockaddr_nl la, lr;
- __u64 values[nr_cpus];
+ int sock, sock_arp, nll;
struct nlmsghdr *nh;
- int nll, ret = 0;
- int interval = 5;
- __u32 key;
- int i;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ return NULL;
}
fcntl(sock, F_SETFL, O_NONBLOCK);
@@ -552,17 +467,19 @@ static int monitor_route(void)
lr.nl_family = AF_NETLINK;
lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
- goto cleanup;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
+ close(sock);
+ return NULL;
}
+
fds_route.fd = sock;
fds_route.events = POLL_IN;
sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock_arp < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
+ fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
+ close(sock);
+ return NULL;
}
fcntl(sock_arp, F_SETFL, O_NONBLOCK);
@@ -570,51 +487,44 @@ static int monitor_route(void)
la.nl_family = AF_NETLINK;
la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- ret = -1;
+ fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
goto cleanup;
}
+
fds_arp.fd = sock_arp;
fds_arp.events = POLL_IN;
- memset(prev, 0, sizeof(prev));
- do {
- signal(SIGINT, close_and_exit);
- signal(SIGTERM, close_and_exit);
+ /* dump route and arp tables */
+ if (get_arp_table(AF_INET) < 0) {
+ fprintf(stderr, "Failed reading arp table\n");
+ goto cleanup;
+ }
- sleep(interval);
- for (key = 0; key < nr_keys; key++) {
- __u64 sum = 0;
-
- assert(bpf_map_lookup_elem(rxcnt_map_fd,
- &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[key][i]);
- if (sum)
- printf("proto %u: %10llu pkt/s\n",
- key, sum / interval);
- memcpy(prev[key], values, sizeof(values));
- }
+ if (get_route_table(AF_INET) < 0) {
+ fprintf(stderr, "Failed reading route table\n");
+ goto cleanup;
+ }
+ while (!routes_thread_exit) {
memset(buf, 0, sizeof(buf));
if (poll(&fds_route, 1, 3) == POLL_IN) {
nll = recv_msg(lr, sock);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n",
+ strerror(nll));
goto cleanup;
}
nh = (struct nlmsghdr *)buf;
- printf("Routing table updated.\n");
read_route(nh, nll);
}
+
memset(buf, 0, sizeof(buf));
if (poll(&fds_arp, 1, 3) == POLL_IN) {
nll = recv_msg(la, sock_arp);
if (nll < 0) {
- printf("recv from netlink: %s\n", strerror(nll));
- ret = -1;
+ fprintf(stderr, "recv from netlink: %s\n",
+ strerror(nll));
goto cleanup;
}
@@ -622,132 +532,169 @@ static int monitor_route(void)
read_arp(nh, nll);
}
- } while (1);
+ sleep(interval);
+ }
+
cleanup:
+ close(sock_arp);
close(sock);
- return ret;
+ return NULL;
}
-static void usage(const char *prog)
+static void usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error,
+ struct bpf_object *obj)
{
- fprintf(stderr,
- "%s: %s [OPTS] interface name list\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -F force loading prog\n",
- __func__, prog);
+ sample_usage(argv, long_options, doc, mask, error);
}
-int main(int ac, char **argv)
+int main(int argc, char **argv)
{
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "SF";
- struct bpf_program *prog;
- struct bpf_object *obj;
- char filename[256];
- char **ifname_list;
- int prog_fd, opt;
- int err, i = 1;
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- total_ifindex = ac - 1;
- ifname_list = (argv + 1);
-
- while ((opt = getopt(ac, argv, optstr)) != -1) {
+ bool error = true, generic = false, force = false;
+ int opt, ret = EXIT_FAIL_BPF;
+ struct xdp_router_ipv4 *skel;
+ int i, total_ifindex = argc - 1;
+ char **ifname_list = argv + 1;
+ pthread_t routes_thread;
+ int longindex = 0;
+
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ goto end;
+ }
+
+ skel = xdp_router_ipv4__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_router_ipv4__open: %s\n",
+ strerror(errno));
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n",
+ strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = xdp_router_ipv4__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_router_ipv4__load: %s\n",
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ while ((opt = getopt_long(argc, argv, "si:SFvh",
+ long_options, &longindex)) != -1) {
switch (opt) {
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ total_ifindex -= 2;
+ ifname_list += 2;
+ break;
case 'S':
- flags |= XDP_FLAGS_SKB_MODE;
+ generic = true;
total_ifindex--;
ifname_list++;
break;
case 'F':
- flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
total_ifindex--;
ifname_list++;
break;
+ case 'v':
+ sample_switch_mode();
+ total_ifindex--;
+ ifname_list++;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ usage(argv, long_options, __doc__, mask, error, skel->obj);
+ goto end_destroy;
}
}
- if (!(flags & XDP_FLAGS_SKB_MODE))
- flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind == ac) {
- usage(basename(argv[0]));
- return 1;
+ ret = EXIT_FAIL_OPTION;
+ if (optind == argc) {
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_destroy;
}
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj))
- return 1;
-
- prog = bpf_object__next_program(obj, NULL);
- bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
-
- printf("\n******************loading bpf file*********************\n");
- err = bpf_object__load(obj);
- if (err) {
- printf("bpf_object__load(): %s\n", strerror(errno));
- return 1;
+ lpm_map_fd = bpf_map__fd(skel->maps.lpm_map);
+ if (lpm_map_fd < 0) {
+ fprintf(stderr, "Failed loading lpm_map %s\n",
+ strerror(-lpm_map_fd));
+ goto end_destroy;
}
- prog_fd = bpf_program__fd(prog);
-
- lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table");
- exact_match_map_fd = bpf_object__find_map_fd_by_name(obj,
- "exact_match");
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
- if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 ||
- exact_match_map_fd < 0 || tx_port_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ arp_table_map_fd = bpf_map__fd(skel->maps.arp_table);
+ if (arp_table_map_fd < 0) {
+ fprintf(stderr, "Failed loading arp_table_map_fd %s\n",
+ strerror(-arp_table_map_fd));
+ goto end_destroy;
}
-
- ifindex_list = (int *)calloc(total_ifindex, sizeof(int *));
- for (i = 0; i < total_ifindex; i++) {
- ifindex_list[i] = if_nametoindex(ifname_list[i]);
- if (!ifindex_list[i]) {
- printf("Couldn't translate interface name: %s",
- strerror(errno));
- return 1;
- }
+ exact_match_map_fd = bpf_map__fd(skel->maps.exact_match);
+ if (exact_match_map_fd < 0) {
+ fprintf(stderr, "Failed loading exact_match_map_fd %s\n",
+ strerror(-exact_match_map_fd));
+ goto end_destroy;
+ }
+ tx_port_map_fd = bpf_map__fd(skel->maps.tx_port);
+ if (tx_port_map_fd < 0) {
+ fprintf(stderr, "Failed loading tx_port_map_fd %s\n",
+ strerror(-tx_port_map_fd));
+ goto end_destroy;
}
- prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *));
- for (i = 0; i < total_ifindex; i++) {
- if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) {
- printf("link set xdp fd failed\n");
- int recovery_index = i;
- for (i = 0; i < recovery_index; i++)
- bpf_xdp_detach(ifindex_list[i], flags, NULL);
+ ret = EXIT_FAIL_XDP;
+ for (i = 0; i < total_ifindex; i++) {
+ int index = if_nametoindex(ifname_list[i]);
- return 1;
+ if (!index) {
+ fprintf(stderr, "Interface %s not found %s\n",
+ ifname_list[i], strerror(-tx_port_map_fd));
+ goto end_destroy;
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- return err;
- }
- prog_id_list[i] = info.id;
- memset(&info, 0, sizeof(info));
- printf("Attached to %d\n", ifindex_list[i]);
+ if (sample_install_xdp(skel->progs.xdp_router_ipv4_prog,
+ index, generic, force) < 0)
+ goto end_destroy;
}
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
- printf("\n*******************ROUTE TABLE*************************\n");
- get_route_table(AF_INET);
- printf("\n*******************ARP TABLE***************************\n");
- get_arp_table(AF_INET);
- if (monitor_route() < 0) {
- printf("Error in receiving route update");
- return 1;
+ ret = pthread_create(&routes_thread, NULL, monitor_routes_thread, NULL);
+ if (ret) {
+ fprintf(stderr, "Failed creating routes_thread: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- return 0;
+ ret = sample_run(interval, NULL, NULL);
+ routes_thread_exit = true;
+
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_thread_wait;
+ }
+ ret = EXIT_OK;
+
+end_thread_wait:
+ pthread_join(routes_thread, NULL);
+end_destroy:
+ xdp_router_ipv4__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 290998c82de1..f041c4a6a1f2 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -567,7 +567,7 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
res = probe_prog_type_ifindex(prog_type, ifindex);
} else {
- res = libbpf_probe_bpf_prog_type(prog_type, NULL);
+ res = libbpf_probe_bpf_prog_type(prog_type, NULL) > 0;
}
#ifdef USE_LIBCAP
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 97dec81950e5..8fb0116f9136 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -20,6 +20,9 @@ static const char * const link_type_name[] = {
[BPF_LINK_TYPE_CGROUP] = "cgroup",
[BPF_LINK_TYPE_ITER] = "iter",
[BPF_LINK_TYPE_NETNS] = "netns",
+ [BPF_LINK_TYPE_XDP] = "xdp",
+ [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
};
static struct hashmap *link_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index bc4e05542c2b..8643b37d4e43 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -68,6 +68,7 @@ const char * const prog_type_name[] = {
[BPF_PROG_TYPE_EXT] = "ext",
[BPF_PROG_TYPE_LSM] = "lsm",
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+ [BPF_PROG_TYPE_SYSCALL] = "syscall",
};
const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index b0d8fea1951d..a9162a6c0284 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -33,8 +33,8 @@ struct btf_type {
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
- * bits 24-27: kind (e.g. int, ptr, array...etc)
- * bits 28-30: unused
+ * bits 24-28: kind (e.g. int, ptr, array...etc)
+ * bits 29-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 94f0a146bb7b..31a1a9015902 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
+ usdt.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 064c89e31560..64741c55b8e3 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -239,7 +239,7 @@ install_lib: all_cmd
SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \
- skel_internal.h libbpf_version.h
+ skel_internal.h libbpf_version.h usdt.bpf.h
GEN_HDRS := $(BPF_GENERATED)
INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1383e26c5d1f..d124e9e533f0 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -2826,10 +2826,8 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
- err = -EINVAL;
- goto done;
- }
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+ goto done; /* skip core relos parsing */
err = btf_ext_setup_core_relos(btf_ext);
if (err)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 809fe209cdcc..465b7c0996f1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -483,6 +483,8 @@ struct elf_state {
int st_ops_shndx;
};
+struct usdt_manager;
+
struct bpf_object {
char name[BPF_OBJ_NAME_LEN];
char license[64];
@@ -545,6 +547,8 @@ struct bpf_object {
size_t fd_array_cap;
size_t fd_array_cnt;
+ struct usdt_manager *usdt_man;
+
char path[];
};
@@ -1397,8 +1401,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _
for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
Elf64_Sym *sym = elf_sym_by_idx(obj, si);
- if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
- ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
+ if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
+ continue;
+
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
continue;
sname = elf_sym_str(obj, sym->st_name);
@@ -4678,6 +4685,18 @@ static int probe_perf_link(void)
return link_fd < 0 && err == -EBADF;
}
+static int probe_kern_bpf_cookie(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+ BPF_EXIT_INSN(),
+ };
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+ return probe_fd(ret);
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4740,6 +4759,9 @@ static struct kern_feature_desc {
[FEAT_MEMCG_ACCOUNT] = {
"memcg-based memory accounting", probe_memcg_account,
},
+ [FEAT_BPF_COOKIE] = {
+ "BPF cookie support", probe_kern_bpf_cookie,
+ },
};
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -5665,10 +5687,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
insn_idx = rec->insn_off / BPF_INSN_SZ;
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
- sec_name, insn_idx, i);
- err = -EINVAL;
- goto out;
+ /* When __weak subprog is "overridden" by another instance
+ * of the subprog from a different object file, linker still
+ * appends all the .BTF.ext info that used to belong to that
+ * eliminated subprogram.
+ * This is similar to what x86-64 linker does for relocations.
+ * So just ignore such relocations just like we ignore
+ * subprog instructions when discovering subprograms.
+ */
+ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
+ sec_name, i, insn_idx);
+ continue;
}
/* no need to apply CO-RE relocation if the program is
* not going to be loaded
@@ -8200,6 +8229,9 @@ void bpf_object__close(struct bpf_object *obj)
if (obj->clear_priv)
obj->clear_priv(obj, obj->priv);
+ usdt_manager_free(obj->usdt_man);
+ obj->usdt_man = NULL;
+
bpf_gen__free(obj->gen_loader);
bpf_object__elf_finish(obj);
bpf_object_unload(obj);
@@ -8630,6 +8662,8 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
}
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
@@ -8642,11 +8676,12 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe),
- SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE),
+ SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe),
- SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE),
+ SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
+ SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -9692,14 +9727,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
return bpf_prog_load_xattr2(&attr, pobj, prog_fd);
}
-struct bpf_link {
- int (*detach)(struct bpf_link *link);
- void (*dealloc)(struct bpf_link *link);
- char *pin_path; /* NULL, if not pinned */
- int fd; /* hook FD, -1 if not applicable */
- bool disconnected;
-};
-
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
@@ -10517,6 +10544,273 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
return pfd;
}
+/* uprobes deal in relative offsets; subtract the base address associated with
+ * the mapped binary. See Documentation/trace/uprobetracer.rst for more
+ * details.
+ */
+static long elf_find_relative_offset(const char *filename, Elf *elf, long addr)
+{
+ size_t n;
+ int i;
+
+ if (elf_getphdrnum(elf, &n)) {
+ pr_warn("elf: failed to find program headers for '%s': %s\n", filename,
+ elf_errmsg(-1));
+ return -ENOENT;
+ }
+
+ for (i = 0; i < n; i++) {
+ int seg_start, seg_end, seg_offset;
+ GElf_Phdr phdr;
+
+ if (!gelf_getphdr(elf, i, &phdr)) {
+ pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename,
+ elf_errmsg(-1));
+ return -ENOENT;
+ }
+ if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X))
+ continue;
+
+ seg_start = phdr.p_vaddr;
+ seg_end = seg_start + phdr.p_memsz;
+ seg_offset = phdr.p_offset;
+ if (addr >= seg_start && addr < seg_end)
+ return addr - seg_start + seg_offset;
+ }
+ pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename);
+ return -ENOENT;
+}
+
+/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
+static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
+{
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ GElf_Shdr sh;
+
+ if (!gelf_getshdr(scn, &sh))
+ continue;
+ if (sh.sh_type == sh_type)
+ return scn;
+ }
+ return NULL;
+}
+
+/* Find offset of function name in object specified by path. "name" matches
+ * symbol name or name@@LIB for library functions.
+ */
+static long elf_find_func_offset(const char *binary_path, const char *name)
+{
+ int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
+ bool is_shared_lib, is_name_qualified;
+ char errmsg[STRERR_BUFSIZE];
+ long ret = -ENOENT;
+ size_t name_len;
+ GElf_Ehdr ehdr;
+ Elf *elf;
+
+ fd = open(binary_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = -errno;
+ pr_warn("failed to open %s: %s\n", binary_path,
+ libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
+ return ret;
+ }
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
+ close(fd);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (!gelf_getehdr(elf, &ehdr)) {
+ pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+ /* for shared lib case, we do not need to calculate relative offset */
+ is_shared_lib = ehdr.e_type == ET_DYN;
+
+ name_len = strlen(name);
+ /* Does name specify "@@LIB"? */
+ is_name_qualified = strstr(name, "@@") != NULL;
+
+ /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
+ * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
+ * linked binary may not have SHT_DYMSYM, so absence of a section should not be
+ * reported as a warning/error.
+ */
+ for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
+ size_t nr_syms, strtabidx, idx;
+ Elf_Data *symbols = NULL;
+ Elf_Scn *scn = NULL;
+ int last_bind = -1;
+ const char *sname;
+ GElf_Shdr sh;
+
+ scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
+ if (!scn) {
+ pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
+ binary_path);
+ continue;
+ }
+ if (!gelf_getshdr(scn, &sh))
+ continue;
+ strtabidx = sh.sh_link;
+ symbols = elf_getdata(scn, 0);
+ if (!symbols) {
+ pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
+ binary_path, elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+ nr_syms = symbols->d_size / sh.sh_entsize;
+
+ for (idx = 0; idx < nr_syms; idx++) {
+ int curr_bind;
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, idx, &sym))
+ continue;
+
+ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+ continue;
+
+ sname = elf_strptr(elf, strtabidx, sym.st_name);
+ if (!sname)
+ continue;
+
+ curr_bind = GELF_ST_BIND(sym.st_info);
+
+ /* User can specify func, func@@LIB or func@@LIB_VERSION. */
+ if (strncmp(sname, name, name_len) != 0)
+ continue;
+ /* ...but we don't want a search for "foo" to match 'foo2" also, so any
+ * additional characters in sname should be of the form "@@LIB".
+ */
+ if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
+ continue;
+
+ if (ret >= 0) {
+ /* handle multiple matches */
+ if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
+ /* Only accept one non-weak bind. */
+ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
+ sname, name, binary_path);
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ } else if (curr_bind == STB_WEAK) {
+ /* already have a non-weak bind, and
+ * this is a weak bind, so ignore.
+ */
+ continue;
+ }
+ }
+ ret = sym.st_value;
+ last_bind = curr_bind;
+ }
+ /* For binaries that are not shared libraries, we need relative offset */
+ if (ret > 0 && !is_shared_lib)
+ ret = elf_find_relative_offset(binary_path, elf, ret);
+ if (ret > 0)
+ break;
+ }
+
+ if (ret > 0) {
+ pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
+ ret);
+ } else {
+ if (ret == 0) {
+ pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
+ is_shared_lib ? "should not be 0 in a shared library" :
+ "try using shared library path instead");
+ ret = -ENOENT;
+ } else {
+ pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
+ }
+ }
+out:
+ elf_end(elf);
+ close(fd);
+ return ret;
+}
+
+static const char *arch_specific_lib_paths(void)
+{
+ /*
+ * Based on https://packages.debian.org/sid/libc6.
+ *
+ * Assume that the traced program is built for the same architecture
+ * as libbpf, which should cover the vast majority of cases.
+ */
+#if defined(__x86_64__)
+ return "/lib/x86_64-linux-gnu";
+#elif defined(__i386__)
+ return "/lib/i386-linux-gnu";
+#elif defined(__s390x__)
+ return "/lib/s390x-linux-gnu";
+#elif defined(__s390__)
+ return "/lib/s390-linux-gnu";
+#elif defined(__arm__) && defined(__SOFTFP__)
+ return "/lib/arm-linux-gnueabi";
+#elif defined(__arm__) && !defined(__SOFTFP__)
+ return "/lib/arm-linux-gnueabihf";
+#elif defined(__aarch64__)
+ return "/lib/aarch64-linux-gnu";
+#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
+ return "/lib/mips64el-linux-gnuabi64";
+#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
+ return "/lib/mipsel-linux-gnu";
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return "/lib/powerpc64le-linux-gnu";
+#elif defined(__sparc__) && defined(__arch64__)
+ return "/lib/sparc64-linux-gnu";
+#elif defined(__riscv) && __riscv_xlen == 64
+ return "/lib/riscv64-linux-gnu";
+#else
+ return NULL;
+#endif
+}
+
+/* Get full path to program/shared library. */
+static int resolve_full_path(const char *file, char *result, size_t result_sz)
+{
+ const char *search_paths[3] = {};
+ int i;
+
+ if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
+ search_paths[0] = getenv("LD_LIBRARY_PATH");
+ search_paths[1] = "/usr/lib64:/usr/lib";
+ search_paths[2] = arch_specific_lib_paths();
+ } else {
+ search_paths[0] = getenv("PATH");
+ search_paths[1] = "/usr/bin:/usr/sbin";
+ }
+
+ for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
+ const char *s;
+
+ if (!search_paths[i])
+ continue;
+ for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
+ char *next_path;
+ int seg_len;
+
+ if (s[0] == ':')
+ s++;
+ next_path = strchr(s, ':');
+ seg_len = next_path ? next_path - s : strlen(s);
+ if (!seg_len)
+ continue;
+ snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
+ /* ensure it is an executable file/link */
+ if (access(result, R_OK | X_OK) < 0)
+ continue;
+ pr_debug("resolved '%s' to '%s'\n", file, result);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
@@ -10524,10 +10818,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
{
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
+ char full_binary_path[PATH_MAX];
struct bpf_link *link;
size_t ref_ctr_off;
int pfd, err;
bool retprobe, legacy;
+ const char *func_name;
if (!OPTS_VALID(opts, bpf_uprobe_opts))
return libbpf_err_ptr(-EINVAL);
@@ -10536,12 +10832,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+ if (binary_path && !strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, full_binary_path,
+ sizeof(full_binary_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, binary_path, err);
+ return libbpf_err_ptr(err);
+ }
+ binary_path = full_binary_path;
+ }
+ func_name = OPTS_GET(opts, func_name, NULL);
+ if (func_name) {
+ long sym_off;
+
+ if (!binary_path) {
+ pr_warn("prog '%s': name-based attach requires binary_path\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+ sym_off = elf_find_func_offset(binary_path, func_name);
+ if (sym_off < 0)
+ return libbpf_err_ptr(sym_off);
+ func_offset += sym_off;
+ }
+
legacy = determine_uprobe_perf_type() < 0;
if (!legacy) {
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
} else {
- char probe_name[512];
+ char probe_name[PATH_MAX + 64];
if (ref_ctr_off)
return libbpf_err_ptr(-EINVAL);
@@ -10589,6 +10910,60 @@ err_out:
}
+/* Format of u[ret]probe section definition supporting auto-attach:
+ * u[ret]probe/binary:function[+offset]
+ *
+ * binary can be an absolute/relative path or a filename; the latter is resolved to a
+ * full binary path via bpf_program__attach_uprobe_opts.
+ *
+ * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
+ * specified (and auto-attach is not possible) or the above format is specified for
+ * auto-attach.
+ */
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
+ int n, ret = -EINVAL;
+ long offset = 0;
+
+ *link = NULL;
+
+ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
+ &probe_type, &binary_path, &func_name, &offset);
+ switch (n) {
+ case 1:
+ /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
+ ret = 0;
+ break;
+ case 2:
+ pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
+ prog->name, prog->sec_name);
+ break;
+ case 3:
+ case 4:
+ opts.retprobe = strcmp(probe_type, "uretprobe") == 0;
+ if (opts.retprobe && offset != 0) {
+ pr_warn("prog '%s': uretprobes do not support offset specification\n",
+ prog->name);
+ break;
+ }
+ opts.func_name = func_name;
+ *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
+ ret = libbpf_get_error(*link);
+ break;
+ default:
+ pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
+ prog->sec_name);
+ break;
+ }
+ free(probe_type);
+ free(binary_path);
+ free(func_name);
+
+ return ret;
+}
+
struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
bool retprobe, pid_t pid,
const char *binary_path,
@@ -10599,6 +10974,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
}
+struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
+ pid_t pid, const char *binary_path,
+ const char *usdt_provider, const char *usdt_name,
+ const struct bpf_usdt_opts *opts)
+{
+ char resolved_path[512];
+ struct bpf_object *obj = prog->obj;
+ struct bpf_link *link;
+ long usdt_cookie;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ if (bpf_program__fd(prog) < 0) {
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ if (!strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, binary_path, err);
+ return libbpf_err_ptr(err);
+ }
+ binary_path = resolved_path;
+ }
+
+ /* USDT manager is instantiated lazily on first USDT attach. It will
+ * be destroyed together with BPF object in bpf_object__close().
+ */
+ if (IS_ERR(obj->usdt_man))
+ return libbpf_ptr(obj->usdt_man);
+ if (!obj->usdt_man) {
+ obj->usdt_man = usdt_manager_new(obj);
+ if (IS_ERR(obj->usdt_man))
+ return libbpf_ptr(obj->usdt_man);
+ }
+
+ usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
+ link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
+ usdt_provider, usdt_name, usdt_cookie);
+ err = libbpf_get_error(link);
+ if (err)
+ return libbpf_err_ptr(err);
+ return link;
+}
+
+static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ char *path = NULL, *provider = NULL, *name = NULL;
+ const char *sec_name;
+ int n, err;
+
+ sec_name = bpf_program__section_name(prog);
+ if (strcmp(sec_name, "usdt") == 0) {
+ /* no auto-attach for just SEC("usdt") */
+ *link = NULL;
+ return 0;
+ }
+
+ n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
+ if (n != 3) {
+ pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
+ sec_name);
+ err = -EINVAL;
+ } else {
+ *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
+ provider, name, NULL);
+ err = libbpf_get_error(*link);
+ }
+ free(path);
+ free(provider);
+ free(name);
+ return err;
+}
+
static int determine_tracepoint_id(const char *tp_category,
const char *tp_name)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 05dde85e19a6..63d66f1adf1a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -459,9 +459,17 @@ struct bpf_uprobe_opts {
__u64 bpf_cookie;
/* uprobe is return probe, invoked at function return time */
bool retprobe;
+ /* Function name to attach to. Could be an unqualified ("abc") or library-qualified
+ * "abc@LIBXYZ" name. To specify function entry, func_name should be set while
+ * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an
+ * offset within a function, specify func_name and use func_offset argument to specify
+ * offset within the function. Shared library functions must specify the shared library
+ * binary_path.
+ */
+ const char *func_name;
size_t :0;
};
-#define bpf_uprobe_opts__last_field retprobe
+#define bpf_uprobe_opts__last_field func_name
/**
* @brief **bpf_program__attach_uprobe()** attaches a BPF program
@@ -503,6 +511,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
const struct bpf_uprobe_opts *opts);
+struct bpf_usdt_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value accessible through usdt_cookie() */
+ __u64 usdt_cookie;
+ size_t :0;
+};
+#define bpf_usdt_opts__last_field usdt_cookie
+
+/**
+ * @brief **bpf_program__attach_usdt()** is just like
+ * bpf_program__attach_uprobe_opts() except it covers USDT (User-space
+ * Statically Defined Tracepoint) attachment, instead of attaching to
+ * user-space function entry or exit.
+ *
+ * @param prog BPF program to attach
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains provided USDT probe
+ * @param usdt_provider USDT provider name
+ * @param usdt_name USDT probe name
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_usdt(const struct bpf_program *prog,
+ pid_t pid, const char *binary_path,
+ const char *usdt_provider, const char *usdt_name,
+ const struct bpf_usdt_opts *opts);
+
struct bpf_tracepoint_opts {
/* size of this struct, for forward/backward compatiblity */
size_t sz;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index dd35ee58bfaa..82f6d62176dd 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -444,6 +444,7 @@ LIBBPF_0.8.0 {
global:
bpf_object__destroy_subskeleton;
bpf_object__open_subskeleton;
+ bpf_program__attach_usdt;
libbpf_register_prog_handler;
libbpf_unregister_prog_handler;
bpf_program__attach_kprobe_multi_opts;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b6247dc7f8eb..080272421f6c 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -103,6 +103,17 @@
#define str_has_pfx(str, pfx) \
(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+/* suffix check */
+static inline bool str_has_sfx(const char *str, const char *sfx)
+{
+ size_t str_len = strlen(str);
+ size_t sfx_len = strlen(sfx);
+
+ if (sfx_len <= str_len)
+ return strcmp(str + str_len - sfx_len, sfx);
+ return false;
+}
+
/* Symbol versioning is different between static and shared library.
* Properly versioned symbols are needed for shared library, but
* only the symbol of the new version is needed for static library.
@@ -148,6 +159,15 @@ do { \
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
+
+struct bpf_link {
+ int (*detach)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
+ char *pin_path; /* NULL, if not pinned */
+ int fd; /* hook FD, -1 if not applicable */
+ bool disconnected;
+};
+
/*
* Re-implement glibc's reallocarray() for libbpf internal-only use.
* reallocarray(), unfortunately, is not available in all versions of glibc,
@@ -329,6 +349,8 @@ enum kern_feature_id {
FEAT_BTF_TYPE_TAG,
/* memcg-based accounting for BPF maps and progs */
FEAT_MEMCG_ACCOUNT,
+ /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
+ FEAT_BPF_COOKIE,
__FEAT_CNT,
};
@@ -543,4 +565,12 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand,
struct bpf_core_cand_list *cands);
void bpf_core_free_cands(struct bpf_core_cand_list *cands);
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj);
+void usdt_manager_free(struct usdt_manager *man);
+struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
+ const struct bpf_program *prog,
+ pid_t pid, const char *path,
+ const char *usdt_provider, const char *usdt_name,
+ long usdt_cookie);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
new file mode 100644
index 000000000000..4181fddb3687
--- /dev/null
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#ifndef __USDT_BPF_H__
+#define __USDT_BPF_H__
+
+#include <linux/errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* Below types and maps are internal implementation details of libbpf's USDT
+ * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
+ * be considered an unstable API as well and might be adjusted based on user
+ * feedback from using libbpf's USDT support in production.
+ */
+
+/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal
+ * map that keeps track of USDT argument specifications. This might be
+ * necessary if there are a lot of USDT attachments.
+ */
+#ifndef BPF_USDT_MAX_SPEC_CNT
+#define BPF_USDT_MAX_SPEC_CNT 256
+#endif
+/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal
+ * map that keeps track of IP (memory address) mapping to USDT argument
+ * specification.
+ * Note, if kernel supports BPF cookies, this map is not used and could be
+ * resized all the way to 1 to save a bit of memory.
+ */
+#ifndef BPF_USDT_MAX_IP_CNT
+#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
+#endif
+/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is
+ * the only dependency on CO-RE, so if it's undesirable, user can override
+ * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not.
+ */
+#ifndef BPF_USDT_HAS_BPF_COOKIE
+#define BPF_USDT_HAS_BPF_COOKIE \
+ bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt)
+#endif
+
+enum __bpf_usdt_arg_type {
+ BPF_USDT_ARG_CONST,
+ BPF_USDT_ARG_REG,
+ BPF_USDT_ARG_REG_DEREF,
+};
+
+struct __bpf_usdt_arg_spec {
+ /* u64 scalar interpreted depending on arg_type, see below */
+ __u64 val_off;
+ /* arg location case, see bpf_udst_arg() for details */
+ enum __bpf_usdt_arg_type arg_type;
+ /* offset of referenced register within struct pt_regs */
+ short reg_off;
+ /* whether arg should be interpreted as signed value */
+ bool arg_signed;
+ /* number of bits that need to be cleared and, optionally,
+ * sign-extended to cast arguments that are 1, 2, or 4 bytes
+ * long into final 8-byte u64/s64 value returned to user
+ */
+ char arg_bitshift;
+};
+
+/* should match USDT_MAX_ARG_CNT in usdt.c exactly */
+#define BPF_USDT_MAX_ARG_CNT 12
+struct __bpf_usdt_spec {
+ struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT];
+ __u64 usdt_cookie;
+ short arg_cnt;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, BPF_USDT_MAX_SPEC_CNT);
+ __type(key, int);
+ __type(value, struct __bpf_usdt_spec);
+} __bpf_usdt_specs SEC(".maps") __weak;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, BPF_USDT_MAX_IP_CNT);
+ __type(key, long);
+ __type(value, __u32);
+} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
+
+/* don't rely on user's BPF code to have latest definition of bpf_func_id */
+enum bpf_func_id___usdt {
+ BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */
+};
+
+static __always_inline
+int __bpf_usdt_spec_id(struct pt_regs *ctx)
+{
+ if (!BPF_USDT_HAS_BPF_COOKIE) {
+ long ip = PT_REGS_IP(ctx);
+ int *spec_id_ptr;
+
+ spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip);
+ return spec_id_ptr ? *spec_id_ptr : -ESRCH;
+ }
+
+ return bpf_get_attach_cookie(ctx);
+}
+
+/* Return number of USDT arguments defined for currently traced USDT. */
+__weak __hidden
+int bpf_usdt_arg_cnt(struct pt_regs *ctx)
+{
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ return spec->arg_cnt;
+}
+
+/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
+ * Returns 0 on success; negative error, otherwise.
+ * On error *res is guaranteed to be set to zero.
+ */
+__weak __hidden
+int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
+{
+ struct __bpf_usdt_spec *spec;
+ struct __bpf_usdt_arg_spec *arg_spec;
+ unsigned long val;
+ int err, spec_id;
+
+ *res = 0;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt)
+ return -ENOENT;
+
+ arg_spec = &spec->args[arg_num];
+ switch (arg_spec->arg_type) {
+ case BPF_USDT_ARG_CONST:
+ /* Arg is just a constant ("-4@$-9" in USDT arg spec).
+ * value is recorded in arg_spec->val_off directly.
+ */
+ val = arg_spec->val_off;
+ break;
+ case BPF_USDT_ARG_REG:
+ /* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
+ * so we read the contents of that register directly from
+ * struct pt_regs. To keep things simple user-space parts
+ * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ break;
+ case BPF_USDT_ARG_REG_DEREF:
+ /* Arg is in memory addressed by register, plus some offset
+ * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is
+ * identified like with BPF_USDT_ARG_REG case, and the offset
+ * is in arg_spec->val_off. We first fetch register contents
+ * from pt_regs, then do another user-space probe read to
+ * fetch argument value itself.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing
+ * necessary upper arg_bitshift bits, with sign extension if argument
+ * is signed
+ */
+ val <<= arg_spec->arg_bitshift;
+ if (arg_spec->arg_signed)
+ val = ((long)val) >> arg_spec->arg_bitshift;
+ else
+ val = val >> arg_spec->arg_bitshift;
+ *res = val;
+ return 0;
+}
+
+/* Retrieve user-specified cookie value provided during attach as
+ * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie
+ * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself
+ * utilizing BPF cookies internally, so user can't use BPF cookie directly
+ * for USDT programs and has to use bpf_usdt_cookie() API instead.
+ */
+__weak __hidden
+long bpf_usdt_cookie(struct pt_regs *ctx)
+{
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return 0;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return 0;
+
+ return spec->usdt_cookie;
+}
+
+/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */
+#define ___bpf_usdt_args0() ctx
+#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; })
+#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; })
+#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; })
+#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; })
+#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; })
+#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; })
+#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; })
+#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; })
+#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; })
+#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; })
+#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; })
+#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; })
+#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes.
+ * Original struct pt_regs * context is preserved as 'ctx' argument.
+ */
+#define BPF_USDT(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_usdt_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
+#endif /* __USDT_BPF_H__ */
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
new file mode 100644
index 000000000000..acf2d99a9e77
--- /dev/null
+++ b/tools/lib/bpf/usdt.c
@@ -0,0 +1,1335 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <unistd.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+/* libbpf's USDT support consists of BPF-side state/code and user-space
+ * state/code working together in concert. BPF-side parts are defined in
+ * usdt.bpf.h header library. User-space state is encapsulated by struct
+ * usdt_manager and all the supporting code centered around usdt_manager.
+ *
+ * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map
+ * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that
+ * don't support BPF cookie (see below). These two maps are implicitly
+ * embedded into user's end BPF object file when user's code included
+ * usdt.bpf.h. This means that libbpf doesn't do anything special to create
+ * these USDT support maps. They are created by normal libbpf logic of
+ * instantiating BPF maps when opening and loading BPF object.
+ *
+ * As such, libbpf is basically unaware of the need to do anything
+ * USDT-related until the very first call to bpf_program__attach_usdt(), which
+ * can be called by user explicitly or happen automatically during skeleton
+ * attach (or, equivalently, through generic bpf_program__attach() call). At
+ * this point, libbpf will instantiate and initialize struct usdt_manager and
+ * store it in bpf_object. USDT manager is per-BPF object construct, as each
+ * independent BPF object might or might not have USDT programs, and thus all
+ * the expected USDT-related state. There is no coordination between two
+ * bpf_object in parts of USDT attachment, they are oblivious of each other's
+ * existence and libbpf is just oblivious, dealing with bpf_object-specific
+ * USDT state.
+ *
+ * Quick crash course on USDTs.
+ *
+ * From user-space application's point of view, USDT is essentially just
+ * a slightly special function call that normally has zero overhead, unless it
+ * is being traced by some external entity (e.g, BPF-based tool). Here's how
+ * a typical application can trigger USDT probe:
+ *
+ * #include <sys/sdt.h> // provided by systemtap-sdt-devel package
+ * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h
+ *
+ * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y);
+ *
+ * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each
+ * individual USDT has a fixed number of arguments (3 in the above example)
+ * and specifies values of each argument as if it was a function call.
+ *
+ * USDT call is actually not a function call, but is instead replaced by
+ * a single NOP instruction (thus zero overhead, effectively). But in addition
+ * to that, those USDT macros generate special SHT_NOTE ELF records in
+ * .note.stapsdt ELF section. Here's an example USDT definition as emitted by
+ * `readelf -n <binary>`:
+ *
+ * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors)
+ * Provider: test
+ * Name: usdt12
+ * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e
+ * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil
+ *
+ * In this case we have USDT test:usdt12 with 12 arguments.
+ *
+ * Location and base are offsets used to calculate absolute IP address of that
+ * NOP instruction that kernel can replace with an interrupt instruction to
+ * trigger instrumentation code (BPF program for all that we care about).
+ *
+ * Semaphore above is and optional feature. It records an address of a 2-byte
+ * refcount variable (normally in '.probes' ELF section) used for signaling if
+ * there is anything that is attached to USDT. This is useful for user
+ * applications if, for example, they need to prepare some arguments that are
+ * passed only to USDTs and preparation is expensive. By checking if USDT is
+ * "activated", an application can avoid paying those costs unnecessarily.
+ * Recent enough kernel has built-in support for automatically managing this
+ * refcount, which libbpf expects and relies on. If USDT is defined without
+ * associated semaphore, this value will be zero. See selftests for semaphore
+ * examples.
+ *
+ * Arguments is the most interesting part. This USDT specification string is
+ * providing information about all the USDT arguments and their locations. The
+ * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and
+ * whether the argument is signed or unsigned (negative size means signed).
+ * The part after @ sign is assembly-like definition of argument location
+ * (see [0] for more details). Technically, assembler can provide some pretty
+ * advanced definitions, but libbpf is currently supporting three most common
+ * cases:
+ * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9);
+ * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer
+ * whose value is in register %rdx";
+ * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which
+ * specifies signed 32-bit integer stored at offset -1204 bytes from
+ * memory address stored in %rbp.
+ *
+ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ *
+ * During attachment, libbpf parses all the relevant USDT specifications and
+ * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side
+ * code through spec map. This allows BPF applications to quickly fetch the
+ * actual value at runtime using a simple BPF-side code.
+ *
+ * With basics out of the way, let's go over less immediately obvious aspects
+ * of supporting USDTs.
+ *
+ * First, there is no special USDT BPF program type. It is actually just
+ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe
+ * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference
+ * that uprobe is usually attached at the function entry, while USDT will
+ * normally will be somewhere inside the function. But it should always be
+ * pointing to NOP instruction, which makes such uprobes the fastest uprobe
+ * kind.
+ *
+ * Second, it's important to realize that such STAP_PROBEn(provider, name, ...)
+ * macro invocations can end up being inlined many-many times, depending on
+ * specifics of each individual user application. So single conceptual USDT
+ * (identified by provider:name pair of identifiers) is, generally speaking,
+ * multiple uprobe locations (USDT call sites) in different places in user
+ * application. Further, again due to inlining, each USDT call site might end
+ * up having the same argument #N be located in a different place. In one call
+ * site it could be a constant, in another will end up in a register, and in
+ * yet another could be some other register or even somewhere on the stack.
+ *
+ * As such, "attaching to USDT" means (in general case) attaching the same
+ * uprobe BPF program to multiple target locations in user application, each
+ * potentially having a completely different USDT spec associated with it.
+ * To wire all this up together libbpf allocates a unique integer spec ID for
+ * each unique USDT spec. Spec IDs are allocated as sequential small integers
+ * so that they can be used as keys in array BPF map (for performance reasons).
+ * Spec ID allocation and accounting is big part of what usdt_manager is
+ * about. This state has to be maintained per-BPF object and coordinate
+ * between different USDT attachments within the same BPF object.
+ *
+ * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out
+ * as struct usdt_spec. Each invocation of BPF program at runtime needs to
+ * know its associated spec ID. It gets it either through BPF cookie, which
+ * libbpf sets to spec ID during attach time, or, if kernel is too old to
+ * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such
+ * case. The latter means that some modes of operation can't be supported
+ * without BPF cookie. Such mode is attaching to shared library "generically",
+ * without specifying target process. In such case, it's impossible to
+ * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode
+ * is not supported without BPF cookie support.
+ *
+ * Note that libbpf is using BPF cookie functionality for its own internal
+ * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf
+ * provides conceptually equivalent USDT cookie support. It's still u64
+ * user-provided value that can be associated with USDT attachment. Note that
+ * this will be the same value for all USDT call sites within the same single
+ * *logical* USDT attachment. This makes sense because to user attaching to
+ * USDT is a single BPF program triggered for singular USDT probe. The fact
+ * that this is done at multiple actual locations is a mostly hidden
+ * implementation details. This USDT cookie value can be fetched with
+ * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h
+ *
+ * Lastly, while single USDT can have tons of USDT call sites, it doesn't
+ * necessarily have that many different USDT specs. It very well might be
+ * that 1000 USDT call sites only need 5 different USDT specs, because all the
+ * arguments are typically contained in a small set of registers or stack
+ * locations. As such, it's wasteful to allocate as many USDT spec IDs as
+ * there are USDT call sites. So libbpf tries to be frugal and performs
+ * on-the-fly deduplication during a single USDT attachment to only allocate
+ * the minimal required amount of unique USDT specs (and thus spec IDs). This
+ * is trivially achieved by using USDT spec string (Arguments string from USDT
+ * note) as a lookup key in a hashmap. USDT spec string uniquely defines
+ * everything about how to fetch USDT arguments, so two USDT call sites
+ * sharing USDT spec string can safely share the same USDT spec and spec ID.
+ * Note, this spec string deduplication is happening only during the same USDT
+ * attachment, so each USDT spec shares the same USDT cookie value. This is
+ * not generally true for other USDT attachments within the same BPF object,
+ * as even if USDT spec string is the same, USDT cookie value can be
+ * different. It was deemed excessive to try to deduplicate across independent
+ * USDT attachments by taking into account USDT spec string *and* USDT cookie
+ * value, which would complicated spec ID accounting significantly for little
+ * gain.
+ */
+
+#define USDT_BASE_SEC ".stapsdt.base"
+#define USDT_SEMA_SEC ".probes"
+#define USDT_NOTE_SEC ".note.stapsdt"
+#define USDT_NOTE_TYPE 3
+#define USDT_NOTE_NAME "stapsdt"
+
+/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */
+enum usdt_arg_type {
+ USDT_ARG_CONST,
+ USDT_ARG_REG,
+ USDT_ARG_REG_DEREF,
+};
+
+/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
+struct usdt_arg_spec {
+ __u64 val_off;
+ enum usdt_arg_type arg_type;
+ short reg_off;
+ bool arg_signed;
+ char arg_bitshift;
+};
+
+/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */
+#define USDT_MAX_ARG_CNT 12
+
+/* should match struct __bpf_usdt_spec from usdt.bpf.h */
+struct usdt_spec {
+ struct usdt_arg_spec args[USDT_MAX_ARG_CNT];
+ __u64 usdt_cookie;
+ short arg_cnt;
+};
+
+struct usdt_note {
+ const char *provider;
+ const char *name;
+ /* USDT args specification string, e.g.:
+ * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx"
+ */
+ const char *args;
+ long loc_addr;
+ long base_addr;
+ long sema_addr;
+};
+
+struct usdt_target {
+ long abs_ip;
+ long rel_ip;
+ long sema_off;
+ struct usdt_spec spec;
+ const char *spec_str;
+};
+
+struct usdt_manager {
+ struct bpf_map *specs_map;
+ struct bpf_map *ip_to_spec_id_map;
+
+ int *free_spec_ids;
+ size_t free_spec_cnt;
+ size_t next_free_spec_id;
+
+ bool has_bpf_cookie;
+ bool has_sema_refcnt;
+};
+
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
+{
+ static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset";
+ struct usdt_manager *man;
+ struct bpf_map *specs_map, *ip_to_spec_id_map;
+
+ specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs");
+ ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id");
+ if (!specs_map || !ip_to_spec_id_map) {
+ pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n");
+ return ERR_PTR(-ESRCH);
+ }
+
+ man = calloc(1, sizeof(*man));
+ if (!man)
+ return ERR_PTR(-ENOMEM);
+
+ man->specs_map = specs_map;
+ man->ip_to_spec_id_map = ip_to_spec_id_map;
+
+ /* Detect if BPF cookie is supported for kprobes.
+ * We don't need IP-to-ID mapping if we can use BPF cookies.
+ * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value")
+ */
+ man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE);
+
+ /* Detect kernel support for automatic refcounting of USDT semaphore.
+ * If this is not supported, USDTs with semaphores will not be supported.
+ * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0;
+
+ return man;
+}
+
+void usdt_manager_free(struct usdt_manager *man)
+{
+ if (IS_ERR_OR_NULL(man))
+ return;
+
+ free(man->free_spec_ids);
+ free(man);
+}
+
+static int sanity_check_usdt_elf(Elf *elf, const char *path)
+{
+ GElf_Ehdr ehdr;
+ int endianness;
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path);
+ return -EBADF;
+ }
+
+ switch (gelf_getclass(elf)) {
+ case ELFCLASS64:
+ if (sizeof(void *) != 8) {
+ pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path);
+ return -EBADF;
+ }
+ break;
+ case ELFCLASS32:
+ if (sizeof(void *) != 4) {
+ pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path);
+ return -EBADF;
+ }
+ break;
+ default:
+ pr_warn("usdt: unsupported ELF class for '%s'\n", path);
+ return -EBADF;
+ }
+
+ if (!gelf_getehdr(elf, &ehdr))
+ return -EINVAL;
+
+ if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) {
+ pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n",
+ path, ehdr.e_type);
+ return -EBADF;
+ }
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ endianness = ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (endianness != ehdr.e_ident[EI_DATA]) {
+ pr_warn("usdt: ELF endianness mismatch for '%s'\n", path);
+ return -EBADF;
+ }
+
+ return 0;
+}
+
+static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn)
+{
+ Elf_Scn *sec = NULL;
+ size_t shstrndx;
+
+ if (elf_getshdrstrndx(elf, &shstrndx))
+ return -EINVAL;
+
+ /* check if ELF is corrupted and avoid calling elf_strptr if yes */
+ if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL))
+ return -EINVAL;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ char *name;
+
+ if (!gelf_getshdr(sec, shdr))
+ return -EINVAL;
+
+ name = elf_strptr(elf, shstrndx, shdr->sh_name);
+ if (name && strcmp(sec_name, name) == 0) {
+ *scn = sec;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+struct elf_seg {
+ long start;
+ long end;
+ long offset;
+ bool is_exec;
+};
+
+static int cmp_elf_segs(const void *_a, const void *_b)
+{
+ const struct elf_seg *a = _a;
+ const struct elf_seg *b = _b;
+
+ return a->start < b->start ? -1 : 1;
+}
+
+static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt)
+{
+ GElf_Phdr phdr;
+ size_t n;
+ int i, err;
+ struct elf_seg *seg;
+ void *tmp;
+
+ *seg_cnt = 0;
+
+ if (elf_getphdrnum(elf, &n)) {
+ err = -errno;
+ return err;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (!gelf_getphdr(elf, i, &phdr)) {
+ err = -errno;
+ return err;
+ }
+
+ pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n",
+ i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset,
+ (long)phdr.p_type, (long)phdr.p_flags);
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs));
+ if (!tmp)
+ return -ENOMEM;
+
+ *segs = tmp;
+ seg = *segs + *seg_cnt;
+ (*seg_cnt)++;
+
+ seg->start = phdr.p_vaddr;
+ seg->end = phdr.p_vaddr + phdr.p_memsz;
+ seg->offset = phdr.p_offset;
+ seg->is_exec = phdr.p_flags & PF_X;
+ }
+
+ if (*seg_cnt == 0) {
+ pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path);
+ return -ESRCH;
+ }
+
+ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs);
+ return 0;
+}
+
+static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
+{
+ char path[PATH_MAX], line[PATH_MAX], mode[16];
+ size_t seg_start, seg_end, seg_off;
+ struct elf_seg *seg;
+ int tmp_pid, i, err;
+ FILE *f;
+
+ *seg_cnt = 0;
+
+ /* Handle containerized binaries only accessible from
+ * /proc/<pid>/root/<path>. They will be reported as just /<path> in
+ * /proc/<pid>/maps.
+ */
+ if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid)
+ goto proceed;
+
+ if (!realpath(lib_path, path)) {
+ pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n",
+ lib_path, -errno);
+ libbpf_strlcpy(path, lib_path, sizeof(path));
+ }
+
+proceed:
+ sprintf(line, "/proc/%d/maps", pid);
+ f = fopen(line, "r");
+ if (!f) {
+ err = -errno;
+ pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n",
+ line, lib_path, err);
+ return err;
+ }
+
+ /* We need to handle lines with no path at the end:
+ *
+ * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so
+ * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0
+ * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so
+ */
+ while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n",
+ &seg_start, &seg_end, mode, &seg_off, line) == 5) {
+ void *tmp;
+
+ /* to handle no path case (see above) we need to capture line
+ * without skipping any whitespaces. So we need to strip
+ * leading whitespaces manually here
+ */
+ i = 0;
+ while (isblank(line[i]))
+ i++;
+ if (strcmp(line + i, path) != 0)
+ continue;
+
+ pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n",
+ path, seg_start, seg_end, mode, seg_off);
+
+ /* ignore non-executable sections for shared libs */
+ if (mode[2] != 'x')
+ continue;
+
+ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ *segs = tmp;
+ seg = *segs + *seg_cnt;
+ *seg_cnt += 1;
+
+ seg->start = seg_start;
+ seg->end = seg_end;
+ seg->offset = seg_off;
+ seg->is_exec = true;
+ }
+
+ if (*seg_cnt == 0) {
+ pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n",
+ lib_path, path, pid);
+ err = -ESRCH;
+ goto err_out;
+ }
+
+ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs);
+ err = 0;
+err_out:
+ fclose(f);
+ return err;
+}
+
+static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative)
+{
+ struct elf_seg *seg;
+ int i;
+
+ if (relative) {
+ /* for shared libraries, address is relative offset and thus
+ * should be fall within logical offset-based range of
+ * [offset_start, offset_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start))
+ return seg;
+ }
+ } else {
+ /* for binaries, address is absolute and thus should be within
+ * absolute address range of [seg_start, seg_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->start <= addr && addr < seg->end)
+ return seg;
+ }
+ }
+
+ return NULL;
+}
+
+static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
+ GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+ struct usdt_note *usdt_note);
+
+static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie);
+
+static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
+ const char *usdt_provider, const char *usdt_name, long usdt_cookie,
+ struct usdt_target **out_targets, size_t *out_target_cnt)
+{
+ size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0;
+ struct elf_seg *segs = NULL, *lib_segs = NULL;
+ struct usdt_target *targets = NULL, *target;
+ long base_addr = 0;
+ Elf_Scn *notes_scn, *base_scn;
+ GElf_Shdr base_shdr, notes_shdr;
+ GElf_Ehdr ehdr;
+ GElf_Nhdr nhdr;
+ Elf_Data *data;
+ int err;
+
+ *out_targets = NULL;
+ *out_target_cnt = 0;
+
+ err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, &notes_shdr, &notes_scn);
+ if (err) {
+ pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path);
+ return err;
+ }
+
+ if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) {
+ pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path);
+ return -EINVAL;
+ }
+
+ err = parse_elf_segs(elf, path, &segs, &seg_cnt);
+ if (err) {
+ pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err);
+ goto err_out;
+ }
+
+ /* .stapsdt.base ELF section is optional, but is used for prelink
+ * offset compensation (see a big comment further below)
+ */
+ if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0)
+ base_addr = base_shdr.sh_addr;
+
+ data = elf_getdata(notes_scn, 0);
+ off = 0;
+ while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) {
+ long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0;
+ struct usdt_note note;
+ struct elf_seg *seg = NULL;
+ void *tmp;
+
+ err = parse_usdt_note(elf, path, base_addr, &nhdr,
+ data->d_buf, name_off, desc_off, &note);
+ if (err)
+ goto err_out;
+
+ if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0)
+ continue;
+
+ /* We need to compensate "prelink effect". See [0] for details,
+ * relevant parts quoted here:
+ *
+ * Each SDT probe also expands into a non-allocated ELF note. You can
+ * find this by looking at SHT_NOTE sections and decoding the format;
+ * see below for details. Because the note is non-allocated, it means
+ * there is no runtime cost, and also preserved in both stripped files
+ * and .debug files.
+ *
+ * However, this means that prelink won't adjust the note's contents
+ * for address offsets. Instead, this is done via the .stapsdt.base
+ * section. This is a special section that is added to the text. We
+ * will only ever have one of these sections in a final link and it
+ * will only ever be one byte long. Nothing about this section itself
+ * matters, we just use it as a marker to detect prelink address
+ * adjustments.
+ *
+ * Each probe note records the link-time address of the .stapsdt.base
+ * section alongside the probe PC address. The decoder compares the
+ * base address stored in the note with the .stapsdt.base section's
+ * sh_addr. Initially these are the same, but the section header will
+ * be adjusted by prelink. So the decoder applies the difference to
+ * the probe PC address to get the correct prelinked PC address; the
+ * same adjustment is applied to the semaphore address, if any.
+ *
+ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ */
+ usdt_rel_ip = usdt_abs_ip = note.loc_addr;
+ if (base_addr) {
+ usdt_abs_ip += base_addr - note.base_addr;
+ usdt_rel_ip += base_addr - note.base_addr;
+ }
+
+ if (ehdr.e_type == ET_EXEC) {
+ /* When attaching uprobes (which what USDTs basically
+ * are) kernel expects a relative IP to be specified,
+ * so if we are attaching to an executable ELF binary
+ * (i.e., not a shared library), we need to calculate
+ * proper relative IP based on ELF's load address
+ */
+ seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_abs_ip);
+ goto err_out;
+ }
+ if (!seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ usdt_abs_ip);
+ goto err_out;
+ }
+
+ usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset);
+ } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */
+ /* If we don't have BPF cookie support but need to
+ * attach to a shared library, we'll need to know and
+ * record absolute addresses of attach points due to
+ * the need to lookup USDT spec by absolute IP of
+ * triggered uprobe. Doing this resolution is only
+ * possible when we have a specific PID of the process
+ * that's using specified shared library. BPF cookie
+ * removes the absolute address limitation as we don't
+ * need to do this lookup (we just use BPF cookie as
+ * an index of USDT spec), so for newer kernels with
+ * BPF cookie support libbpf supports USDT attachment
+ * to shared libraries with no PID filter.
+ */
+ if (pid < 0) {
+ pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n");
+ err = -ENOTSUP;
+ goto err_out;
+ }
+
+ /* lib_segs are lazily initialized only if necessary */
+ if (lib_seg_cnt == 0) {
+ err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt);
+ if (err) {
+ pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n",
+ pid, path, err);
+ goto err_out;
+ }
+ }
+
+ seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_rel_ip);
+ goto err_out;
+ }
+
+ usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset);
+ }
+
+ pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n",
+ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path,
+ note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args,
+ seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0);
+
+ /* Adjust semaphore address to be a relative offset */
+ if (note.sema_addr) {
+ if (!man->has_sema_refcnt) {
+ pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n",
+ usdt_provider, usdt_name, path);
+ err = -ENOTSUP;
+ goto err_out;
+ }
+
+ seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n",
+ usdt_provider, usdt_name, path, note.sema_addr);
+ goto err_out;
+ }
+ if (seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ note.sema_addr);
+ goto err_out;
+ }
+
+ usdt_sema_off = note.sema_addr - (seg->start - seg->offset);
+
+ pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n",
+ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ",
+ path, note.sema_addr, note.base_addr, usdt_sema_off,
+ seg->start, seg->end, seg->offset);
+ }
+
+ /* Record adjusted addresses and offsets and parse USDT spec */
+ tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ targets = tmp;
+
+ target = &targets[target_cnt];
+ memset(target, 0, sizeof(*target));
+
+ target->abs_ip = usdt_abs_ip;
+ target->rel_ip = usdt_rel_ip;
+ target->sema_off = usdt_sema_off;
+
+ /* notes->args references strings from Elf itself, so they can
+ * be referenced safely until elf_end() call
+ */
+ target->spec_str = note.args;
+
+ err = parse_usdt_spec(&target->spec, &note, usdt_cookie);
+ if (err)
+ goto err_out;
+
+ target_cnt++;
+ }
+
+ *out_targets = targets;
+ *out_target_cnt = target_cnt;
+ err = target_cnt;
+
+err_out:
+ free(segs);
+ free(lib_segs);
+ if (err < 0)
+ free(targets);
+ return err;
+}
+
+struct bpf_link_usdt {
+ struct bpf_link link;
+
+ struct usdt_manager *usdt_man;
+
+ size_t spec_cnt;
+ int *spec_ids;
+
+ size_t uprobe_cnt;
+ struct {
+ long abs_ip;
+ struct bpf_link *link;
+ } *uprobes;
+};
+
+static int bpf_link_usdt_detach(struct bpf_link *link)
+{
+ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link);
+ struct usdt_manager *man = usdt_link->usdt_man;
+ int i;
+
+ for (i = 0; i < usdt_link->uprobe_cnt; i++) {
+ /* detach underlying uprobe link */
+ bpf_link__destroy(usdt_link->uprobes[i].link);
+ /* there is no need to update specs map because it will be
+ * unconditionally overwritten on subsequent USDT attaches,
+ * but if BPF cookies are not used we need to remove entry
+ * from ip_to_spec_id map, otherwise we'll run into false
+ * conflicting IP errors
+ */
+ if (!man->has_bpf_cookie) {
+ /* not much we can do about errors here */
+ (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map),
+ &usdt_link->uprobes[i].abs_ip);
+ }
+ }
+
+ /* try to return the list of previously used spec IDs to usdt_manager
+ * for future reuse for subsequent USDT attaches
+ */
+ if (!man->free_spec_ids) {
+ /* if there were no free spec IDs yet, just transfer our IDs */
+ man->free_spec_ids = usdt_link->spec_ids;
+ man->free_spec_cnt = usdt_link->spec_cnt;
+ usdt_link->spec_ids = NULL;
+ } else {
+ /* otherwise concat IDs */
+ size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt;
+ int *new_free_ids;
+
+ new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt,
+ sizeof(*new_free_ids));
+ /* If we couldn't resize free_spec_ids, we'll just leak
+ * a bunch of free IDs; this is very unlikely to happen and if
+ * system is so exhausted on memory, it's the least of user's
+ * concerns, probably.
+ * So just do our best here to return those IDs to usdt_manager.
+ */
+ if (new_free_ids) {
+ memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids,
+ usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids));
+ man->free_spec_ids = new_free_ids;
+ man->free_spec_cnt = new_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void bpf_link_usdt_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link);
+
+ free(usdt_link->spec_ids);
+ free(usdt_link->uprobes);
+ free(usdt_link);
+}
+
+static size_t specs_hash_fn(const void *key, void *ctx)
+{
+ const char *s = key;
+
+ return str_hash(s);
+}
+
+static bool specs_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+ const char *s1 = key1;
+ const char *s2 = key2;
+
+ return strcmp(s1, s2) == 0;
+}
+
+static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash,
+ struct bpf_link_usdt *link, struct usdt_target *target,
+ int *spec_id, bool *is_new)
+{
+ void *tmp;
+ int err;
+
+ /* check if we already allocated spec ID for this spec string */
+ if (hashmap__find(specs_hash, target->spec_str, &tmp)) {
+ *spec_id = (long)tmp;
+ *is_new = false;
+ return 0;
+ }
+
+ /* otherwise it's a new ID that needs to be set up in specs map and
+ * returned back to usdt_manager when USDT link is detached
+ */
+ tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
+ if (!tmp)
+ return -ENOMEM;
+ link->spec_ids = tmp;
+
+ /* get next free spec ID, giving preference to free list, if not empty */
+ if (man->free_spec_cnt) {
+ *spec_id = man->free_spec_ids[man->free_spec_cnt - 1];
+
+ /* cache spec ID for current spec string for future lookups */
+ err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+ if (err)
+ return err;
+
+ man->free_spec_cnt--;
+ } else {
+ /* don't allocate spec ID bigger than what fits in specs map */
+ if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map))
+ return -E2BIG;
+
+ *spec_id = man->next_free_spec_id;
+
+ /* cache spec ID for current spec string for future lookups */
+ err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+ if (err)
+ return err;
+
+ man->next_free_spec_id++;
+ }
+
+ /* remember new spec ID in the link for later return back to free list on detach */
+ link->spec_ids[link->spec_cnt] = *spec_id;
+ link->spec_cnt++;
+ *is_new = true;
+ return 0;
+}
+
+struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog,
+ pid_t pid, const char *path,
+ const char *usdt_provider, const char *usdt_name,
+ long usdt_cookie)
+{
+ int i, fd, err, spec_map_fd, ip_map_fd;
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct hashmap *specs_hash = NULL;
+ struct bpf_link_usdt *link = NULL;
+ struct usdt_target *targets = NULL;
+ size_t target_cnt;
+ Elf *elf;
+
+ spec_map_fd = bpf_map__fd(man->specs_map);
+ ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map);
+
+ /* TODO: perform path resolution similar to uprobe's */
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err);
+ return libbpf_err_ptr(err);
+ }
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ err = -EBADF;
+ pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1));
+ goto err_out;
+ }
+
+ err = sanity_check_usdt_elf(elf, path);
+ if (err)
+ goto err_out;
+
+ /* normalize PID filter */
+ if (pid < 0)
+ pid = -1;
+ else if (pid == 0)
+ pid = getpid();
+
+ /* discover USDT in given binary, optionally limiting
+ * activations to a given PID, if pid > 0
+ */
+ err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name,
+ usdt_cookie, &targets, &target_cnt);
+ if (err <= 0) {
+ err = (err == 0) ? -ENOENT : err;
+ goto err_out;
+ }
+
+ specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL);
+ if (IS_ERR(specs_hash)) {
+ err = PTR_ERR(specs_hash);
+ goto err_out;
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ link->usdt_man = man;
+ link->link.detach = &bpf_link_usdt_detach;
+ link->link.dealloc = &bpf_link_usdt_dealloc;
+
+ link->uprobes = calloc(target_cnt, sizeof(*link->uprobes));
+ if (!link->uprobes) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ for (i = 0; i < target_cnt; i++) {
+ struct usdt_target *target = &targets[i];
+ struct bpf_link *uprobe_link;
+ bool is_new;
+ int spec_id;
+
+ /* Spec ID can be either reused or newly allocated. If it is
+ * newly allocated, we'll need to fill out spec map, otherwise
+ * entire spec should be valid and can be just used by a new
+ * uprobe. We reuse spec when USDT arg spec is identical. We
+ * also never share specs between two different USDT
+ * attachments ("links"), so all the reused specs already
+ * share USDT cookie value implicitly.
+ */
+ err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new);
+ if (err)
+ goto err_out;
+
+ if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) {
+ err = -errno;
+ pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n",
+ spec_id, usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+ if (!man->has_bpf_cookie &&
+ bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) {
+ err = -errno;
+ if (err == -EEXIST) {
+ pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n",
+ spec_id, usdt_provider, usdt_name, path);
+ } else {
+ pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n",
+ target->abs_ip, spec_id, usdt_provider, usdt_name,
+ path, err);
+ }
+ goto err_out;
+ }
+
+ opts.ref_ctr_offset = target->sema_off;
+ opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0;
+ uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path,
+ target->rel_ip, &opts);
+ err = libbpf_get_error(uprobe_link);
+ if (err) {
+ pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n",
+ i, usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+
+ link->uprobes[i].link = uprobe_link;
+ link->uprobes[i].abs_ip = target->abs_ip;
+ link->uprobe_cnt++;
+ }
+
+ free(targets);
+ hashmap__free(specs_hash);
+ elf_end(elf);
+ close(fd);
+
+ return &link->link;
+
+err_out:
+ if (link)
+ bpf_link__destroy(&link->link);
+ free(targets);
+ hashmap__free(specs_hash);
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return libbpf_err_ptr(err);
+}
+
+/* Parse out USDT ELF note from '.note.stapsdt' section.
+ * Logic inspired by perf's code.
+ */
+static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
+ GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+ struct usdt_note *note)
+{
+ const char *provider, *name, *args;
+ long addrs[3];
+ size_t len;
+
+ /* sanity check USDT note name and type first */
+ if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0)
+ return -EINVAL;
+ if (nhdr->n_type != USDT_NOTE_TYPE)
+ return -EINVAL;
+
+ /* sanity check USDT note contents ("description" in ELF terminology) */
+ len = nhdr->n_descsz;
+ data = data + desc_off;
+
+ /* +3 is the very minimum required to store three empty strings */
+ if (len < sizeof(addrs) + 3)
+ return -EINVAL;
+
+ /* get location, base, and semaphore addrs */
+ memcpy(&addrs, data, sizeof(addrs));
+
+ /* parse string fields: provider, name, args */
+ provider = data + sizeof(addrs);
+
+ name = (const char *)memchr(provider, '\0', data + len - provider);
+ if (!name) /* non-zero-terminated provider */
+ return -EINVAL;
+ name++;
+ if (name >= data + len || *name == '\0') /* missing or empty name */
+ return -EINVAL;
+
+ args = memchr(name, '\0', data + len - name);
+ if (!args) /* non-zero-terminated name */
+ return -EINVAL;
+ ++args;
+ if (args >= data + len) /* missing arguments spec */
+ return -EINVAL;
+
+ note->provider = provider;
+ note->name = name;
+ if (*args == '\0' || *args == ':')
+ note->args = "";
+ else
+ note->args = args;
+ note->loc_addr = addrs[0];
+ note->base_addr = addrs[1];
+ note->sema_addr = addrs[2];
+
+ return 0;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg);
+
+static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie)
+{
+ const char *s;
+ int len;
+
+ spec->usdt_cookie = usdt_cookie;
+ spec->arg_cnt = 0;
+
+ s = note->args;
+ while (s[0]) {
+ if (spec->arg_cnt >= USDT_MAX_ARG_CNT) {
+ pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n",
+ USDT_MAX_ARG_CNT, note->provider, note->name, note->args);
+ return -E2BIG;
+ }
+
+ len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]);
+ if (len < 0)
+ return len;
+
+ s += len;
+ spec->arg_cnt++;
+ }
+
+ return 0;
+}
+
+/* Architecture-specific logic for parsing USDT argument location specs */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *names[4];
+ size_t pt_regs_off;
+ } reg_map[] = {
+#ifdef __x86_64__
+#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64)
+#else
+#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32)
+#endif
+ { {"rip", "eip", "", ""}, reg_off(rip, eip) },
+ { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) },
+ { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) },
+ { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) },
+ { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) },
+ { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) },
+ { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) },
+ { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) },
+ { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) },
+#undef reg_off
+#ifdef __x86_64__
+ { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) },
+ { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) },
+ { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) },
+ { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) },
+ { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) },
+ { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) },
+ { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) },
+ { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) },
+#endif
+ };
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) {
+ if (strcmp(reg_name, reg_map[i].names[j]) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ char *reg_name = NULL;
+ int arg_sz, len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
+ /* Memory dereference case, e.g., -4@-20(%rbp) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, &reg_name, &len) == 2) {
+ /* Register read case, e.g., -4@%eax */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@$71 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ arg_num, arg_str, arg_sz);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__s390x__)
+
+/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ unsigned int reg;
+ int arg_sz, len;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, &reg, &len) == 3) {
+ /* Memory dereference case, e.g., -2@-28(%r15) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ if (reg > 15) {
+ pr_warn("usdt: unrecognized register '%%r%u'\n", reg);
+ return -EINVAL;
+ }
+ arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
+ } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, &reg, &len) == 2) {
+ /* Register read case, e.g., -8@%r0 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ if (reg > 15) {
+ pr_warn("usdt: unrecognized register '%%r%u'\n", reg);
+ return -EINVAL;
+ }
+ arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
+ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@71 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ arg_num, arg_str, arg_sz);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#else
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n");
+ return -ENOTSUP;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3820608faf57..bafdc5373a13 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -168,9 +168,15 @@ $(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
-$(OUTPUT)/urandom_read: urandom_read.c
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
+ $(call msg,LIB,,$@)
+ $(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^) \
+ liburandom_read.so $(LDLIBS) \
+ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
@@ -328,12 +334,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
linked_vars.skel.h linked_maps.skel.h \
- test_subskeleton.skel.h test_subskeleton_lib.skel.h
-
-# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
-# but that's created as a side-effect of the skel.h generation.
-test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
-test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+ test_subskeleton.skel.h test_subskeleton_lib.skel.h \
+ test_usdt.skel.h
LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
@@ -346,6 +348,11 @@ test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o
LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
@@ -400,6 +407,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
$(wildcard $(BPFDIR)/bpf_*.h) \
+ $(wildcard $(BPFDIR)/*.bpf.h) \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
@@ -491,6 +499,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
btf_helpers.c flow_dissector_load.h \
cap_helpers.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
+ $(OUTPUT)/liburandom_read.so \
ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d48f6e533e1e..c0c6d410751d 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -11,15 +11,22 @@ static void trigger_func(void)
asm volatile ("");
}
+/* attach point for byname uprobe */
+static void trigger_func2(void)
+{
+ asm volatile ("");
+}
+
void test_attach_probe(void)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
- int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
ssize_t uprobe_offset, ref_ctr_offset;
+ struct bpf_link *uprobe_err_link;
bool legacy;
+ char *mem;
/* Check if new-style kprobe/uprobe API is supported.
* Kernels that support new FD-based kprobe and uprobe BPF attachment
@@ -43,9 +50,9 @@ void test_attach_probe(void)
return;
skel = test_attach_probe__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
goto cleanup;
kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
@@ -90,25 +97,73 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
- /* trigger & validate kprobe && kretprobe */
- usleep(1);
+ /* verify auto-attach fails for old-style uprobe definition */
+ uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+ if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+ "auto-attach should fail for old-style name"))
+ goto cleanup;
+
+ uprobe_opts.func_name = "trigger_func2";
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = 0;
+ skel->links.handle_uprobe_byname =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname,
+ 0 /* this pid */,
+ "/proc/self/exe",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
+ goto cleanup;
+
+ /* verify auto-attach works */
+ skel->links.handle_uretprobe_byname =
+ bpf_program__attach(skel->progs.handle_uretprobe_byname);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
+ goto cleanup;
- if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
- "wrong kprobe res: %d\n", skel->bss->kprobe_res))
+ /* test attach by name for a library function, using the library
+ * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
+ */
+ uprobe_opts.func_name = "malloc";
+ uprobe_opts.retprobe = false;
+ skel->links.handle_uprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
+ 0 /* this pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
goto cleanup;
- if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
- "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
+
+ uprobe_opts.func_name = "free";
+ uprobe_opts.retprobe = true;
+ skel->links.handle_uretprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
+ -1 /* any pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
goto cleanup;
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ mem = malloc(1);
+ free(mem);
+
/* trigger & validate uprobe & uretprobe */
trigger_func();
- if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
- "wrong uprobe res: %d\n", skel->bss->uprobe_res))
- goto cleanup;
- if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
- "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
- goto cleanup;
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+ ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 044df13ee069..754e80937e5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -4,6 +4,7 @@
#include <network_helpers.h>
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
+#include "for_each_map_elem_write_key.skel.h"
static unsigned int duration;
@@ -129,10 +130,21 @@ out:
for_each_array_map_elem__destroy(skel);
}
+static void test_write_map_key(void)
+{
+ struct for_each_map_elem_write_key *skel;
+
+ skel = for_each_map_elem_write_key__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load"))
+ for_each_map_elem_write_key__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
test_hash_map();
if (test__start_subtest("array_map"))
test_array_map();
+ if (test__start_subtest("write_map_key"))
+ test_write_map_key();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index f6933b06daf8..1d7a2f1e0731 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -138,12 +138,16 @@ cleanup:
test_ksyms_weak_lskel__destroy(skel);
}
-static void test_write_check(void)
+static void test_write_check(bool test_handler1)
{
struct test_ksyms_btf_write_check *skel;
- skel = test_ksyms_btf_write_check__open_and_load();
- ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
+ skel = test_ksyms_btf_write_check__open();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open"))
+ return;
+ bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false);
+ ASSERT_ERR(test_ksyms_btf_write_check__load(skel),
+ "unexpected load of a prog writing to ksym memory\n");
test_ksyms_btf_write_check__destroy(skel);
}
@@ -179,6 +183,9 @@ void test_ksyms_btf(void)
if (test__start_subtest("weak_ksyms_lskel"))
test_weak_syms_lskel();
- if (test__start_subtest("write_check"))
- test_write_check();
+ if (test__start_subtest("write_check1"))
+ test_write_check(true);
+
+ if (test__start_subtest("write_check2"))
+ test_write_check(false);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
index 954964f0ac3d..d3915c58d0e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/netcnt.c
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -25,7 +25,7 @@ void serial_test_netcnt(void)
if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
return;
- nproc = get_nprocs_conf();
+ nproc = bpf_num_possible_cpus();
percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 509e21d5cb9d..b90ee47d3111 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -81,6 +81,7 @@ void test_test_global_funcs(void)
{ "test_global_func14.o", "reference type('FWD S') size cannot be determined" },
{ "test_global_func15.o", "At program exit the register R0 has value" },
{ "test_global_func16.o", "invalid indirect read from stack" },
+ { "test_global_func17.o", "Caller passes invalid args into func#1" },
};
libbpf_print_fn_t old_print_fn = NULL;
int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
new file mode 100644
index 000000000000..d6003dc8cc99
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include "test_uprobe_autoattach.skel.h"
+
+/* uprobe attach point */
+static noinline int autoattach_trigger_func(int arg)
+{
+ asm volatile ("");
+ return arg + 1;
+}
+
+void test_uprobe_autoattach(void)
+{
+ struct test_uprobe_autoattach *skel;
+ int trigger_val = 100, trigger_ret;
+ size_t malloc_sz = 1;
+ char *mem;
+
+ skel = test_uprobe_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate uprobe & uretprobe */
+ trigger_ret = autoattach_trigger_func(trigger_val);
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ mem = malloc(malloc_sz);
+ free(mem);
+
+ ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
+cleanup:
+ test_uprobe_autoattach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
new file mode 100644
index 000000000000..a71f51bdc08d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "../sdt.h"
+
+#include "test_usdt.skel.h"
+#include "test_urandom_usdt.skel.h"
+
+int lets_test_this(int);
+
+static volatile int idx = 2;
+static volatile __u64 bla = 0xFEDCBA9876543210ULL;
+static volatile short nums[] = {-1, -2, -3, };
+
+static volatile struct {
+ int x;
+ signed char y;
+} t1 = { 1, -127 };
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short test_usdt0_semaphore SEC(".probes");
+unsigned short test_usdt3_semaphore SEC(".probes");
+unsigned short test_usdt12_semaphore SEC(".probes");
+
+static void __always_inline trigger_func(int x) {
+ long y = 42;
+
+ if (test_usdt0_semaphore)
+ STAP_PROBE(test, usdt0);
+ if (test_usdt3_semaphore)
+ STAP_PROBE3(test, usdt3, x, y, &bla);
+ if (test_usdt12_semaphore) {
+ STAP_PROBE12(test, usdt12,
+ x, x + 1, y, x + y, 5,
+ y / 7, bla, &bla, -9, nums[x],
+ nums[idx], t1.y);
+ }
+}
+
+static void subtest_basic_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt0 won't be auto-attached */
+ opts.usdt_cookie = 0xcafedeadbeeffeed;
+ skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt0", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
+ goto cleanup;
+
+ trigger_func(1);
+
+ ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+
+ ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
+ ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+
+ /* auto-attached usdt3 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+ /* auto-attached usdt12 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
+ ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");
+
+ ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
+ ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
+ ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
+ ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
+ ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
+ ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
+ ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
+ ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+
+ /* trigger_func() is marked __always_inline, so USDT invocations will be
+ * inlined in two different places, meaning that each USDT will have
+ * at least 2 different places to be attached to. This verifies that
+ * bpf_program__attach_usdt() handles this properly and attaches to
+ * all possible places of USDT invocation.
+ */
+ trigger_func(2);
+
+ ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+
+ /* only check values that depend on trigger_func()'s input value */
+ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
+
+ ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");
+
+ /* detach and re-attach usdt3 */
+ bpf_link__destroy(skel->links.usdt3);
+
+ opts.usdt_cookie = 0xBADC00C51E;
+ skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
+ "/proc/self/exe", "test", "usdt3", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
+ goto cleanup;
+
+ trigger_func(3);
+
+ ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+ /* this time usdt3 has custom cookie */
+ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+unsigned short test_usdt_100_semaphore SEC(".probes");
+unsigned short test_usdt_300_semaphore SEC(".probes");
+unsigned short test_usdt_400_semaphore SEC(".probes");
+
+#define R10(F, X) F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
+ F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
+#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
+ R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);
+
+/* carefully control that we get exactly 100 inlines by preventing inlining */
+static void __always_inline f100(int x)
+{
+ STAP_PROBE1(test, usdt_100, x);
+}
+
+__weak void trigger_100_usdts(void)
+{
+ R100(f100, 0);
+}
+
+/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
+ * many slots for specs. It's important that each STAP_PROBE2() invocation
+ * (after untolling) gets different arg spec due to compiler inlining i as
+ * a constant
+ */
+static void __always_inline f300(int x)
+{
+ STAP_PROBE1(test, usdt_300, x);
+}
+
+__weak void trigger_300_usdts(void)
+{
+ R100(f300, 0);
+ R100(f300, 100);
+ R100(f300, 200);
+}
+
+static void __always_inline f400(int x __attribute__((unused)))
+{
+ static int y;
+
+ STAP_PROBE1(test, usdt_400, y++);
+}
+
+/* this time we have 400 different USDT call sites, but they have uniform
+ * argument location, so libbpf's spec string deduplication logic should keep
+ * spec count use very small and so we should be able to attach to all 400
+ * call sites
+ */
+__weak void trigger_400_usdts(void)
+{
+ R100(f400, 0);
+ R100(f400, 100);
+ R100(f400, 200);
+ R100(f400, 300);
+}
+
+static void subtest_multispec_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err, i;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt_100 is auto-attached and there are 100 inlined call sites,
+ * let's validate that all of them are properly attached to and
+ * handled from BPF side
+ */
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+
+ /* Stress test free spec ID tracking. By default libbpf allows up to
+ * 256 specs to be used, so if we don't return free spec IDs back
+ * after few detachments and re-attachments we should run out of
+ * available spec IDs.
+ */
+ for (i = 0; i < 2; i++) {
+ bpf_link__destroy(skel->links.usdt_100);
+
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_100", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
+ goto cleanup;
+
+ bss->usdt_100_sum = 0;
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+ }
+
+ /* Now let's step it up and try to attach USDT that requires more than
+ * 256 attach points with different specs for each.
+ * Note that we need trigger_300_usdts() only to actually have 300
+ * USDT call sites, we are not going to actually trace them.
+ */
+ trigger_300_usdts();
+
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
+ bpf_link__destroy(skel->links.usdt_100);
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
+ "test", "usdt_300", NULL);
+ err = -errno;
+ if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
+ goto cleanup;
+ ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");
+
+ /* let's check that there are no "dangling" BPF programs attached due
+ * to partial success of the above test:usdt_300 attachment
+ */
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ f300(777); /* this is 301st instance of usdt_300 */
+
+ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
+ ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+
+ /* This time we have USDT with 400 inlined invocations, but arg specs
+ * should be the same across all sites, so libbpf will only need to
+ * use one spec and thus we'll be able to attach 400 uprobes
+ * successfully.
+ *
+ * Again, we are reusing usdt_100 BPF program.
+ */
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_400", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
+ goto cleanup;
+
+ trigger_400_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
+ ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+static void subtest_urandom_usdt(bool auto_attach)
+{
+ struct test_urandom_usdt *skel;
+ struct test_urandom_usdt__bss *bss;
+ struct bpf_link *l;
+ FILE *urand_pipe = NULL;
+ int err, urand_pid = 0;
+
+ skel = test_urandom_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->urand_pid = urand_pid;
+
+ if (auto_attach) {
+ err = test_urandom_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_auto_attach"))
+ goto cleanup;
+ } else {
+ l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_with_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_with_sema = l;
+
+ }
+
+ /* trigger urandom_read USDTs */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
+ ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");
+
+ ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
+ ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_urandom_usdt__destroy(skel);
+}
+
+void test_usdt(void)
+{
+ if (test__start_subtest("basic"))
+ subtest_basic_usdt();
+ if (test__start_subtest("multispec"))
+ subtest_multispec_usdt();
+ if (test__start_subtest("urand_auto_attach"))
+ subtest_urandom_usdt(true /* auto_attach */);
+ if (test__start_subtest("urand_pid_attach"))
+ subtest_urandom_usdt(false /* auto_attach */);
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
new file mode 100644
index 000000000000..8e545865ea33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map SEC(".maps");
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ void *data)
+{
+ bpf_get_current_comm(key, sizeof(*key));
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int test_map_key_write(const void *ctx)
+{
+ bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index b964ec1390c2..963b393c37e8 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
/* weak and shared between two files */
const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx1(__u64 *ctx)
/* this weak instance should win because it's the first one */
__weak int set_output_weak(int x)
{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = bpf_core_type_size(struct task_struct);
+
output_weak1 = x;
return x;
}
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index 575e958e60b7..db195872f4eb 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
/* weak and shared between both files */
const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx2(__u64 *ctx)
/* this weak instance should lose, because it will be processed second */
__weak int set_output_weak(int x)
{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = 2 * bpf_core_type_size(struct task_struct);
+
output_weak2 = x;
return 2 * x;
}
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
index b3fcb5274ee0..f793280a3238 100644
--- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -35,10 +35,10 @@ int oncpu(void *ctx)
long val;
val = bpf_get_stackid(ctx, &stackmap, 0);
- if (val > 0)
+ if (val >= 0)
stackid_kernel = 2;
val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
- if (val > 0)
+ if (val >= 0)
stackid_user = 2;
trace = bpf_map_lookup_elem(&stackdata_map, &key);
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 4896fdf816f7..92331053dba3 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -826,8 +826,9 @@ out:
SEC("kprobe/vfs_link")
int BPF_KPROBE(kprobe__vfs_link,
- struct dentry* old_dentry, struct inode* dir,
- struct dentry* new_dentry, struct inode** delegated_inode)
+ struct dentry* old_dentry, struct user_namespace *mnt_userns,
+ struct inode* dir, struct dentry* new_dentry,
+ struct inode** delegated_inode)
{
struct bpf_func_stats_ctx stats_ctx;
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 8056a4c6d918..af994d16bb10 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -10,6 +10,10 @@ int kprobe_res = 0;
int kretprobe_res = 0;
int uprobe_res = 0;
int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
SEC("kprobe/sys_nanosleep")
int handle_kprobe(struct pt_regs *ctx)
@@ -25,18 +29,51 @@ int BPF_KRETPROBE(handle_kretprobe)
return 0;
}
-SEC("uprobe/trigger_func")
+SEC("uprobe")
int handle_uprobe(struct pt_regs *ctx)
{
uprobe_res = 3;
return 0;
}
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
int handle_uretprobe(struct pt_regs *ctx)
{
uretprobe_res = 4;
return 0;
}
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+/* use auto-attach format for section definition. */
+SEC("uretprobe//proc/self/exe:trigger_func2")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+ uretprobe_byname_res = 6;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+ unsigned int size = PT_REGS_PARM1(ctx);
+
+ /* verify malloc size */
+ if (size == 1)
+ uprobe_byname2_res = 7;
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+ uretprobe_byname2_res = 8;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 2d3a7710e2ce..0e2222968918 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx)
return 0;
}
-SEC("uprobe/trigger_func")
+SEC("uprobe")
int handle_uprobe(struct pt_regs *ctx)
{
update(ctx, &uprobe_res);
return 0;
}
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
int handle_uretprobe(struct pt_regs *ctx)
{
update(ctx, &uretprobe_res);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
new file mode 100644
index 000000000000..2b8b9b8ba018
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(int *p)
+{
+ return p ? (*p = 42) : 0;
+}
+
+const volatile int i;
+
+SEC("tc")
+int test_cls(struct __sk_buff *skb)
+{
+ return foo((int *)&i);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
index 2180c41cd890..a72a5bf3812a 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -8,7 +8,7 @@
extern const int bpf_prog_active __ksym; /* int type global var. */
SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+int handler1(const void *ctx)
{
int *active;
__u32 cpu;
@@ -26,4 +26,20 @@ int handler(const void *ctx)
return 0;
}
+__noinline int write_active(int *p)
+{
+ return p ? (*p = 42) : 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ int *active;
+ __u32 cpu;
+
+ active = bpf_this_cpu_ptr(&bpf_prog_active);
+ write_active(active);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 19e4d2071c60..c8bc0c6947aa 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real,
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 02f79356d5eb..98c6493d9b91 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -89,7 +89,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
static inline int
handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -121,7 +120,6 @@ assign:
static inline int
handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -161,7 +159,7 @@ assign:
SEC("tc")
int bpf_sk_assign_test(struct __sk_buff *skb)
{
- struct bpf_sock_tuple *tuple, ln = {0};
+ struct bpf_sock_tuple *tuple;
bool ipv4 = false;
bool tcp = false;
int tuple_len;
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
index e6cb09259408..1926facba122 100644
--- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {};
char ctx_regs[PT_REGS_SIZE] = {};
int uprobe_res = 0;
-SEC("uprobe/trigger_func")
+SEC("uprobe")
int handle_uprobe(struct pt_regs *ctx)
{
struct task_struct *current;
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
new file mode 100644
index 000000000000..ab75522e2eeb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int uprobe_byname_parm1 = 0;
+int uprobe_byname_ran = 0;
+int uretprobe_byname_rc = 0;
+int uretprobe_byname_ran = 0;
+size_t uprobe_byname2_parm1 = 0;
+int uprobe_byname2_ran = 0;
+char *uretprobe_byname2_rc = NULL;
+int uretprobe_byname2_ran = 0;
+
+int test_pid;
+
+/* This program cannot auto-attach, but that should not stop other
+ * programs from attaching.
+ */
+SEC("uprobe")
+int handle_uprobe_noautoattach(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
+ uprobe_byname_ran = 1;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+ uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+ uretprobe_byname_ran = 2;
+ return 0;
+}
+
+
+SEC("uprobe/libc.so.6:malloc")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx);
+ uprobe_byname2_ran = 3;
+ return 0;
+}
+
+SEC("uretprobe/libc.so.6:malloc")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx);
+ uretprobe_byname2_ran = 4;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
new file mode 100644
index 000000000000..3539b02bd5f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int urand_pid;
+
+int urand_read_without_sema_call_cnt;
+int urand_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_without_sema")
+int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urand_read_with_sema_call_cnt;
+int urand_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_with_sema")
+int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_without_sema_call_cnt;
+int urandlib_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_without_sema")
+int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_with_sema_call_cnt;
+int urandlib_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_with_sema")
+int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
new file mode 100644
index 000000000000..505aab9a5234
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int my_pid;
+
+int usdt0_called;
+u64 usdt0_cookie;
+int usdt0_arg_cnt;
+int usdt0_arg_ret;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt0_called, 1);
+
+ usdt0_cookie = bpf_usdt_cookie(ctx);
+ usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
+ /* should return -ENOENT for any arg_num */
+ usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ return 0;
+}
+
+int usdt3_called;
+u64 usdt3_cookie;
+int usdt3_arg_cnt;
+int usdt3_arg_rets[3];
+u64 usdt3_args[3];
+
+SEC("usdt//proc/self/exe:test:usdt3")
+int usdt3(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt3_called, 1);
+
+ usdt3_cookie = bpf_usdt_cookie(ctx);
+ usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt3_args[0] = (int)tmp;
+
+ usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
+ usdt3_args[1] = (long)tmp;
+
+ usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
+ usdt3_args[2] = (uintptr_t)tmp;
+
+ return 0;
+}
+
+int usdt12_called;
+u64 usdt12_cookie;
+int usdt12_arg_cnt;
+u64 usdt12_args[12];
+
+SEC("usdt//proc/self/exe:test:usdt12")
+int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
+ long a6, __u64 a7, uintptr_t a8, int a9, short a10,
+ short a11, signed char a12)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt12_called, 1);
+
+ usdt12_cookie = bpf_usdt_cookie(ctx);
+ usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt12_args[0] = a1;
+ usdt12_args[1] = a2;
+ usdt12_args[2] = a3;
+ usdt12_args[3] = a4;
+ usdt12_args[4] = a5;
+ usdt12_args[5] = a6;
+ usdt12_args[6] = a7;
+ usdt12_args[7] = a8;
+ usdt12_args[8] = a9;
+ usdt12_args[9] = a10;
+ usdt12_args[10] = a11;
+ usdt12_args[11] = a12;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
new file mode 100644
index 000000000000..aa6de32b50d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+/* this file is linked together with test_usdt.c to validate that usdt.bpf.h
+ * can be included in multiple .bpf.c files forming single final BPF object
+ * file
+ */
+
+extern int my_pid;
+
+int usdt_100_called;
+int usdt_100_sum;
+
+SEC("usdt//proc/self/exe:test:usdt_100")
+int BPF_USDT(usdt_100, int x)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_100_called, 1);
+ __sync_fetch_and_add(&usdt_100_sum, x);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 596c4e71bf3a..125d872d7981 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -564,22 +564,22 @@ static bool get_packet_dst(struct real_definition **real,
hash = get_packet_hash(pckt, hash_16bytes);
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
key = 2 * vip_info->vip_num + hash % 2;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
- return 0;
+ return false;
key = *real_pos;
*real = bpf_map_lookup_elem(&reals, &key);
if (!(*real))
- return 0;
+ return false;
if (!(vip_info->flags & (1 << 1))) {
__u32 conn_rate_key = 512 + 2;
struct lb_stats *conn_rate_stats =
bpf_map_lookup_elem(&stats, &conn_rate_key);
if (!conn_rate_stats)
- return 1;
+ return true;
cur_time = bpf_ktime_get_ns();
if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
conn_rate_stats->v1 = 1;
@@ -587,14 +587,14 @@ static bool get_packet_dst(struct real_definition **real,
} else {
conn_rate_stats->v1 += 1;
if (conn_rate_stats->v1 >= 1)
- return 1;
+ return true;
}
if (pckt->flow.proto == IPPROTO_UDP)
new_dst_lru.atime = cur_time;
new_dst_lru.pos = key;
bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
}
- return 1;
+ return true;
}
__attribute__ ((noinline))
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2ab049b54d6c..694e7cec1823 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx)
return -22;
}
-SEC("uprobe/self/uprobe_target")
+SEC("uprobe")
int bench_trigger_uprobe(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h
new file mode 100644
index 000000000000..733045a52771
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure.
+
+ This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+ indicate whether the assembler supports "?" in .pushsection directives. */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
new file mode 100644
index 000000000000..ca0162b4dc57
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -0,0 +1,513 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+ This file is dedicated to the public domain, pursuant to CC0
+ (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H 1
+
+/*
+ This file defines a family of macros
+
+ STAP_PROBEn(op1, ..., opn)
+
+ that emit a nop into the instruction stream, and some data into an auxiliary
+ note section. The data in the note section describes the operands, in terms
+ of size and location. Each location is encoded as assembler operand string.
+ Consumer tools such as gdb or systemtap insert breakpoints on top of
+ the nop, and decode the location operand-strings, like an assembler,
+ to find the values being passed.
+
+ The operand strings are selected by the compiler for each operand.
+ They are constrained by gcc inline-assembler codes. The default is:
+
+ #define STAP_SDT_ARG_CONSTRAINT nor
+
+ This is a good default if the operands tend to be integral and
+ moderate in number (smaller than number of registers). In other
+ cases, the compiler may report "'asm' requires impossible reload" or
+ similar. In this case, consider simplifying the macro call (fewer
+ and simpler operands), reduce optimization, or override the default
+ constraints string via:
+
+ #define STAP_SDT_ARG_CONSTRAINT g
+ #include <sys/sdt.h>
+
+ See also:
+ https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist) \
+ _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \
+ _SDT_ASM_BASE
+# define _SDT_ASM_1(x) x;
+# define _SDT_ASM_2(a, b) a,b;
+# define _SDT_ASM_3(a, b, c) a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x) .asciz #x;
+# define _SDT_ASM_SUBSTR_1(x) .ascii #x;
+# define _SDT_DEPAREN_0() /* empty */
+# define _SDT_DEPAREN_1(a) a
+# define _SDT_DEPAREN_2(a,b) a b
+# define _SDT_DEPAREN_3(a,b,c) a b c
+# define _SDT_DEPAREN_4(a,b,c,d) a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l
+#else
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \
+ __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore));
+#else
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name)
+#endif
+
+# define _SDT_PROBE(provider, name, n, arglist) \
+ do { \
+ _SDT_NOTE_SEMAPHORE_USE(provider, name); \
+ __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+ :: _SDT_ASM_OPERANDS_##n arglist); \
+ __asm__ __volatile__ (_SDT_ASM_BASE); \
+ } while (0)
+# define _SDT_S(x) #x
+# define _SDT_ASM_1(x) _SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "," _SDT_S(d) "," \
+ _SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n) _SDT_ASM_TEMPLATE_##n
+# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x)
+# define _SDT_ASM_SUBSTR_1(x) _SDT_ASM_1(.ascii #x)
+
+# define _SDT_ARGFMT(no) _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \
+ _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no))
+
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT nZr
+# elif defined __arm__
+# define STAP_SDT_ARG_CONSTRAINT g
+# else
+# define STAP_SDT_ARG_CONSTRAINT nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x) #x
+# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x)
+/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler
+ macros _SDT_SIZE and _SDT_TYPE */
+# define _SDT_ARG(n, x) \
+ [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \
+ [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x)
+#define _SDT_ASM_SUBSTR(x) _SDT_ASM_SUBSTR_1(x)
+
+#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \
+ || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \
+ && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \
+ ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+ static const bool __sdt_signed = false;
+};
+
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x) \
+ __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \
+ + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x) \
+ (!__extension__ \
+ (__builtin_constant_p ((((unsigned long long) \
+ (_SDT_ARGINTTYPE (x)) __sdt_unsp) \
+ & ((unsigned long long)1 << (sizeof (unsigned long long) \
+ * __CHAR_BIT__ - 1))) == 0) \
+ || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x) \
+ (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id) %I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+# define _SDT_ARGTMPL(id) %[id]
+#endif
+
+/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ operand note format.
+
+ The named register may be a longer or shorter (!) alias for the
+ storage where the value in question is found. For example, on
+ i386, 64-bit value may be put in register pairs, and the register
+ name stored would identify just one of them. Previously, gcc was
+ asked to emit the %w[id] (16-bit alias of some registers holding
+ operands), even when a wider 32-bit value was used.
+
+ Bottom line: the byte-width given before the @ sign governs. If
+ there is a mismatch between that width and that of the named
+ register, then a sys/sdt.h note consumer may need to employ
+ architecture-specific heuristics to figure out where the compiler
+ has actually put the complete value.
+*/
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR .8byte
+#else
+# define _SDT_ASM_ADDR .4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP nop 0
+#else
+#define _SDT_NOP nop
+#endif
+
+#define _SDT_NOTE_NAME "stapsdt"
+#define _SDT_NOTE_TYPE 3
+
+/* If the assembler supports the necessary feature, then we can play
+ nice with code in COMDAT sections, which comes up in C++ code.
+ Without that assembler support, some combinations of probe placements
+ in certain kinds of C++ code may produce link-time errors. */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_DEF_MACROS \
+ _SDT_ASM_1(.altmacro) \
+ _SDT_ASM_1(.macro _SDT_SIGN x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.iflt \\x) \
+ _SDT_ASM_1(.ascii "-") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.ascii "\x") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE x) \
+ _SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8)) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_2(.ifc 8,\\x) \
+ _SDT_ASM_1(.ascii "f") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.ascii "@") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE x) \
+ _SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff))) \
+ _SDT_ASM_1(.endm)
+
+#define _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(.purgem _SDT_SIGN) \
+ _SDT_ASM_1(.purgem _SDT_SIZE_) \
+ _SDT_ASM_1(.purgem _SDT_SIZE) \
+ _SDT_ASM_1(.purgem _SDT_TYPE_) \
+ _SDT_ASM_1(.purgem _SDT_TYPE)
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \
+ _SDT_DEF_MACROS \
+ _SDT_ASM_1(990: _SDT_NOP) \
+ _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+ _SDT_ASM_1( .balign 4) \
+ _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \
+ _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \
+ _SDT_ASM_1(992: .balign 4) \
+ _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \
+ _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \
+ _SDT_SEMAPHORE(provider,name) \
+ _SDT_ASM_STRING(provider) \
+ _SDT_ASM_STRING(name) \
+ pack_args args \
+ _SDT_ASM_SUBSTR(\x00) \
+ _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(994: .balign 4) \
+ _SDT_ASM_1( .popsection)
+
+#define _SDT_ASM_BASE \
+ _SDT_ASM_1(.ifndef _.stapsdt.base) \
+ _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \
+ .stapsdt.base,comdat) \
+ _SDT_ASM_1( .weak _.stapsdt.base) \
+ _SDT_ASM_1( .hidden _.stapsdt.base) \
+ _SDT_ASM_1( _.stapsdt.base: .space 1) \
+ _SDT_ASM_2( .size _.stapsdt.base, 1) \
+ _SDT_ASM_1( .popsection) \
+ _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) \
+ _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20)
+#define _SDT_ASM_TEMPLATE_0 /* no arguments */
+#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+ _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+ _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+ _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+ _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+ _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+ _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+ _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+ _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+ _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+ _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+ _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+ In assembly code the arguments should use normal assembly operand syntax. */
+
+#define STAP_PROBE(provider, name) \
+ _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+ _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+ _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+ _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+ _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+ _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+ _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+ _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_PROBE(provider, name, 10, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_PROBE(provider, name, 11, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_PROBE(provider, name, 12, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+ number of probe arguments is not known until compile time. Since
+ variadic macro support may vary with compiler options, you must
+ pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+ The trick to count __VA_ARGS__ was inspired by this post by
+ Laurent Deniau <laurent.deniau@cern.ch>:
+ http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+ Note that our _SDT_NARG is called with an extra 0 arg that's not
+ counted, so we don't have to worry about the behavior of macros
+ called without any arguments. */
+
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#ifdef SDT_USE_VARIADIC
+#define _SDT_PROBE_N(provider, name, N, ...) \
+ _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+ _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements. You must compile
+ with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+ The STAP_PROBE_ASM macro generates a quoted string to be used in the
+ template portion of the asm statement, concatenated with strings that
+ contain the actual assembly code around the probe site.
+
+ For example:
+
+ asm ("before\n"
+ STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+ "after");
+
+ emits the assembly code for "before\nafter", with a probe in between.
+ The probe arguments are the %eax register, and the value of the memory
+ word located 4 bytes past the address in the %esi register. Note that
+ because this is a simple asm, not a GNU C extended asm statement, these
+ % characters do not need to be doubled to generate literal %reg names.
+
+ In a GNU C extended asm statement, the probe arguments can be specified
+ using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired
+ macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+ and appears in the input operand list of the asm statement. For example:
+
+ asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+ STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+ "otherinsn %[namedarg]"
+ : "r" (outvar)
+ : "g" (some_value), [namedarg] "i" (1234),
+ STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+ This is just like writing:
+
+ STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+ but the probe site is right between "someinsn" and "otherinsn".
+
+ The probe arguments in STAP_PROBE_ASM can be given as assembly
+ operands instead, even inside a GNU C extended asm statement.
+ Note that these can use operand templates like %0 or %[name],
+ and likewise they must write %%reg for a literal operand of %reg. */
+
+#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__))
+#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__)
+#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__)
+#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__)
+#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__)
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...) \
+ _SDT_ASM_BODY_N(provider, name, __VA_ARGS__) \
+ _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args) \
+ _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args)) \
+ _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_"
+
+
+/* DTrace compatible macro names. */
+#define DTRACE_PROBE(provider,probe) \
+ STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1) \
+ STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2) \
+ STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+ STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \
+ STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \
+ STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+ STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+ STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+ STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+ STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+ STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+ STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+ STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 6bf21e47882a..c0e7acd698ed 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -180,7 +180,7 @@ class FileExtractor(object):
@enum_name: name of the enum to parse
"""
start_marker = re.compile(f'enum {enum_name} {{\n')
- pattern = re.compile('^\s*(BPF_\w+),?$')
+ pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
end_marker = re.compile('^};')
parser = BlockParser(self.reader)
parser.search_block(start_marker)
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index d6a1be4d8020..2ffa08198d1c 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -7,6 +7,7 @@
#include <sys/sysinfo.h>
#include "bpf_rlimit.h"
+#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "testing_helpers.h"
@@ -44,7 +45,7 @@ int main(int argc, char **argv)
unsigned long long *percpu_value;
int cpu, nproc;
- nproc = get_nprocs_conf();
+ nproc = bpf_num_possible_cpus();
percpu_value = malloc(sizeof(*percpu_value) * nproc);
if (!percpu_value) {
printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index edaffd43da83..6cd6ef9fc20b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -184,7 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
def bpftool_map_list(expected=None, ns=""):
_, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
# Remove the base maps
- maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
+ maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names]
if expected is not None:
if len(maps) != expected:
fail(True, "%d BPF maps loaded, expected %d" %
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2ecb73a65206..0a4b45d7b515 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -761,8 +761,10 @@ int cd_flavor_subdir(const char *exec_name)
const char *flavor = strrchr(exec_name, '/');
if (!flavor)
- return 0;
- flavor++;
+ flavor = exec_name;
+ else
+ flavor++;
+
flavor = strrchr(flavor, '-');
if (!flavor)
return 0;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 93c1ff705533..eec4c7385b14 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -332,6 +332,8 @@ int trigger_module_test_write(int write_sz);
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
#elif defined(__s390x__)
#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#elif defined(__aarch64__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
#else
#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
#endif
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 795b6798ccee..87867f7a78c3 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -60,7 +60,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
set[i] = true;
}
- if (!set)
+ if (!set || parsing_end)
return -EINVAL;
*num_set = set;
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 3d6217e3aff7..9c4be2cdb21a 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -25,15 +25,12 @@ static int ksym_cmp(const void *p1, const void *p2)
int load_kallsyms(void)
{
- FILE *f = fopen("/proc/kallsyms", "r");
+ FILE *f;
char func[256], buf[256];
char symbol;
void *addr;
int i = 0;
- if (!f)
- return -ENOENT;
-
/*
* This is called/used from multiplace places,
* load symbols just once.
@@ -41,6 +38,10 @@ int load_kallsyms(void)
if (sym_cnt)
return 0;
+ f = fopen("/proc/kallsyms", "r");
+ if (!f)
+ return -ENOENT;
+
while (fgets(buf, sizeof(buf), f)) {
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index db781052758d..e92644d0fa75 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -1,32 +1,85 @@
+#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
+#include <signal.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
#define BUF_SIZE 256
+/* defined in urandom_read_aux.c */
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+/* these are coming from urandom_read_lib{1,2}.c */
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+
+unsigned short urand_read_with_sema_semaphore SEC(".probes");
+
static __attribute__((noinline))
void urandom_read(int fd, int count)
{
- char buf[BUF_SIZE];
- int i;
+ char buf[BUF_SIZE];
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ read(fd, buf, BUF_SIZE);
+
+ /* trigger USDTs defined in executable itself */
+ urand_read_without_sema(i, count, BUF_SIZE);
+ STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE);
- for (i = 0; i < count; ++i)
- read(fd, buf, BUF_SIZE);
+ /* trigger USDTs defined in shared lib */
+ urandlib_read_without_sema(i, count, BUF_SIZE);
+ urandlib_read_with_sema(i, count, BUF_SIZE);
+ }
+}
+
+static volatile bool parent_ready;
+
+static void handle_sigpipe(int sig)
+{
+ parent_ready = true;
}
int main(int argc, char *argv[])
{
int fd = open("/dev/urandom", O_RDONLY);
int count = 4;
+ bool report_pid = false;
if (fd < 0)
return 1;
- if (argc == 2)
+ if (argc >= 2)
count = atoi(argv[1]);
+ if (argc >= 3) {
+ report_pid = true;
+ /* install SIGPIPE handler to catch when parent closes their
+ * end of the pipe (on the other side of our stdout)
+ */
+ signal(SIGPIPE, handle_sigpipe);
+ }
+
+ /* report PID and wait for parent process to send us "signal" by
+ * closing stdout
+ */
+ if (report_pid) {
+ while (!parent_ready) {
+ fprintf(stdout, "%d\n", getpid());
+ fflush(stdout);
+ }
+ /* at this point stdout is closed, parent process knows our
+ * PID and is ready to trace us
+ */
+ }
urandom_read(fd, count);
diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c
new file mode 100644
index 000000000000..6132edcfea74
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_aux.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ /* semaphore-less USDT */
+ STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
new file mode 100644
index 000000000000..86186e24b740
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
+
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c
new file mode 100644
index 000000000000..9d401ad9838f
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib2.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 4f70baad867d..bbe3b379927a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -20,6 +20,7 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
+TESTS="fib_rule6 fib_rule4"
log_test()
{
@@ -316,7 +317,16 @@ fi
# start clean
cleanup &> /dev/null
setup
-run_fibrule_tests
+for t in $TESTS
+do
+ case $t in
+ fib_rule6_test|fib_rule6) fib_rule6_test;;
+ fib_rule4_test|fib_rule4) fib_rule4_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+
+ esac
+done
cleanup
if [ "$TESTS" != "none" ]; then
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8fa97ae9af9e..ae80c2aef577 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,6 +2,7 @@
TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
+ bridge_mdb.sh \
bridge_port_isolation.sh \
bridge_sticky_fdb.sh \
bridge_vlan_aware.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Enable multicast filtering
+ ip link add dev br0 type bridge mcast_snooping 1
+
+ ip link set dev $swp1 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp1 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+ local group=$1
+ local flag=$2
+
+ RET=0
+ bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+ check_err $? "Failed adding $group to br0, port br0"
+
+ if [ -z "$flag" ]; then
+ flag="temp"
+ fi
+
+ bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+ check_err $? "$group not added with $flag flag"
+
+ bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+ check_err $? "Failed deleting $group from br0, port br0"
+
+ bridge mdb show dev br0 | grep -q $group >/dev/null
+ check_err_fail 1 $? "$group still in mdb after delete"
+
+ log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+ do_mdb_add_del $TEST_GROUP_MAC permanent
+ do_mdb_add_del $TEST_GROUP_IP4
+ do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index ff821025d309..9dd43d7d957b 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -71,6 +71,43 @@ chk_msk_remote_key_nr()
__chk_nr "grep -c remote_key" $*
}
+__chk_listen()
+{
+ local filter="$1"
+ local expected=$2
+
+ shift 2
+ msg=$*
+
+ nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN)
+ printf "%-50s" "$msg"
+
+ if [ $nr != $expected ]; then
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ else
+ echo "[ ok ]"
+ fi
+}
+
+chk_msk_listen()
+{
+ lport=$1
+ local msg="check for listen socket"
+
+ # destination port search should always return empty list
+ __chk_listen "dport $lport" 0 "listen match for dport $lport"
+
+ # should return 'our' mptcp listen socket
+ __chk_listen "sport $lport" 1 "listen match for sport $lport"
+
+ __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
+
+ __chk_listen "" 1 "all listen sockets"
+
+ nr=$(ss -Ml $filter | wc -l)
+}
+
# $1: ns, $2: port
wait_local_port_listen()
{
@@ -113,6 +150,7 @@ echo "a" | \
0.0.0.0 >/dev/null &
wait_local_port_listen $ns 10000
chk_msk_nr 0 "no msk on netns creation"
+chk_msk_listen 10000
echo "b" | \
timeout ${timeout_test} \
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index 695a1958723f..fd76b69635a4 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -66,6 +66,20 @@ table inet filter {
EOF
}
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
load_ruleset_count() {
local netns=$1
@@ -219,4 +233,40 @@ sleep 2
ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+ ip -net ${nsrouter} nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
exit 0