summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/netlink/specs/ethtool.yaml8
-rw-r--r--Documentation/networking/ethtool-netlink.rst10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c32
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c68
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c4
-rw-r--r--drivers/net/netdevsim/ethtool.c12
-rw-r--r--drivers/net/netdevsim/netdev.c9
-rw-r--r--drivers/net/netdevsim/netdevsim.h3
-rw-r--r--include/linux/ethtool.h11
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/uapi/linux/ethtool_netlink_generated.h2
-rw-r--r--net/core/dev.c27
-rw-r--r--net/core/devmem.c11
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rings.c51
-rw-r--r--tools/testing/selftests/drivers/net/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py120
18 files changed, 363 insertions, 21 deletions
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 60f85fbf4156..66be04013048 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -332,6 +332,12 @@ attribute-sets:
-
name: tx-push-buf-len-max
type: u32
+ -
+ name: hds-thresh
+ type: u32
+ -
+ name: hds-thresh-max
+ type: u32
-
name: mm-stat
@@ -1777,6 +1783,8 @@ operations:
- rx-push
- tx-push-buf-len
- tx-push-buf-len-max
+ - hds-thresh
+ - hds-thresh-max
dump: *ring-get-op
-
name: rings-set
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index da846f1d998e..f70c0249860c 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -899,6 +899,10 @@ Kernel response contents:
``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode
``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer
``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX`` u32 max size of TX push buffer
+ ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of
+ header / data split
+ ``ETHTOOL_A_RINGS_HDS_THRESH_MAX`` u32 max threshold of
+ header / data split
======================================= ====== ===========================
``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with
@@ -941,10 +945,12 @@ Request contents:
``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring
``ETHTOOL_A_RINGS_TX`` u32 size of TX ring
``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring
+ ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split
``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE
``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode
``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode
``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer
+ ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of header / data split
==================================== ====== ===========================
Kernel checks that requested ring sizes do not exceed limits reported by
@@ -961,6 +967,10 @@ A bigger CQE can have more receive buffer pointers, and in turn the NIC can
transfer a bigger frame from wire. Based on the NIC hardware, the overall
completion queue size can be adjusted in the driver if CQE size is modified.
+``ETHTOOL_A_RINGS_HDS_THRESH`` specifies the threshold value of
+header / data split feature. If a received packet size is larger than this
+threshold value, header and data will be split.
+
CHANNELS_GET
============
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 884d42db5554..caddb5cbc024 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -81,7 +81,6 @@ MODULE_DESCRIPTION("Broadcom NetXtreme network driver");
#define BNXT_RX_OFFSET (NET_SKB_PAD + NET_IP_ALIGN)
#define BNXT_RX_DMA_OFFSET NET_SKB_PAD
-#define BNXT_RX_COPY_THRESH 256
#define BNXT_TX_PUSH_THRESH 164
@@ -1343,13 +1342,13 @@ static struct sk_buff *bnxt_copy_data(struct bnxt_napi *bnapi, u8 *data,
if (!skb)
return NULL;
- dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copy_thresh,
+ dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copybreak,
bp->rx_dir);
memcpy(skb->data - NET_IP_ALIGN, data - NET_IP_ALIGN,
len + NET_IP_ALIGN);
- dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copy_thresh,
+ dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copybreak,
bp->rx_dir);
skb_put(skb, len);
@@ -1842,7 +1841,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
return NULL;
}
- if (len <= bp->rx_copy_thresh) {
+ if (len <= bp->rx_copybreak) {
skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
if (!skb) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
@@ -2176,7 +2175,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
}
}
- if (len <= bp->rx_copy_thresh) {
+ if (len <= bp->rx_copybreak) {
if (!xdp_active)
skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr);
else
@@ -4608,6 +4607,12 @@ void bnxt_set_tpa_flags(struct bnxt *bp)
bp->flags |= BNXT_FLAG_GRO;
}
+static void bnxt_init_ring_params(struct bnxt *bp)
+{
+ bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK;
+ bp->dev->ethtool->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK;
+}
+
/* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must
* be set on entry.
*/
@@ -4622,12 +4627,11 @@ void bnxt_set_ring_params(struct bnxt *bp)
rx_space = rx_size + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- bp->rx_copy_thresh = BNXT_RX_COPY_THRESH;
ring_size = bp->rx_ring_size;
bp->rx_agg_ring_size = 0;
bp->rx_agg_nr_pages = 0;
- if (bp->flags & BNXT_FLAG_TPA)
+ if (bp->flags & BNXT_FLAG_TPA || bp->flags & BNXT_FLAG_HDS)
agg_factor = min_t(u32, 4, 65536 / BNXT_RX_PAGE_SIZE);
bp->flags &= ~BNXT_FLAG_JUMBO;
@@ -4667,7 +4671,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) -
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
} else {
- rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN);
+ rx_size = SKB_DATA_ALIGN(max(BNXT_DEFAULT_RX_COPYBREAK,
+ bp->rx_copybreak) +
+ NET_IP_ALIGN);
rx_space = rx_size + NET_SKB_PAD +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}
@@ -6564,6 +6570,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp)
static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic)
{
+ u16 hds_thresh = (u16)bp->dev->ethtool->hds_thresh;
struct hwrm_vnic_plcmodes_cfg_input *req;
int rc;
@@ -6573,16 +6580,14 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic)
req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT);
req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID);
+ req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size);
- if (BNXT_RX_PAGE_MODE(bp)) {
- req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size);
- } else {
+ if (!BNXT_RX_PAGE_MODE(bp) && (bp->flags & BNXT_FLAG_AGG_RINGS)) {
req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
req->enables |=
cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
- req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
- req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
+ req->hds_threshold = cpu_to_le16(hds_thresh);
}
req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
return hwrm_req_send(bp, req);
@@ -16261,6 +16266,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
bnxt_init_l2_fltr_tbl(bp);
bnxt_set_rx_skb_mode(bp, false);
bnxt_set_tpa_flags(bp);
+ bnxt_init_ring_params(bp);
bnxt_set_ring_params(bp);
bnxt_rdma_aux_device_init(bp);
rc = bnxt_set_dflt_rings(bp, true);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 094c9e95b463..8f481dd9c224 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -34,6 +34,9 @@
#include <linux/firmware/broadcom/tee_bnxt_fw.h>
#endif
+#define BNXT_DEFAULT_RX_COPYBREAK 256
+#define BNXT_MAX_RX_COPYBREAK 1024
+
extern struct list_head bnxt_block_cb_list;
struct page_pool;
@@ -2241,8 +2244,6 @@ struct bnxt {
#define BNXT_FLAG_TPA (BNXT_FLAG_LRO | BNXT_FLAG_GRO)
#define BNXT_FLAG_JUMBO 0x10
#define BNXT_FLAG_STRIP_VLAN 0x20
- #define BNXT_FLAG_AGG_RINGS (BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \
- BNXT_FLAG_LRO)
#define BNXT_FLAG_RFS 0x100
#define BNXT_FLAG_SHARED_RINGS 0x200
#define BNXT_FLAG_PORT_STATS 0x400
@@ -2263,6 +2264,9 @@ struct bnxt {
#define BNXT_FLAG_ROCE_MIRROR_CAP 0x4000000
#define BNXT_FLAG_TX_COAL_CMPL 0x8000000
#define BNXT_FLAG_PORT_STATS_EXT 0x10000000
+ #define BNXT_FLAG_HDS 0x20000000
+ #define BNXT_FLAG_AGG_RINGS (BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \
+ BNXT_FLAG_LRO | BNXT_FLAG_HDS)
#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \
BNXT_FLAG_RFS | \
@@ -2347,7 +2351,7 @@ struct bnxt {
enum dma_data_direction rx_dir;
u32 rx_ring_size;
u32 rx_agg_ring_size;
- u32 rx_copy_thresh;
+ u32 rx_copybreak;
u32 rx_ring_mask;
u32 rx_agg_ring_mask;
int rx_nr_pages;
@@ -2775,6 +2779,8 @@ struct bnxt {
#define SFF_MODULE_ID_QSFP28 0x11
#define BNXT_MAX_PHY_I2C_RESP_SIZE 64
+#define BNXT_HDS_THRESHOLD_MAX 1023
+
static inline u32 bnxt_tx_avail(struct bnxt *bp,
const struct bnxt_tx_ring_info *txr)
{
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 75a59dd72bce..540c140d52dc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -833,6 +833,9 @@ static void bnxt_get_ringparam(struct net_device *dev,
ering->rx_pending = bp->rx_ring_size;
ering->rx_jumbo_pending = bp->rx_agg_ring_size;
ering->tx_pending = bp->tx_ring_size;
+
+ kernel_ering->hds_thresh = dev->ethtool->hds_thresh;
+ kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX;
}
static int bnxt_set_ringparam(struct net_device *dev,
@@ -840,16 +843,35 @@ static int bnxt_set_ringparam(struct net_device *dev,
struct kernel_ethtool_ringparam *kernel_ering,
struct netlink_ext_ack *extack)
{
+ u8 tcp_data_split = kernel_ering->tcp_data_split;
struct bnxt *bp = netdev_priv(dev);
+ u8 hds_config_mod;
if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) ||
(ering->tx_pending > BNXT_MAX_TX_DESC_CNT) ||
(ering->tx_pending < BNXT_MIN_TX_DESC_CNT))
return -EINVAL;
+ hds_config_mod = tcp_data_split != dev->ethtool->hds_config;
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod)
+ return -EINVAL;
+
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
+ hds_config_mod && BNXT_RX_PAGE_MODE(bp)) {
+ NL_SET_ERR_MSG_MOD(extack, "tcp-data-split is disallowed when XDP is attached");
+ return -EINVAL;
+ }
+
if (netif_running(dev))
bnxt_close_nic(bp, false, false);
+ if (hds_config_mod) {
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
+ bp->flags |= BNXT_FLAG_HDS;
+ else if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
+ bp->flags &= ~BNXT_FLAG_HDS;
+ }
+
bp->rx_ring_size = ering->rx_pending;
bp->tx_ring_size = ering->tx_pending;
bnxt_set_ring_params(bp);
@@ -4328,6 +4350,45 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata)
return 0;
}
+static int bnxt_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct bnxt *bp = netdev_priv(dev);
+ u32 rx_copybreak;
+
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ rx_copybreak = *(u32 *)data;
+ if (rx_copybreak > BNXT_MAX_RX_COPYBREAK)
+ return -ERANGE;
+ if (rx_copybreak != bp->rx_copybreak) {
+ if (netif_running(dev))
+ return -EBUSY;
+ bp->rx_copybreak = rx_copybreak;
+ }
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int bnxt_get_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna, void *data)
+{
+ struct bnxt *bp = netdev_priv(dev);
+
+ switch (tuna->id) {
+ case ETHTOOL_RX_COPYBREAK:
+ *(u32 *)data = bp->rx_copybreak;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int bnxt_read_sfp_module_eeprom_info(struct bnxt *bp, u16 i2c_addr,
u16 page_number, u8 bank,
u16 start_addr, u16 data_length,
@@ -4790,7 +4851,8 @@ static int bnxt_run_loopback(struct bnxt *bp)
cpr = &rxr->bnapi->cp_ring;
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
cpr = rxr->rx_cpr;
- pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh);
+ pkt_size = min(bp->dev->mtu + ETH_HLEN, max(BNXT_DEFAULT_RX_COPYBREAK,
+ bp->rx_copybreak));
skb = netdev_alloc_skb(bp->dev, pkt_size);
if (!skb)
return -ENOMEM;
@@ -5331,6 +5393,8 @@ const struct ethtool_ops bnxt_ethtool_ops = {
ETHTOOL_COALESCE_STATS_BLOCK_USECS |
ETHTOOL_COALESCE_USE_ADAPTIVE_RX |
ETHTOOL_COALESCE_USE_CQE,
+ .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT |
+ ETHTOOL_RING_USE_HDS_THRS,
.get_link_ksettings = bnxt_get_link_ksettings,
.set_link_ksettings = bnxt_set_link_ksettings,
.get_fec_stats = bnxt_get_fec_stats,
@@ -5372,6 +5436,8 @@ const struct ethtool_ops bnxt_ethtool_ops = {
.get_link_ext_stats = bnxt_get_link_ext_stats,
.get_eee = bnxt_get_eee,
.set_eee = bnxt_set_eee,
+ .get_tunable = bnxt_get_tunable,
+ .set_tunable = bnxt_set_tunable,
.get_module_info = bnxt_get_module_info,
.get_module_eeprom = bnxt_get_module_eeprom,
.get_module_eeprom_by_page = bnxt_get_module_eeprom_by_page,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index f88b641533fc..1bfff7f29310 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -395,6 +395,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU);
return -EOPNOTSUPP;
}
+ if (prog && bp->flags & BNXT_FLAG_HDS) {
+ netdev_warn(dev, "XDP is disallowed when HDS is enabled.\n");
+ return -EOPNOTSUPP;
+ }
if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) {
netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
return -EOPNOTSUPP;
diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c
index 5fe1eaef99b5..9e0df40c71e1 100644
--- a/drivers/net/netdevsim/ethtool.c
+++ b/drivers/net/netdevsim/ethtool.c
@@ -2,7 +2,6 @@
// Copyright (c) 2020 Facebook
#include <linux/debugfs.h>
-#include <linux/ethtool.h>
#include <linux/random.h>
#include "netdevsim.h"
@@ -72,6 +71,12 @@ static void nsim_get_ringparam(struct net_device *dev,
struct netdevsim *ns = netdev_priv(dev);
memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring));
+ kernel_ring->tcp_data_split = dev->ethtool->hds_config;
+ kernel_ring->hds_thresh = dev->ethtool->hds_thresh;
+ kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX;
+
+ if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
+ kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
}
static int nsim_set_ringparam(struct net_device *dev,
@@ -161,6 +166,8 @@ static int nsim_get_ts_info(struct net_device *dev,
static const struct ethtool_ops nsim_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS,
+ .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT |
+ ETHTOOL_RING_USE_HDS_THRS,
.get_pause_stats = nsim_get_pause_stats,
.get_pauseparam = nsim_get_pauseparam,
.set_pauseparam = nsim_set_pauseparam,
@@ -182,6 +189,9 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns)
ns->ethtool.ring.rx_jumbo_max_pending = 4096;
ns->ethtool.ring.rx_mini_max_pending = 4096;
ns->ethtool.ring.tx_max_pending = 4096;
+
+ ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN;
+ ns->netdev->ethtool->hds_thresh = 0;
}
void nsim_ethtool_init(struct netdevsim *ns)
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index d013b6498539..f92b05ccdca9 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -15,6 +15,7 @@
#include <linux/debugfs.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool_netlink.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
@@ -54,6 +55,7 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
+ struct ethtool_netdev_state *ethtool;
struct net_device *peer_dev;
unsigned int len = skb->len;
struct netdevsim *peer_ns;
@@ -74,6 +76,13 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = rxq % peer_dev->num_rx_queues;
rq = peer_ns->rq[rxq];
+ ethtool = peer_dev->ethtool;
+ if (skb_is_nonlinear(skb) &&
+ (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
+ (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
+ ethtool->hds_thresh > len)))
+ skb_linearize(skb);
+
skb_tx_timestamp(skb);
if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
goto out_drop_cnt;
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index a70f62af4c88..dcf073bc4802 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -16,6 +16,7 @@
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/netdevice.h>
@@ -36,6 +37,8 @@
#define NSIM_IPSEC_VALID BIT(31)
#define NSIM_UDP_TUNNEL_N_PORTS 4
+#define NSIM_HDS_THRESHOLD_MAX 1024
+
struct nsim_sa {
struct xfrm_state *xs;
__be32 ipaddr[4];
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 20a86bd5f4e3..e4136b0df892 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -78,6 +78,9 @@ enum {
* @cqe_size: Size of TX/RX completion queue event
* @tx_push_buf_len: Size of TX push buffer
* @tx_push_buf_max_len: Maximum allowed size of TX push buffer
+ * @hds_thresh: Packet size threshold for header data split (HDS)
+ * @hds_thresh_max: Maximum supported setting for @hds_threshold
+ *
*/
struct kernel_ethtool_ringparam {
u32 rx_buf_len;
@@ -87,6 +90,8 @@ struct kernel_ethtool_ringparam {
u32 cqe_size;
u32 tx_push_buf_len;
u32 tx_push_buf_max_len;
+ u32 hds_thresh;
+ u32 hds_thresh_max;
};
/**
@@ -97,6 +102,7 @@ struct kernel_ethtool_ringparam {
* @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push
* @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len
* @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split
+ * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh
*/
enum ethtool_supported_ring_param {
ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0),
@@ -105,6 +111,7 @@ enum ethtool_supported_ring_param {
ETHTOOL_RING_USE_RX_PUSH = BIT(3),
ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4),
ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5),
+ ETHTOOL_RING_USE_HDS_THRS = BIT(6),
};
#define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit))
@@ -1157,12 +1164,16 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
* @rss_ctx: XArray of custom RSS contexts
* @rss_lock: Protects entries in @rss_ctx. May be taken from
* within RTNL.
+ * @hds_thresh: HDS Threshold value.
+ * @hds_config: HDS value from userspace.
* @wol_enabled: Wake-on-LAN is enabled
* @module_fw_flash_in_progress: Module firmware flashing is in progress.
*/
struct ethtool_netdev_state {
struct xarray rss_ctx;
struct mutex rss_lock;
+ u32 hds_thresh;
+ u8 hds_config;
unsigned wol_enabled:1;
unsigned module_fw_flash_in_progress:1;
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bced03fb349e..3e6336775baf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4082,6 +4082,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
u8 dev_xdp_prog_count(struct net_device *dev);
int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
+u8 dev_xdp_sb_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
u32 dev_get_min_mp_channel_count(const struct net_device *dev);
diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h
index 43993a2d68e5..2e17ff348f89 100644
--- a/include/uapi/linux/ethtool_netlink_generated.h
+++ b/include/uapi/linux/ethtool_netlink_generated.h
@@ -155,6 +155,8 @@ enum {
ETHTOOL_A_RINGS_RX_PUSH,
ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
+ ETHTOOL_A_RINGS_HDS_THRESH,
+ ETHTOOL_A_RINGS_HDS_THRESH_MAX,
__ETHTOOL_A_RINGS_CNT,
ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1)
diff --git a/net/core/dev.c b/net/core/dev.c
index fda4e1039bf0..47e6b0f73cfc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -92,6 +92,7 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/skbuff.h>
#include <linux/kthread.h>
#include <linux/bpf.h>
@@ -9550,11 +9551,31 @@ u8 dev_xdp_prog_count(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
+u8 dev_xdp_sb_prog_count(struct net_device *dev)
+{
+ u8 count = 0;
+ int i;
+
+ for (i = 0; i < __MAX_XDP_MODE; i++)
+ if (dev->xdp_state[i].prog &&
+ !dev->xdp_state[i].prog->aux->xdp_has_frags)
+ count++;
+ return count;
+}
+
int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
{
if (!dev->netdev_ops->ndo_bpf)
return -EOPNOTSUPP;
+ if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
+ bpf->command == XDP_SETUP_PROG &&
+ bpf->prog && !bpf->prog->aux->xdp_has_frags) {
+ NL_SET_ERR_MSG(bpf->extack,
+ "unable to propagate XDP to device using tcp-data-split");
+ return -EBUSY;
+ }
+
if (dev_get_min_mp_channel_count(dev)) {
NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider");
return -EBUSY;
@@ -9592,6 +9613,12 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
struct netdev_bpf xdp;
int err;
+ if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
+ prog && !prog->aux->xdp_has_frags) {
+ NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split");
+ return -EBUSY;
+ }
+
if (dev_get_min_mp_channel_count(dev)) {
NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider");
return -EBUSY;
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 0b6ed7525b22..c971b8aceac8 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -8,6 +8,7 @@
*/
#include <linux/dma-buf.h>
+#include <linux/ethtool_netlink.h>
#include <linux/genalloc.h>
#include <linux/mm.h>
#include <linux/netdevice.h>
@@ -140,6 +141,16 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
return -ERANGE;
}
+ if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
+ NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
+ return -EINVAL;
+ }
+
+ if (dev->ethtool->hds_thresh) {
+ NL_SET_ERR_MSG(extack, "hds-thresh is not zero");
+ return -EINVAL;
+ }
+
rxq = __netif_get_rx_queue(dev, rxq_idx);
if (rxq->mp_params.mp_priv) {
NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 1ce0a3de1430..ff69ca0715de 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -456,7 +456,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_HDS_THRESH_MAX + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index b7865a14fdf8..d8cd4e4d7762 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -61,7 +61,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */
nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */
nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */
- nla_total_size(sizeof(u32)); /* _RINGS_TX_PUSH_BUF_LEN_MAX */
+ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN_MAX */
+ nla_total_size(sizeof(u32)) + /* _RINGS_HDS_THRESH */
+ nla_total_size(sizeof(u32)); /* _RINGS_HDS_THRESH_MAX*/
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -108,7 +110,12 @@ static int rings_fill_reply(struct sk_buff *skb,
(nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
kr->tx_push_buf_max_len) ||
nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
- kr->tx_push_buf_len))))
+ kr->tx_push_buf_len))) ||
+ ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) &&
+ (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH,
+ kr->hds_thresh) ||
+ nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX,
+ kr->hds_thresh_max))))
return -EMSGSIZE;
return 0;
@@ -130,6 +137,7 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 },
+ [ETHTOOL_A_RINGS_HDS_THRESH] = { .type = NLA_U32 },
};
static int
@@ -155,6 +163,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info,
return -EOPNOTSUPP;
}
+ if (tb[ETHTOOL_A_RINGS_HDS_THRESH] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_HDS_THRESH],
+ "setting hds-thresh is not supported");
+ return -EOPNOTSUPP;
+ }
+
if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
NL_SET_ERR_MSG_ATTR(info->extack,
@@ -203,6 +219,7 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
dev->ethtool_ops->get_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
+ kernel_ringparam.tcp_data_split = dev->ethtool->hds_config;
ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod);
ethnl_update_u32(&ringparam.rx_mini_pending,
@@ -222,9 +239,32 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
tb[ETHTOOL_A_RINGS_RX_PUSH], &mod);
ethnl_update_u32(&kernel_ringparam.tx_push_buf_len,
tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod);
+ ethnl_update_u32(&kernel_ringparam.hds_thresh,
+ tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod);
if (!mod)
return 0;
+ if (kernel_ringparam.tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
+ dev_xdp_sb_prog_count(dev)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT],
+ "tcp-data-split can not be enabled with single buffer XDP");
+ return -EINVAL;
+ }
+
+ if (dev_get_min_mp_channel_count(dev)) {
+ if (kernel_ringparam.tcp_data_split !=
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
+ NL_SET_ERR_MSG(info->extack,
+ "can't disable tcp-data-split while device has memory provider enabled");
+ return -EINVAL;
+ } else if (kernel_ringparam.hds_thresh) {
+ NL_SET_ERR_MSG(info->extack,
+ "can't set non-zero hds_thresh while device is memory provider enabled");
+ return -EINVAL;
+ }
+ }
+
/* ensure new ring parameters are within limits */
if (ringparam.rx_pending > ringparam.rx_max_pending)
err_attr = tb[ETHTOOL_A_RINGS_RX];
@@ -234,6 +274,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO];
else if (ringparam.tx_pending > ringparam.tx_max_pending)
err_attr = tb[ETHTOOL_A_RINGS_TX];
+ else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max)
+ err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH];
else
err_attr = NULL;
if (err_attr) {
@@ -252,6 +294,11 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
+ if (!ret) {
+ dev->ethtool->hds_config = kernel_ringparam.tcp_data_split;
+ dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh;
+ }
+
return ret < 0 ? ret : 1;
}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 469179c18935..137470bdee0c 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -12,6 +12,7 @@ TEST_PROGS := \
queues.py \
stats.py \
shaper.py \
+ hds.py \
# end of TEST_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
new file mode 100755
index 000000000000..394971b25c0b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+def get_hds(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+def get_hds_thresh(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+
+def set_hds_enable(cfg, netnl) -> None:
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("disabling of HDS not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+ ksft_eq('enabled', rings['tcp-data-split'])
+
+def set_hds_disable(cfg, netnl) -> None:
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("disabling of HDS not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+ ksft_eq('disabled', rings['tcp-data-split'])
+
+def set_hds_thresh_zero(cfg, netnl) -> None:
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+
+ ksft_eq(0, rings['hds-thresh'])
+
+def set_hds_thresh_max(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
+
+def set_hds_thresh_gt(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+ hds_gt = rings['hds-thresh-max'] + 1
+ with ksft_raises(NlError) as e:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
+ ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=3) as cfg:
+ ksft_run([get_hds,
+ get_hds_thresh,
+ set_hds_disable,
+ set_hds_enable,
+ set_hds_thresh_zero,
+ set_hds_thresh_max,
+ set_hds_thresh_gt],
+ args=(cfg, EthtoolFamily()))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()