summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-10-09 11:10:03 +0200
committerPaolo Abeni <pabeni@redhat.com>2025-10-09 11:10:04 +0200
commite6cc7ac0d420a7782cdb6d926245abf40f943f24 (patch)
tree0f5c2a30edb7dd1cc82f1beabf44298f94c87126 /tools
parent2854378a00e8872507a19cb1eb88517fc9584bc5 (diff)
parent5d683e550540b7afd813ae45e9f727245bd26fe3 (diff)
Merge branch 'eth-fbnic-fix-xdp_tx-and-xdp-vs-qstats'
Jakub Kicinski says: ==================== eth: fbnic: fix XDP_TX and XDP vs qstats Fix XDP_TX hangs and adjust the XDP statistics to match the definition of qstats. The three problems are somewhat distinct. XDP_TX hangs is a simple coding bug (patch 1). The accounting of XDP packets is all over the place. Fix it to obey qstat rules (packets seen by XDP always counted as Rx packets). Patch 2 fixes the basic accounting, patch 3 touches up saving the stats when rings are freed. Patch 6 corrects reporting of alloc_fail stats which prevented the pp_alloc_fail test from passing. Patches 4, 5, 7, 8, 9 add or fix related test cases. v2: - [patch 2] remove now unnecessary byte adjustment - [patch 8] use seen_fails more v1: https://lore.kernel.org/20251003233025.1157158-1-kuba@kernel.org Testing on fbnic below: $ ./tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py TAP version 13 1..1 fbnic-err: bad MMIO read address 0x80074 fbnic-err: bad MMIO read address 0x80074 # Seen: pkts:20605 fails:40 (pass thrs:12) # ethtool -G change retval: success ok 1 pp_alloc_fail.test_pp_alloc # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 $ ./tools/testing/selftests/drivers/net/xdp.py TAP version 13 1..13 ok 1 xdp.test_xdp_native_pass_sb ok 2 xdp.test_xdp_native_pass_mb ok 3 xdp.test_xdp_native_drop_sb ok 4 xdp.test_xdp_native_drop_mb ok 5 xdp.test_xdp_native_tx_sb ok 6 xdp.test_xdp_native_tx_mb # Failed run: pkt_sz 2048, offset 1. Last successful run: pkt_sz 1024, offset 256. Reason: Adjustment failed ok 7 xdp.test_xdp_native_adjst_tail_grow_data ok 8 xdp.test_xdp_native_adjst_tail_shrnk_data # Failed run: pkt_sz 512, offset -256. Last successful run: pkt_sz 512, offset -128. Reason: Adjustment failed ok 9 xdp.test_xdp_native_adjst_head_grow_data # Failed run: pkt_sz (2048) > HDS threshold (1536) and offset 64 > 48 ok 10 xdp.test_xdp_native_adjst_head_shrnk_data ok 11 xdp.test_xdp_native_qstats_pass ok 12 xdp.test_xdp_native_qstats_drop ok 13 xdp.test_xdp_native_qstats_tx # Totals: pass:13 fail:0 xfail:0 xpass:0 skip:0 error:0 ==================== Link: https://patch.msgid.link/20251007232653.2099376-1-kuba@kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/drivers/net/hw/config4
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py36
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py99
3 files changed, 122 insertions, 17 deletions
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
index e8a06aa1471c..2307aa001be1 100644
--- a/tools/testing/selftests/drivers/net/hw/config
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -1,3 +1,7 @@
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
CONFIG_IO_URING=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=y
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index ad192fef3117..2a51b60df8a1 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -1,8 +1,13 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Test driver resilience vs page pool allocation failures.
+"""
+
import errno
import time
+import math
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import KsftSkipEx, KsftFailEx
@@ -13,7 +18,8 @@ from lib.py import cmd, tool, GenerateTraffic
def _write_fail_config(config):
for key, value in config.items():
- with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
+ path = "/sys/kernel/debug/fail_function/"
+ with open(path + key, "w", encoding='ascii') as fp:
fp.write(str(value) + "\n")
@@ -22,8 +28,7 @@ def _enable_pp_allocation_fail():
raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
- with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("page_pool_alloc_netmems\n")
+ _write_fail_config({"inject": "page_pool_alloc_netmems"})
_write_fail_config({
"verbose": 0,
@@ -38,8 +43,7 @@ def _disable_pp_allocation_fail():
return
if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
- with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("\n")
+ _write_fail_config({"inject": ""})
_write_fail_config({
"probability": 0,
@@ -48,6 +52,10 @@ def _disable_pp_allocation_fail():
def test_pp_alloc(cfg, netdevnl):
+ """
+ Configure page pool allocation fail injection while traffic is running.
+ """
+
def get_stats():
return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
@@ -55,7 +63,7 @@ def test_pp_alloc(cfg, netdevnl):
stat1 = get_stats()
time.sleep(1)
stat2 = get_stats()
- if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
+ if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
@@ -82,11 +90,16 @@ def test_pp_alloc(cfg, netdevnl):
time.sleep(3)
s2 = get_stats()
- if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
+ seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
+ if seen_fails < 1:
raise KsftSkipEx("Allocation failures not increasing")
- if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
- raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
- "packets:", s2['rx-packets'] - s1['rx-packets'])
+ pkts = s2['rx-packets'] - s1['rx-packets']
+ # Expecting one failure per 512 buffers, 3.1x safety margin
+ want_fails = math.floor(pkts / 512 / 3.1)
+ if seen_fails < want_fails:
+ raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
+ "packets:", pkts)
+ ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
# Basic failures are fine, try to wobble some settings to catch extra failures
check_traffic_flowing()
@@ -105,7 +118,7 @@ def test_pp_alloc(cfg, netdevnl):
else:
ksft_pr("ethtool -G change retval: did not succeed", new_g)
else:
- ksft_pr("ethtool -G change retval: did not try")
+ ksft_pr("ethtool -G change retval: did not try")
time.sleep(0.1)
check_traffic_flowing()
@@ -119,6 +132,7 @@ def test_pp_alloc(cfg, netdevnl):
def main() -> None:
+ """ Ksft boiler plate main """
netdevnl = NetdevFamily()
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
index 08fea4230759..a148004e1c36 100755
--- a/tools/testing/selftests/drivers/net/xdp.py
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -11,8 +11,9 @@ import string
from dataclasses import dataclass
from enum import Enum
-from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr
-from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftFailEx, NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import bkg, cmd, rand_port, wait_port_listen
from lib.py import ip, bpftool, defer
@@ -541,11 +542,11 @@ def get_hds_thresh(cfg):
The HDS threshold value. If the threshold is not supported or an error occurs,
a default value of 1500 is returned.
"""
- netnl = cfg.netnl
+ ethnl = cfg.ethnl
hds_thresh = 1500
try:
- rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
if 'hds-thresh' not in rings:
ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
return hds_thresh
@@ -562,7 +563,7 @@ def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
Args:
cfg: Configuration object containing network settings.
- netnl: Network namespace or link object (not used in this function).
+ ethnl: Network namespace or link object (not used in this function).
This function sets up the packet size and offset lists, then performs
the head adjustment test by sending and receiving UDP packets.
@@ -671,6 +672,88 @@ def test_xdp_native_adjst_head_shrnk_data(cfg):
_validate_res(res, offset_lst, pkt_sz_lst)
+def _test_xdp_native_ifc_stats(cfg, act):
+ cfg.require_cmd("socat")
+
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ # Discard the input, but we need a listener to avoid ICMP errors
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \
+ "/dev/null"
+ # Listener runs on "remote" in case of XDP_TX
+ rx_host = cfg.remote if act == XDPAction.TX else None
+ # We want to spew 2000 packets quickly, bash seems to do a good enough job
+ tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+ "for i in `seq 2000`; do echo a >&5; done; exec 5>&-"
+
+ cfg.wait_hw_stats_settle()
+ # Qstats have more clearly defined semantics than rtnetlink.
+ # XDP is the "first layer of the stack" so XDP packets should be counted
+ # as received and sent as if the decision was made in the routing layer.
+ before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ with bkg(rx_udp, host=rx_host, exit_wait=True):
+ wait_port_listen(port, proto="udp", host=rx_host)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ cfg.wait_hw_stats_settle()
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ ksft_ge(after['rx-packets'] - before['rx-packets'], 2000)
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'] - before['tx-packets'], 2000)
+
+ expected_pkts = 2000
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+ ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
+ if act == XDPAction.TX:
+ ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch")
+
+ # Flip the ring count back and forth to make sure the stats from XDP rings
+ # don't get lost.
+ chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if chans.get('combined-count', 0) > 1:
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': 1})
+ cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+ 'combined-count': chans['combined-count']})
+ before = after
+ after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ ksft_ge(after['rx-packets'], before['rx-packets'])
+ if act == XDPAction.TX:
+ ksft_ge(after['tx-packets'], before['tx-packets'])
+
+
+def test_xdp_native_qstats_pass(cfg):
+ """
+ Send 2000 messages, expect XDP_PASS, make sure the packets were counted
+ to interface level qstats (Rx).
+ """
+ _test_xdp_native_ifc_stats(cfg, XDPAction.PASS)
+
+
+def test_xdp_native_qstats_drop(cfg):
+ """
+ Send 2000 messages, expect XDP_DROP, make sure the packets were counted
+ to interface level qstats (Rx).
+ """
+ _test_xdp_native_ifc_stats(cfg, XDPAction.DROP)
+
+
+def test_xdp_native_qstats_tx(cfg):
+ """
+ Send 2000 messages, expect XDP_TX, make sure the packets were counted
+ to interface level qstats (Rx and Tx)
+ """
+ _test_xdp_native_ifc_stats(cfg, XDPAction.TX)
+
+
def main():
"""
Main function to execute the XDP tests.
@@ -681,7 +764,8 @@ def main():
function to execute the tests.
"""
with NetDrvEpEnv(__file__) as cfg:
- cfg.netnl = EthtoolFamily()
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
ksft_run(
[
test_xdp_native_pass_sb,
@@ -694,6 +778,9 @@ def main():
test_xdp_native_adjst_tail_shrnk_data,
test_xdp_native_adjst_head_grow_data,
test_xdp_native_adjst_head_shrnk_data,
+ test_xdp_native_qstats_pass,
+ test_xdp_native_qstats_drop,
+ test_xdp_native_qstats_tx,
],
args=(cfg,))
ksft_exit()