diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-02-17 16:46:05 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-02-17 16:46:05 -0800 |
| commit | 24fc595edba2a04573329ae2df7af7db1b83a782 (patch) | |
| tree | baafd5580e809db095adac19000105a33a783460 | |
| parent | c935af429ec2344ffe716f869ddde43f5f1b20dc (diff) | |
| parent | 788e52e2b66844301fe09f3372d46d8c62f6ebe4 (diff) | |
Merge branch 'netdev-genl-add-an-xsk-attribute-to-queues'
Joe Damato says:
====================
netdev-genl: Add an xsk attribute to queues
This is an attempt to followup on something Jakub asked me about [1],
adding an xsk attribute to queues and more clearly documenting which
queues are linked to NAPIs...
After the RFC [2], Jakub suggested creating an empty nest for queues
which have a pool, so I've adjusted this version to work that way.
The nest can be extended in the future to express attributes about XSK
as needed. Queues which are not used for AF_XDP do not have the xsk
attribute present.
I've run the included test on:
- my mlx5 machine (via NETIF=)
- without setting NETIF
And the test seems to pass in both cases.
[1]: https://lore.kernel.org/netdev/20250113143109.60afa59a@kernel.org/
[2]: https://lore.kernel.org/netdev/20250129172431.65773-1-jdamato@fastly.com/
====================
Link: https://patch.msgid.link/20250214211255.14194-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | Documentation/netlink/specs/netdev.yaml | 13 | ||||
| -rw-r--r-- | include/net/netlink.h | 15 | ||||
| -rw-r--r-- | include/uapi/linux/netdev.h | 6 | ||||
| -rw-r--r-- | net/core/netdev-genl.c | 12 | ||||
| -rw-r--r-- | tools/include/uapi/linux/netdev.h | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/drivers/net/.gitignore | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/drivers/net/Makefile | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/drivers/net/config | 1 | ||||
| -rwxr-xr-x | tools/testing/selftests/drivers/net/queues.py | 42 | ||||
| -rw-r--r-- | tools/testing/selftests/drivers/net/xdp_helper.c | 98 |
10 files changed, 194 insertions, 4 deletions
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 288923e965ae..85402a2e289c 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -277,6 +277,9 @@ attribute-sets: processing, if event polling finds events type: uint - + name: xsk-info + attributes: [] + - name: queue attributes: - @@ -294,6 +297,9 @@ attribute-sets: - name: type doc: Queue type as rx, tx. Each queue type defines a separate ID space. + XDP TX queues allocated in the kernel are not linked to NAPIs and + thus not listed. AF_XDP queues will have more information set in + the xsk attribute. type: u32 enum: queue-type - @@ -309,7 +315,11 @@ attribute-sets: doc: io_uring memory provider information. type: nest nested-attributes: io-uring-provider-info - + - + name: xsk + doc: XSK information for this queue, if any. + type: nest + nested-attributes: xsk-info - name: qstats doc: | @@ -652,6 +662,7 @@ operations: - ifindex - dmabuf - io-uring + - xsk dump: request: attributes: diff --git a/include/net/netlink.h b/include/net/netlink.h index e015ffbed819..29e0db940382 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -118,6 +118,7 @@ * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction + * nla_put_empty_nest(skb, type) create an empty nest * * Attribute Length Calculations: * nla_attr_size(payload) length of attribute w/o padding @@ -2241,6 +2242,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) } /** + * nla_put_empty_nest - Create an empty nest + * @skb: socket buffer the message is stored in + * @attrtype: attribute type of the container + * + * This function is a helper for creating empty nests. + * + * Returns: 0 when successful or -EMSGSIZE on failure. + */ +static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE; +} + +/** * __nla_validate_nested - Validate a stream of nested attributes * @start: container attribute * @maxtype: maximum attribute type to be expected diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6c6ee183802d..4e82f3871473 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -137,12 +137,18 @@ enum { }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 22ac51356d9f..c92fba65b20d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -400,11 +400,23 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (txq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; } genlmsg_end(rsp, hdr); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6c6ee183802d..4e82f3871473 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -137,12 +137,18 @@ enum { }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..ec746f374e85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +xdp_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 28b6d47f812d..0c95bd944d56 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,10 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ +TEST_GEN_FILES := xdp_helper + TEST_PROGS := \ netcons_basic.sh \ netcons_fragmented_msg.sh \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index a2d8af60876d..f27172ddee0a 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee..5fdfebc6415f 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -2,13 +2,16 @@ # SPDX-License-Identifier: GPL-2.0 from lib.py import ksft_disruptive, ksft_exit, ksft_run -from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv from lib.py import cmd, defer, ip import errno import glob - +import os +import socket +import struct +import subprocess def sys_get_queues(ifname, qtype='rx') -> int: folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') @@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'): return len([q for q in queues if q['type'] == qtype]) return None +def check_xdp(cfg, nl, xdp_queue_id=0) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1, + text=True) + defer(xdp.kill) + + stdout, stderr = xdp.communicate(timeout=10) + rx = tx = False + + if xdp.returncode == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.returncode > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q['xsk'], {}) + else: + if 'xsk' in q: + _fail("Check failed: xsk attribute set.") + + ksft_eq(rx, True) + ksft_eq(tx, True) def get_queues(cfg, nl) -> None: snl = NetdevFamily(recv_size=4096) @@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None: def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c new file mode 100644 index 000000000000..cf06a88b830b --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp_helper.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int ifindex; + int sock_fd; + int queue; + char byte; + + if (argc != 3) { + fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { + munmap(umem_area, UMEM_SZ); + perror("bind failed"); + close(sock_fd); + return 1; + } + + /* give the parent program some data when the socket is ready*/ + fprintf(stdout, "%d\n", sock_fd); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + read(STDIN_FILENO, &byte, 1); + + close(sock_fd); + return 0; +} |
