diff options
-rw-r--r-- | Documentation/devicetree/bindings/net/ethernet-controller.yaml | 97 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_adapter.c | 47 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_adapter.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/mediatek/mtk_eth_soc.c | 19 | ||||
-rw-r--r-- | drivers/net/virtio_net.c | 19 | ||||
-rw-r--r-- | include/linux/netdevice.h | 1 | ||||
-rw-r--r-- | net/core/dev.c | 18 | ||||
-rw-r--r-- | net/core/dev_api.c | 23 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 15 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 15 | ||||
-rw-r--r-- | tools/net/ynl/lib/ynl.c | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/drivers/net/ping.py | 45 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 1 | ||||
-rwxr-xr-x | tools/testing/selftests/net/gre_ipv6_lladdr.sh | 177 | ||||
-rw-r--r-- | tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json | 35 |
15 files changed, 422 insertions, 98 deletions
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 45819b235800..a2d4c626f659 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -74,19 +74,17 @@ properties: - rev-rmii - moca - # RX and TX delays are added by the MAC when required + # RX and TX delays are provided by the PCB. See below - rgmii - # RGMII with internal RX and TX delays provided by the PHY, - # the MAC should not add the RX or TX delays in this case + # RX and TX delays are not provided by the PCB. This is the most + # frequent case. See below - rgmii-id - # RGMII with internal RX delay provided by the PHY, the MAC - # should not add an RX delay in this case + # TX delay is provided by the PCB. See below - rgmii-rxid - # RGMII with internal TX delay provided by the PHY, the MAC - # should not add an TX delay in this case + # RX delay is provided by the PCB. See below - rgmii-txid - rtbi - smii @@ -286,4 +284,89 @@ allOf: additionalProperties: true +# Informative +# =========== +# +# 'phy-modes' & 'phy-connection-type' properties 'rgmii', 'rgmii-id', +# 'rgmii-rxid', and 'rgmii-txid' are frequently used wrongly by +# developers. This informative section clarifies their usage. +# +# The RGMII specification requires a 2ns delay between the data and +# clock signals on the RGMII bus. How this delay is implemented is not +# specified. +# +# One option is to make the clock traces on the PCB longer than the +# data traces. A sufficiently difference in length can provide the 2ns +# delay. If both the RX and TX delays are implemented in this manner, +# 'rgmii' should be used, so indicating the PCB adds the delays. +# +# If the PCB does not add these delays via extra long traces, +# 'rgmii-id' should be used. Here, 'id' refers to 'internal delay', +# where either the MAC or PHY adds the delay. +# +# If only one of the two delays are implemented via extra long clock +# lines, either 'rgmii-rxid' or 'rgmii-txid' should be used, +# indicating the MAC or PHY should implement one of the delays +# internally, while the PCB implements the other delay. +# +# Device Tree describes hardware, and in this case, it describes the +# PCB between the MAC and the PHY, if the PCB implements delays or +# not. +# +# In practice, very few PCBs make use of extra long clock lines. Hence +# any RGMII phy mode other than 'rgmii-id' is probably wrong, and is +# unlikely to be accepted during review without details provided in +# the commit description and comments in the .dts file. +# +# When the PCB does not implement the delays, the MAC or PHY must. As +# such, this is software configuration, and so not described in Device +# Tree. +# +# The following describes how Linux implements the configuration of +# the MAC and PHY to add these delays when the PCB does not. As stated +# above, developers often get this wrong, and the aim of this section +# is reduce the frequency of these errors by Linux developers. Other +# users of the Device Tree may implement it differently, and still be +# consistent with both the normative and informative description +# above. +# +# By default in Linux, when using phylib/phylink, the MAC is expected +# to read the 'phy-mode' from Device Tree, not implement any delays, +# and pass the value to the PHY. The PHY will then implement delays as +# specified by the 'phy-mode'. The PHY should always be reconfigured +# to implement the needed delays, replacing any setting performed by +# strapping or the bootloader, etc. +# +# Experience to date is that all PHYs which implement RGMII also +# implement the ability to add or not add the needed delays. Hence +# this default is expected to work in all cases. Ignoring this default +# is likely to be questioned by Reviews, and require a strong argument +# to be accepted. +# +# There are a small number of cases where the MAC has hard coded +# delays which cannot be disabled. The 'phy-mode' only describes the +# PCB. The inability to disable the delays in the MAC does not change +# the meaning of 'phy-mode'. It does however mean that a 'phy-mode' of +# 'rgmii' is now invalid, it cannot be supported, since both the PCB +# and the MAC and PHY adding delays cannot result in a functional +# link. Thus the MAC should report a fatal error for any modes which +# cannot be supported. When the MAC implements the delay, it must +# ensure that the PHY does not also implement the same delay. So it +# must modify the phy-mode it passes to the PHY, removing the delay it +# has added. Failure to remove the delay will result in a +# non-functioning link. +# +# Sometimes there is a need to fine tune the delays. Often the MAC or +# PHY can perform this fine tuning. In the MAC node, the Device Tree +# properties 'rx-internal-delay-ps' and 'tx-internal-delay-ps' should +# be used to indicate fine tuning performed by the MAC. The values +# expected here are small. A value of 2000ps, i.e 2ns, and a phy-mode +# of 'rgmii' will not be accepted by Reviewers. +# +# If the PHY is to perform fine tuning, the properties +# 'rx-internal-delay-ps' and 'tx-internal-delay-ps' in the PHY node +# should be used. When the PHY is implementing delays, e.g. 'rgmii-id' +# these properties should have a value near to 2000ps. If the PCB is +# implementing delays, e.g. 'rgmii', a small value can be used to fine +# tune the delay added by the PCB. ... diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090..66e070095d1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include <linux/bitfield.h> #include <linux/cleanup.h> #include <linux/mutex.h> #include <linux/pci.h> @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b3..ac15c0d2bc1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8fda4ce80d81..22a532695fb0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3186,11 +3186,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->tx.desc_size, @@ -3465,9 +3473,6 @@ static int mtk_open(struct net_device *dev) } mtk_gdm_config(eth, target_mac->id, gdm_config); } - /* Reset and enable PSE */ - mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); - mtk_w32(eth, 0, MTK_RST_GL); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 848fab51dfa1..f9e3e628ec4d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3383,12 +3383,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, bool refill) { bool running = netif_running(vi->dev); + bool schedule_refill = false; if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - + schedule_refill = true; if (running) virtnet_napi_enable(rq); + + if (schedule_refill) + schedule_delayed_work(&vi->refill, 0); } static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3728,8 +3731,10 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) + spin_lock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP && vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); + spin_unlock_bh(&vi->refill_lock); return 0; } @@ -5885,8 +5890,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5914,6 +5921,8 @@ err_rq: err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d11d013cabe..7ea022750e4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4972,6 +4972,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); diff --git a/net/core/dev.c b/net/core/dev.c index 1be7cb73a602..11da1e272ec2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9193,18 +9193,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9216,7 +9205,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { @@ -11966,9 +11954,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -12161,7 +12149,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90898cd540ce..f9a160ab596f 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -268,6 +268,29 @@ void dev_disable_lro(struct net_device *dev) EXPORT_SYMBOL(dev_disable_lro); /** + * dev_set_promiscuity() - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. + */ +int dev_set_promiscuity(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_promiscuity(dev, inc); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_promiscuity); + +/** * dev_set_allmulti() - update allmulti count on a device * @dev: device * @inc: modifier diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ba83f0c9928..c6b22170dc49 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3214,16 +3214,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3534,7 +3531,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4b9a639b642e..14bf71f57057 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1485,8 +1486,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (!cl->prio_activity) - return; htb_deactivate(qdisc_priv(sch), cl); } @@ -1740,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1949,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ce32cb35007d..c4da34048ef8 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 4b6822866066..af8df2313a3b 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -9,11 +9,11 @@ from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port from lib.py import defer, ethtool, ip -remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: - cfg.require_ipver("4") + if not cfg.addr_v["4"]: + return cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) @@ -21,7 +21,8 @@ def _test_v4(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_ipver("6") + if not cfg.addr_v["6"]: + return cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) @@ -57,7 +58,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: def _set_xdp_generic_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +67,8 @@ def _set_xdp_generic_sb_on(cfg) -> None: def _set_xdp_generic_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") @@ -76,7 +77,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: def _set_xdp_native_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] @@ -93,8 +94,8 @@ def _set_xdp_native_sb_on(cfg) -> None: def _set_xdp_native_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -112,18 +113,15 @@ def _set_xdp_offload_on(cfg) -> None: except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') defer(ip, f"link set dev {cfg.ifname} xdpoffload off") - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: time.sleep(10) def get_interface_info(cfg) -> None: - global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout - remote_ifname = remote_info.rstrip('\n') - if remote_ifname == "": + if cfg.remote_ifname == "": raise KsftFailEx('Can not get remote interface') local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": @@ -136,15 +134,25 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") -def test_default(cfg, netnl) -> None: _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) - _test_v6(cfg) _test_tcp(cfg) _set_offload_checksum(cfg, netnl, "on") _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") _test_v6(cfg) _test_tcp(cfg) @@ -202,7 +210,8 @@ def main() -> None: with NetDrvEpEnv(__file__) as cfg: get_interface_info(cfg) set_interface_init(cfg) - ksft_run([test_default, + ksft_run([test_default_v4, + test_default_v6, test_xdp_generic_sb, test_xdp_generic_mb, test_xdp_native_sb, diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 124078b56fa4..70a38f485d4d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0843f6d37e9c..a951c0d33cd2 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -538,5 +538,40 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "62c4", + "name": "Test HTB with FQ_CODEL - basic functionality", + "category": [ + "qdisc", + "htb", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 root handle 1: htb default 11", + "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit", + "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486", + "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn", + "sleep 0.5" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'", + "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 handle 1: root" + ] } ] |