summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/devlink-trap-netdevsim.rst20
-rw-r--r--Documentation/networking/devlink-trap.rst208
-rw-r--r--Documentation/networking/index.rst2
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/netdevsim/dev.c282
-rw-r--r--drivers/net/netdevsim/netdevsim.h1
-rw-r--r--include/net/devlink.h175
-rw-r--r--include/net/drop_monitor.h33
-rw-r--r--include/uapi/linux/devlink.h62
-rw-r--r--include/uapi/linux/net_dropmon.h15
-rw-r--r--net/Kconfig1
-rw-r--r--net/core/devlink.c1080
-rw-r--r--net/core/drop_monitor.c724
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh364
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh189
15 files changed, 3116 insertions, 41 deletions
diff --git a/Documentation/networking/devlink-trap-netdevsim.rst b/Documentation/networking/devlink-trap-netdevsim.rst
new file mode 100644
index 000000000000..b721c9415473
--- /dev/null
+++ b/Documentation/networking/devlink-trap-netdevsim.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Devlink Trap netdevsim
+======================
+
+Driver-specific Traps
+=====================
+
+.. list-table:: List of Driver-specific Traps Registered by ``netdevsim``
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``fid_miss``
+ - ``exception``
+ - When a packet enters the device it is classified to a filtering
+ indentifier (FID) based on the ingress port and VLAN. This trap is used
+ to trap packets for which a FID could not be found
diff --git a/Documentation/networking/devlink-trap.rst b/Documentation/networking/devlink-trap.rst
new file mode 100644
index 000000000000..c20c7c483664
--- /dev/null
+++ b/Documentation/networking/devlink-trap.rst
@@ -0,0 +1,208 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Devlink Trap
+============
+
+Background
+==========
+
+Devices capable of offloading the kernel's datapath and perform functions such
+as bridging and routing must also be able to send specific packets to the
+kernel (i.e., the CPU) for processing.
+
+For example, a device acting as a multicast-aware bridge must be able to send
+IGMP membership reports to the kernel for processing by the bridge module.
+Without processing such packets, the bridge module could never populate its
+MDB.
+
+As another example, consider a device acting as router which has received an IP
+packet with a TTL of 1. Upon routing the packet the device must send it to the
+kernel so that it will route it as well and generate an ICMP Time Exceeded
+error datagram. Without letting the kernel route such packets itself, utilities
+such as ``traceroute`` could never work.
+
+The fundamental ability of sending certain packets to the kernel for processing
+is called "packet trapping".
+
+Overview
+========
+
+The ``devlink-trap`` mechanism allows capable device drivers to register their
+supported packet traps with ``devlink`` and report trapped packets to
+``devlink`` for further analysis.
+
+Upon receiving trapped packets, ``devlink`` will perform a per-trap packets and
+bytes accounting and potentially report the packet to user space via a netlink
+event along with all the provided metadata (e.g., trap reason, timestamp, input
+port). This is especially useful for drop traps (see :ref:`Trap-Types`)
+as it allows users to obtain further visibility into packet drops that would
+otherwise be invisible.
+
+The following diagram provides a general overview of ``devlink-trap``::
+
+ Netlink event: Packet w/ metadata
+ Or a summary of recent drops
+ ^
+ |
+ Userspace |
+ +---------------------------------------------------+
+ Kernel |
+ |
+ +-------+--------+
+ | |
+ | drop_monitor |
+ | |
+ +-------^--------+
+ |
+ |
+ |
+ +----+----+
+ | | Kernel's Rx path
+ | devlink | (non-drop traps)
+ | |
+ +----^----+ ^
+ | |
+ +-----------+
+ |
+ +-------+-------+
+ | |
+ | Device driver |
+ | |
+ +-------^-------+
+ Kernel |
+ +---------------------------------------------------+
+ Hardware |
+ | Trapped packet
+ |
+ +--+---+
+ | |
+ | ASIC |
+ | |
+ +------+
+
+.. _Trap-Types:
+
+Trap Types
+==========
+
+The ``devlink-trap`` mechanism supports the following packet trap types:
+
+ * ``drop``: Trapped packets were dropped by the underlying device. Packets
+ are only processed by ``devlink`` and not injected to the kernel's Rx path.
+ The trap action (see :ref:`Trap-Actions`) can be changed.
+ * ``exception``: Trapped packets were not forwarded as intended by the
+ underlying device due to an exception (e.g., TTL error, missing neighbour
+ entry) and trapped to the control plane for resolution. Packets are
+ processed by ``devlink`` and injected to the kernel's Rx path. Changing the
+ action of such traps is not allowed, as it can easily break the control
+ plane.
+
+.. _Trap-Actions:
+
+Trap Actions
+============
+
+The ``devlink-trap`` mechanism supports the following packet trap actions:
+
+ * ``trap``: The sole copy of the packet is sent to the CPU.
+ * ``drop``: The packet is dropped by the underlying device and a copy is not
+ sent to the CPU.
+
+Generic Packet Traps
+====================
+
+Generic packet traps are used to describe traps that trap well-defined packets
+or packets that are trapped due to well-defined conditions (e.g., TTL error).
+Such traps can be shared by multiple device drivers and their description must
+be added to the following table:
+
+.. list-table:: List of Generic Packet Traps
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``source_mac_is_multicast``
+ - ``drop``
+ - Traps incoming packets that the device decided to drop because of a
+ multicast source MAC
+ * - ``vlan_tag_mismatch``
+ - ``drop``
+ - Traps incoming packets that the device decided to drop in case of VLAN
+ tag mismatch: The ingress bridge port is not configured with a PVID and
+ the packet is untagged or prio-tagged
+ * - ``ingress_vlan_filter``
+ - ``drop``
+ - Traps incoming packets that the device decided to drop in case they are
+ tagged with a VLAN that is not configured on the ingress bridge port
+ * - ``ingress_spanning_tree_filter``
+ - ``drop``
+ - Traps incoming packets that the device decided to drop in case the STP
+ state of the ingress bridge port is not "forwarding"
+ * - ``port_list_is_empty``
+ - ``drop``
+ - Traps packets that the device decided to drop in case they need to be
+ flooded and the flood list is empty
+ * - ``port_loopback_filter``
+ - ``drop``
+ - Traps packets that the device decided to drop in case after layer 2
+ forwarding the only port from which they should be transmitted through
+ is the port from which they were received
+ * - ``blackhole_route``
+ - ``drop``
+ - Traps packets that the device decided to drop in case they hit a
+ blackhole route
+ * - ``ttl_value_is_too_small``
+ - ``exception``
+ - Traps unicast packets that should be forwarded by the device whose TTL
+ was decremented to 0 or less
+ * - ``tail_drop``
+ - ``drop``
+ - Traps packets that the device decided to drop because they could not be
+ enqueued to a transmission queue which is full
+
+Driver-specific Packet Traps
+============================
+
+Device drivers can register driver-specific packet traps, but these must be
+clearly documented. Such traps can correspond to device-specific exceptions and
+help debug packet drops caused by these exceptions. The following list includes
+links to the description of driver-specific traps registered by various device
+drivers:
+
+ * :doc:`/devlink-trap-netdevsim`
+
+Generic Packet Trap Groups
+==========================
+
+Generic packet trap groups are used to aggregate logically related packet
+traps. These groups allow the user to batch operations such as setting the trap
+action of all member traps. In addition, ``devlink-trap`` can report aggregated
+per-group packets and bytes statistics, in case per-trap statistics are too
+narrow. The description of these groups must be added to the following table:
+
+.. list-table:: List of Generic Packet Trap Groups
+ :widths: 10 90
+
+ * - Name
+ - Description
+ * - ``l2_drops``
+ - Contains packet traps for packets that were dropped by the device during
+ layer 2 forwarding (i.e., bridge)
+ * - ``l3_drops``
+ - Contains packet traps for packets that were dropped by the device or hit
+ an exception (e.g., TTL error) during layer 3 forwarding
+ * - ``buffer_drops``
+ - Contains packet traps for packets that were dropped by the device due to
+ an enqueue decision
+
+Testing
+=======
+
+See ``tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh`` for a
+test covering the core infrastructure. Test cases should be added for any new
+functionality.
+
+Device drivers should focus their tests on device-specific functionality, such
+as the triggering of supported packet traps.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index a46fca264bee..37eabc17894c 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -14,6 +14,8 @@ Contents:
device_drivers/index
dsa/index
devlink-info-versions
+ devlink-trap
+ devlink-trap-netdevsim
ieee802154
kapi
z8530book
diff --git a/MAINTAINERS b/MAINTAINERS
index fd9ab61c2670..96d3e60697f5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11156,6 +11156,7 @@ S: Maintained
W: https://fedorahosted.org/dropwatch/
F: net/core/drop_monitor.c
F: include/uapi/linux/net_dropmon.h
+F: include/net/drop_monitor.h
NETWORKING DRIVERS
M: "David S. Miller" <davem@davemloft.net>
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index a570da406d1d..c217049552f7 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -17,11 +17,20 @@
#include <linux/debugfs.h>
#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/inet.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
#include <net/devlink.h>
+#include <net/ip.h>
+#include <uapi/linux/devlink.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/udp.h>
#include "netdevsim.h"
@@ -302,6 +311,218 @@ static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
devlink_region_destroy(nsim_dev->dummy_region);
}
+struct nsim_trap_item {
+ void *trap_ctx;
+ enum devlink_trap_action action;
+};
+
+struct nsim_trap_data {
+ struct delayed_work trap_report_dw;
+ struct nsim_trap_item *trap_items_arr;
+ struct nsim_dev *nsim_dev;
+ spinlock_t trap_lock; /* Protects trap_items_arr */
+};
+
+/* All driver-specific traps must be documented in
+ * Documentation/networking/devlink-trap-netdevsim.rst
+ */
+enum {
+ NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
+ NSIM_TRAP_ID_FID_MISS,
+};
+
+#define NSIM_TRAP_NAME_FID_MISS "fid_miss"
+
+#define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
+
+#define NSIM_TRAP_DROP(_id, _group_id) \
+ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \
+ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \
+ NSIM_TRAP_METADATA)
+#define NSIM_TRAP_EXCEPTION(_id, _group_id) \
+ DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \
+ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \
+ NSIM_TRAP_METADATA)
+#define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id) \
+ DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id, \
+ NSIM_TRAP_NAME_##_id, \
+ DEVLINK_TRAP_GROUP_GENERIC(_group_id), \
+ NSIM_TRAP_METADATA)
+
+static const struct devlink_trap nsim_traps_arr[] = {
+ NSIM_TRAP_DROP(SMAC_MC, L2_DROPS),
+ NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
+ NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
+ NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
+ NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
+ NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
+ NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS),
+ NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
+ NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
+ NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS),
+};
+
+#define NSIM_TRAP_L4_DATA_LEN 100
+
+static struct sk_buff *nsim_dev_trap_skb_build(void)
+{
+ int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN;
+ struct sk_buff *skb;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+ tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
+
+ eth = skb_put(skb, sizeof(struct ethhdr));
+ eth_random_addr(eth->h_dest);
+ eth_random_addr(eth->h_source);
+ eth->h_proto = htons(ETH_P_IP);
+ skb->protocol = htons(ETH_P_IP);
+
+ iph = skb_put(skb, sizeof(struct iphdr));
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = in_aton("192.0.2.1");
+ iph->daddr = in_aton("198.51.100.1");
+ iph->version = 0x4;
+ iph->frag_off = 0;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(tot_len);
+ iph->ttl = 100;
+ ip_send_check(iph);
+
+ udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
+ get_random_bytes(&udph->source, sizeof(u16));
+ get_random_bytes(&udph->dest, sizeof(u16));
+ udph->len = htons(sizeof(struct udphdr) + data_len);
+
+ return skb;
+}
+
+static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port)
+{
+ struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev;
+ struct devlink *devlink = priv_to_devlink(nsim_dev);
+ struct nsim_trap_data *nsim_trap_data;
+ int i;
+
+ nsim_trap_data = nsim_dev->trap_data;
+
+ spin_lock(&nsim_trap_data->trap_lock);
+ for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
+ struct nsim_trap_item *nsim_trap_item;
+ struct sk_buff *skb;
+
+ nsim_trap_item = &nsim_trap_data->trap_items_arr[i];
+ if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP)
+ continue;
+
+ skb = nsim_dev_trap_skb_build();
+ if (!skb)
+ continue;
+ skb->dev = nsim_dev_port->ns->netdev;
+
+ /* Trapped packets are usually passed to devlink in softIRQ,
+ * but in this case they are generated in a workqueue. Disable
+ * softIRQs to prevent lockdep from complaining about
+ * "incosistent lock state".
+ */
+ local_bh_disable();
+ devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx,
+ &nsim_dev_port->devlink_port);
+ local_bh_enable();
+ consume_skb(skb);
+ }
+ spin_unlock(&nsim_trap_data->trap_lock);
+}
+
+#define NSIM_TRAP_REPORT_INTERVAL_MS 100
+
+static void nsim_dev_trap_report_work(struct work_struct *work)
+{
+ struct nsim_trap_data *nsim_trap_data;
+ struct nsim_dev_port *nsim_dev_port;
+ struct nsim_dev *nsim_dev;
+
+ nsim_trap_data = container_of(work, struct nsim_trap_data,
+ trap_report_dw.work);
+ nsim_dev = nsim_trap_data->nsim_dev;
+
+ /* For each running port and enabled packet trap, generate a UDP
+ * packet with a random 5-tuple and report it.
+ */
+ mutex_lock(&nsim_dev->port_list_lock);
+ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) {
+ if (!netif_running(nsim_dev_port->ns->netdev))
+ continue;
+
+ nsim_dev_trap_report(nsim_dev_port);
+ }
+ mutex_unlock(&nsim_dev->port_list_lock);
+
+ schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
+ msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
+}
+
+static int nsim_dev_traps_init(struct devlink *devlink)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_trap_data *nsim_trap_data;
+ int err;
+
+ nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL);
+ if (!nsim_trap_data)
+ return -ENOMEM;
+
+ nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr),
+ sizeof(struct nsim_trap_item),
+ GFP_KERNEL);
+ if (!nsim_trap_data->trap_items_arr) {
+ err = -ENOMEM;
+ goto err_trap_data_free;
+ }
+
+ /* The lock is used to protect the action state of the registered
+ * traps. The value is written by user and read in delayed work when
+ * iterating over all the traps.
+ */
+ spin_lock_init(&nsim_trap_data->trap_lock);
+ nsim_trap_data->nsim_dev = nsim_dev;
+ nsim_dev->trap_data = nsim_trap_data;
+
+ err = devlink_traps_register(devlink, nsim_traps_arr,
+ ARRAY_SIZE(nsim_traps_arr), NULL);
+ if (err)
+ goto err_trap_items_free;
+
+ INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw,
+ nsim_dev_trap_report_work);
+ schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw,
+ msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS));
+
+ return 0;
+
+err_trap_items_free:
+ kfree(nsim_trap_data->trap_items_arr);
+err_trap_data_free:
+ kfree(nsim_trap_data);
+ return err;
+}
+
+static void nsim_dev_traps_exit(struct devlink *devlink)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+
+ cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw);
+ devlink_traps_unregister(devlink, nsim_traps_arr,
+ ARRAY_SIZE(nsim_traps_arr));
+ kfree(nsim_dev->trap_data->trap_items_arr);
+ kfree(nsim_dev->trap_data);
+}
+
static int nsim_dev_reload(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
@@ -369,9 +590,61 @@ static int nsim_dev_flash_update(struct devlink *devlink, const char *file_name,
return 0;
}
+static struct nsim_trap_item *
+nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id)
+{
+ struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) {
+ if (nsim_traps_arr[i].id == trap_id)
+ return &nsim_trap_data->trap_items_arr[i];
+ }
+
+ return NULL;
+}
+
+static int nsim_dev_devlink_trap_init(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ void *trap_ctx)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_trap_item *nsim_trap_item;
+
+ nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
+ if (WARN_ON(!nsim_trap_item))
+ return -ENOENT;
+
+ nsim_trap_item->trap_ctx = trap_ctx;
+ nsim_trap_item->action = trap->init_action;
+
+ return 0;
+}
+
+static int
+nsim_dev_devlink_trap_action_set(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ enum devlink_trap_action action)
+{
+ struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_trap_item *nsim_trap_item;
+
+ nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id);
+ if (WARN_ON(!nsim_trap_item))
+ return -ENOENT;
+
+ spin_lock(&nsim_dev->trap_data->trap_lock);
+ nsim_trap_item->action = action;
+ spin_unlock(&nsim_dev->trap_data->trap_lock);
+
+ return 0;
+}
+
static const struct devlink_ops nsim_dev_devlink_ops = {
.reload = nsim_dev_reload,
.flash_update = nsim_dev_flash_update,
+ .trap_init = nsim_dev_devlink_trap_init,
+ .trap_action_set = nsim_dev_devlink_trap_action_set,
};
#define NSIM_DEV_MAX_MACS_DEFAULT 32
@@ -421,10 +694,14 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
if (err)
goto err_params_unregister;
- err = nsim_dev_debugfs_init(nsim_dev);
+ err = nsim_dev_traps_init(devlink);
if (err)
goto err_dummy_region_exit;
+ err = nsim_dev_debugfs_init(nsim_dev);
+ if (err)
+ goto err_traps_exit;
+
err = nsim_bpf_dev_init(nsim_dev);
if (err)
goto err_debugfs_exit;
@@ -434,6 +711,8 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
err_debugfs_exit:
nsim_dev_debugfs_exit(nsim_dev);
+err_traps_exit:
+ nsim_dev_traps_exit(devlink);
err_dummy_region_exit:
nsim_dev_dummy_region_exit(nsim_dev);
err_params_unregister:
@@ -456,6 +735,7 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
nsim_bpf_dev_exit(nsim_dev);
nsim_dev_debugfs_exit(nsim_dev);
+ nsim_dev_traps_exit(devlink);
nsim_dev_dummy_region_exit(nsim_dev);
devlink_params_unregister(devlink, nsim_devlink_params,
ARRAY_SIZE(nsim_devlink_params));
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 4c758c6919f5..262a6978bbca 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -145,6 +145,7 @@ struct nsim_dev_port {
struct nsim_dev {
struct nsim_bus_dev *nsim_bus_dev;
struct nsim_fib_data *fib_data;
+ struct nsim_trap_data *trap_data;
struct dentry *ddir;
struct dentry *ports_ddir;
struct bpf_offload_dev *bpf_dev;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 451268f64880..7f43c48f54cd 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -14,6 +14,7 @@
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include <linux/refcount.h>
#include <net/net_namespace.h>
#include <uapi/linux/devlink.h>
@@ -31,6 +32,8 @@ struct devlink {
struct list_head reporter_list;
struct mutex reporters_lock; /* protects reporter_list */
struct devlink_dpipe_headers *dpipe_headers;
+ struct list_head trap_list;
+ struct list_head trap_group_list;
const struct devlink_ops *ops;
struct device *dev;
possible_net_t _net;
@@ -497,6 +500,135 @@ struct devlink_health_reporter_ops {
struct devlink_fmsg *fmsg);
};
+/**
+ * struct devlink_trap_group - Immutable packet trap group attributes.
+ * @name: Trap group name.
+ * @id: Trap group identifier.
+ * @generic: Whether the trap group is generic or not.
+ *
+ * Describes immutable attributes of packet trap groups that drivers register
+ * with devlink.
+ */
+struct devlink_trap_group {
+ const char *name;
+ u16 id;
+ bool generic;
+};
+
+#define DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT BIT(0)
+
+/**
+ * struct devlink_trap - Immutable packet trap attributes.
+ * @type: Trap type.
+ * @init_action: Initial trap action.
+ * @generic: Whether the trap is generic or not.
+ * @id: Trap identifier.
+ * @name: Trap name.
+ * @group: Immutable packet trap group attributes.
+ * @metadata_cap: Metadata types that can be provided by the trap.
+ *
+ * Describes immutable attributes of packet traps that drivers register with
+ * devlink.
+ */
+struct devlink_trap {
+ enum devlink_trap_type type;
+ enum devlink_trap_action init_action;
+ bool generic;
+ u16 id;
+ const char *name;
+ struct devlink_trap_group group;
+ u32 metadata_cap;
+};
+
+/* All traps must be documented in
+ * Documentation/networking/devlink-trap.rst
+ */
+enum devlink_trap_generic_id {
+ DEVLINK_TRAP_GENERIC_ID_SMAC_MC,
+ DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH,
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
+ DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE,
+ DEVLINK_TRAP_GENERIC_ID_TTL_ERROR,
+ DEVLINK_TRAP_GENERIC_ID_TAIL_DROP,
+
+ /* Add new generic trap IDs above */
+ __DEVLINK_TRAP_GENERIC_ID_MAX,
+ DEVLINK_TRAP_GENERIC_ID_MAX = __DEVLINK_TRAP_GENERIC_ID_MAX - 1,
+};
+
+/* All trap groups must be documented in
+ * Documentation/networking/devlink-trap.rst
+ */
+enum devlink_trap_group_generic_id {
+ DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS,
+
+ /* Add new generic trap group IDs above */
+ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_MAX =
+ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX - 1,
+};
+
+#define DEVLINK_TRAP_GENERIC_NAME_SMAC_MC \
+ "source_mac_is_multicast"
+#define DEVLINK_TRAP_GENERIC_NAME_VLAN_TAG_MISMATCH \
+ "vlan_tag_mismatch"
+#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_VLAN_FILTER \
+ "ingress_vlan_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_STP_FILTER \
+ "ingress_spanning_tree_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_EMPTY_TX_LIST \
+ "port_list_is_empty"
+#define DEVLINK_TRAP_GENERIC_NAME_PORT_LOOPBACK_FILTER \
+ "port_loopback_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_ROUTE \
+ "blackhole_route"
+#define DEVLINK_TRAP_GENERIC_NAME_TTL_ERROR \
+ "ttl_value_is_too_small"
+#define DEVLINK_TRAP_GENERIC_NAME_TAIL_DROP \
+ "tail_drop"
+
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
+ "l2_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_DROPS \
+ "l3_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BUFFER_DROPS \
+ "buffer_drops"
+
+#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .init_action = DEVLINK_TRAP_ACTION_##_init_action, \
+ .generic = true, \
+ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
+ .group = _group, \
+ .metadata_cap = _metadata_cap, \
+ }
+
+#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group, \
+ _metadata_cap) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .init_action = DEVLINK_TRAP_ACTION_##_init_action, \
+ .generic = false, \
+ .id = _id, \
+ .name = _name, \
+ .group = _group, \
+ .metadata_cap = _metadata_cap, \
+ }
+
+#define DEVLINK_TRAP_GROUP_GENERIC(_id) \
+ { \
+ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
+ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
+ .generic = true, \
+ }
+
struct devlink_ops {
int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
int (*port_type_set)(struct devlink_port *devlink_port,
@@ -558,6 +690,38 @@ struct devlink_ops {
int (*flash_update)(struct devlink *devlink, const char *file_name,
const char *component,
struct netlink_ext_ack *extack);
+ /**
+ * @trap_init: Trap initialization function.
+ *
+ * Should be used by device drivers to initialize the trap in the
+ * underlying device. Drivers should also store the provided trap
+ * context, so that they could efficiently pass it to
+ * devlink_trap_report() when the trap is triggered.
+ */
+ int (*trap_init)(struct devlink *devlink,
+ const struct devlink_trap *trap, void *trap_ctx);
+ /**
+ * @trap_fini: Trap de-initialization function.
+ *
+ * Should be used by device drivers to de-initialize the trap in the
+ * underlying device.
+ */
+ void (*trap_fini)(struct devlink *devlink,
+ const struct devlink_trap *trap, void *trap_ctx);
+ /**
+ * @trap_action_set: Trap action set function.
+ */
+ int (*trap_action_set)(struct devlink *devlink,
+ const struct devlink_trap *trap,
+ enum devlink_trap_action action);
+ /**
+ * @trap_group_init: Trap group initialization function.
+ *
+ * Should be used by device drivers to initialize the trap group in the
+ * underlying device.
+ */
+ int (*trap_group_init)(struct devlink *devlink,
+ const struct devlink_trap_group *group);
};
static inline void *devlink_priv(struct devlink *devlink)
@@ -774,6 +938,17 @@ void devlink_flash_update_status_notify(struct devlink *devlink,
unsigned long done,
unsigned long total);
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv);
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count);
+void devlink_trap_report(struct devlink *devlink,
+ struct sk_buff *skb, void *trap_ctx,
+ struct devlink_port *in_devlink_port);
+void *devlink_trap_ctx_priv(void *trap_ctx);
+
#if IS_ENABLED(CONFIG_NET_DEVLINK)
void devlink_compat_running_version(struct net_device *dev,
diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h
new file mode 100644
index 000000000000..2ab668461463
--- /dev/null
+++ b/include/net/drop_monitor.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_DROP_MONITOR_H_
+#define _NET_DROP_MONITOR_H_
+
+#include <linux/ktime.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+/**
+ * struct net_dm_hw_metadata - Hardware-supplied packet metadata.
+ * @trap_group_name: Hardware trap group name.
+ * @trap_name: Hardware trap name.
+ * @input_dev: Input netdevice.
+ */
+struct net_dm_hw_metadata {
+ const char *trap_group_name;
+ const char *trap_name;
+ struct net_device *input_dev;
+};
+
+#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
+void net_dm_hw_report(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata);
+#else
+static inline void
+net_dm_hw_report(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+}
+#endif
+
+#endif /* _NET_DROP_MONITOR_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ffc993256527..546e75dd74ac 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -107,6 +107,16 @@ enum devlink_command {
DEVLINK_CMD_FLASH_UPDATE_END, /* notification only */
DEVLINK_CMD_FLASH_UPDATE_STATUS, /* notification only */
+ DEVLINK_CMD_TRAP_GET, /* can dump */
+ DEVLINK_CMD_TRAP_SET,
+ DEVLINK_CMD_TRAP_NEW,
+ DEVLINK_CMD_TRAP_DEL,
+
+ DEVLINK_CMD_TRAP_GROUP_GET, /* can dump */
+ DEVLINK_CMD_TRAP_GROUP_SET,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ DEVLINK_CMD_TRAP_GROUP_DEL,
+
/* add new commands above here */
__DEVLINK_CMD_MAX,
DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -194,6 +204,47 @@ enum devlink_param_fw_load_policy_value {
DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
};
+enum {
+ DEVLINK_ATTR_STATS_RX_PACKETS, /* u64 */
+ DEVLINK_ATTR_STATS_RX_BYTES, /* u64 */
+
+ __DEVLINK_ATTR_STATS_MAX,
+ DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
+};
+
+/**
+ * enum devlink_trap_action - Packet trap action.
+ * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
+ * sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU.
+ */
+enum devlink_trap_action {
+ DEVLINK_TRAP_ACTION_DROP,
+ DEVLINK_TRAP_ACTION_TRAP,
+};
+
+/**
+ * enum devlink_trap_type - Packet trap type.
+ * @DEVLINK_TRAP_TYPE_DROP: Trap reason is a drop. Trapped packets are only
+ * processed by devlink and not injected to the
+ * kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_EXCEPTION: Trap reason is an exception. Packet was not
+ * forwarded as intended due to an exception
+ * (e.g., missing neighbour entry) and trapped to
+ * control plane for resolution. Trapped packets
+ * are processed by devlink and injected to
+ * the kernel's Rx path.
+ */
+enum devlink_trap_type {
+ DEVLINK_TRAP_TYPE_DROP,
+ DEVLINK_TRAP_TYPE_EXCEPTION,
+};
+
+enum {
+ /* Trap can report input port as metadata */
+ DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT,
+};
+
enum devlink_attr {
/* don't change the order or add anything between, this is ABI! */
DEVLINK_ATTR_UNSPEC,
@@ -348,6 +399,17 @@ enum devlink_attr {
DEVLINK_ATTR_PORT_PCI_PF_NUMBER, /* u16 */
DEVLINK_ATTR_PORT_PCI_VF_NUMBER, /* u16 */
+ DEVLINK_ATTR_STATS, /* nested */
+
+ DEVLINK_ATTR_TRAP_NAME, /* string */
+ /* enum devlink_trap_action */
+ DEVLINK_ATTR_TRAP_ACTION, /* u8 */
+ /* enum devlink_trap_type */
+ DEVLINK_ATTR_TRAP_TYPE, /* u8 */
+ DEVLINK_ATTR_TRAP_GENERIC, /* flag */
+ DEVLINK_ATTR_TRAP_METADATA, /* nested */
+ DEVLINK_ATTR_TRAP_GROUP_NAME, /* string */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 405b31cbf723..75a35dccb675 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -83,6 +83,15 @@ enum net_dm_attr {
NET_DM_ATTR_ORIG_LEN, /* u32 */
NET_DM_ATTR_QUEUE_LEN, /* u32 */
NET_DM_ATTR_STATS, /* nested */
+ NET_DM_ATTR_HW_STATS, /* nested */
+ NET_DM_ATTR_ORIGIN, /* u16 */
+ NET_DM_ATTR_HW_TRAP_GROUP_NAME, /* string */
+ NET_DM_ATTR_HW_TRAP_NAME, /* string */
+ NET_DM_ATTR_HW_ENTRIES, /* nested */
+ NET_DM_ATTR_HW_ENTRY, /* nested */
+ NET_DM_ATTR_HW_TRAP_COUNT, /* u32 */
+ NET_DM_ATTR_SW_DROPS, /* flag */
+ NET_DM_ATTR_HW_DROPS, /* flag */
__NET_DM_ATTR_MAX,
NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
@@ -101,6 +110,7 @@ enum net_dm_alert_mode {
enum {
NET_DM_ATTR_PORT_NETDEV_IFINDEX, /* u32 */
+ NET_DM_ATTR_PORT_NETDEV_NAME, /* string */
__NET_DM_ATTR_PORT_MAX,
NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
@@ -113,4 +123,9 @@ enum {
NET_DM_ATTR_STATS_MAX = __NET_DM_ATTR_STATS_MAX - 1
};
+enum net_dm_origin {
+ NET_DM_ORIGIN_SW,
+ NET_DM_ORIGIN_HW,
+};
+
#endif
diff --git a/net/Kconfig b/net/Kconfig
index 57f51a279ad6..3101bfcbdd7a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -430,6 +430,7 @@ config NET_SOCK_MSG
config NET_DEVLINK
bool
default n
+ imply NET_DROP_MONITOR
config PAGE_POOL
bool
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d3dbb904bf3b..650f36379203 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -18,6 +18,8 @@
#include <linux/spinlock.h>
#include <linux/refcount.h>
#include <linux/workqueue.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/timekeeping.h>
#include <rdma/ib_verbs.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -25,6 +27,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
+#include <net/drop_monitor.h>
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
@@ -551,7 +554,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
goto nla_put_failure;
- spin_lock(&devlink_port->type_lock);
+ spin_lock_bh(&devlink_port->type_lock);
if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
goto nla_put_failure_type_locked;
if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
@@ -576,7 +579,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
ibdev->name))
goto nla_put_failure_type_locked;
}
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
if (devlink_nl_port_attrs_put(msg, devlink_port))
goto nla_put_failure;
@@ -584,7 +587,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
return 0;
nla_put_failure_type_locked:
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -5154,6 +5157,571 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
return 0;
}
+struct devlink_stats {
+ u64 rx_bytes;
+ u64 rx_packets;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @refcount: Number of trap items using the group.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * registration.
+ */
+struct devlink_trap_group_item {
+ const struct devlink_trap_group *group;
+ refcount_t refcount;
+ struct list_head list;
+ struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+ const struct devlink_trap *trap;
+ struct devlink_trap_group_item *group_item;
+ struct list_head list;
+ enum devlink_trap_action action;
+ struct devlink_stats __percpu *stats;
+ void *priv;
+};
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_item *trap_item;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (!strcmp(trap_item->trap->name, name))
+ return trap_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ struct nlattr *attr;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+ return NULL;
+ attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+ return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+ enum devlink_trap_action *p_trap_action)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+ switch (val) {
+ case DEVLINK_TRAP_ACTION_DROP: /* fall-through */
+ case DEVLINK_TRAP_ACTION_TRAP:
+ *p_trap_action = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+ const struct devlink_trap *trap)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+ struct devlink_stats *stats)
+{
+ int i;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(i) {
+ struct devlink_stats *cpu_stats;
+ u64 rx_packets, rx_bytes;
+ unsigned int start;
+
+ cpu_stats = per_cpu_ptr(trap_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ rx_packets = cpu_stats->rx_packets;
+ rx_bytes = cpu_stats->rx_bytes;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ }
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+
+ devlink_trap_stats_read(trap_stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ stats.rx_packets, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ stats.rx_bytes, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+ goto nla_put_failure;
+
+ if (trap_item->trap->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+ goto nla_put_failure;
+
+ err = devlink_trap_metadata_put(msg, trap_item->trap);
+ if (err)
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, trap_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+ info->snd_seq, 0);
+ if (err)
+ goto err_trap_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_trap_item *trap_item;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+ return 0;
+ }
+
+ err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+ trap_action);
+ if (err)
+ return err;
+
+ trap_item->action = trap_action;
+
+ return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+ struct devlink_trap_item *trap_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ return __devlink_trap_action_set(devlink, trap_item, trap_action,
+ info->extack);
+}
+
+static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (list_empty(&devlink->trap_list))
+ return -EOPNOTSUPP;
+
+ trap_item = devlink_trap_item_get_from_info(devlink, info);
+ if (!trap_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_action_set(devlink, trap_item, info);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+ struct devlink_trap_group_item *group_item;
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (!strcmp(group_item->group->name, name))
+ return group_item;
+ }
+
+ return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ char *name;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+ return NULL;
+ name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+ return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags)
+{
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+ group_item->group->name))
+ goto nla_put_failure;
+
+ if (group_item->group->generic &&
+ nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+ goto nla_put_failure;
+
+ err = devlink_trap_stats_put(msg, group_item->stats);
+ if (err)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ struct sk_buff *msg;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err)
+ goto err_trap_group_fill;
+
+ return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+ struct devlink_trap_group_item *group_item;
+ u32 portid = NETLINK_CB(cb->skb).portid;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ continue;
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(group_item, &devlink->trap_group_list,
+ list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_trap_group_fill(msg, devlink,
+ group_item, cmd,
+ portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ enum devlink_trap_action trap_action,
+ struct netlink_ext_ack *extack)
+{
+ const char *group_name = group_item->group->name;
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->trap->group.name, group_name))
+ continue;
+ err = __devlink_trap_action_set(devlink, trap_item,
+ trap_action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item,
+ struct genl_info *info)
+{
+ enum devlink_trap_action trap_action;
+ int err;
+
+ if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+ return 0;
+
+ err = devlink_trap_action_get_from_info(info, &trap_action);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ return -EINVAL;
+ }
+
+ err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+ info->extack);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ if (list_empty(&devlink->trap_group_list))
+ return -EOPNOTSUPP;
+
+ group_item = devlink_trap_group_item_get_from_info(devlink, info);
+ if (!group_item) {
+ NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ return -ENOENT;
+ }
+
+ err = devlink_trap_group_action_set(devlink, group_item, info);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5184,6 +5752,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -5483,6 +6054,32 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_TRAP_GET,
+ .doit = devlink_nl_cmd_trap_get_doit,
+ .dumpit = devlink_nl_cmd_trap_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_SET,
+ .doit = devlink_nl_cmd_trap_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+ .doit = devlink_nl_cmd_trap_group_get_doit,
+ .dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+ .doit = devlink_nl_cmd_trap_group_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -5528,6 +6125,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->param_list);
INIT_LIST_HEAD(&devlink->region_list);
INIT_LIST_HEAD(&devlink->reporter_list);
+ INIT_LIST_HEAD(&devlink->trap_list);
+ INIT_LIST_HEAD(&devlink->trap_group_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
return devlink;
@@ -5574,6 +6173,8 @@ void devlink_free(struct devlink *devlink)
{
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
WARN_ON(!list_empty(&devlink->reporter_list));
WARN_ON(!list_empty(&devlink->region_list));
WARN_ON(!list_empty(&devlink->param_list));
@@ -5678,10 +6279,10 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
if (WARN_ON(!devlink_port->registered))
return;
devlink_port_type_warn_cancel(devlink_port);
- spin_lock(&devlink_port->type_lock);
+ spin_lock_bh(&devlink_port->type_lock);
devlink_port->type = type;
devlink_port->type_dev = type_dev;
- spin_unlock(&devlink_port->type_lock);
+ spin_unlock_bh(&devlink_port->type_lock);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
}
@@ -6834,6 +7435,475 @@ unlock:
}
EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+#define DEVLINK_TRAP(_id, _type) \
+ { \
+ .type = DEVLINK_TRAP_TYPE_##_type, \
+ .id = DEVLINK_TRAP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap devlink_trap_generic[] = {
+ DEVLINK_TRAP(SMAC_MC, DROP),
+ DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+ DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+ DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+ DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+ DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+ DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+ DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+ DEVLINK_TRAP(TAIL_DROP, DROP),
+};
+
+#define DEVLINK_TRAP_GROUP(_id) \
+ { \
+ .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \
+ .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \
+ }
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+ DEVLINK_TRAP_GROUP(L2_DROPS),
+ DEVLINK_TRAP_GROUP(L3_DROPS),
+ DEVLINK_TRAP_GROUP(BUFFER_DROPS),
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+ if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+ return -EINVAL;
+
+ if (trap->type != devlink_trap_generic[trap->id].type)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+ int i;
+
+ if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+ if (!strcmp(trap->name, devlink_trap_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+ if (!trap || !trap->name || !trap->group.name)
+ return -EINVAL;
+
+ if (trap->generic)
+ return devlink_trap_generic_verify(trap);
+ else
+ return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+ if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+ int i;
+
+ if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+ if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+ if (group->generic)
+ return devlink_trap_group_generic_verify(group);
+ else
+ return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_create(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+ int err;
+
+ err = devlink_trap_group_verify(group);
+ if (err)
+ return ERR_PTR(err);
+
+ group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+ if (!group_item)
+ return ERR_PTR(-ENOMEM);
+
+ group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!group_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ group_item->group = group;
+ refcount_set(&group_item->refcount, 1);
+
+ if (devlink->ops->trap_group_init) {
+ err = devlink->ops->trap_group_init(devlink, group);
+ if (err)
+ goto err_group_init;
+ }
+
+ list_add_tail(&group_item->list, &devlink->trap_group_list);
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ return group_item;
+
+err_group_init:
+ free_percpu(group_item->stats);
+err_stats_alloc:
+ kfree(group_item);
+ return ERR_PTR(err);
+}
+
+static void
+devlink_trap_group_item_destroy(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_del(&group_item->list);
+ free_percpu(group_item->stats);
+ kfree(group_item);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get(struct devlink *devlink,
+ const struct devlink_trap_group *group)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_lookup(devlink, group->name);
+ if (group_item) {
+ refcount_inc(&group_item->refcount);
+ return group_item;
+ }
+
+ return devlink_trap_group_item_create(devlink, group);
+}
+
+static void
+devlink_trap_group_item_put(struct devlink *devlink,
+ struct devlink_trap_group_item *group_item)
+{
+ if (!refcount_dec_and_test(&group_item->refcount))
+ return;
+
+ devlink_trap_group_item_destroy(devlink, group_item);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ struct devlink_trap_group_item *group_item;
+
+ group_item = devlink_trap_group_item_get(devlink,
+ &trap_item->trap->group);
+ if (IS_ERR(group_item))
+ return PTR_ERR(group_item);
+
+ trap_item->group_item = group_item;
+
+ return 0;
+}
+
+static void
+devlink_trap_item_group_unlink(struct devlink *devlink,
+ struct devlink_trap_item *trap_item)
+{
+ devlink_trap_group_item_put(devlink, trap_item->group_item);
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+ cmd != DEVLINK_CMD_TRAP_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+ const struct devlink_trap *trap, void *priv)
+{
+ struct devlink_trap_item *trap_item;
+ int err;
+
+ if (devlink_trap_item_lookup(devlink, trap->name))
+ return -EEXIST;
+
+ trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+ if (!trap_item)
+ return -ENOMEM;
+
+ trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+ if (!trap_item->stats) {
+ err = -ENOMEM;
+ goto err_stats_alloc;
+ }
+
+ trap_item->trap = trap;
+ trap_item->action = trap->init_action;
+ trap_item->priv = priv;
+
+ err = devlink_trap_item_group_link(devlink, trap_item);
+ if (err)
+ goto err_group_link;
+
+ err = devlink->ops->trap_init(devlink, trap, trap_item);
+ if (err)
+ goto err_trap_init;
+
+ list_add_tail(&trap_item->list, &devlink->trap_list);
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ return 0;
+
+err_trap_init:
+ devlink_trap_item_group_unlink(devlink, trap_item);
+err_group_link:
+ free_percpu(trap_item->stats);
+err_stats_alloc:
+ kfree(trap_item);
+ return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+ list_del(&trap_item->list);
+ if (devlink->ops->trap_fini)
+ devlink->ops->trap_fini(devlink, trap, trap_item);
+ devlink_trap_item_group_unlink(devlink, trap_item);
+ free_percpu(trap_item->stats);
+ kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+ const struct devlink_trap *trap)
+{
+ struct devlink_trap_item *trap_item;
+
+ trap_item = devlink_trap_item_lookup(devlink, trap->name);
+ if (WARN_ON_ONCE(!trap_item))
+ return;
+
+ devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+ trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count, void *priv)
+{
+ int i, err;
+
+ if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+ return -EINVAL;
+
+ mutex_lock(&devlink->lock);
+ for (i = 0; i < traps_count; i++) {
+ const struct devlink_trap *trap = &traps[i];
+
+ err = devlink_trap_verify(trap);
+ if (err)
+ goto err_trap_verify;
+
+ err = devlink_trap_register(devlink, trap, priv);
+ if (err)
+ goto err_trap_register;
+ }
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+
+err_trap_register:
+err_trap_verify:
+ for (i--; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+ const struct devlink_trap *traps,
+ size_t traps_count)
+{
+ int i;
+
+ mutex_lock(&devlink->lock);
+ /* Make sure we do not have any packets in-flight while unregistering
+ * traps by disabling all of them and waiting for a grace period.
+ */
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_disable(devlink, &traps[i]);
+ synchronize_rcu();
+ for (i = traps_count - 1; i >= 0; i--)
+ devlink_trap_unregister(devlink, &traps[i]);
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+ size_t skb_len)
+{
+ struct devlink_stats *stats;
+
+ stats = this_cpu_ptr(trap_stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_bytes += skb_len;
+ stats->rx_packets++;
+ u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
+ const struct devlink_trap_item *trap_item,
+ struct devlink_port *in_devlink_port)
+{
+ struct devlink_trap_group_item *group_item = trap_item->group_item;
+
+ hw_metadata->trap_group_name = group_item->group->name;
+ hw_metadata->trap_name = trap_item->trap->name;
+
+ spin_lock(&in_devlink_port->type_lock);
+ if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+ hw_metadata->input_dev = in_devlink_port->type_dev;
+ spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+ void *trap_ctx, struct devlink_port *in_devlink_port)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+ struct net_dm_hw_metadata hw_metadata = {};
+
+ devlink_trap_stats_update(trap_item->stats, skb->len);
+ devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+ devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
+ in_devlink_port);
+ net_dm_hw_report(skb, &hw_metadata);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+ struct devlink_trap_item *trap_item = trap_ctx;
+
+ return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
static void __devlink_compat_running_version(struct devlink *devlink,
char *buf, size_t len)
{
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 39e094907391..bfc024024aa3 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -26,6 +26,7 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <net/drop_monitor.h>
#include <net/genetlink.h>
#include <net/netevent.h>
@@ -43,6 +44,7 @@
* netlink alerts
*/
static int trace_state = TRACE_OFF;
+static bool monitor_hw;
/* net_dm_mutex
*
@@ -56,9 +58,26 @@ struct net_dm_stats {
struct u64_stats_sync syncp;
};
+#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
+
+struct net_dm_hw_entry {
+ char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
+ u32 count;
+};
+
+struct net_dm_hw_entries {
+ u32 num_entries;
+ struct net_dm_hw_entry entries[0];
+};
+
struct per_cpu_dm_data {
- spinlock_t lock; /* Protects 'skb' and 'send_timer' */
- struct sk_buff *skb;
+ spinlock_t lock; /* Protects 'skb', 'hw_entries' and
+ * 'send_timer'
+ */
+ union {
+ struct sk_buff *skb;
+ struct net_dm_hw_entries *hw_entries;
+ };
struct sk_buff_head drop_queue;
struct work_struct dm_alert_work;
struct timer_list send_timer;
@@ -76,6 +95,7 @@ struct dm_hw_stat_delta {
static struct genl_family net_drop_monitor_family;
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
@@ -92,10 +112,16 @@ struct net_dm_alert_ops {
void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
int work, int budget);
void (*work_item_func)(struct work_struct *work);
+ void (*hw_work_item_func)(struct work_struct *work);
+ void (*hw_probe)(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata);
};
struct net_dm_skb_cb {
- void *pc;
+ union {
+ struct net_dm_hw_metadata *hw_metadata;
+ void *pc;
+ };
};
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -266,10 +292,190 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_unlock();
}
+static struct net_dm_hw_entries *
+net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
+{
+ struct net_dm_hw_entries *hw_entries;
+ unsigned long flags;
+
+ hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
+ GFP_KERNEL);
+ if (!hw_entries) {
+ /* If the memory allocation failed, we try to perform another
+ * allocation in 1/10 second. Otherwise, the probe function
+ * will constantly bail out.
+ */
+ mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
+ }
+
+ spin_lock_irqsave(&hw_data->lock, flags);
+ swap(hw_data->hw_entries, hw_entries);
+ spin_unlock_irqrestore(&hw_data->lock, flags);
+
+ return hw_entries;
+}
+
+static int net_dm_hw_entry_put(struct sk_buff *msg,
+ const struct net_dm_hw_entry *hw_entry)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_hw_entries_put(struct sk_buff *msg,
+ const struct net_dm_hw_entries *hw_entries)
+{
+ struct nlattr *attr;
+ int i;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
+ if (!attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < hw_entries->num_entries; i++) {
+ int rc;
+
+ rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
+ if (rc)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int
+net_dm_hw_summary_report_fill(struct sk_buff *msg,
+ const struct net_dm_hw_entries *hw_entries)
+{
+ struct net_dm_alert_msg anc_hdr = { 0 };
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ /* We need to put the ancillary header in order not to break user
+ * space.
+ */
+ if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
+ goto nla_put_failure;
+
+ rc = net_dm_hw_entries_put(msg, hw_entries);
+ if (rc)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void net_dm_hw_summary_work(struct work_struct *work)
+{
+ struct net_dm_hw_entries *hw_entries;
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff *msg;
+ int rc;
+
+ hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+ if (!hw_entries)
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_hw_summary_report_fill(msg, hw_entries);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ kfree(hw_entries);
+}
+
+static void
+net_dm_hw_summary_probe(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_entries *hw_entries;
+ struct net_dm_hw_entry *hw_entry;
+ struct per_cpu_dm_data *hw_data;
+ unsigned long flags;
+ int i;
+
+ hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+ spin_lock_irqsave(&hw_data->lock, flags);
+ hw_entries = hw_data->hw_entries;
+
+ if (!hw_entries)
+ goto out;
+
+ for (i = 0; i < hw_entries->num_entries; i++) {
+ hw_entry = &hw_entries->entries[i];
+ if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
+ NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
+ hw_entry->count++;
+ goto out;
+ }
+ }
+ if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
+ goto out;
+
+ hw_entry = &hw_entries->entries[hw_entries->num_entries];
+ strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
+ NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
+ hw_entry->count = 1;
+ hw_entries->num_entries++;
+
+ if (!timer_pending(&hw_data->send_timer)) {
+ hw_data->send_timer.expires = jiffies + dm_delay * HZ;
+ add_timer(&hw_data->send_timer);
+ }
+
+out:
+ spin_unlock_irqrestore(&hw_data->lock, flags);
+}
+
static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
.kfree_skb_probe = trace_kfree_skb_hit,
.napi_poll_probe = trace_napi_poll_hit,
.work_item_func = send_dm_alert,
+ .hw_work_item_func = net_dm_hw_summary_work,
+ .hw_probe = net_dm_hw_summary_probe,
};
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
@@ -323,7 +529,9 @@ static size_t net_dm_in_port_size(void)
/* NET_DM_ATTR_IN_PORT nest */
return nla_total_size(0) +
/* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
- nla_total_size(sizeof(u32));
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PORT_NETDEV_NAME */
+ nla_total_size(IFNAMSIZ + 1);
}
#define NET_DM_MAX_SYMBOL_LEN 40
@@ -335,6 +543,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_ORIGIN */
+ nla_total_size(sizeof(u16)) +
/* NET_DM_ATTR_PC */
nla_total_size(sizeof(u64)) +
/* NET_DM_ATTR_SYMBOL */
@@ -351,7 +561,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
nla_total_size(payload_len);
}
-static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
+ const char *name)
{
struct nlattr *attr;
@@ -363,6 +574,9 @@ static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
goto nla_put_failure;
+ if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
+ goto nla_put_failure;
+
nla_nest_end(msg, attr);
return 0;
@@ -387,6 +601,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (!hdr)
return -EMSGSIZE;
+ if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
+ goto nla_put_failure;
+
if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
goto nla_put_failure;
@@ -394,7 +611,7 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
goto nla_put_failure;
- rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif);
+ rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
if (rc)
goto nla_put_failure;
@@ -481,10 +698,250 @@ static void net_dm_packet_work(struct work_struct *work)
net_dm_packet_report(skb);
}
+static size_t
+net_dm_hw_packet_report_size(size_t payload_len,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ size_t size;
+
+ size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+ return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_ORIGIN */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
+ nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
+ /* NET_DM_ATTR_HW_TRAP_NAME */
+ nla_total_size(strlen(hw_metadata->trap_name) + 1) +
+ /* NET_DM_ATTR_IN_PORT */
+ net_dm_in_port_size() +
+ /* NET_DM_ATTR_TIMESTAMP */
+ nla_total_size(sizeof(struct timespec)) +
+ /* NET_DM_ATTR_ORIG_LEN */
+ nla_total_size(sizeof(u32)) +
+ /* NET_DM_ATTR_PROTO */
+ nla_total_size(sizeof(u16)) +
+ /* NET_DM_ATTR_PAYLOAD */
+ nla_total_size(payload_len);
+}
+
+static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
+ struct sk_buff *skb, size_t payload_len)
+{
+ struct net_dm_hw_metadata *hw_metadata;
+ struct nlattr *attr;
+ struct timespec ts;
+ void *hdr;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_PACKET_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
+ hw_metadata->trap_group_name))
+ goto nla_put_failure;
+
+ if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
+ hw_metadata->trap_name))
+ goto nla_put_failure;
+
+ if (hw_metadata->input_dev) {
+ struct net_device *dev = hw_metadata->input_dev;
+ int rc;
+
+ rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
+ dev->name);
+ if (rc)
+ goto nla_put_failure;
+ }
+
+ if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
+ nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+ goto nla_put_failure;
+
+ if (!payload_len)
+ goto out;
+
+ if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+ goto nla_put_failure;
+
+ attr = skb_put(msg, nla_total_size(payload_len));
+ attr->nla_type = NET_DM_ATTR_PAYLOAD;
+ attr->nla_len = nla_attr_size(payload_len);
+ if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static struct net_dm_hw_metadata *
+net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_metadata *n_hw_metadata;
+ const char *trap_group_name;
+ const char *trap_name;
+
+ n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC);
+ if (!n_hw_metadata)
+ return NULL;
+
+ trap_group_name = kmemdup(hw_metadata->trap_group_name,
+ strlen(hw_metadata->trap_group_name) + 1,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!trap_group_name)
+ goto free_hw_metadata;
+ n_hw_metadata->trap_group_name = trap_group_name;
+
+ trap_name = kmemdup(hw_metadata->trap_name,
+ strlen(hw_metadata->trap_name) + 1,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!trap_name)
+ goto free_trap_group;
+ n_hw_metadata->trap_name = trap_name;
+
+ n_hw_metadata->input_dev = hw_metadata->input_dev;
+ if (n_hw_metadata->input_dev)
+ dev_hold(n_hw_metadata->input_dev);
+
+ return n_hw_metadata;
+
+free_trap_group:
+ kfree(trap_group_name);
+free_hw_metadata:
+ kfree(n_hw_metadata);
+ return NULL;
+}
+
+static void
+net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
+{
+ if (hw_metadata->input_dev)
+ dev_put(hw_metadata->input_dev);
+ kfree(hw_metadata->trap_name);
+ kfree(hw_metadata->trap_group_name);
+ kfree(hw_metadata);
+}
+
+static void net_dm_hw_packet_report(struct sk_buff *skb)
+{
+ struct net_dm_hw_metadata *hw_metadata;
+ struct sk_buff *msg;
+ size_t payload_len;
+ int rc;
+
+ if (skb->data > skb_mac_header(skb))
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ else
+ skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+ if (net_dm_trunc_len)
+ payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
+ GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
+ consume_skb(skb);
+}
+
+static void net_dm_hw_packet_work(struct work_struct *work)
+{
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff_head list;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ __skb_queue_head_init(&list);
+
+ spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+ skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+ while ((skb = __skb_dequeue(&list)))
+ net_dm_hw_packet_report(skb);
+}
+
+static void
+net_dm_hw_packet_probe(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ struct net_dm_hw_metadata *n_hw_metadata;
+ ktime_t tstamp = ktime_get_real();
+ struct per_cpu_dm_data *hw_data;
+ struct sk_buff *nskb;
+ unsigned long flags;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
+ if (!n_hw_metadata)
+ goto free;
+
+ NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
+ nskb->tstamp = tstamp;
+
+ hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+
+ spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+ if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
+ __skb_queue_tail(&hw_data->drop_queue, nskb);
+ else
+ goto unlock_free;
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+ schedule_work(&hw_data->dm_alert_work);
+
+ return;
+
+unlock_free:
+ spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+ u64_stats_update_begin(&hw_data->stats.syncp);
+ hw_data->stats.dropped++;
+ u64_stats_update_end(&hw_data->stats.syncp);
+ net_dm_hw_metadata_free(n_hw_metadata);
+free:
+ consume_skb(nskb);
+}
+
static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
.kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
.napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
.work_item_func = net_dm_packet_work,
+ .hw_work_item_func = net_dm_hw_packet_work,
+ .hw_probe = net_dm_hw_packet_probe,
};
static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
@@ -492,6 +949,85 @@ static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
[NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
};
+void net_dm_hw_report(struct sk_buff *skb,
+ const struct net_dm_hw_metadata *hw_metadata)
+{
+ rcu_read_lock();
+
+ if (!monitor_hw)
+ goto out;
+
+ net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(net_dm_hw_report);
+
+static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
+{
+ const struct net_dm_alert_ops *ops;
+ int cpu;
+
+ if (monitor_hw) {
+ NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
+ return -EAGAIN;
+ }
+
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+ if (!try_module_get(THIS_MODULE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+ return -ENODEV;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct net_dm_hw_entries *hw_entries;
+
+ INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
+ timer_setup(&hw_data->send_timer, sched_send_work, 0);
+ hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+ kfree(hw_entries);
+ }
+
+ monitor_hw = true;
+
+ return 0;
+}
+
+static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
+{
+ int cpu;
+
+ if (!monitor_hw)
+ NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+
+ monitor_hw = false;
+
+ /* After this call returns we are guaranteed that no CPU is processing
+ * any hardware drops.
+ */
+ synchronize_rcu();
+
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ del_timer_sync(&hw_data->send_timer);
+ cancel_work_sync(&hw_data->dm_alert_work);
+ while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+ struct net_dm_hw_metadata *hw_metadata;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ net_dm_hw_metadata_free(hw_metadata);
+ consume_skb(skb);
+ }
+ }
+
+ module_put(THIS_MODULE);
+}
+
static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
{
const struct net_dm_alert_ops *ops;
@@ -604,6 +1140,11 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
return rc;
}
+static bool net_dm_is_monitoring(void)
+{
+ return trace_state == TRACE_ON || monitor_hw;
+}
+
static int net_dm_alert_mode_get_from_info(struct genl_info *info,
enum net_dm_alert_mode *p_alert_mode)
{
@@ -665,8 +1206,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
struct netlink_ext_ack *extack = info->extack;
int rc;
- if (trace_state == TRACE_ON) {
- NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
+ if (net_dm_is_monitoring()) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
return -EBUSY;
}
@@ -681,14 +1222,61 @@ static int net_dm_cmd_config(struct sk_buff *skb,
return 0;
}
+static int net_dm_monitor_start(bool set_sw, bool set_hw,
+ struct netlink_ext_ack *extack)
+{
+ bool sw_set = false;
+ int rc;
+
+ if (set_sw) {
+ rc = set_all_monitor_traces(TRACE_ON, extack);
+ if (rc)
+ return rc;
+ sw_set = true;
+ }
+
+ if (set_hw) {
+ rc = net_dm_hw_monitor_start(extack);
+ if (rc)
+ goto err_monitor_hw;
+ }
+
+ return 0;
+
+err_monitor_hw:
+ if (sw_set)
+ set_all_monitor_traces(TRACE_OFF, extack);
+ return rc;
+}
+
+static void net_dm_monitor_stop(bool set_sw, bool set_hw,
+ struct netlink_ext_ack *extack)
+{
+ if (set_hw)
+ net_dm_hw_monitor_stop(extack);
+ if (set_sw)
+ set_all_monitor_traces(TRACE_OFF, extack);
+}
+
static int net_dm_cmd_trace(struct sk_buff *skb,
struct genl_info *info)
{
+ bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
+ bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
+ struct netlink_ext_ack *extack = info->extack;
+
+ /* To maintain backward compatibility, we start / stop monitoring of
+ * software drops if no flag is specified.
+ */
+ if (!set_sw && !set_hw)
+ set_sw = true;
+
switch (info->genlhdr->cmd) {
case NET_DM_CMD_START:
- return set_all_monitor_traces(TRACE_ON, info->extack);
+ return net_dm_monitor_start(set_sw, set_hw, extack);
case NET_DM_CMD_STOP:
- return set_all_monitor_traces(TRACE_OFF, info->extack);
+ net_dm_monitor_stop(set_sw, set_hw, extack);
+ return 0;
}
return -EOPNOTSUPP;
@@ -785,6 +1373,50 @@ nla_put_failure:
return -EMSGSIZE;
}
+static void net_dm_hw_stats_read(struct net_dm_stats *stats)
+{
+ int cpu;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct net_dm_stats *cpu_stats = &hw_data->stats;
+ unsigned int start;
+ u64 dropped;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ dropped = cpu_stats->dropped;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->dropped += dropped;
+ }
+}
+
+static int net_dm_hw_stats_put(struct sk_buff *msg)
+{
+ struct net_dm_stats stats;
+ struct nlattr *attr;
+
+ net_dm_hw_stats_read(&stats);
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+ stats.dropped, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
{
void *hdr;
@@ -799,6 +1431,10 @@ static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
if (rc)
goto nla_put_failure;
+ rc = net_dm_hw_stats_put(msg);
+ if (rc)
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -872,6 +1508,8 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
+ [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG },
+ [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
};
static const struct genl_ops dropmon_ops[] = {
@@ -934,9 +1572,57 @@ static struct notifier_block dropmon_net_notifier = {
.notifier_call = dropmon_net_event
};
-static int __init init_net_drop_monitor(void)
+static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
+{
+ spin_lock_init(&data->lock);
+ skb_queue_head_init(&data->drop_queue);
+ u64_stats_init(&data->stats.syncp);
+}
+
+static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
+{
+ WARN_ON(!skb_queue_empty(&data->drop_queue));
+}
+
+static void net_dm_cpu_data_init(int cpu)
{
struct per_cpu_dm_data *data;
+
+ data = &per_cpu(dm_cpu_data, cpu);
+ __net_dm_cpu_data_init(data);
+}
+
+static void net_dm_cpu_data_fini(int cpu)
+{
+ struct per_cpu_dm_data *data;
+
+ data = &per_cpu(dm_cpu_data, cpu);
+ /* At this point, we should have exclusive access
+ * to this struct and can free the skb inside it.
+ */
+ consume_skb(data->skb);
+ __net_dm_cpu_data_fini(data);
+}
+
+static void net_dm_hw_cpu_data_init(int cpu)
+{
+ struct per_cpu_dm_data *hw_data;
+
+ hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ __net_dm_cpu_data_init(hw_data);
+}
+
+static void net_dm_hw_cpu_data_fini(int cpu)
+{
+ struct per_cpu_dm_data *hw_data;
+
+ hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ kfree(hw_data->hw_entries);
+ __net_dm_cpu_data_fini(hw_data);
+}
+
+static int __init init_net_drop_monitor(void)
+{
int cpu, rc;
pr_info("Initializing network drop monitor service\n");
@@ -962,10 +1648,8 @@ static int __init init_net_drop_monitor(void)
rc = 0;
for_each_possible_cpu(cpu) {
- data = &per_cpu(dm_cpu_data, cpu);
- spin_lock_init(&data->lock);
- skb_queue_head_init(&data->drop_queue);
- u64_stats_init(&data->stats.syncp);
+ net_dm_cpu_data_init(cpu);
+ net_dm_hw_cpu_data_init(cpu);
}
goto out;
@@ -978,7 +1662,6 @@ out:
static void exit_net_drop_monitor(void)
{
- struct per_cpu_dm_data *data;
int cpu;
BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
@@ -989,13 +1672,8 @@ static void exit_net_drop_monitor(void)
*/
for_each_possible_cpu(cpu) {
- data = &per_cpu(dm_cpu_data, cpu);
- /*
- * At this point, we should have exclusive access
- * to this struct and can free the skb inside it
- */
- kfree_skb(data->skb);
- WARN_ON(!skb_queue_empty(&data->drop_queue));
+ net_dm_hw_cpu_data_fini(cpu);
+ net_dm_cpu_data_fini(cpu);
}
BUG_ON(genl_unregister_family(&net_drop_monitor_family));
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
new file mode 100755
index 000000000000..f101ab9441e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking devlink-trap functionality. It makes use of
+# netdevsim which implements the required callbacks.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ init_test
+ trap_action_test
+ trap_metadata_test
+ bad_trap_test
+ bad_trap_action_test
+ trap_stats_test
+ trap_group_action_test
+ bad_trap_group_test
+ trap_group_stats_test
+ port_del_test
+ dev_del_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SLEEP_TIME=1
+NETDEV=""
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+require_command udevadm
+
+modprobe netdevsim &> /dev/null
+if [ ! -d "$NETDEVSIM_PATH" ]; then
+ echo "SKIP: No netdevsim support"
+ exit 1
+fi
+
+if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
+ echo "SKIP: Device netdevsim${DEV_ADDR} already exists"
+ exit 1
+fi
+
+init_test()
+{
+ RET=0
+
+ test $(devlink_traps_num_get) -ne 0
+ check_err $? "No traps were registered"
+
+ log_test "Initialization"
+}
+
+trap_action_test()
+{
+ local orig_action
+ local trap_name
+ local action
+
+ RET=0
+
+ for trap_name in $(devlink_traps_get); do
+ # The action of non-drop traps cannot be changed.
+ if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+ devlink_trap_action_set $trap_name "trap"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "trap" ]; then
+ check_err 1 "Trap $trap_name did not change action to trap"
+ fi
+
+ devlink_trap_action_set $trap_name "drop"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "drop" ]; then
+ check_err 1 "Trap $trap_name did not change action to drop"
+ fi
+ else
+ orig_action=$(devlink_trap_action_get $trap_name)
+
+ devlink_trap_action_set $trap_name "trap"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != $orig_action ]; then
+ check_err 1 "Trap $trap_name changed action when should not"
+ fi
+
+ devlink_trap_action_set $trap_name "drop"
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != $orig_action ]; then
+ check_err 1 "Trap $trap_name changed action when should not"
+ fi
+ fi
+ done
+
+ log_test "Trap action"
+}
+
+trap_metadata_test()
+{
+ local trap_name
+
+ RET=0
+
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_metadata_test $trap_name "input_port"
+ check_err $? "Input port not reported as metadata of trap $trap_name"
+ done
+
+ log_test "Trap metadata"
+}
+
+bad_trap_test()
+{
+ RET=0
+
+ devlink_trap_action_set "made_up_trap" "drop"
+ check_fail $? "Did not get an error for non-existing trap"
+
+ log_test "Non-existing trap"
+}
+
+bad_trap_action_test()
+{
+ local traps_arr
+ local trap_name
+
+ RET=0
+
+ # Pick first trap.
+ traps_arr=($(devlink_traps_get))
+ trap_name=${traps_arr[0]}
+
+ devlink_trap_action_set $trap_name "made_up_action"
+ check_fail $? "Did not get an error for non-existing trap action"
+
+ log_test "Non-existing trap action"
+}
+
+trap_stats_test()
+{
+ local trap_name
+
+ RET=0
+
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Stats of trap $trap_name not idle when netdev down"
+
+ ip link set dev $NETDEV up
+
+ if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+ devlink_trap_action_set $trap_name "trap"
+ devlink_trap_stats_idle_test $trap_name
+ check_fail $? "Stats of trap $trap_name idle when action is trap"
+
+ devlink_trap_action_set $trap_name "drop"
+ devlink_trap_stats_idle_test $trap_name
+ check_err $? "Stats of trap $trap_name not idle when action is drop"
+ else
+ devlink_trap_stats_idle_test $trap_name
+ check_fail $? "Stats of non-drop trap $trap_name idle when should not"
+ fi
+
+ ip link set dev $NETDEV down
+ done
+
+ log_test "Trap statistics"
+}
+
+trap_group_action_test()
+{
+ local curr_group group_name
+ local trap_name
+ local trap_type
+ local action
+
+ RET=0
+
+ for group_name in $(devlink_trap_groups_get); do
+ devlink_trap_group_action_set $group_name "trap"
+
+ for trap_name in $(devlink_traps_get); do
+ curr_group=$(devlink_trap_group_get $trap_name)
+ if [ $curr_group != $group_name ]; then
+ continue
+ fi
+
+ trap_type=$(devlink_trap_type_get $trap_name)
+ if [ $trap_type != "drop" ]; then
+ continue
+ fi
+
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "trap" ]; then
+ check_err 1 "Trap $trap_name did not change action to trap"
+ fi
+ done
+
+ devlink_trap_group_action_set $group_name "drop"
+
+ for trap_name in $(devlink_traps_get); do
+ curr_group=$(devlink_trap_group_get $trap_name)
+ if [ $curr_group != $group_name ]; then
+ continue
+ fi
+
+ trap_type=$(devlink_trap_type_get $trap_name)
+ if [ $trap_type != "drop" ]; then
+ continue
+ fi
+
+ action=$(devlink_trap_action_get $trap_name)
+ if [ $action != "drop" ]; then
+ check_err 1 "Trap $trap_name did not change action to drop"
+ fi
+ done
+ done
+
+ log_test "Trap group action"
+}
+
+bad_trap_group_test()
+{
+ RET=0
+
+ devlink_trap_group_action_set "made_up_trap_group" "drop"
+ check_fail $? "Did not get an error for non-existing trap group"
+
+ log_test "Non-existing trap group"
+}
+
+trap_group_stats_test()
+{
+ local group_name
+
+ RET=0
+
+ for group_name in $(devlink_trap_groups_get); do
+ devlink_trap_group_stats_idle_test $group_name
+ check_err $? "Stats of trap group $group_name not idle when netdev down"
+
+ ip link set dev $NETDEV up
+
+ devlink_trap_group_action_set $group_name "trap"
+ devlink_trap_group_stats_idle_test $group_name
+ check_fail $? "Stats of trap group $group_name idle when action is trap"
+
+ devlink_trap_group_action_set $group_name "drop"
+ ip link set dev $NETDEV down
+ done
+
+ log_test "Trap group statistics"
+}
+
+port_del_test()
+{
+ local group_name
+ local i
+
+ # The test never fails. It is meant to exercise different code paths
+ # and make sure we properly dismantle a port while packets are
+ # in-flight.
+ RET=0
+
+ devlink_traps_enable_all
+
+ for i in $(seq 1 10); do
+ ip link set dev $NETDEV up
+
+ sleep $SLEEP_TIME
+
+ netdevsim_port_destroy
+ netdevsim_port_create
+ udevadm settle
+ done
+
+ devlink_traps_disable_all
+
+ log_test "Port delete"
+}
+
+dev_del_test()
+{
+ local group_name
+ local i
+
+ # The test never fails. It is meant to exercise different code paths
+ # and make sure we properly unregister traps while packets are
+ # in-flight.
+ RET=0
+
+ devlink_traps_enable_all
+
+ for i in $(seq 1 10); do
+ ip link set dev $NETDEV up
+
+ sleep $SLEEP_TIME
+
+ cleanup
+ setup_prepare
+ done
+
+ devlink_traps_disable_all
+
+ log_test "Device delete"
+}
+
+netdevsim_dev_create()
+{
+ echo "$DEV_ADDR 0" > ${NETDEVSIM_PATH}/new_device
+}
+
+netdevsim_dev_destroy()
+{
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+}
+
+netdevsim_port_create()
+{
+ echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/new_port
+}
+
+netdevsim_port_destroy()
+{
+ echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/del_port
+}
+
+setup_prepare()
+{
+ local netdev
+
+ netdevsim_dev_create
+
+ if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}" ]; then
+ echo "Failed to create netdevsim device"
+ exit 1
+ fi
+
+ netdevsim_port_create
+
+ if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}/net/" ]; then
+ echo "Failed to create netdevsim port"
+ exit 1
+ fi
+
+ # Wait for udev to rename newly created netdev.
+ udevadm settle
+
+ NETDEV=$(ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+}
+
+cleanup()
+{
+ pre_cleanup
+ netdevsim_port_destroy
+ netdevsim_dev_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 8553a67a2322..13d03a6d85ba 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -4,19 +4,21 @@
##############################################################################
# Defines
-DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
- | jq -r '.port | keys[]' | cut -d/ -f-2)
-if [ -z "$DEVLINK_DEV" ]; then
- echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
-fi
-if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
- echo "SKIP: devlink device's bus is not PCI"
- exit 1
-fi
+if [[ ! -v DEVLINK_DEV ]]; then
+ DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+ | jq -r '.port | keys[]' | cut -d/ -f-2)
+ if [ -z "$DEVLINK_DEV" ]; then
+ echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+ exit 1
+ fi
+ if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+ echo "SKIP: devlink device's bus is not PCI"
+ exit 1
+ fi
-DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
- -n | cut -d" " -f3)
+ DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+ -n | cut -d" " -f3)
+fi
##############################################################################
# Sanity checks
@@ -27,6 +29,12 @@ if [ $? -ne 0 ]; then
exit 1
fi
+devlink help 2>&1 | grep trap &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink trap support"
+ exit 1
+fi
+
##############################################################################
# Devlink helpers
@@ -190,3 +198,160 @@ devlink_tc_bind_pool_th_restore()
devlink sb tc bind set $port tc $tc type $dir \
pool ${orig[0]} th ${orig[1]}
}
+
+devlink_traps_num_get()
+{
+ devlink -j trap | jq '.[]["'$DEVLINK_DEV'"] | length'
+}
+
+devlink_traps_get()
+{
+ devlink -j trap | jq -r '.[]["'$DEVLINK_DEV'"][].name'
+}
+
+devlink_trap_type_get()
+{
+ local trap_name=$1; shift
+
+ devlink -j trap show $DEVLINK_DEV trap $trap_name \
+ | jq -r '.[][][].type'
+}
+
+devlink_trap_action_set()
+{
+ local trap_name=$1; shift
+ local action=$1; shift
+
+ # Pipe output to /dev/null to avoid expected warnings.
+ devlink trap set $DEVLINK_DEV trap $trap_name \
+ action $action &> /dev/null
+}
+
+devlink_trap_action_get()
+{
+ local trap_name=$1; shift
+
+ devlink -j trap show $DEVLINK_DEV trap $trap_name \
+ | jq -r '.[][][].action'
+}
+
+devlink_trap_group_get()
+{
+ devlink -j trap show $DEVLINK_DEV trap $trap_name \
+ | jq -r '.[][][].group'
+}
+
+devlink_trap_metadata_test()
+{
+ local trap_name=$1; shift
+ local metadata=$1; shift
+
+ devlink -jv trap show $DEVLINK_DEV trap $trap_name \
+ | jq -e '.[][][].metadata | contains(["'$metadata'"])' \
+ &> /dev/null
+}
+
+devlink_trap_rx_packets_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["packets"]'
+}
+
+devlink_trap_rx_bytes_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["bytes"]'
+}
+
+devlink_trap_stats_idle_test()
+{
+ local trap_name=$1; shift
+ local t0_packets t0_bytes
+ local t1_packets t1_bytes
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+ t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+devlink_traps_enable_all()
+{
+ local trap_name
+
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_action_set $trap_name "trap"
+ done
+}
+
+devlink_traps_disable_all()
+{
+ for trap_name in $(devlink_traps_get); do
+ devlink_trap_action_set $trap_name "drop"
+ done
+}
+
+devlink_trap_groups_get()
+{
+ devlink -j trap group | jq -r '.[]["'$DEVLINK_DEV'"][].name'
+}
+
+devlink_trap_group_action_set()
+{
+ local group_name=$1; shift
+ local action=$1; shift
+
+ # Pipe output to /dev/null to avoid expected warnings.
+ devlink trap group set $DEVLINK_DEV group $group_name action $action \
+ &> /dev/null
+}
+
+devlink_trap_group_rx_packets_get()
+{
+ local group_name=$1; shift
+
+ devlink -js trap group show $DEVLINK_DEV group $group_name \
+ | jq '.[][][]["stats"]["rx"]["packets"]'
+}
+
+devlink_trap_group_rx_bytes_get()
+{
+ local group_name=$1; shift
+
+ devlink -js trap group show $DEVLINK_DEV group $group_name \
+ | jq '.[][][]["stats"]["rx"]["bytes"]'
+}
+
+devlink_trap_group_stats_idle_test()
+{
+ local group_name=$1; shift
+ local t0_packets t0_bytes
+ local t1_packets t1_bytes
+
+ t0_packets=$(devlink_trap_group_rx_packets_get $group_name)
+ t0_bytes=$(devlink_trap_group_rx_bytes_get $group_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_group_rx_packets_get $group_name)
+ t1_bytes=$(devlink_trap_group_rx_bytes_get $group_name)
+
+ if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+ return 0
+ else
+ return 1
+ fi
+}