summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_mdb.c17
-rw-r--r--net/bridge/br_multicast.c179
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/bridge/br_netlink.c19
-rw-r--r--net/bridge/br_netlink_tunnel.c3
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/br_vlan_options.c27
-rw-r--r--net/can/isotp.c69
-rw-r--r--net/can/j1939/transport.c4
-rw-r--r--net/can/raw.c47
-rw-r--r--net/core/gro.c11
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/page_pool.c6
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c9
-rw-r--r--net/core/sock.c15
-rw-r--r--net/core/sock_map.c61
-rw-r--r--net/devlink/Makefile2
-rw-r--r--net/devlink/dev.c1343
-rw-r--r--net/devlink/devl_internal.h30
-rw-r--r--net/devlink/leftover.c1358
-rw-r--r--net/dsa/master.c2
-rw-r--r--net/dsa/slave.c11
-rw-r--r--net/ethtool/mm.c2
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/addrconf.c59
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/mac802154/rx.c1
-rw-r--r--net/mctp/af_mctp.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c5
-rw-r--r--net/netfilter/nf_flow_table_core.c5
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c18
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/openvswitch/datapath.c12
-rw-r--r--net/openvswitch/flow.c9
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_table.c8
-rw-r--r--net/qrtr/ns.c5
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rxrpc/Kconfig9
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h15
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c15
-rw-r--r--net/rxrpc/call_object.c7
-rw-r--r--net/rxrpc/conn_service.c7
-rw-r--r--net/rxrpc/input.c60
-rw-r--r--net/rxrpc/io_thread.c48
-rw-r--r--net/rxrpc/local_object.c7
-rw-r--r--net/rxrpc/misc.c7
-rw-r--r--net/rxrpc/output.c69
-rw-r--r--net/rxrpc/proc.c4
-rw-r--r--net/rxrpc/recvmsg.c18
-rw-r--r--net/rxrpc/skbuff.c4
-rw-r--r--net/rxrpc/sysctl.c17
-rw-r--r--net/rxrpc/txbuf.c12
-rw-r--r--net/sched/Kconfig7
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_ct.c65
-rw-r--r--net/sched/act_pedit.c277
-rw-r--r--net/sched/sch_htb.c5
-rw-r--r--net/sched/sch_mqprio.c291
-rw-r--r--net/sched/sch_mqprio_lib.c117
-rw-r--r--net/sched/sch_mqprio_lib.h18
-rw-r--r--net/sched/sch_taprio.c70
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/smc/af_smc.c25
-rw-r--r--net/smc/smc_core.c75
-rw-r--r--net/smc/smc_core.h6
-rw-r--r--net/smc/smc_llc.c34
-rw-r--r--net/tls/tls_sw.c2
77 files changed, 2660 insertions, 2083 deletions
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 00e5743647b0..9f22ebfdc518 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -849,11 +849,10 @@ static int br_mdb_add_group_sg(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- MCAST_INCLUDE, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port group");
+ MCAST_INCLUDE, cfg->rt_protocol, extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
+
rcu_assign_pointer(*pp, p);
if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
mod_timer(&p->timer,
@@ -1075,11 +1074,10 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- cfg->filter_mode, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port group");
+ cfg->filter_mode, cfg->rt_protocol,
+ extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
err = br_mdb_add_group_srcs(cfg, p, brmctx, extack);
if (err)
@@ -1101,8 +1099,7 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
return 0;
err_del_port_group:
- hlist_del_init(&p->mglist);
- kfree(p);
+ br_multicast_del_port_group(p);
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dea1ee1bd095..96d1fc78dd39 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -31,6 +31,7 @@
#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#endif
+#include <trace/events/bridge.h>
#include "br_private.h"
#include "br_private_mcast_eht.h"
@@ -234,6 +235,29 @@ out:
return pmctx;
}
+static struct net_bridge_mcast_port *
+br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx = NULL;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return NULL;
+
+ /* Take RCU to access the vlan. */
+ rcu_read_lock();
+
+ vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+
+ rcu_read_unlock();
+
+ return pmctx;
+}
+
/* when snooping we need to check if the contexts should be used
* in the following order:
* - if pmctx is non-NULL (port), check if it should be used
@@ -668,6 +692,101 @@ void br_multicast_del_group_src(struct net_bridge_group_src *src,
__br_multicast_del_group_src(src);
}
+static int
+br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
+ struct netlink_ext_ack *extack,
+ const char *what)
+{
+ u32 max = READ_ONCE(pmctx->mdb_max_entries);
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ if (max && n >= max) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u",
+ what, n, max);
+ return -E2BIG;
+ }
+
+ WRITE_ONCE(pmctx->mdb_n_entries, n + 1);
+ return 0;
+}
+
+static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx)
+{
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ WARN_ON_ONCE(n == 0);
+ WRITE_ONCE(pmctx->mdb_n_entries, n - 1);
+}
+
+static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
+ const struct br_ip *group,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast_port *pmctx;
+ int err;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ /* Always count on the port context. */
+ err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack,
+ "Port");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ return err;
+ }
+
+ /* Only count on the VLAN context if VID is given, and if snooping on
+ * that VLAN is enabled.
+ */
+ if (!group->vid)
+ return 0;
+
+ pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid);
+ if (!pmctx)
+ return 0;
+
+ err = br_multicast_port_ngroups_inc_one(pmctx, extack, "Port-VLAN");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ goto dec_one_out;
+ }
+
+ return 0;
+
+dec_one_out:
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+ return err;
+}
+
+static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (vid) {
+ pmctx = br_multicast_port_vid_to_port_ctx(port, vid);
+ if (pmctx)
+ br_multicast_port_ngroups_dec_one(pmctx);
+ }
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+}
+
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_n_entries);
+}
+
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max)
+{
+ WRITE_ONCE(pmctx->mdb_max_entries, max);
+}
+
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_max_entries);
+}
+
static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
{
struct net_bridge_port_group *pg;
@@ -702,6 +821,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
} else {
br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
}
+ br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid);
hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
queue_work(system_long_wq, &br->mcast_gc_work);
@@ -1165,6 +1285,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ trace_br_mdb_full(br->dev, group);
br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
@@ -1284,14 +1405,22 @@ struct net_bridge_port_group *br_multicast_new_port_group(
unsigned char flags,
const unsigned char *src,
u8 filter_mode,
- u8 rt_protocol)
+ u8 rt_protocol,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port_group *p;
+ int err;
- p = kzalloc(sizeof(*p), GFP_ATOMIC);
- if (unlikely(!p))
+ err = br_multicast_port_ngroups_inc(port, group, extack);
+ if (err)
return NULL;
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
+ goto dec_out;
+ }
+
p->key.addr = *group;
p->key.port = port;
p->flags = flags;
@@ -1305,8 +1434,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (!br_multicast_is_star_g(group) &&
rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode,
br_sg_port_rht_params)) {
- kfree(p);
- return NULL;
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group");
+ goto free_out;
}
rcu_assign_pointer(p->next, next);
@@ -1320,6 +1449,25 @@ struct net_bridge_port_group *br_multicast_new_port_group(
eth_broadcast_addr(p->eth_addr);
return p;
+
+free_out:
+ kfree(p);
+dec_out:
+ br_multicast_port_ngroups_dec(port, group->vid);
+ return NULL;
+}
+
+void br_multicast_del_port_group(struct net_bridge_port_group *p)
+{
+ struct net_bridge_port *port = p->key.port;
+ __u16 vid = p->key.addr.vid;
+
+ hlist_del_init(&p->mglist);
+ if (!br_multicast_is_star_g(&p->key.addr))
+ rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode,
+ br_sg_port_rht_params);
+ kfree(p);
+ br_multicast_port_ngroups_dec(port, vid);
}
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1387,7 +1535,7 @@ __br_multicast_add_group(struct net_bridge_mcast *brmctx,
}
p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
- filter_mode, RTPROT_KERNEL);
+ filter_mode, RTPROT_KERNEL, NULL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
goto out;
@@ -1933,6 +2081,25 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_ip4_multicast_add_router(brmctx, pmctx);
br_ip6_multicast_add_router(brmctx, pmctx);
}
+
+ if (br_multicast_port_ctx_is_vlan(pmctx)) {
+ struct net_bridge_port_group *pg;
+ u32 n = 0;
+
+ /* The mcast_n_groups counter might be wrong. First,
+ * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
+ * are flushed, thus mcast_n_groups after the toggle does not
+ * reflect the true values. And second, permanent entries added
+ * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
+ * either. Thus we have to refresh the counter.
+ */
+
+ hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
+ if (pg->key.addr.vid == pmctx->vlan->vid)
+ n++;
+ }
+ WRITE_ONCE(pmctx->mdb_n_entries, n);
+ }
}
void br_multicast_enable_port(struct net_bridge_port *port)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index b67c9c98effa..638a4d5359db 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -871,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv,
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
+ nf_bridge_info_free(skb);
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4316cc82ae17..9173e52b89e2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -202,6 +202,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_MAX_GROUPS */
#endif
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
@@ -298,7 +300,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
- p->multicast_eht_hosts_cnt))
+ p->multicast_eht_hosts_cnt) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&p->multicast_ctx)) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&p->multicast_ctx)))
return -EMSGSIZE;
#endif
@@ -858,6 +864,8 @@ static int br_afspec(struct net_bridge *br,
}
static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_UNSPEC] = { .strict_start_type =
+ IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + 1 },
[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
[IFLA_BRPORT_COST] = { .type = NLA_U32 },
[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
@@ -881,6 +889,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MAB] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
+ [IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1015,6 +1025,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (tb[IFLA_BRPORT_MCAST_MAX_GROUPS]) {
+ u32 max_groups;
+
+ max_groups = nla_get_u32(tb[IFLA_BRPORT_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&p->multicast_ctx, max_groups);
+ }
#endif
if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 8914290c75d4..17abf092f7ca 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -188,6 +188,9 @@ initvars:
}
static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = {
+ [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = {
+ .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1
+ },
[IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 },
[IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 },
[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 },
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 15ef7fd508ee..cef5f6ea850c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,6 +126,8 @@ struct net_bridge_mcast_port {
struct hlist_node ip6_rlist;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
+ u32 mdb_n_entries;
+ u32 mdb_max_entries;
#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
};
@@ -956,7 +958,9 @@ br_multicast_new_port_group(struct net_bridge_port *port,
const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
- u8 filter_mode, u8 rt_protocol);
+ u8 filter_mode, u8 rt_protocol,
+ struct netlink_ext_ack *extack);
+void br_multicast_del_port_group(struct net_bridge_port_group *p);
int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
@@ -974,6 +978,9 @@ void br_multicast_uninit_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest);
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx);
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max);
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx);
void br_mdb_init(void);
void br_mdb_uninit(void);
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1757,7 +1764,8 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v);
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p);
size_t br_vlan_opts_nl_size(void);
int br_vlan_process_options(const struct net_bridge *br,
const struct net_bridge_port *p,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bc75fa1e4666..8a3dbc09ba38 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1816,6 +1816,7 @@ out_err:
/* v_opts is used to dump the options which must be equal in the whole range */
static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
const struct net_bridge_vlan *v_opts,
+ const struct net_bridge_port *p,
u16 flags,
bool dump_stats)
{
@@ -1842,7 +1843,7 @@ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
goto out_err;
if (v_opts) {
- if (!br_vlan_opts_fill(skb, v_opts))
+ if (!br_vlan_opts_fill(skb, v_opts, p))
goto out_err;
if (dump_stats && !br_vlan_stats_fill(skb, v_opts))
@@ -1925,7 +1926,7 @@ void br_vlan_notify(const struct net_bridge *br,
goto out_kfree;
}
- if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false))
+ if (!br_vlan_fill_vids(skb, vid, vid_range, v, p, flags, false))
goto out_err;
nlmsg_end(skb, nlh);
@@ -2030,7 +2031,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
if (!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- vlan_flags, dump_stats)) {
+ p, vlan_flags, dump_stats)) {
err = -EMSGSIZE;
break;
}
@@ -2056,7 +2057,7 @@ update_end:
else if (!dump_global &&
!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- br_vlan_flags(range_start, pvid),
+ p, br_vlan_flags(range_start, pvid),
dump_stats))
err = -EMSGSIZE;
}
@@ -2131,6 +2132,8 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
static int br_vlan_rtm_process_one(struct net_device *dev,
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index a2724d03278c..e378c2f3a9e2 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -48,7 +48,8 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
curr_mc_rtr == range_mc_rtr;
}
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p)
{
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
!__vlan_tun_put(skb, v))
@@ -58,6 +59,12 @@ bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
br_vlan_multicast_router(v)))
return false;
+ if (p && !br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx) &&
+ (nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&v->port_mcast_ctx)) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&v->port_mcast_ctx))))
+ return false;
#endif
return true;
@@ -70,6 +77,8 @@ size_t br_vlan_opts_nl_size(void)
+ nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS */
#endif
+ 0;
}
@@ -212,6 +221,22 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
return err;
*changed = true;
}
+ if (tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]) {
+ u32 val;
+
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't set mcast_max_groups for non-port vlans");
+ return -EINVAL;
+ }
+ if (br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast snooping disabled on this VLAN");
+ return -EINVAL;
+ }
+
+ val = nla_get_u32(tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&v->port_mcast_ctx, val);
+ *changed = true;
+ }
#endif
return 0;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index a18450ffae01..9bc344851704 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -140,7 +140,7 @@ struct isotp_sock {
canid_t rxid;
ktime_t tx_gap;
ktime_t lastrxcf_tstamp;
- struct hrtimer rxtimer, txtimer;
+ struct hrtimer rxtimer, txtimer, txfrtimer;
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
@@ -871,7 +871,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data)
}
/* start timer to send next consecutive frame with correct delay */
- hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
+ hrtimer_start(&so->txfrtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -879,49 +879,39 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- switch (so->tx.state) {
- case ISOTP_SENDING:
-
- /* cfecho should be consumed by isotp_rcv_echo() here */
- if (!so->cfecho) {
- /* start timeout for unlikely lost echo skb */
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(ISOTP_ECHO_TIMEOUT, 0)));
- restart = HRTIMER_RESTART;
+ /* don't handle timeouts in IDLE state */
+ if (so->tx.state == ISOTP_IDLE)
+ return HRTIMER_NORESTART;
- /* push out the next consecutive frame */
- isotp_send_cframe(so);
- break;
- }
+ /* we did not get any flow control or echo frame in time */
- /* cfecho has not been cleared in isotp_rcv_echo() */
- pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
- fallthrough;
+ /* report 'communication error on send' */
+ sk->sk_err = ECOMM;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
- case ISOTP_WAIT_FC:
- case ISOTP_WAIT_FIRST_FC:
+ /* reset tx state */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
- /* we did not get any flow control frame in time */
+ return HRTIMER_NORESTART;
+}
- /* report 'communication error on send' */
- sk->sk_err = ECOMM;
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+static enum hrtimer_restart isotp_txfr_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ txfrtimer);
- /* reset tx state */
- so->tx.state = ISOTP_IDLE;
- wake_up_interruptible(&so->wait);
- break;
+ /* start echo timeout handling and cover below protocol error */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
- default:
- WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n",
- so->tx.state, so->cfecho);
- }
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (so->tx.state == ISOTP_SENDING && !so->cfecho)
+ isotp_send_cframe(so);
- return restart;
+ return HRTIMER_NORESTART;
}
static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -1162,6 +1152,10 @@ static int isotp_release(struct socket *sock)
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ /* force state machines to be idle also when a signal occurred */
+ so->tx.state = ISOTP_IDLE;
+ so->rx.state = ISOTP_IDLE;
+
spin_lock(&isotp_notifier_lock);
while (isotp_busy_notifier == so) {
spin_unlock(&isotp_notifier_lock);
@@ -1194,6 +1188,7 @@ static int isotp_release(struct socket *sock)
}
}
+ hrtimer_cancel(&so->txfrtimer);
hrtimer_cancel(&so->txtimer);
hrtimer_cancel(&so->rxtimer);
@@ -1600,6 +1595,8 @@ static int isotp_init(struct sock *sk)
so->rxtimer.function = isotp_rx_timer_handler;
hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
so->txtimer.function = isotp_tx_timer_handler;
+ hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->txfrtimer.function = isotp_txfr_timer_handler;
init_waitqueue_head(&so->wait);
spin_lock_init(&so->rx_lock);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 5c722b55fe23..fce9b9ebf13f 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1092,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session)
bool active;
j1939_session_list_lock(priv);
- /* This function should be called with a session ref-count of at
- * least 2.
- */
- WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
j1939_session_list_unlock(priv);
diff --git a/net/can/raw.c b/net/can/raw.c
index 81071cdb0301..ba86782ba8bb 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -132,8 +132,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) ||
- (can_is_canxl_skb(oskb) && !ro->xl_frames))
+ if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
+ (!ro->xl_frames && can_is_canxl_skb(oskb)))
return;
/* eliminate multiple filter matches for the same skb */
@@ -670,6 +670,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames && !ro->fd_frames) {
+ ro->fd_frames = ro->xl_frames;
+ return -EINVAL;
+ }
break;
case CAN_RAW_XL_FRAMES:
@@ -679,6 +684,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->xl_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames)
+ ro->fd_frames = ro->xl_frames;
break;
case CAN_RAW_JOIN_FILTERS:
@@ -786,6 +794,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+{
+ /* Classical CAN -> no checks for flags and device capabilities */
+ if (can_is_can_skb(skb))
+ return false;
+
+ /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
+ if (ro->fd_frames && can_is_canfd_skb(skb) &&
+ (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
+ return false;
+
+ /* CAN XL -> needs to be enabled and a CAN XL device */
+ if (ro->xl_frames && can_is_canxl_skb(skb) &&
+ can_is_canxl_dev_mtu(mtu))
+ return false;
+
+ return true;
+}
+
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
@@ -833,20 +860,8 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) {
- /* CAN XL, CAN FD and Classical CAN */
- if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) &&
- !can_is_can_skb(skb))
- goto free_skb;
- } else if (ro->fd_frames && dev->mtu == CANFD_MTU) {
- /* CAN FD and Classical CAN */
- if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb))
- goto free_skb;
- } else {
- /* Classical CAN */
- if (!can_is_can_skb(skb))
- goto free_skb;
- }
+ if (raw_bad_txframe(ro, skb, dev->mtu))
+ goto free_skb;
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
diff --git a/net/core/gro.c b/net/core/gro.c
index b15f85546bdd..a606705a0859 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -162,10 +162,19 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
struct sk_buff *lp;
int segs;
+ /* Do not splice page pool based packets w/ non-page pool
+ * packets. This can result in reference count issues as page
+ * pool pages will not decrement the reference count and will
+ * instead be immediately returned to the pool or have frag
+ * count decremented.
+ */
+ if (p->pp_recycle != skb->pp_recycle)
+ return -ETOOMANYREFS;
+
/* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
gro_max_size = p->protocol == htons(ETH_P_IPV6) ?
READ_ONCE(p->dev->gro_max_size) :
- READ_ONCE(p->dev->gro_ipv4_max_size);
+ READ_ONCE(p->dev->gro_ipv4_max_size);
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index ee7006bbe49b..805b7385dd8d 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full);
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b203d8660e4..193c18799865 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -511,8 +511,8 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
{
int ret;
- /* BH protection not needed if current is serving softirq */
- if (in_serving_softirq())
+ /* BH protection not needed if current is softirq */
+ if (in_softirq())
ret = ptr_ring_produce(&pool->ring, page);
else
ret = ptr_ring_produce_bh(&pool->ring, page);
@@ -570,7 +570,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
page_pool_dma_sync_for_device(pool, page,
dma_sync_size);
- if (allow_direct && in_serving_softirq() &&
+ if (allow_direct && in_softirq() &&
page_pool_recycle_in_cache(page, pool))
return NULL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b9f584955b77..5d8eb57867a9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -58,7 +58,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 40
+#define RTNL_SLAVE_MAX_TYPE 42
struct rtnl_link {
rtnl_doit_func doit;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb79b4cb89db..624e9e4ec116 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1000,8 +1000,10 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
while (segs) {
struct sk_buff *next = segs->next;
- if (__kfree_skb_reason(segs, reason))
+ if (__kfree_skb_reason(segs, reason)) {
+ skb_poison_list(segs);
kfree_skb_add_bulk(segs, &sa, reason);
+ }
segs = next;
}
@@ -4143,7 +4145,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_shinfo(skb)->frag_list = NULL;
- do {
+ while (list_skb) {
nskb = list_skb;
list_skb = list_skb->next;
@@ -4189,8 +4191,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
if (skb_needs_linearize(nskb, features) &&
__skb_linearize(nskb))
goto err_linearize;
-
- } while (list_skb);
+ }
skb->truesize = skb->truesize - delta_truesize;
skb->data_len = skb->data_len - delta_len;
diff --git a/net/core/sock.c b/net/core/sock.c
index f08b76acde9b..208634b01df5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3383,7 +3383,7 @@ void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
}
EXPORT_SYMBOL(sk_stop_timer_sync);
-void sock_init_data(struct socket *sock, struct sock *sk)
+void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
{
sk_init_common(sk);
sk->sk_send_head = NULL;
@@ -3403,11 +3403,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_type = sock->type;
RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
sock->sk = sk;
- sk->sk_uid = SOCK_INODE(sock)->i_uid;
} else {
RCU_INIT_POINTER(sk->sk_wq, NULL);
- sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
}
+ sk->sk_uid = uid;
rwlock_init(&sk->sk_callback_lock);
if (sk->sk_kern_sock)
@@ -3466,6 +3465,16 @@ void sock_init_data(struct socket *sock, struct sock *sk)
refcount_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
+EXPORT_SYMBOL(sock_init_data_uid);
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+ kuid_t uid = sock ?
+ SOCK_INODE(sock)->i_uid :
+ make_kuid(sock_net(sk)->user_ns, 0);
+
+ sock_init_data_uid(sock, sk, uid);
+}
EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 22fa2c5bc6ec..a68a7290a3b2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
+ saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+ } else {
+ saved_unhash = psock->saved_unhash;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
}
-
- saved_unhash = psock->saved_unhash;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
+ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+ return;
+ if (saved_unhash)
+ saved_unhash(sk);
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
@@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk)
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->destroy)
- sk->sk_prot->destroy(sk);
- return;
+ saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+ } else {
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ sk_psock_put(sk, psock);
}
-
- saved_destroy = psock->saved_destroy;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- sk_psock_put(sk, psock);
- saved_destroy(sk);
+ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+ return;
+ if (saved_destroy)
+ saved_destroy(sk);
}
EXPORT_SYMBOL_GPL(sock_map_destroy);
@@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout)
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+ } else {
+ saved_close = psock->saved_close;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ release_sock(sk);
+ cancel_work_sync(&psock->work);
+ sk_psock_put(sk, psock);
}
-
- saved_close = psock->saved_close;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- release_sock(sk);
- cancel_work_sync(&psock->work);
- sk_psock_put(sk, psock);
+ /* Make sure we do not recurse. This is a bug.
+ * Leak the socket instead of crashing on a stack overflow.
+ */
+ if (WARN_ON_ONCE(saved_close == sock_map_close))
+ return;
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 1b1eeac59cb3..daad4521c61e 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := leftover.o core.o netlink.o
+obj-y := leftover.o core.o netlink.o dev.o
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
new file mode 100644
index 000000000000..78d824eda5ec
--- /dev/null
+++ b/net/devlink/dev.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include "devl_internal.h"
+
+struct devlink_info_req {
+ struct sk_buff *msg;
+ void (*version_cb)(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv);
+ void *version_cb_priv;
+};
+
+struct devlink_reload_combination {
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+};
+
+static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
+ {
+ /* can't reinitialize driver with no down time */
+ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
+ },
+};
+
+static bool
+devlink_reload_combination_is_invalid(enum devlink_reload_action action,
+ enum devlink_reload_limit limit)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
+ if (devlink_reload_invalid_combinations[i].action == action &&
+ devlink_reload_invalid_combinations[i].limit == limit)
+ return true;
+ return false;
+}
+
+static bool
+devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
+{
+ return test_bit(action, &devlink->ops->reload_actions);
+}
+
+static bool
+devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
+{
+ return test_bit(limit, &devlink->ops->reload_limits);
+}
+
+static int devlink_reload_stat_put(struct sk_buff *msg,
+ enum devlink_reload_limit limit, u32 value)
+{
+ struct nlattr *reload_stats_entry;
+
+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+ if (!reload_stats_entry)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(msg, reload_stats_entry);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_entry);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
+{
+ struct nlattr *reload_stats_attr, *act_info, *act_stats;
+ int i, j, stat_idx;
+ u32 value;
+
+ if (!is_remote)
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+ else
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
+
+ if (!reload_stats_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote &&
+ !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC)
+ continue;
+ act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+ if (!act_info)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+ goto action_info_nest_cancel;
+ act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+ if (!act_stats)
+ goto action_info_nest_cancel;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported.
+ * Stats of actions with unspecified limit are shown
+ * though drivers don't need to register unspecified
+ * limit.
+ */
+ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j)) ||
+ devlink_reload_combination_is_invalid(i, j))
+ continue;
+
+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+ if (!is_remote)
+ value = devlink->stats.reload_stats[stat_idx];
+ else
+ value = devlink->stats.remote_reload_stats[stat_idx];
+ if (devlink_reload_stat_put(msg, j, value))
+ goto action_stats_nest_cancel;
+ }
+ nla_nest_end(msg, act_stats);
+ nla_nest_end(msg, act_info);
+ }
+ nla_nest_end(msg, reload_stats_attr);
+ return 0;
+
+action_stats_nest_cancel:
+ nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+ nla_nest_cancel(msg, act_info);
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct nlattr *dev_stats;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+ goto nla_put_failure;
+
+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+ if (!dev_stats)
+ goto nla_put_failure;
+
+ if (devlink_reload_stats_put(msg, devlink, false))
+ goto dev_stats_nest_cancel;
+ if (devlink_reload_stats_put(msg, devlink, true))
+ goto dev_stats_nest_cancel;
+
+ nla_nest_end(msg, dev_stats);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+dev_stats_nest_cancel:
+ nla_nest_cancel(msg, dev_stats);
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+const struct devlink_cmd devl_cmd_get = {
+ .dump_one = devlink_nl_cmd_get_dump_one,
+};
+
+static void devlink_reload_failed_set(struct devlink *devlink,
+ bool reload_failed)
+{
+ if (devlink->reload_failed == reload_failed)
+ return;
+ devlink->reload_failed = reload_failed;
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+ return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+ enum devlink_reload_limit limit, u32 actions_performed)
+{
+ unsigned long actions = actions_performed;
+ int stat_idx;
+ int action;
+
+ for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+ reload_stats[stat_idx]++;
+ }
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
+ actions_performed);
+}
+
+/**
+ * devlink_remote_reload_actions_performed - Update devlink on reload actions
+ * performed which are not a direct result of devlink reload call.
+ *
+ * This should be called by a driver after performing reload actions in case it was not
+ * a result of devlink reload call. For example fw_activate was performed as a result
+ * of devlink reload triggered fw_activate on another host.
+ * The motivation for this function is to keep data on reload actions performed on this
+ * function whether it was done due to direct devlink reload call or not.
+ *
+ * @devlink: devlink
+ * @limit: reload limit
+ * @actions_performed: bitmask of actions performed
+ */
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ if (WARN_ON(!actions_performed ||
+ actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
+ limit > DEVLINK_RELOAD_LIMIT_MAX))
+ return;
+
+ __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
+ actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
+
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *curr_net,
+ struct net *dest_net)
+{
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+ devlink_notify_unregister(devlink);
+ move_netdevice_notifier_net(curr_net, dest_net,
+ &devlink->netdevice_nb);
+ write_pnet(&devlink->_net, dest_net);
+ devlink_notify_register(devlink);
+}
+
+int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack)
+{
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ struct net *curr_net;
+ int err;
+
+ memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats));
+
+ err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
+ if (err)
+ return err;
+
+ curr_net = devlink_net(devlink);
+ if (dest_net && !net_eq(dest_net, curr_net))
+ devlink_reload_netns_change(devlink, curr_net, dest_net);
+
+ err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ if (err)
+ return err;
+
+ WARN_ON(!(*actions_performed & BIT(action)));
+ /* Catch driver on updating the remote action within devlink reload */
+ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats)));
+ devlink_reload_stats_update(devlink, limit, *actions_performed);
+ return 0;
+}
+
+static int
+devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
+ enum devlink_command cmd, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
+ actions_performed))
+ goto nla_put_failure;
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+ struct net *dest_net = NULL;
+ u32 actions_performed;
+ int err;
+
+ err = devlink_resources_validate(devlink, NULL, info);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+ action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+ else
+ action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+
+ if (!devlink_reload_action_is_supported(devlink, action)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested reload action is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+
+ limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
+ if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
+ struct nla_bitfield32 limits;
+ u32 limits_selected;
+
+ limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
+ limits_selected = limits.value & limits.selector;
+ if (!limits_selected) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
+ return -EINVAL;
+ }
+ for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
+ if (limits_selected & BIT(limit))
+ break;
+ /* UAPI enables multiselection, but currently it is not used */
+ if (limits_selected != BIT(limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Multiselection of limit is not supported");
+ return -EOPNOTSUPP;
+ }
+ if (!devlink_reload_limit_is_supported(devlink, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_reload_combination_is_invalid(action, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is invalid for this action");
+ return -EINVAL;
+ }
+ }
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ }
+
+ err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
+ if (err)
+ return err;
+ /* For backward compatibility generate reply only if attributes used by user */
+ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
+ return 0;
+
+ return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
+ DEVLINK_CMD_RELOAD, info);
+}
+
+bool devlink_reload_actions_valid(const struct devlink_ops *ops)
+{
+ const struct devlink_reload_combination *comb;
+ int i;
+
+ if (!devlink_reload_supported(ops)) {
+ if (WARN_ON(ops->reload_actions))
+ return false;
+ return true;
+ }
+
+ if (WARN_ON(!ops->reload_actions ||
+ ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
+ return false;
+
+ if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
+ ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
+ comb = &devlink_reload_invalid_combinations[i];
+ if (ops->reload_actions == BIT(comb->action) &&
+ ops->reload_limits == BIT(comb->limit))
+ return false;
+ }
+ return true;
+}
+
+static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ void *hdr;
+ int err = 0;
+ u16 mode;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (ops->eswitch_mode_get) {
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_inline_mode_get) {
+ err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+ inline_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
+ info->snd_portid, info->snd_seq, 0);
+
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ int err = 0;
+ u16 mode;
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+ if (!ops->eswitch_mode_set)
+ return -EOPNOTSUPP;
+ mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = devlink_rate_nodes_check(devlink, mode, info->extack);
+ if (err)
+ return err;
+ err = ops->eswitch_mode_set(devlink, mode, info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+ if (!ops->eswitch_inline_mode_set)
+ return -EOPNOTSUPP;
+ inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
+static int devlink_info_version_put(struct devlink_info_req *req, int attr,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ struct nlattr *nest;
+ int err;
+
+ if (req->version_cb)
+ req->version_cb(version_name, version_type,
+ req->version_cb_priv);
+
+ if (!req->msg)
+ return 0;
+
+ nest = nla_nest_start_noflag(req->msg, attr);
+ if (!nest)
+ return -EMSGSIZE;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
+ version_name);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
+ version_value);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(req->msg, nest);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(req->msg, nest);
+ return err;
+}
+
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
+
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
+
+int devlink_info_version_running_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+
+static int devlink_nl_driver_info_get(struct device_driver *drv,
+ struct devlink_info_req *req)
+{
+ if (!drv)
+ return 0;
+
+ if (drv->name[0])
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
+ drv->name);
+
+ return 0;
+}
+
+static int
+devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags, struct netlink_ext_ack *extack)
+{
+ struct device *dev = devlink_to_dev(devlink);
+ struct devlink_info_req req = {};
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ req.msg = msg;
+ if (devlink->ops->info_get) {
+ err = devlink->ops->info_get(devlink, &req, extack);
+ if (err)
+ goto err_cancel_msg;
+ }
+
+ err = devlink_nl_driver_info_get(dev->driver, &req);
+ if (err)
+ goto err_cancel_msg;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ int err;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+
+const struct devlink_cmd devl_cmd_info_get = {
+ .dump_one = devlink_nl_cmd_info_get_dump_one,
+};
+
+static int devlink_nl_flash_update_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
+ goto out;
+
+ if (params->status_msg &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
+ params->status_msg))
+ goto nla_put_failure;
+ if (params->component &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
+ params->component))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ params->done, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ params->total, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void __devlink_flash_update_notify(struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
+ if (err)
+ goto out_free_msg;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+}
+
+static void devlink_flash_update_begin_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE,
+ &params);
+}
+
+static void devlink_flash_update_end_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_END,
+ &params);
+}
+
+void devlink_flash_update_status_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long done,
+ unsigned long total)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .done = done,
+ .total = total,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long timeout)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .timeout = timeout,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+
+struct devlink_flash_component_lookup_ctx {
+ const char *lookup_name;
+ bool lookup_name_found;
+};
+
+static void
+devlink_flash_component_lookup_cb(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv)
+{
+ struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
+
+ if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
+ lookup_ctx->lookup_name_found)
+ return;
+
+ lookup_ctx->lookup_name_found =
+ !strcmp(lookup_ctx->lookup_name, version_name);
+}
+
+static int devlink_flash_component_get(struct devlink *devlink,
+ struct nlattr *nla_component,
+ const char **p_component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_flash_component_lookup_ctx lookup_ctx = {};
+ struct devlink_info_req req = {};
+ const char *component;
+ int ret;
+
+ if (!nla_component)
+ return 0;
+
+ component = nla_data(nla_component);
+
+ if (!devlink->ops->info_get) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ lookup_ctx.lookup_name = component;
+ req.version_cb = devlink_flash_component_lookup_cb;
+ req.version_cb_priv = &lookup_ctx;
+
+ ret = devlink->ops->info_get(devlink, &req, NULL);
+ if (ret)
+ return ret;
+
+ if (!lookup_ctx.lookup_name_found) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "selected component is not supported by this device");
+ return -EINVAL;
+ }
+ *p_component = component;
+ return 0;
+}
+
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *nla_overwrite_mask, *nla_file_name;
+ struct devlink_flash_update_params params = {};
+ struct devlink *devlink = info->user_ptr[0];
+ const char *file_name;
+ u32 supported_params;
+ int ret;
+
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
+ return -EINVAL;
+
+ ret = devlink_flash_component_get(devlink,
+ info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
+ &params.component, info->extack);
+ if (ret)
+ return ret;
+
+ supported_params = devlink->ops->supported_flash_update_params;
+
+ nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
+ if (nla_overwrite_mask) {
+ struct nla_bitfield32 sections;
+
+ if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
+ "overwrite settings are not supported by this device");
+ return -EOPNOTSUPP;
+ }
+ sections = nla_get_bitfield32(nla_overwrite_mask);
+ params.overwrite_mask = sections.value & sections.selector;
+ }
+
+ nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
+ file_name = nla_data(nla_file_name);
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name,
+ "failed to locate the requested firmware file");
+ return ret;
+ }
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, info->extack);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+
+ return ret;
+}
+
+static void __devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ struct devlink_info_req req = {};
+ const struct nlattr *nlattr;
+ struct sk_buff *msg;
+ int rem, err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ req.msg = msg;
+ err = devlink->ops->info_get(devlink, &req, NULL);
+ if (err)
+ goto free_msg;
+
+ nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+ const struct nlattr *kv;
+ int rem_kv;
+
+ if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
+ continue;
+
+ nla_for_each_nested(kv, nlattr, rem_kv) {
+ if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
+ continue;
+
+ strlcat(buf, nla_data(kv), len);
+ strlcat(buf, " ", len);
+ }
+ }
+free_msg:
+ nlmsg_free(msg);
+}
+
+void devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ if (!devlink->ops->info_get)
+ return;
+
+ devl_lock(devlink);
+ if (devl_is_registered(devlink))
+ __devlink_compat_running_version(devlink, buf, len);
+ devl_unlock(devlink);
+}
+
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
+{
+ struct devlink_flash_update_params params = {};
+ int ret;
+
+ devl_lock(devlink);
+ if (!devl_is_registered(devlink)) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (!devlink->ops->flash_update) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret)
+ goto out_unlock;
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, NULL);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+out_unlock:
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+static int
+devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
+ u32 portid, u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *selftests;
+ void *hdr;
+ int err;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
+ DEVLINK_CMD_SELFTESTS_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto err_cancel_msg;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ if (devlink->ops->selftest_check(devlink, i, extack)) {
+ err = nla_put_flag(msg, i);
+ if (err)
+ goto err_cancel_msg;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
+ info->snd_seq, 0, info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ if (!devlink->ops->selftest_check)
+ return 0;
+
+ return devlink_nl_selftests_fill(msg, devlink,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+}
+
+const struct devlink_cmd devl_cmd_selftests_get = {
+ .dump_one = devlink_nl_cmd_selftests_get_dump_one,
+};
+
+static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
+ enum devlink_selftest_status test_status)
+{
+ struct nlattr *result_attr;
+
+ result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
+ if (!result_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
+ test_status))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, result_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, result_attr);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
+};
+
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *attrs, *selftests;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
+ return -EINVAL;
+
+ attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
+ devlink_selftest_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto genlmsg_cancel;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ enum devlink_selftest_status test_status;
+
+ if (nla_get_flag(tb[i])) {
+ if (!devlink->ops->selftest_check(devlink, i,
+ info->extack)) {
+ if (devlink_selftest_result_put(msg, i,
+ DEVLINK_SELFTEST_STATUS_SKIP))
+ goto selftests_nest_cancel;
+ continue;
+ }
+
+ test_status = devlink->ops->selftest_run(devlink, i,
+ info->extack);
+ if (devlink_selftest_result_put(msg, i, test_status))
+ goto selftests_nest_cancel;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+selftests_nest_cancel:
+ nla_nest_cancel(msg, selftests);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return err;
+}
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index bdd7ad25c7e8..941174e157d4 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -139,6 +139,16 @@ devlink_dump_state(struct netlink_callback *cb)
return (struct devlink_nl_dump_state *)cb->ctx;
}
+static inline int
+devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
+ return -EMSGSIZE;
+ return 0;
+}
+
/* Commands */
extern const struct devlink_cmd devl_cmd_get;
extern const struct devlink_cmd devl_cmd_port_get;
@@ -157,6 +167,9 @@ extern const struct devlink_cmd devl_cmd_rate_get;
extern const struct devlink_cmd devl_cmd_linecard_get;
extern const struct devlink_cmd devl_cmd_selftests_get;
+/* Notify */
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+
/* Ports */
int devlink_port_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr);
@@ -176,6 +189,12 @@ static inline bool devlink_reload_supported(const struct devlink_ops *ops)
return ops->reload_down && ops->reload_up;
}
+/* Resources */
+struct devlink_resource;
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info);
+
/* Line cards */
struct devlink_linecard;
@@ -183,8 +202,19 @@ struct devlink_linecard *
devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
/* Rates */
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
struct devlink_rate *
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
struct devlink_rate *
devlink_rate_node_get_from_info(struct devlink *devlink,
struct genl_info *info);
+/* Devlink nl cmds */
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 056d9ca14a3d..97d30ea98b00 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -143,10 +143,6 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
-static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
- [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
-};
-
#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
WARN_ON_ONCE(!(devlink_port)->registered)
#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
@@ -596,15 +592,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
- return -EMSGSIZE;
- if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
- return -EMSGSIZE;
- return 0;
-}
-
static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
{
struct nlattr *nested_attr;
@@ -641,185 +628,6 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+ nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
}
-struct devlink_reload_combination {
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
-};
-
-static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
- {
- /* can't reinitialize driver with no down time */
- .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
- },
-};
-
-static bool
-devlink_reload_combination_is_invalid(enum devlink_reload_action action,
- enum devlink_reload_limit limit)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
- if (devlink_reload_invalid_combinations[i].action == action &&
- devlink_reload_invalid_combinations[i].limit == limit)
- return true;
- return false;
-}
-
-static bool
-devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
-{
- return test_bit(action, &devlink->ops->reload_actions);
-}
-
-static bool
-devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
-{
- return test_bit(limit, &devlink->ops->reload_limits);
-}
-
-static int devlink_reload_stat_put(struct sk_buff *msg,
- enum devlink_reload_limit limit, u32 value)
-{
- struct nlattr *reload_stats_entry;
-
- reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
- if (!reload_stats_entry)
- return -EMSGSIZE;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
- nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
- goto nla_put_failure;
- nla_nest_end(msg, reload_stats_entry);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_entry);
- return -EMSGSIZE;
-}
-
-static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
-{
- struct nlattr *reload_stats_attr, *act_info, *act_stats;
- int i, j, stat_idx;
- u32 value;
-
- if (!is_remote)
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
- else
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
-
- if (!reload_stats_attr)
- return -EMSGSIZE;
-
- for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
- if ((!is_remote &&
- !devlink_reload_action_is_supported(devlink, i)) ||
- i == DEVLINK_RELOAD_ACTION_UNSPEC)
- continue;
- act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
- if (!act_info)
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
- goto action_info_nest_cancel;
- act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
- if (!act_stats)
- goto action_info_nest_cancel;
-
- for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
- /* Remote stats are shown even if not locally supported.
- * Stats of actions with unspecified limit are shown
- * though drivers don't need to register unspecified
- * limit.
- */
- if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
- !devlink_reload_limit_is_supported(devlink, j)) ||
- devlink_reload_combination_is_invalid(i, j))
- continue;
-
- stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
- if (!is_remote)
- value = devlink->stats.reload_stats[stat_idx];
- else
- value = devlink->stats.remote_reload_stats[stat_idx];
- if (devlink_reload_stat_put(msg, j, value))
- goto action_stats_nest_cancel;
- }
- nla_nest_end(msg, act_stats);
- nla_nest_end(msg, act_info);
- }
- nla_nest_end(msg, reload_stats_attr);
- return 0;
-
-action_stats_nest_cancel:
- nla_nest_cancel(msg, act_stats);
-action_info_nest_cancel:
- nla_nest_cancel(msg, act_info);
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- struct nlattr *dev_stats;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
- goto nla_put_failure;
-
- dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
- if (!dev_stats)
- goto nla_put_failure;
-
- if (devlink_reload_stats_put(msg, devlink, false))
- goto dev_stats_nest_cancel;
- if (devlink_reload_stats_put(msg, devlink, true))
- goto dev_stats_nest_cancel;
-
- nla_nest_end(msg, dev_stats);
- genlmsg_end(msg, hdr);
- return 0;
-
-dev_stats_nest_cancel:
- nla_nest_cancel(msg, dev_stats);
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
static int devlink_nl_port_attrs_put(struct sk_buff *msg,
struct devlink_port *devlink_port)
{
@@ -1274,39 +1082,6 @@ devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
return false;
}
-static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
-}
-
-const struct devlink_cmd devl_cmd_get = {
- .dump_one = devlink_nl_cmd_get_dump_one,
-};
-
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -3064,85 +2839,8 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- void *hdr;
- int err = 0;
- u16 mode;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto nla_put_failure;
-
- if (ops->eswitch_mode_get) {
- err = ops->eswitch_mode_get(devlink, &mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_inline_mode_get) {
- err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
- inline_mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_encap_mode_get) {
- err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
- if (err)
- goto nla_put_failure;
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
- info->snd_portid, info->snd_seq, 0);
-
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
- struct netlink_ext_ack *extack)
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
@@ -3154,52 +2852,6 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
return 0;
}
-static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- int err = 0;
- u16 mode;
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
- if (!ops->eswitch_mode_set)
- return -EOPNOTSUPP;
- mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
- err = devlink_rate_nodes_check(devlink, mode, info->extack);
- if (err)
- return err;
- err = ops->eswitch_mode_set(devlink, mode, info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
- if (!ops->eswitch_inline_mode_set)
- return -EOPNOTSUPP;
- inline_mode = nla_get_u8(
- info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
- err = ops->eswitch_inline_mode_set(devlink, inline_mode,
- info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
- if (!ops->eswitch_encap_mode_set)
- return -EOPNOTSUPP;
- encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
- err = ops->eswitch_encap_mode_set(devlink, encap_mode,
- info->extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
int devlink_dpipe_match_put(struct sk_buff *skb,
struct devlink_dpipe_match *match)
{
@@ -4170,10 +3822,9 @@ static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
}
-static int
-devlink_resources_validate(struct devlink *devlink,
- struct devlink_resource *resource,
- struct genl_info *info)
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
{
struct list_head *resource_list;
int err = 0;
@@ -4193,698 +3844,6 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
-static struct net *devlink_netns_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
- struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
- struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
- struct net *net;
-
- if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
- return ERR_PTR(-EINVAL);
- }
-
- if (netns_pid_attr) {
- net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
- } else if (netns_fd_attr) {
- net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
- } else if (netns_id_attr) {
- net = get_net_ns_by_id(sock_net(skb->sk),
- nla_get_u32(netns_id_attr));
- if (!net)
- net = ERR_PTR(-EINVAL);
- } else {
- WARN_ON(1);
- net = ERR_PTR(-EINVAL);
- }
- if (IS_ERR(net)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
- return ERR_PTR(-EINVAL);
- }
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
- put_net(net);
- return ERR_PTR(-EPERM);
- }
- return net;
-}
-
-static void devlink_reload_netns_change(struct devlink *devlink,
- struct net *curr_net,
- struct net *dest_net)
-{
- /* Userspace needs to be notified about devlink objects
- * removed from original and entering new network namespace.
- * The rest of the devlink objects are re-created during
- * reload process so the notifications are generated separatelly.
- */
- devlink_notify_unregister(devlink);
- move_netdevice_notifier_net(curr_net, dest_net,
- &devlink->netdevice_nb);
- write_pnet(&devlink->_net, dest_net);
- devlink_notify_register(devlink);
-}
-
-static void devlink_reload_failed_set(struct devlink *devlink,
- bool reload_failed)
-{
- if (devlink->reload_failed == reload_failed)
- return;
- devlink->reload_failed = reload_failed;
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-bool devlink_is_reload_failed(const struct devlink *devlink)
-{
- return devlink->reload_failed;
-}
-EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
-
-static void
-__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
- enum devlink_reload_limit limit, u32 actions_performed)
-{
- unsigned long actions = actions_performed;
- int stat_idx;
- int action;
-
- for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
- stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
- reload_stats[stat_idx]++;
- }
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-static void
-devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
- actions_performed);
-}
-
-/**
- * devlink_remote_reload_actions_performed - Update devlink on reload actions
- * performed which are not a direct result of devlink reload call.
- *
- * This should be called by a driver after performing reload actions in case it was not
- * a result of devlink reload call. For example fw_activate was performed as a result
- * of devlink reload triggered fw_activate on another host.
- * The motivation for this function is to keep data on reload actions performed on this
- * function whether it was done due to direct devlink reload call or not.
- *
- * @devlink: devlink
- * @limit: reload limit
- * @actions_performed: bitmask of actions performed
- */
-void devlink_remote_reload_actions_performed(struct devlink *devlink,
- enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- if (WARN_ON(!actions_performed ||
- actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
- limit > DEVLINK_RELOAD_LIMIT_MAX))
- return;
-
- __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
- actions_performed);
-}
-EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
-
-int devlink_reload(struct devlink *devlink, struct net *dest_net,
- enum devlink_reload_action action,
- enum devlink_reload_limit limit,
- u32 *actions_performed, struct netlink_ext_ack *extack)
-{
- u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
- struct net *curr_net;
- int err;
-
- memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats));
-
- err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
- if (err)
- return err;
-
- curr_net = devlink_net(devlink);
- if (dest_net && !net_eq(dest_net, curr_net))
- devlink_reload_netns_change(devlink, curr_net, dest_net);
-
- err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
- devlink_reload_failed_set(devlink, !!err);
- if (err)
- return err;
-
- WARN_ON(!(*actions_performed & BIT(action)));
- /* Catch driver on updating the remote action within devlink reload */
- WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats)));
- devlink_reload_stats_update(devlink, limit, *actions_performed);
- return 0;
-}
-
-static int
-devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
- enum devlink_command cmd, struct genl_info *info)
-{
- struct sk_buff *msg;
- void *hdr;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
- actions_performed))
- goto nla_put_failure;
- genlmsg_end(msg, hdr);
-
- return genlmsg_reply(msg, info);
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
- struct net *dest_net = NULL;
- u32 actions_performed;
- int err;
-
- err = devlink_resources_validate(devlink, NULL, info);
- if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
- action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
- else
- action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
-
- if (!devlink_reload_action_is_supported(devlink, action)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested reload action is not supported by the driver");
- return -EOPNOTSUPP;
- }
-
- limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
- if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
- struct nla_bitfield32 limits;
- u32 limits_selected;
-
- limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
- limits_selected = limits.value & limits.selector;
- if (!limits_selected) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
- return -EINVAL;
- }
- for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
- if (limits_selected & BIT(limit))
- break;
- /* UAPI enables multiselection, but currently it is not used */
- if (limits_selected != BIT(limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Multiselection of limit is not supported");
- return -EOPNOTSUPP;
- }
- if (!devlink_reload_limit_is_supported(devlink, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is not supported by the driver");
- return -EOPNOTSUPP;
- }
- if (devlink_reload_combination_is_invalid(action, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is invalid for this action");
- return -EINVAL;
- }
- }
- if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
- info->attrs[DEVLINK_ATTR_NETNS_FD] ||
- info->attrs[DEVLINK_ATTR_NETNS_ID]) {
- dest_net = devlink_netns_get(skb, info);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
- }
-
- err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
-
- if (dest_net)
- put_net(dest_net);
-
- if (err)
- return err;
- /* For backward compatibility generate reply only if attributes used by user */
- if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
- return 0;
-
- return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
- DEVLINK_CMD_RELOAD, info);
-}
-
-static int devlink_nl_flash_update_fill(struct sk_buff *msg,
- struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- void *hdr;
-
- hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
- goto out;
-
- if (params->status_msg &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
- params->status_msg))
- goto nla_put_failure;
- if (params->component &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
- params->component))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- params->done, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- params->total, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
- params->timeout, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
-out:
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void __devlink_flash_update_notify(struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
- if (err)
- goto out_free_msg;
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
- return;
-
-out_free_msg:
- nlmsg_free(msg);
-}
-
-static void devlink_flash_update_begin_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE,
- &params);
-}
-
-static void devlink_flash_update_end_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_END,
- &params);
-}
-
-void devlink_flash_update_status_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long done,
- unsigned long total)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .done = done,
- .total = total,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
-
-void devlink_flash_update_timeout_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long timeout)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .timeout = timeout,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
-
-struct devlink_info_req {
- struct sk_buff *msg;
- void (*version_cb)(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv);
- void *version_cb_priv;
-};
-
-struct devlink_flash_component_lookup_ctx {
- const char *lookup_name;
- bool lookup_name_found;
-};
-
-static void
-devlink_flash_component_lookup_cb(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv)
-{
- struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
-
- if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
- lookup_ctx->lookup_name_found)
- return;
-
- lookup_ctx->lookup_name_found =
- !strcmp(lookup_ctx->lookup_name, version_name);
-}
-
-static int devlink_flash_component_get(struct devlink *devlink,
- struct nlattr *nla_component,
- const char **p_component,
- struct netlink_ext_ack *extack)
-{
- struct devlink_flash_component_lookup_ctx lookup_ctx = {};
- struct devlink_info_req req = {};
- const char *component;
- int ret;
-
- if (!nla_component)
- return 0;
-
- component = nla_data(nla_component);
-
- if (!devlink->ops->info_get) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "component update is not supported by this device");
- return -EOPNOTSUPP;
- }
-
- lookup_ctx.lookup_name = component;
- req.version_cb = devlink_flash_component_lookup_cb;
- req.version_cb_priv = &lookup_ctx;
-
- ret = devlink->ops->info_get(devlink, &req, NULL);
- if (ret)
- return ret;
-
- if (!lookup_ctx.lookup_name_found) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "selected component is not supported by this device");
- return -EINVAL;
- }
- *p_component = component;
- return 0;
-}
-
-static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *nla_overwrite_mask, *nla_file_name;
- struct devlink_flash_update_params params = {};
- struct devlink *devlink = info->user_ptr[0];
- const char *file_name;
- u32 supported_params;
- int ret;
-
- if (!devlink->ops->flash_update)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
- return -EINVAL;
-
- ret = devlink_flash_component_get(devlink,
- info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
- &params.component, info->extack);
- if (ret)
- return ret;
-
- supported_params = devlink->ops->supported_flash_update_params;
-
- nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
- if (nla_overwrite_mask) {
- struct nla_bitfield32 sections;
-
- if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
- "overwrite settings are not supported by this device");
- return -EOPNOTSUPP;
- }
- sections = nla_get_bitfield32(nla_overwrite_mask);
- params.overwrite_mask = sections.value & sections.selector;
- }
-
- nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
- file_name = nla_data(nla_file_name);
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file");
- return ret;
- }
-
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, info->extack);
- devlink_flash_update_end_notify(devlink);
-
- release_firmware(params.fw);
-
- return ret;
-}
-
-static int
-devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
- u32 portid, u32 seq, int flags,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *selftests;
- void *hdr;
- int err;
- int i;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
- DEVLINK_CMD_SELFTESTS_GET);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto err_cancel_msg;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- if (devlink->ops->selftest_check(devlink, i, extack)) {
- err = nla_put_flag(msg, i);
- if (err)
- goto err_cancel_msg;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- if (!devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
- info->snd_seq, 0, info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- if (!devlink->ops->selftest_check)
- return 0;
-
- return devlink_nl_selftests_fill(msg, devlink,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
-}
-
-const struct devlink_cmd devl_cmd_selftests_get = {
- .dump_one = devlink_nl_cmd_selftests_get_dump_one,
-};
-
-static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
- enum devlink_selftest_status test_status)
-{
- struct nlattr *result_attr;
-
- result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
- if (!result_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
- nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
- test_status))
- goto nla_put_failure;
-
- nla_nest_end(skb, result_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, result_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_selftests_run(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
- struct devlink *devlink = info->user_ptr[0];
- struct nlattr *attrs, *selftests;
- struct sk_buff *msg;
- void *hdr;
- int err;
- int i;
-
- if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
- return -EINVAL;
-
- attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
-
- err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
- devlink_selftest_nl_policy, info->extack);
- if (err < 0)
- return err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = -EMSGSIZE;
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto genlmsg_cancel;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- enum devlink_selftest_status test_status;
-
- if (nla_get_flag(tb[i])) {
- if (!devlink->ops->selftest_check(devlink, i,
- info->extack)) {
- if (devlink_selftest_result_put(msg, i,
- DEVLINK_SELFTEST_STATUS_SKIP))
- goto selftests_nest_cancel;
- continue;
- }
-
- test_status = devlink->ops->selftest_run(devlink, i,
- info->extack);
- if (devlink_selftest_result_put(msg, i, test_status))
- goto selftests_nest_cancel;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return genlmsg_reply(msg, info);
-
-selftests_nest_cancel:
- nla_nest_cancel(msg, selftests);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return err;
-}
-
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -6430,205 +5389,6 @@ out_unlock:
return err;
}
-int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
-
-int devlink_info_board_serial_number_put(struct devlink_info_req *req,
- const char *bsn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
- bsn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
-
-static int devlink_info_version_put(struct devlink_info_req *req, int attr,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- struct nlattr *nest;
- int err;
-
- if (req->version_cb)
- req->version_cb(version_name, version_type,
- req->version_cb_priv);
-
- if (!req->msg)
- return 0;
-
- nest = nla_nest_start_noflag(req->msg, attr);
- if (!nest)
- return -EMSGSIZE;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
- version_name);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
- version_value);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(req->msg, nest);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(req->msg, nest);
- return err;
-}
-
-int devlink_info_version_fixed_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
-
-int devlink_info_version_stored_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
-
-int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
-
-int devlink_info_version_running_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
-
-int devlink_info_version_running_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
-
-static int devlink_nl_driver_info_get(struct device_driver *drv,
- struct devlink_info_req *req)
-{
- if (!drv)
- return 0;
-
- if (drv->name[0])
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
- drv->name);
-
- return 0;
-}
-
-static int
-devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags, struct netlink_ext_ack *extack)
-{
- struct device *dev = devlink_to_dev(devlink);
- struct devlink_info_req req = {};
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- req.msg = msg;
- if (devlink->ops->info_get) {
- err = devlink->ops->info_get(devlink, &req, extack);
- if (err)
- goto err_cancel_msg;
- }
-
- err = devlink_nl_driver_info_get(dev->driver, &req);
- if (err)
- goto err_cancel_msg;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
- struct netlink_callback *cb)
-{
- int err;
-
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
- if (err == -EOPNOTSUPP)
- err = 0;
- return err;
-}
-
-const struct devlink_cmd devl_cmd_info_get = {
- .dump_one = devlink_nl_cmd_info_get_dump_one,
-};
-
struct devlink_fmsg_item {
struct list_head list;
int attrtype;
@@ -9257,35 +8017,6 @@ const struct genl_small_ops devlink_nl_ops[56] = {
/* -- No new ops here! Use split ops going forward! -- */
};
-bool devlink_reload_actions_valid(const struct devlink_ops *ops)
-{
- const struct devlink_reload_combination *comb;
- int i;
-
- if (!devlink_reload_supported(ops)) {
- if (WARN_ON(ops->reload_actions))
- return false;
- return true;
- }
-
- if (WARN_ON(!ops->reload_actions ||
- ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
- return false;
-
- if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
- ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
- return false;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
- comb = &devlink_reload_invalid_combinations[i];
- if (ops->reload_actions == BIT(comb->action) &&
- ops->reload_limits == BIT(comb->limit))
- return false;
- }
- return true;
-}
-
static void
devlink_trap_policer_notify(struct devlink *devlink,
const struct devlink_trap_policer_item *policer_item,
@@ -12068,85 +10799,6 @@ devl_trap_policers_unregister(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-static void __devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- struct devlink_info_req req = {};
- const struct nlattr *nlattr;
- struct sk_buff *msg;
- int rem, err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- req.msg = msg;
- err = devlink->ops->info_get(devlink, &req, NULL);
- if (err)
- goto free_msg;
-
- nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
- const struct nlattr *kv;
- int rem_kv;
-
- if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
- continue;
-
- nla_for_each_nested(kv, nlattr, rem_kv) {
- if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
- continue;
-
- strlcat(buf, nla_data(kv), len);
- strlcat(buf, " ", len);
- }
- }
-free_msg:
- nlmsg_free(msg);
-}
-
-void devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- if (!devlink->ops->info_get)
- return;
-
- devl_lock(devlink);
- if (devl_is_registered(devlink))
- __devlink_compat_running_version(devlink, buf, len);
- devl_unlock(devlink);
-}
-
-int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
-{
- struct devlink_flash_update_params params = {};
- int ret;
-
- devl_lock(devlink);
- if (!devl_is_registered(devlink)) {
- ret = -ENODEV;
- goto out_unlock;
- }
-
- if (!devlink->ops->flash_update) {
- ret = -EOPNOTSUPP;
- goto out_unlock;
- }
-
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret)
- goto out_unlock;
-
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, NULL);
- devlink_flash_update_end_notify(devlink);
-
- release_firmware(params.fw);
-out_unlock:
- devl_unlock(devlink);
-
- return ret;
-}
-
int devlink_compat_phys_port_name_get(struct net_device *dev,
char *name, size_t len)
{
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 1507b8cdb360..22d3f16b0e6d 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -299,7 +299,7 @@ static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
struct net_device *dev = to_net_dev(d);
struct dsa_port *cpu_dp = dev->dsa_ptr;
- return sprintf(buf, "%s\n",
+ return sysfs_emit(buf, "%s\n",
dsa_tag_protocol_to_str(cpu_dp->tag_ops));
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 26c458f50ac6..6957971c2db2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2692,7 +2692,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2705,8 +2706,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
err = dsa_port_lag_join(dp, info->upper_dev,
info->upper_info, extack);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2718,8 +2719,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (info->linking) {
err = dsa_port_hsr_join(dp, info->upper_dev);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
index 7e51f7633001..e612856eed8c 100644
--- a/net/ethtool/mm.c
+++ b/net/ethtool/mm.c
@@ -56,7 +56,7 @@ static int mm_prepare_data(const struct ethnl_req_info *req_base,
out_complete:
ethnl_ops_complete(dev);
- return 0;
+ return ret;
}
static int mm_reply_size(const struct ethnl_req_info *req_base,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 006c1f0ed8b4..94df935ee0c5 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ int raw_hash_sk(struct sock *sk)
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_nulls_head *hlist;
- hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+ hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
spin_lock(&h->lock);
__sk_nulls_add_node_rcu(sk, hlist);
@@ -160,9 +160,9 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
-static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+static int raw_v4_input(struct net *net, struct sk_buff *skb,
+ const struct iphdr *iph, int hash)
{
- struct net *net = dev_net(skb->dev);
struct hlist_nulls_head *hlist;
struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
@@ -193,9 +193,10 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
- int hash = protocol & (RAW_HTABLE_SIZE - 1);
+ struct net *net = dev_net(skb->dev);
- return raw_v4_input(skb, ip_hdr(skb), hash);
+ return raw_v4_input(net, skb, ip_hdr(skb),
+ raw_hashfunc(net, protocol));
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -271,7 +272,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
struct sock *sk;
int hash;
- hash = protocol & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, protocol);
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
@@ -287,11 +288,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
/* Charge it to the socket. */
ipv4_pktinfo_prepare(sk, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -302,7 +305,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 94aad3870c5f..cf26d65ca389 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <linux/init.h>
#include <linux/wait.h>
+#include <linux/util_macros.h>
#include <net/inet_common.h>
#include <net/tls.h>
@@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+ if (is_insidevar(prot, tcp_bpf_prots))
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8320d0ecb13a..ea370afa70ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2102,6 +2102,7 @@ process:
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ drop_reason = SKB_DROP_REASON_TCP_MINTTL;
goto discard_and_relse;
}
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7a84a4acffc..faa47f9ea73a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3127,17 +3127,17 @@ static void add_v4_addrs(struct inet6_dev *idev)
offset = sizeof(struct in6_addr) - 4;
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
- if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
+ } else {
if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
return;
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
- } else {
- scope = IPV6_ADDR_COMPATv4;
- plen = 96;
- pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
@@ -3447,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
+static void addrconf_init_auto_addrs(struct net_device *dev)
+{
+ switch (dev->type) {
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+#endif
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
+ break;
+#endif
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ }
+}
+
static int fixup_permanent_addr(struct net *net,
struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
@@ -3615,26 +3639,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- switch (dev->type) {
-#if IS_ENABLED(CONFIG_IPV6_SIT)
- case ARPHRD_SIT:
- addrconf_sit_config(dev);
- break;
-#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
- case ARPHRD_IP6GRE:
- case ARPHRD_IPGRE:
- addrconf_gre_config(dev);
- break;
-#endif
- case ARPHRD_LOOPBACK:
- init_loopback(dev);
- break;
-
- default:
- addrconf_dev_config(dev);
- break;
- }
+ addrconf_init_auto_addrs(dev);
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
@@ -6397,7 +6402,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6408,7 +6413,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
}
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ada087b50541..bac9ba747bde 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -338,7 +338,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
struct sock *sk;
int hash;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -355,17 +355,19 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -386,7 +388,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
@@ -410,7 +412,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 11b736a76bd7..543ee2167720 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1708,8 +1708,9 @@ process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
- if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ drop_reason = SKB_DROP_REASON_TCP_MINTTL;
goto discard_and_relse;
}
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c2aae2a6d6a6..97bb4401dd3e 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -213,7 +213,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
- kfree_skb(skb);
return;
}
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 45bbe3e54cc2..3150f3f0c872 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -587,6 +587,11 @@ static void mctp_sk_unhash(struct sock *sk)
del_timer_sync(&msk->key_expiry);
}
+static void mctp_sk_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
static struct proto mctp_proto = {
.name = "MCTP",
.owner = THIS_MODULE,
@@ -623,6 +628,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock,
return -ENOMEM;
sock_init_data(sock, sk);
+ sk->sk_destruct = mctp_sk_destruct;
rc = 0;
if (sk->sk_prot->init)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c00858344f02..9a830573480e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1371,9 +1371,6 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
- continue;
-
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -1443,11 +1440,14 @@ static bool gc_worker_skip_ct(const struct nf_conn *ct)
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
+ u8 protonum = nf_ct_protonum(ct);
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
+ return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
- l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(protonum);
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
@@ -1504,7 +1504,8 @@ static void gc_worker(struct work_struct *work)
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
- continue;
+ if (!nf_conntrack_max95)
+ continue;
}
if (expired_count > GC_SCAN_EXPIRED_MAX) {
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3937cbee9418..91eacc9b0b98 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -142,10 +142,11 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
}
#endif
+/* do_basic_checks ensures sch->length > 0, do not use before */
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
- ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))) && \
- (sch)->length; \
+ (offset) < (skb)->len && \
+ ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
(offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
/* Some validity checks to make sure the chunks are fine */
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 81c26a96c30b..04bd0ed4d2ae 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -193,8 +193,11 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
+ enum udp_conntrack state =
+ test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ UDP_CT_REPLIED : UDP_CT_UNREPLIED;
- timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout = tn->timeouts[state];
timeout -= tn->offload_timeout;
} else {
return;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 0ccabf3fa6aa..9505f9d188ff 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -39,7 +39,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
}
static int nf_flow_rule_route_inet(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 4d9b99abe37d..1c26f03fc661 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -679,7 +679,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
return 0;
}
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@@ -704,7 +704,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
-int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@@ -735,7 +735,7 @@ nf_flow_offload_rule_alloc(struct net *net,
{
const struct nf_flowtable *flowtable = offload->flowtable;
const struct flow_offload_tuple *tuple, *other_tuple;
- const struct flow_offload *flow = offload->flow;
+ struct flow_offload *flow = offload->flow;
struct dst_entry *other_dst = NULL;
struct nf_flow_rule *flow_rule;
int err = -ENOMEM;
@@ -895,8 +895,9 @@ static int flow_offload_rule_add(struct flow_offload_work *offload,
ok_count += flow_offload_tuple_add(offload, flow_rule[0],
FLOW_OFFLOAD_DIR_ORIGINAL);
- ok_count += flow_offload_tuple_add(offload, flow_rule[1],
- FLOW_OFFLOAD_DIR_REPLY);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+ FLOW_OFFLOAD_DIR_REPLY);
if (ok_count == 0)
return -ENOENT;
@@ -926,7 +927,8 @@ static void flow_offload_work_del(struct flow_offload_work *offload)
{
clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
}
@@ -946,7 +948,9 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
u64 lastused;
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
- flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
+ &stats[1]);
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
offload->flow->timeout = max_t(u64, offload->flow->timeout,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 6f7f4392cffb..5a4cb796150f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a71795355aec..fcee6012293b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1004,14 +1004,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
error = -ENOMEM;
- goto err_kfree_key;
+ goto err_kfree_flow;
}
ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
@@ -1019,14 +1019,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
key, log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -1126,10 +1126,10 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
err_kfree_key:
kfree(key);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
error:
return error;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e20d1a973417..416976f70322 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -107,7 +107,8 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
rcu_assign_pointer(flow->stats[cpu],
new_stats);
- cpumask_set_cpu(cpu, &flow->cpu_used_mask);
+ cpumask_set_cpu(cpu,
+ flow->cpu_used_mask);
goto unlock;
}
}
@@ -135,7 +136,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
memset(ovs_stats, 0, sizeof(*ovs_stats));
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
@@ -159,7 +161,8 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
int cpu;
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 073ab73ffeaa..b5711aff6e76 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -229,7 +229,7 @@ struct sw_flow {
*/
struct sw_flow_key key;
struct sw_flow_id id;
- struct cpumask cpu_used_mask;
+ struct cpumask *cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 0a0e4c283f02..791504b7f42b 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -79,6 +79,7 @@ struct sw_flow *ovs_flow_alloc(void)
return ERR_PTR(-ENOMEM);
flow->stats_last_writer = -1;
+ flow->cpu_used_mask = (struct cpumask *)&flow->stats[nr_cpu_ids];
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
@@ -91,7 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
RCU_INIT_POINTER(flow->stats[0], stats);
- cpumask_set_cpu(0, &flow->cpu_used_mask);
+ cpumask_set_cpu(0, flow->cpu_used_mask);
return flow;
err:
@@ -115,7 +116,7 @@ static void flow_free(struct sw_flow *flow)
flow->sf_acts);
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct sw_flow_stats __force *)flow->stats[cpu]);
@@ -1196,7 +1197,8 @@ int ovs_flow_init(void)
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct sw_flow_stats *)),
+ * sizeof(struct sw_flow_stats *))
+ + cpumask_size(),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 97bfdf9fd028..722936f7dd98 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -84,7 +84,10 @@ static struct qrtr_node *node_get(unsigned int node_id)
node->id = node_id;
- radix_tree_insert(&nodes, node_id, node);
+ if (radix_tree_insert(&nodes, node_id, node)) {
+ kfree(node);
+ return NULL;
+ }
return node;
}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 36fefc3957d7..ca2b17f32670 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -488,6 +488,12 @@ static int rose_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
struct rose_sock *rose = rose_sk(sk);
@@ -497,8 +503,10 @@ static int rose_listen(struct socket *sock, int backlog)
memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
+ release_sock(sk);
return 0;
}
+ release_sock(sk);
return -EOPNOTSUPP;
}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 7ae023b37a83..a20986806fea 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -36,6 +36,15 @@ config AF_RXRPC_INJECT_LOSS
Say Y here to inject packet loss by discarding some received and some
transmitted packets.
+config AF_RXRPC_INJECT_RX_DELAY
+ bool "Inject delay into packet reception"
+ depends on SYSCTL
+ help
+ Say Y here to inject a delay into packet reception, allowing an
+ extended RTT time to be modelled. The delay can be configured using
+ /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of
+ milliseconds up to 0.5s (note that the granularity is actually in
+ jiffies).
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index ebbd4a1c3f86..102f5cbff91a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -786,7 +786,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
INIT_LIST_HEAD(&rx->sock_calls);
INIT_LIST_HEAD(&rx->to_be_accepted);
INIT_LIST_HEAD(&rx->recvmsg_q);
- rwlock_init(&rx->recvmsg_lock);
+ spin_lock_init(&rx->recvmsg_lock);
rwlock_init(&rx->call_lock);
memset(&rx->srx, 0, sizeof(rx->srx));
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 433060cade03..9e19688b0e06 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -149,7 +149,7 @@ struct rxrpc_sock {
struct list_head sock_calls; /* List of calls owned by this socket */
struct list_head to_be_accepted; /* calls awaiting acceptance */
struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */
- rwlock_t recvmsg_lock; /* Lock for recvmsg_q */
+ spinlock_t recvmsg_lock; /* Lock for recvmsg_q */
struct key *key; /* security for this socket */
struct key *securities; /* list of server security descriptors */
struct rb_root calls; /* User ID -> call mapping */
@@ -284,7 +284,9 @@ struct rxrpc_local {
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
- struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ struct sk_buff_head rx_delay_queue; /* Delay injection queue */
+#endif
struct sk_buff_head rx_queue; /* Received packets */
struct list_head conn_attend_q; /* Conns requiring immediate attention */
struct list_head call_attend_q; /* Calls requiring immediate attention */
@@ -688,9 +690,11 @@ struct rxrpc_call {
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_sack_base; /* Starting slot in SACK table ring */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */
- atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ rxrpc_seq_t ackr_window; /* Base of SACK window */
+ rxrpc_seq_t ackr_wtop; /* Base of SACK window */
+ unsigned int ackr_nr_unacked; /* Number of unacked packets */
atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
struct {
#define RXRPC_SACK_SIZE 256
@@ -1109,6 +1113,9 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+extern unsigned long rxrpc_inject_rx_delay;
+#endif
/*
* net_ns.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3e8689fdc437..0f5a1d77b890 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -195,7 +195,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
- rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 1abdef15debc..e363f21a2014 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -498,9 +498,18 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
rxrpc_propose_ack_rx_idle);
- if (atomic_read(&call->ackr_nr_unacked) > 2)
- rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
- rxrpc_propose_ack_input_data);
+ if (call->ackr_nr_unacked > 2) {
+ if (call->peer->rtt_count < 3)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_rtt);
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_old_rtt);
+ else
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
+ }
/* Make sure the timer is restarted */
if (!__rxrpc_call_is_complete(call)) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f3c9f0201c15..6eaffb0d8fdc 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -167,7 +167,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
- atomic64_set(&call->ackr_window, 0x100000001ULL);
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -560,7 +561,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
/* Make sure we don't get any more notifications */
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -573,7 +574,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (put)
rxrpc_put_call(call, rxrpc_call_put_unnotify);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index f30323de82bd..89ac05a711a4 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -8,11 +8,6 @@
#include <linux/slab.h>
#include "ar-internal.h"
-static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .ref = REFCOUNT_INIT(1),
- .debug_id = UINT_MAX,
-};
-
/*
* Find a service connection under RCU conditions.
*
@@ -132,8 +127,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
refcount_set(&conn->ref, 2);
- conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle,
- rxrpc_bundle_get_service_conn);
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 367927a99881..d68848fce51f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -338,7 +338,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
rxrpc_seq_t window, rxrpc_seq_t wtop)
{
- atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window);
+ call->ackr_window = window;
+ call->ackr_wtop = wtop;
}
/*
@@ -367,9 +368,9 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
rxrpc_serial_t serial = sp->hdr.serial;
- u64 win = atomic64_read(&call->ackr_window);
- rxrpc_seq_t window = lower_32_bits(win);
- rxrpc_seq_t wtop = upper_32_bits(win);
+ unsigned int sack = call->ackr_sack_base;
+ rxrpc_seq_t window = call->ackr_window;
+ rxrpc_seq_t wtop = call->ackr_wtop;
rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
rxrpc_seq_t seq = sp->hdr.seq;
bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
@@ -410,20 +411,23 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Queue the packet. */
if (seq == window) {
- rxrpc_seq_t reset_from;
- bool reset_sack = false;
-
if (sp->hdr.flags & RXRPC_REQUEST_ACK)
ack_reason = RXRPC_ACK_REQUESTED;
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
else
- atomic_inc_return(&call->ackr_nr_unacked);
+ call->ackr_nr_unacked++;
window++;
- if (after(window, wtop))
+ if (after(window, wtop)) {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none);
wtop = window;
+ } else {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+ }
+
rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
@@ -440,43 +444,39 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
__skb_unlink(oos, &call->rx_oos_queue);
last = osp->hdr.flags & RXRPC_LAST_PACKET;
seq = osp->hdr.seq;
- if (!reset_sack) {
- reset_from = seq;
- reset_sack = true;
- }
+ call->ackr_sack_table[sack] = 0;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
window++;
rxrpc_input_queue_data(call, oos, window, wtop,
- rxrpc_receive_queue_oos);
+ rxrpc_receive_queue_oos);
}
spin_unlock(&call->recvmsg_queue.lock);
- if (reset_sack) {
- do {
- call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0;
- } while (reset_from++, before(reset_from, window));
- }
+ call->ackr_sack_base = sack;
} else {
- bool keep = false;
+ unsigned int slot;
ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
- if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) {
- call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1;
- keep = 1;
+ slot = seq - window;
+ sack = (sack + slot) % RXRPC_SACK_SIZE;
+
+ if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
}
+ call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos);
+
if (after(seq + 1, wtop)) {
wtop = seq + 1;
rxrpc_input_update_ack_window(call, window, wtop);
}
- if (!keep) {
- ack_reason = RXRPC_ACK_DUPLICATE;
- goto send_ack;
- }
-
skb_queue_walk(&call->rx_oos_queue, oos) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
@@ -567,8 +567,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_serial_t serial = sp->hdr.serial;
rxrpc_seq_t seq0 = sp->hdr.seq;
- _enter("{%llx,%x},{%u,%x}",
- atomic64_read(&call->ackr_window), call->rx_highest_seq,
+ _enter("{%x,%x,%x},{%u,%x}",
+ call->ackr_window, call->ackr_wtop, call->rx_highest_seq,
skb->len, seq0);
if (__rxrpc_call_is_complete(call))
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 9e9dfb2fc559..4a3a08a0e2cd 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -25,6 +25,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
*/
int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct sk_buff_head *rx_queue;
struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
if (unlikely(!local)) {
@@ -36,7 +37,16 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
skb->mark = RXRPC_SKB_MARK_PACKET;
rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
- skb_queue_tail(&local->rx_queue, skb);
+ rx_queue = &local->rx_queue;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ if (rxrpc_inject_rx_delay ||
+ !skb_queue_empty(&local->rx_delay_queue)) {
+ skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay);
+ rx_queue = &local->rx_delay_queue;
+ }
+#endif
+
+ skb_queue_tail(rx_queue, skb);
rxrpc_wake_up_io_thread(local);
return 0;
}
@@ -407,6 +417,9 @@ int rxrpc_io_thread(void *data)
struct rxrpc_local *local = data;
struct rxrpc_call *call;
struct sk_buff *skb;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ ktime_t now;
+#endif
bool should_stop;
complete(&local->io_thread_ready);
@@ -481,6 +494,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ /* Inject a delay into packets if requested. */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ now = ktime_get_real();
+ while ((skb = skb_peek(&local->rx_delay_queue))) {
+ if (ktime_before(now, skb->tstamp))
+ break;
+ skb = skb_dequeue(&local->rx_delay_queue);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+#endif
+
if (!skb_queue_empty(&local->rx_queue)) {
spin_lock_irq(&local->rx_queue.lock);
skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
@@ -502,6 +526,28 @@ int rxrpc_io_thread(void *data)
if (should_stop)
break;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb = skb_peek(&local->rx_delay_queue);
+ if (skb) {
+ unsigned long timeout;
+ ktime_t tstamp = skb->tstamp;
+ ktime_t now = ktime_get_real();
+ s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now));
+
+ if (delay_ns <= 0) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ timeout = nsecs_to_jiffies(delay_ns);
+ timeout = max(timeout, 1UL);
+ schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+#endif
+
schedule();
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b8eaca5d9f22..7d910aee4f8c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -108,8 +108,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
local->net = net;
local->rxnet = rxrpc_net(net);
INIT_HLIST_NODE(&local->link);
- init_rwsem(&local->defrag_sem);
init_completion(&local->io_thread_ready);
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb_queue_head_init(&local->rx_delay_queue);
+#endif
skb_queue_head_init(&local->rx_queue);
INIT_LIST_HEAD(&local->conn_attend_q);
INIT_LIST_HEAD(&local->call_attend_q);
@@ -434,6 +436,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ rxrpc_purge_queue(&local->rx_delay_queue);
+#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
}
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 056c428d8bf3..825b81183046 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -53,3 +53,10 @@ unsigned int rxrpc_rx_mtu = 5692;
* sender that we're willing to handle.
*/
unsigned int rxrpc_rx_jumbo_max = 4;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+/*
+ * The delay to inject into packet reception.
+ */
+unsigned long rxrpc_inject_rx_delay;
+#endif
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a9746be29634..6b2022240076 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -83,59 +83,36 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_txbuf *txb)
{
struct rxrpc_ackinfo ackinfo;
- unsigned int qsize;
- rxrpc_seq_t window, wtop, wrap_point, ix, first;
+ unsigned int qsize, sack, wrap, to;
+ rxrpc_seq_t window, wtop;
int rsize;
- u64 wtmp;
u32 mtu, jmax;
u8 *ackp = txb->acks;
- u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8);
- atomic_set(&call->ackr_nr_unacked, 0);
+ call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
- /* Barrier against rxrpc_input_data(). */
-retry:
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
+ window = call->ackr_window;
+ wtop = call->ackr_wtop;
+ sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = 0;
+ txb->ack.nAcks = wtop - window;
if (after(wtop, window)) {
- /* Try to copy the SACK ring locklessly. We can use the copy,
- * only if the now-current top of the window didn't go past the
- * previously read base - otherwise we can't know whether we
- * have old data or new data.
- */
- memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer));
- wrap_point = window + RXRPC_SACK_SIZE - 1;
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
- if (after(wtop, wrap_point)) {
- cond_resched();
- goto retry;
- }
-
- /* The buffer is maintained as a ring with an invariant mapping
- * between bit position and sequence number, so we'll probably
- * need to rotate it.
- */
- txb->ack.nAcks = wtop - window;
- ix = window % RXRPC_SACK_SIZE;
- first = sizeof(sack_buffer) - ix;
+ wrap = RXRPC_SACK_SIZE - sack;
+ to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
- if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks);
+ if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
} else {
- memcpy(txb->acks, sack_buffer + ix, first);
- memcpy(txb->acks + first, sack_buffer,
- txb->ack.nAcks - first);
+ memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
+ memcpy(txb->acks + wrap, call->ackr_sack_table,
+ to - wrap);
}
- ackp += txb->ack.nAcks;
+ ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
} else if (txb->ack.reason == RXRPC_ACK_DELAY) {
@@ -253,12 +230,15 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
- if (ret < 0)
+ if (ret < 0) {
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_ack);
- else
+ } else {
trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_ack);
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = ktime_get_real();
+ }
rxrpc_tx_backoff(call, ret);
if (!__rxrpc_call_is_complete(call)) {
@@ -429,8 +409,6 @@ dont_set_request_ack:
if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
- down_read(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -445,7 +423,6 @@ dont_set_request_ack:
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- up_read(&conn->local->defrag_sem);
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
@@ -506,8 +483,6 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -539,8 +514,6 @@ send_fragmentable:
rxrpc_tx_point_call_data_frag);
}
rxrpc_tx_backoff(call, ret);
-
- up_write(&conn->local->defrag_sem);
goto done;
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 750158a085cd..682636d3b060 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -55,7 +55,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
- u64 wtmp;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -83,7 +82,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
}
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
- wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
@@ -98,7 +96,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
- lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
+ call->ackr_window, call->ackr_wtop - call->ackr_window,
call->rx_serial,
call->cong_cwnd,
timeout);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index dd54ceee7bcc..50d263a6359d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,12 +40,12 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
call->notify_rx(sk, call, call->user_call_ID);
spin_unlock(&call->notify_lock);
} else {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -95,7 +95,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
}
trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
- lower_32_bits(atomic64_read(&call->ackr_window)) - 1,
+ call->ackr_window - 1,
call->rx_pkt_offset, call->rx_pkt_len, ret);
return ret;
}
@@ -175,13 +175,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_len = call->rx_pkt_len;
if (rxrpc_call_has_failed(call)) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ seq = call->ackr_window - 1;
ret = -EIO;
goto done;
}
if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ seq = call->ackr_window - 1;
ret = 1;
goto done;
}
@@ -335,14 +335,14 @@ try_again:
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
*/
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
call_debug_id = call->debug_id;
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
@@ -431,9 +431,9 @@ error_unlock_call:
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index ebe0c75e7b07..944320e65ea8 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
if (skb) {
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_CONSUMED);
}
}
@@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
rxrpc_skb_put_purge);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_CONSUMED);
}
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index cde3224a5cd2..ecaeb4ecfb58 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -17,6 +17,9 @@ static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+static const unsigned long max_500 = 500;
+#endif
/*
* RxRPC operating parameters.
@@ -63,6 +66,19 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra2 = (void *)&max_jiffies,
},
+ /* Values used in milliseconds */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ {
+ .procname = "inject_rx_delay",
+ .data = &rxrpc_inject_rx_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)SYSCTL_LONG_ZERO,
+ .extra2 = (void *)&max_500,
+ },
+#endif
+
/* Non-time values */
{
.procname = "reap_client_conns",
@@ -109,7 +125,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
-
{ }
};
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d2cf2aac3adb..d43be8512386 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -110,12 +110,8 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
- for (;;) {
- spin_lock(&call->tx_lock);
- txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link);
- if (!txb)
- break;
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
hard_ack = smp_load_acquire(&call->acks_hard_ack);
if (before(hard_ack, txb->seq))
break;
@@ -128,15 +124,11 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
- spin_unlock(&call->tx_lock);
-
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
if (after(call->acks_hard_ack, call->tx_bottom + 128))
wake = true;
}
- spin_unlock(&call->tx_lock);
-
if (wake)
wake_up(&call->waitq);
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index de18a0dda6df..f5acb535413d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -195,8 +195,14 @@ config NET_SCH_ETF
To compile this code as a module, choose M here: the
module will be called sch_etf.
+config NET_SCH_MQPRIO_LIB
+ tristate
+ help
+ Common library for manipulating mqprio queue configurations.
+
config NET_SCH_TAPRIO
tristate "Time Aware Priority (taprio) Scheduler"
+ select NET_SCH_MQPRIO_LIB
help
Say Y here if you want to use the Time Aware Priority (taprio) packet
scheduling algorithm.
@@ -253,6 +259,7 @@ config NET_SCH_DRR
config NET_SCH_MQPRIO
tristate "Multi-queue priority scheduler (MQPRIO)"
+ select NET_SCH_MQPRIO_LIB
help
Say Y here if you want to use the Multi-queue Priority scheduler.
This scheduler allows QOS to be offloaded on NICs that have support
diff --git a/net/sched/Makefile b/net/sched/Makefile
index dd14ef413fda..7911eec09837 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
obj-$(CONFIG_NET_SCH_ETS) += sch_ets.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_MQPRIO_LIB) += sch_mqprio_lib.o
obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index d68bb5dbf0dc..b126f03c1bb6 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -170,11 +170,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
enum ip_conntrack_dir dir,
+ enum ip_conntrack_info ctinfo,
struct flow_action *action)
{
struct nf_conn_labels *ct_labels;
struct flow_action_entry *entry;
- enum ip_conntrack_info ctinfo;
u32 *act_ct_labels;
entry = tcf_ct_flow_table_flow_action_get_next(action);
@@ -182,8 +182,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
- ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@@ -237,22 +235,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
}
static int tcf_ct_flow_table_fill_actions(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir tdir,
struct nf_flow_rule *flow_rule)
{
struct flow_action *action = &flow_rule->rule->action;
int num_entries = action->num_entries;
struct nf_conn *ct = flow->ct;
+ enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
int i, err;
switch (tdir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
dir = IP_CT_DIR_ORIGINAL;
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ if (ctinfo == IP_CT_ESTABLISHED)
+ set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
break;
case FLOW_OFFLOAD_DIR_REPLY:
dir = IP_CT_DIR_REPLY;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
break;
default:
return -EOPNOTSUPP;
@@ -262,7 +266,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
if (err)
goto err_nat;
- tcf_ct_flow_table_add_action_meta(ct, dir, action);
+ tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
return 0;
err_nat:
@@ -365,7 +369,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
- bool tcp)
+ bool tcp, bool bidirectional)
{
struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
@@ -384,6 +388,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ if (bidirectional)
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
@@ -407,26 +413,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- bool tcp = false;
-
- if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
- !test_bit(IPS_ASSURED_BIT, &ct->status))
- return;
+ bool tcp = false, bidirectional = true;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
- tcp = true;
- if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return;
+
+ tcp = true;
break;
case IPPROTO_UDP:
+ if (!nf_ct_is_confirmed(ct))
+ return;
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ bidirectional = false;
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: {
struct nf_conntrack_tuple *tuple;
- if (ct->status & IPS_NAT_MASK)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->status & IPS_NAT_MASK)
return;
+
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* No support for GRE v1 */
if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@@ -442,7 +456,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
ct->status & IPS_SEQ_ADJUST)
return;
- tcf_ct_flow_table_add(ct_ft, ct, tcp);
+ tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
}
static bool
@@ -621,13 +635,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
ct = flow->ct;
+ if (dir == FLOW_OFFLOAD_DIR_REPLY &&
+ !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
+ /* Only offload reply direction after connection became
+ * assured.
+ */
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
+ else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
+ /* If flow_table flow has already been updated to the
+ * established state, then don't refresh.
+ */
+ return false;
+ }
+
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
flow_offload_teardown(flow);
return false;
}
- ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
+ if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ else
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0378e9f0121..c42fcc47dd6d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -134,6 +134,17 @@ nla_failure:
return -EINVAL;
}
+static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
+{
+ struct tcf_pedit_parms *parms =
+ container_of(head, struct tcf_pedit_parms, rcu);
+
+ kfree(parms->tcfp_keys_ex);
+ kfree(parms->tcfp_keys);
+
+ kfree(parms);
+}
+
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp, u32 flags,
@@ -141,10 +152,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
- struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
- struct tc_pedit_key *keys = NULL;
- struct tcf_pedit_key_ex *keys_ex;
+ struct tcf_pedit_parms *oparms, *nparms;
+ struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tc_pedit *parm;
struct nlattr *pattr;
struct tcf_pedit *p;
@@ -181,18 +191,25 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
- keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
- if (IS_ERR(keys_ex))
- return PTR_ERR(keys_ex);
+ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+ if (!nparms)
+ return -ENOMEM;
+
+ nparms->tcfp_keys_ex =
+ tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+ if (IS_ERR(nparms->tcfp_keys_ex)) {
+ ret = PTR_ERR(nparms->tcfp_keys_ex);
+ goto out_free;
+ }
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_pedit_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_pedit_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
- goto out_free;
+ goto out_free_ex;
}
ret = ACT_P_CREATED;
} else if (err > 0) {
@@ -204,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
} else {
ret = err;
- goto out_free;
+ goto out_free_ex;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
@@ -212,48 +229,50 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
ret = err;
goto out_release;
}
- p = to_pedit(*a);
- spin_lock_bh(&p->tcf_lock);
- if (ret == ACT_P_CREATED ||
- (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
- keys = kmalloc(ksize, GFP_ATOMIC);
- if (!keys) {
- spin_unlock_bh(&p->tcf_lock);
- ret = -ENOMEM;
- goto put_chain;
- }
- kfree(p->tcfp_keys);
- p->tcfp_keys = keys;
- p->tcfp_nkeys = parm->nkeys;
+ nparms->tcfp_off_max_hint = 0;
+ nparms->tcfp_flags = parm->flags;
+ nparms->tcfp_nkeys = parm->nkeys;
+
+ nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL);
+ if (!nparms->tcfp_keys) {
+ ret = -ENOMEM;
+ goto put_chain;
}
- memcpy(p->tcfp_keys, parm->keys, ksize);
- p->tcfp_off_max_hint = 0;
- for (i = 0; i < p->tcfp_nkeys; ++i) {
- u32 cur = p->tcfp_keys[i].off;
+
+ memcpy(nparms->tcfp_keys, parm->keys, ksize);
+
+ for (i = 0; i < nparms->tcfp_nkeys; ++i) {
+ u32 cur = nparms->tcfp_keys[i].off;
/* sanitize the shift value for any later use */
- p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
- p->tcfp_keys[i].shift);
+ nparms->tcfp_keys[i].shift = min_t(size_t,
+ BITS_PER_TYPE(int) - 1,
+ nparms->tcfp_keys[i].shift);
/* The AT option can read a single byte, we can bound the actual
* value with uchar max.
*/
- cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+ cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
/* Each key touches 4 bytes starting from the computed offset */
- p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ nparms->tcfp_off_max_hint =
+ max(nparms->tcfp_off_max_hint, cur + 4);
}
- p->tcfp_flags = parm->flags;
+ p = to_pedit(*a);
+
+ spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparms = rcu_replace_pointer(p->parms, nparms, 1);
+ spin_unlock_bh(&p->tcf_lock);
- kfree(p->tcfp_keys_ex);
- p->tcfp_keys_ex = keys_ex;
+ if (oparms)
+ call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
- spin_unlock_bh(&p->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+
return ret;
put_chain:
@@ -261,19 +280,22 @@ put_chain:
tcf_chain_put_by_act(goto_ch);
out_release:
tcf_idr_release(*a, bind);
+out_free_ex:
+ kfree(nparms->tcfp_keys_ex);
out_free:
- kfree(keys_ex);
+ kfree(nparms);
return ret;
-
}
static void tcf_pedit_cleanup(struct tc_action *a)
{
struct tcf_pedit *p = to_pedit(a);
- struct tc_pedit_key *keys = p->tcfp_keys;
+ struct tcf_pedit_parms *parms;
+
+ parms = rcu_dereference_protected(p->parms, 1);
- kfree(keys);
- kfree(p->tcfp_keys_ex);
+ if (parms)
+ call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
}
static bool offset_valid(struct sk_buff *skb, int offset)
@@ -324,109 +346,107 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
+ enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+ enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
struct tcf_pedit *p = to_pedit(a);
+ struct tcf_pedit_key_ex *tkey_ex;
+ struct tcf_pedit_parms *parms;
+ struct tc_pedit_key *tkey;
u32 max_offset;
int i;
- spin_lock(&p->tcf_lock);
+ parms = rcu_dereference_bh(p->parms);
max_offset = (skb_transport_header_was_set(skb) ?
skb_transport_offset(skb) :
skb_network_offset(skb)) +
- p->tcfp_off_max_hint;
+ parms->tcfp_off_max_hint;
if (skb_ensure_writable(skb, min(skb->len, max_offset)))
- goto unlock;
+ goto done;
tcf_lastuse_update(&p->tcf_tm);
+ tcf_action_update_bstats(&p->common, skb);
- if (p->tcfp_nkeys > 0) {
- struct tc_pedit_key *tkey = p->tcfp_keys;
- struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
- enum pedit_header_type htype =
- TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
- enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
-
- for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
- u32 *ptr, hdata;
- int offset = tkey->off;
- int hoffset;
- u32 val;
- int rc;
-
- if (tkey_ex) {
- htype = tkey_ex->htype;
- cmd = tkey_ex->cmd;
-
- tkey_ex++;
- }
+ tkey = parms->tcfp_keys;
+ tkey_ex = parms->tcfp_keys_ex;
- rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
- if (rc) {
- pr_info("tc action pedit bad header type specified (0x%x)\n",
- htype);
- goto bad;
- }
+ for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
+ int offset = tkey->off;
+ u32 *ptr, hdata;
+ int hoffset;
+ u32 val;
+ int rc;
- if (tkey->offmask) {
- u8 *d, _d;
-
- if (!offset_valid(skb, hoffset + tkey->at)) {
- pr_info("tc action pedit 'at' offset %d out of bounds\n",
- hoffset + tkey->at);
- goto bad;
- }
- d = skb_header_pointer(skb, hoffset + tkey->at,
- sizeof(_d), &_d);
- if (!d)
- goto bad;
- offset += (*d & tkey->offmask) >> tkey->shift;
- }
+ if (tkey_ex) {
+ htype = tkey_ex->htype;
+ cmd = tkey_ex->cmd;
- if (offset % 4) {
- pr_info("tc action pedit offset must be on 32 bit boundaries\n");
- goto bad;
- }
+ tkey_ex++;
+ }
- if (!offset_valid(skb, hoffset + offset)) {
- pr_info("tc action pedit offset %d out of bounds\n",
- hoffset + offset);
- goto bad;
- }
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info("tc action pedit bad header type specified (0x%x)\n",
+ htype);
+ goto bad;
+ }
- ptr = skb_header_pointer(skb, hoffset + offset,
- sizeof(hdata), &hdata);
- if (!ptr)
- goto bad;
- /* just do it, baby */
- switch (cmd) {
- case TCA_PEDIT_KEY_EX_CMD_SET:
- val = tkey->val;
- break;
- case TCA_PEDIT_KEY_EX_CMD_ADD:
- val = (*ptr + tkey->val) & ~tkey->mask;
- break;
- default:
- pr_info("tc action pedit bad command (%d)\n",
- cmd);
+ if (tkey->offmask) {
+ u8 *d, _d;
+
+ if (!offset_valid(skb, hoffset + tkey->at)) {
+ pr_info("tc action pedit 'at' offset %d out of bounds\n",
+ hoffset + tkey->at);
goto bad;
}
+ d = skb_header_pointer(skb, hoffset + tkey->at,
+ sizeof(_d), &_d);
+ if (!d)
+ goto bad;
+ offset += (*d & tkey->offmask) >> tkey->shift;
+ }
- *ptr = ((*ptr & tkey->mask) ^ val);
- if (ptr == &hdata)
- skb_store_bits(skb, hoffset + offset, ptr, 4);
+ if (offset % 4) {
+ pr_info("tc action pedit offset must be on 32 bit boundaries\n");
+ goto bad;
}
- goto done;
- } else {
- WARN(1, "pedit BUG: index %d\n", p->tcf_index);
+ if (!offset_valid(skb, hoffset + offset)) {
+ pr_info("tc action pedit offset %d out of bounds\n",
+ hoffset + offset);
+ goto bad;
+ }
+
+ ptr = skb_header_pointer(skb, hoffset + offset,
+ sizeof(hdata), &hdata);
+ if (!ptr)
+ goto bad;
+ /* just do it, baby */
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ val = tkey->val;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ val = (*ptr + tkey->val) & ~tkey->mask;
+ break;
+ default:
+ pr_info("tc action pedit bad command (%d)\n",
+ cmd);
+ goto bad;
+ }
+
+ *ptr = ((*ptr & tkey->mask) ^ val);
+ if (ptr == &hdata)
+ skb_store_bits(skb, hoffset + offset, ptr, 4);
}
+ goto done;
+
bad:
+ spin_lock(&p->tcf_lock);
p->tcf_qstats.overlimits++;
-done:
- bstats_update(&p->tcf_bstats, skb);
-unlock:
spin_unlock(&p->tcf_lock);
+done:
return p->tcf_action;
}
@@ -445,30 +465,33 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_pedit *p = to_pedit(a);
+ struct tcf_pedit_parms *parms;
struct tc_pedit *opt;
struct tcf_t t;
int s;
- s = struct_size(opt, keys, p->tcfp_nkeys);
+ spin_lock_bh(&p->tcf_lock);
+ parms = rcu_dereference_protected(p->parms, 1);
+ s = struct_size(opt, keys, parms->tcfp_nkeys);
- /* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
- if (unlikely(!opt))
+ if (unlikely(!opt)) {
+ spin_unlock_bh(&p->tcf_lock);
return -ENOBUFS;
+ }
- spin_lock_bh(&p->tcf_lock);
- memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
+ memcpy(opt->keys, parms->tcfp_keys,
+ flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
- opt->nkeys = p->tcfp_nkeys;
- opt->flags = p->tcfp_flags;
+ opt->nkeys = parms->tcfp_nkeys;
+ opt->flags = parms->tcfp_flags;
opt->action = p->tcf_action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
- if (p->tcfp_keys_ex) {
- if (tcf_pedit_key_ex_dump(skb,
- p->tcfp_keys_ex,
- p->tcfp_nkeys))
+ if (parms->tcfp_keys_ex) {
+ if (tcf_pedit_key_ex_dump(skb, parms->tcfp_keys_ex,
+ parms->tcfp_nkeys))
goto nla_put_failure;
if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f46643850df8..cc28e41fb745 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -431,7 +431,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
m = mask;
while (m) {
- int prio = ffz(~m);
+ unsigned int prio = ffz(~m);
+
+ if (WARN_ON_ONCE(prio > ARRAY_SIZE(p->inner.clprio)))
+ break;
m &= ~(1 << prio);
if (p->inner.clprio[prio].feed.rb_node)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 4c68abaa289b..48ed87b91086 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -17,6 +17,8 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include "sch_mqprio_lib.h"
+
struct mqprio_sched {
struct Qdisc **qdiscs;
u16 mode;
@@ -27,6 +29,62 @@ struct mqprio_sched {
u64 max_rate[TC_QOPT_MAX_QUEUE];
};
+static int mqprio_enable_offload(struct Qdisc *sch,
+ const struct tc_mqprio_qopt *qopt,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err, i;
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+ return -EINVAL;
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ mqprio.flags = priv->flags;
+ if (priv->flags & TC_MQPRIO_F_MODE)
+ mqprio.mode = priv->mode;
+ if (priv->flags & TC_MQPRIO_F_SHAPER)
+ mqprio.shaper = priv->shaper;
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.min_rate[i] = priv->min_rate[i];
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.max_rate[i] = priv->max_rate[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ if (err)
+ return err;
+
+ priv->hw_offload = mqprio.qopt.hw;
+
+ return 0;
+}
+
+static void mqprio_disable_offload(struct Qdisc *sch)
+{
+ struct tc_mqprio_qopt_offload mqprio = { { 0 } };
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ case TC_MQPRIO_MODE_CHANNEL:
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ break;
+ }
+}
+
static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
@@ -41,37 +99,17 @@ static void mqprio_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
- if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
- struct tc_mqprio_qopt_offload mqprio = { { 0 } };
-
- switch (priv->mode) {
- case TC_MQPRIO_MODE_DCB:
- case TC_MQPRIO_MODE_CHANNEL:
- dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_MQPRIO,
- &mqprio);
- break;
- default:
- return;
- }
- } else {
+ if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc)
+ mqprio_disable_offload(sch);
+ else
netdev_set_num_tc(dev, 0);
- }
}
-static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ const struct tc_mqprio_caps *caps,
+ struct netlink_ext_ack *extack)
{
- int i, j;
-
- /* Verify num_tc is not out of max range */
- if (qopt->num_tc > TC_MAX_QUEUE)
- return -EINVAL;
-
- /* Verify priority mapping uses valid tcs */
- for (i = 0; i < TC_BITMASK + 1; i++) {
- if (qopt->prio_tc_map[i] >= qopt->num_tc)
- return -EINVAL;
- }
+ int err;
/* Limit qopt->hw to maximum supported offload value. Drivers have
* the option of overriding this later if they don't support the a
@@ -80,31 +118,23 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
- /* If hardware offload is requested we will leave it to the device
- * to either populate the queue counts itself or to validate the
- * provided queue counts. If ndo_setup_tc is not present then
- * hardware doesn't support offload and we should return an error.
+ /* If hardware offload is requested, we will leave 3 options to the
+ * device driver:
+ * - populate the queue counts itself (and ignore what was requested)
+ * - validate the provided queue counts by itself (and apply them)
+ * - request queue count validation here (and apply them)
*/
- if (qopt->hw)
- return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
-
- for (i = 0; i < qopt->num_tc; i++) {
- unsigned int last = qopt->offset[i] + qopt->count[i];
-
- /* Verify the queue count is in tx range being equal to the
- * real_num_tx_queues indicates the last queue is in use.
- */
- if (qopt->offset[i] >= dev->real_num_tx_queues ||
- !qopt->count[i] ||
- last > dev->real_num_tx_queues)
- return -EINVAL;
-
- /* Verify that the offset and counts do not overlap */
- for (j = i + 1; j < qopt->num_tc; j++) {
- if (last > qopt->offset[j])
- return -EINVAL;
- }
- }
+ err = mqprio_validate_qopt(dev, qopt,
+ !qopt->hw || caps->validate_queue_counts,
+ false, extack);
+ if (err)
+ return err;
+
+ /* If ndo_setup_tc is not present then hardware doesn't support offload
+ * and we should return an error.
+ */
+ if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+ return -EINVAL;
return 0;
}
@@ -130,6 +160,67 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
+ struct nlattr *opt)
+{
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int i, rem, err;
+
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw)
+ return -EINVAL;
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -139,9 +230,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
- struct nlattr *tb[TCA_MQPRIO_MAX + 1];
- struct nlattr *attr;
- int rem;
+ struct tc_mqprio_caps caps;
int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
@@ -160,61 +249,18 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
+ qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO,
+ &caps, sizeof(caps));
+
qopt = nla_data(opt);
- if (mqprio_parse_opt(dev, qopt))
+ if (mqprio_parse_opt(dev, qopt, &caps, extack))
return -EINVAL;
len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
if (len > 0) {
- err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
- sizeof(*qopt));
- if (err < 0)
+ err = mqprio_parse_nlattr(sch, qopt, opt);
+ if (err)
return err;
-
- if (!qopt->hw)
- return -EINVAL;
-
- if (tb[TCA_MQPRIO_MODE]) {
- priv->flags |= TC_MQPRIO_F_MODE;
- priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
- }
-
- if (tb[TCA_MQPRIO_SHAPER]) {
- priv->flags |= TC_MQPRIO_F_SHAPER;
- priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
- }
-
- if (tb[TCA_MQPRIO_MIN_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->min_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MIN_RATE;
- }
-
- if (tb[TCA_MQPRIO_MAX_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->max_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MAX_RATE;
- }
}
/* pre-allocate qdisc, attachment can't fail */
@@ -241,36 +287,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
-
- switch (priv->mode) {
- case TC_MQPRIO_MODE_DCB:
- if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
- return -EINVAL;
- break;
- case TC_MQPRIO_MODE_CHANNEL:
- mqprio.flags = priv->flags;
- if (priv->flags & TC_MQPRIO_F_MODE)
- mqprio.mode = priv->mode;
- if (priv->flags & TC_MQPRIO_F_SHAPER)
- mqprio.shaper = priv->shaper;
- if (priv->flags & TC_MQPRIO_F_MIN_RATE)
- for (i = 0; i < mqprio.qopt.num_tc; i++)
- mqprio.min_rate[i] = priv->min_rate[i];
- if (priv->flags & TC_MQPRIO_F_MAX_RATE)
- for (i = 0; i < mqprio.qopt.num_tc; i++)
- mqprio.max_rate[i] = priv->max_rate[i];
- break;
- default:
- return -EINVAL;
- }
- err = dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_MQPRIO,
- &mqprio);
+ err = mqprio_enable_offload(sch, qopt, extack);
if (err)
return err;
-
- priv->hw_offload = mqprio.qopt.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -387,7 +406,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
- unsigned int ntx, tc;
+ unsigned int ntx;
sch->q.qlen = 0;
gnet_stats_basic_sync_init(&sch->bstats);
@@ -411,15 +430,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
spin_unlock_bh(qdisc_lock(qdisc));
}
- opt.num_tc = netdev_get_num_tc(dev);
- memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+ mqprio_qopt_reconstruct(dev, &opt);
opt.hw = priv->hw_offload;
- for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
- opt.count[tc] = dev->tc_to_txq[tc].count;
- opt.offset[tc] = dev->tc_to_txq[tc].offset;
- }
-
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c
new file mode 100644
index 000000000000..c58a533b8ec5
--- /dev/null
+++ b/net/sched/sch_mqprio_lib.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/types.h>
+#include <net/pkt_sched.h>
+
+#include "sch_mqprio_lib.h"
+
+/* Returns true if the intervals [a, b) and [c, d) overlap. */
+static bool intervals_overlap(int a, int b, int c, int d)
+{
+ int left = max(a, c), right = min(b, d);
+
+ return left < right;
+}
+
+static int mqprio_validate_queue_counts(struct net_device *dev,
+ const struct tc_mqprio_qopt *qopt,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack)
+{
+ int i, j;
+
+ for (i = 0; i < qopt->num_tc; i++) {
+ unsigned int last = qopt->offset[i] + qopt->count[i];
+
+ if (!qopt->count[i]) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "No queues for TC %d",
+ i);
+ return -EINVAL;
+ }
+
+ /* Verify the queue count is in tx range being equal to the
+ * real_num_tx_queues indicates the last queue is in use.
+ */
+ if (qopt->offset[i] >= dev->real_num_tx_queues ||
+ last > dev->real_num_tx_queues) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Queues %d:%d for TC %d exceed the %d TX queues available",
+ qopt->count[i], qopt->offset[i],
+ i, dev->real_num_tx_queues);
+ return -EINVAL;
+ }
+
+ if (allow_overlapping_txqs)
+ continue;
+
+ /* Verify that the offset and counts do not overlap */
+ for (j = i + 1; j < qopt->num_tc; j++) {
+ if (intervals_overlap(qopt->offset[i], last,
+ qopt->offset[j],
+ qopt->offset[j] +
+ qopt->count[j])) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "TC %d queues %d@%d overlap with TC %d queues %d@%d",
+ i, qopt->count[i], qopt->offset[i],
+ j, qopt->count[j], qopt->offset[j]);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ bool validate_queue_counts,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack)
+{
+ int i, err;
+
+ /* Verify num_tc is not out of max range */
+ if (qopt->num_tc > TC_MAX_QUEUE) {
+ NL_SET_ERR_MSG(extack,
+ "Number of traffic classes is outside valid range");
+ return -EINVAL;
+ }
+
+ /* Verify priority mapping uses valid tcs */
+ for (i = 0; i <= TC_BITMASK; i++) {
+ if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid traffic class in priority to traffic class mapping");
+ return -EINVAL;
+ }
+ }
+
+ if (validate_queue_counts) {
+ err = mqprio_validate_queue_counts(dev, qopt,
+ allow_overlapping_txqs,
+ extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mqprio_validate_qopt);
+
+void mqprio_qopt_reconstruct(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+ int tc, num_tc = netdev_get_num_tc(dev);
+
+ qopt->num_tc = num_tc;
+ memcpy(qopt->prio_tc_map, dev->prio_tc_map, sizeof(qopt->prio_tc_map));
+
+ for (tc = 0; tc < num_tc; tc++) {
+ qopt->count[tc] = dev->tc_to_txq[tc].count;
+ qopt->offset[tc] = dev->tc_to_txq[tc].offset;
+ }
+}
+EXPORT_SYMBOL_GPL(mqprio_qopt_reconstruct);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_mqprio_lib.h b/net/sched/sch_mqprio_lib.h
new file mode 100644
index 000000000000..63f725ab8761
--- /dev/null
+++ b/net/sched/sch_mqprio_lib.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SCH_MQPRIO_LIB_H
+#define __SCH_MQPRIO_LIB_H
+
+#include <linux/types.h>
+
+struct net_device;
+struct netlink_ext_ack;
+struct tc_mqprio_qopt;
+
+int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ bool validate_queue_counts,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack);
+void mqprio_qopt_reconstruct(struct net_device *dev,
+ struct tc_mqprio_qopt *qopt);
+
+#endif
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c322a61eaeea..1c95785932b9 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -26,6 +26,8 @@
#include <net/sock.h>
#include <net/tcp.h>
+#include "sch_mqprio_lib.h"
+
static LIST_HEAD(taprio_list);
#define TAPRIO_ALL_GATES_OPEN -1
@@ -924,7 +926,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
struct netlink_ext_ack *extack,
u32 taprio_flags)
{
- int i, j;
+ bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
if (!qopt && !dev->num_tc) {
NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
@@ -937,52 +939,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
if (dev->num_tc)
return 0;
- /* Verify num_tc is not out of max range */
- if (qopt->num_tc > TC_MAX_QUEUE) {
- NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
- return -EINVAL;
- }
-
/* taprio imposes that traffic classes map 1:n to tx queues */
if (qopt->num_tc > dev->num_tx_queues) {
NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
return -EINVAL;
}
- /* Verify priority mapping uses valid tcs */
- for (i = 0; i <= TC_BITMASK; i++) {
- if (qopt->prio_tc_map[i] >= qopt->num_tc) {
- NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
- return -EINVAL;
- }
- }
-
- for (i = 0; i < qopt->num_tc; i++) {
- unsigned int last = qopt->offset[i] + qopt->count[i];
-
- /* Verify the queue count is in tx range being equal to the
- * real_num_tx_queues indicates the last queue is in use.
- */
- if (qopt->offset[i] >= dev->num_tx_queues ||
- !qopt->count[i] ||
- last > dev->real_num_tx_queues) {
- NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
- return -EINVAL;
- }
-
- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
- continue;
-
- /* Verify that the offset and counts do not overlap */
- for (j = i + 1; j < qopt->num_tc; j++) {
- if (last > qopt->offset[j]) {
- NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
- return -EINVAL;
- }
- }
- }
-
- return 0;
+ /* For some reason, in txtime-assist mode, we allow TXQ ranges for
+ * different TCs to overlap, and just validate the TXQ ranges.
+ */
+ return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
+ extack);
}
static int taprio_get_start_time(struct Qdisc *sch,
@@ -1203,7 +1170,8 @@ static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
static void taprio_sched_to_offload(struct net_device *dev,
struct sched_gate_list *sched,
- struct tc_taprio_qopt_offload *offload)
+ struct tc_taprio_qopt_offload *offload,
+ const struct tc_taprio_caps *caps)
{
struct sched_entry *entry;
int i = 0;
@@ -1217,7 +1185,11 @@ static void taprio_sched_to_offload(struct net_device *dev,
e->command = entry->command;
e->interval = entry->interval;
- e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask);
+ if (caps->gate_mask_per_txq)
+ e->gate_mask = tc_map_to_queue_mask(dev,
+ entry->gate_mask);
+ else
+ e->gate_mask = entry->gate_mask;
i++;
}
@@ -1261,7 +1233,8 @@ static int taprio_enable_offload(struct net_device *dev,
return -ENOMEM;
}
offload->enable = 1;
- taprio_sched_to_offload(dev, sched, offload);
+ mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
+ taprio_sched_to_offload(dev, sched, offload, &caps);
for (tc = 0; tc < TC_MAX_QUEUE; tc++)
offload->max_sdu[tc] = q->max_sdu[tc];
@@ -1981,18 +1954,11 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct sched_gate_list *oper, *admin;
struct tc_mqprio_qopt opt = { 0 };
struct nlattr *nest, *sched_nest;
- unsigned int i;
oper = rtnl_dereference(q->oper_sched);
admin = rtnl_dereference(q->admin_sched);
- opt.num_tc = netdev_get_num_tc(dev);
- memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
-
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- opt.count[i] = dev->tc_to_txq[i].count;
- opt.offset[i] = dev->tc_to_txq[i].offset;
- }
+ mqprio_qopt_reconstruct(dev, &opt);
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!nest)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ca1eba95c293..2f66a2006517 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -196,9 +196,7 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
/* When a data chunk is sent, reset the heartbeat interval. */
expires = jiffies + sctp_transport_timeout(transport);
- if ((time_before(transport->hb_timer.expires, expires) ||
- !timer_pending(&transport->hb_timer)) &&
- !mod_timer(&transport->hb_timer,
+ if (!mod_timer(&transport->hb_timer,
expires + get_random_u32_below(transport->rto)))
sctp_transport_hold(transport);
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1c0fe9ba5358..b163266e581a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -502,7 +502,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
return -EINVAL;
/* protect against parallel smcr_link_reg_buf() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -510,7 +510,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
if (rc)
break;
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
return rc;
}
@@ -519,15 +519,30 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
struct smc_link_group *lgr = link->lgr;
+ bool do_slow = false;
int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
+
+ down_read(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_active(&lgr->lnk[i]))
+ continue;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ up_read(&lgr->llc_conf_mutex);
+ goto slow_path;
+ }
+ }
+ /* mr register already */
+ goto fast_path;
+slow_path:
+ do_slow = true;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_buf()
*/
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -535,7 +550,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
goto out;
}
-
+fast_path:
/* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc);
if (rc) {
@@ -544,7 +559,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
}
rmb_desc->is_conf_rkey = true;
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 7642b16c41d1..b330a1fa453e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -854,8 +854,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
- mutex_init(&lgr->sndbufs_lock);
- mutex_init(&lgr->rmbs_lock);
+ init_rwsem(&lgr->sndbufs_lock);
+ init_rwsem(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
@@ -1098,7 +1098,7 @@ err_out:
static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link_group *lgr)
{
- struct mutex *lock; /* lock buffer list */
+ struct rw_semaphore *lock; /* lock buffer list */
int rc;
if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
@@ -1106,10 +1106,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
smc_llc_do_delete_rkey(lgr, buf_desc);
buf_desc->is_conf_rkey = false;
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
@@ -1118,9 +1118,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
/* buf registration failed, reuse not possible */
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
- mutex_lock(lock);
+ down_write(lock);
list_del(&buf_desc->list);
- mutex_unlock(lock);
+ up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
} else {
@@ -1224,15 +1224,16 @@ static void smcr_buf_unmap_lgr(struct smc_link *lnk)
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
smcr_buf_unmap_link(buf_desc, true, lnk);
- mutex_unlock(&lgr->rmbs_lock);
- mutex_lock(&lgr->sndbufs_lock);
+ up_write(&lgr->rmbs_lock);
+
+ down_write(&lgr->sndbufs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
list)
smcr_buf_unmap_link(buf_desc, false, lnk);
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
}
}
@@ -1377,12 +1378,12 @@ static void smc_lgr_free(struct smc_link_group *lgr)
int i;
if (!lgr->is_smcd) {
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_UNUSED)
smcr_link_clear(&lgr->lnk[i], false);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
smc_llc_lgr_clear(lgr);
}
@@ -1696,12 +1697,12 @@ static void smcr_link_down(struct smc_link *lnk)
} else {
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
wait_event_timeout(lgr->llc_flow_waiter,
(list_empty(&lgr->list) ||
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
}
if (!list_empty(&lgr->list)) {
smc_llc_send_delete_link(to_lnk, del_link_id,
@@ -1761,9 +1762,9 @@ static void smc_link_down_work(struct work_struct *work)
if (list_empty(&lgr->list))
return;
wake_up_all(&lgr->llc_msg_waiter);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
smcr_link_down(link);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
@@ -1990,19 +1991,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- struct mutex *lock,
+ struct rw_semaphore *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- mutex_lock(lock);
+ down_read(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- mutex_unlock(lock);
+ up_read(lock);
return buf_slot;
}
}
- mutex_unlock(lock);
+ up_read(lock);
return NULL;
}
@@ -2111,13 +2112,13 @@ int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
return 0;
}
-static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
struct list_head *lst, bool is_rmb)
{
struct smc_buf_desc *buf_desc, *bf;
int rc = 0;
- mutex_lock(lock);
+ down_write(lock);
list_for_each_entry_safe(buf_desc, bf, lst, list) {
if (!buf_desc->used)
continue;
@@ -2126,7 +2127,7 @@ static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
goto out;
}
out:
- mutex_unlock(lock);
+ up_write(lock);
return rc;
}
@@ -2159,37 +2160,37 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
int i, rc = 0;
/* reg all RMBs for a new link */
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
return rc;
/* reg all vzalloced sndbufs for a new link */
- mutex_lock(&lgr->sndbufs_lock);
+ down_write(&lgr->sndbufs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
if (!buf_desc->used || !buf_desc->is_vm)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
@@ -2247,7 +2248,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
int i, rc = 0, cnt = 0;
/* protect against parallel link reconfiguration */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
@@ -2260,7 +2261,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
cnt++;
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
if (!rc && !cnt)
rc = -EINVAL;
return rc;
@@ -2309,8 +2310,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- struct mutex *lock; /* lock buffer list */
int sk_buf_size;
if (is_rmb)
@@ -2358,9 +2359,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
buf_desc->used = 1;
- mutex_lock(lock);
+ down_write(lock);
list_add(&buf_desc->list, buf_list);
- mutex_unlock(lock);
+ up_write(lock);
break; /* found */
}
@@ -2434,9 +2435,9 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
if (rc) {
- mutex_lock(&smc->conn.lgr->sndbufs_lock);
+ down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
- mutex_unlock(&smc->conn.lgr->sndbufs_lock);
+ up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd8e232..08b457c2d294 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -252,9 +252,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
- struct mutex sndbufs_lock; /* protects tx buffers */
+ struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
- struct mutex rmbs_lock; /* protects rx buffers */
+ struct rw_semaphore rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -298,7 +298,7 @@ struct smc_link_group {
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
- struct mutex llc_conf_mutex;
+ struct rw_semaphore llc_conf_mutex;
/* protects lgr reconfig. */
struct work_struct llc_add_link_work;
struct work_struct llc_del_link_work;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 524649d0ab65..a0840b8c935b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -608,7 +608,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
prim_lnk_idx = link->link_idx;
lnk_idx = link_new->link_idx;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
ext->num_rkeys = lgr->conns_num;
if (!ext->num_rkeys)
goto out;
@@ -628,7 +628,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
}
len += i * sizeof(ext->rt[0]);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return len;
}
@@ -889,7 +889,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -916,7 +916,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
break;
} while (num_rkeys_send || num_rkeys_recv);
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -999,14 +999,14 @@ static void smc_llc_save_add_link_rkeys(struct smc_link *link,
ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
SMC_WR_TX_SIZE);
max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < max; i++) {
smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
ext->rt[i].rmb_key,
ext->rt[i].rmb_vaddr_new,
ext->rt[i].rmb_key_new);
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
}
static void smc_llc_save_add_link_info(struct smc_link *link,
@@ -1202,12 +1202,12 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
if (smc_llc_is_local_add_link(&qentry->msg))
smc_llc_cli_add_link_invite(qentry->link, qentry);
else
smc_llc_cli_add_link(qentry->link, qentry);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_llc_active_link_count(struct smc_link_group *lgr)
@@ -1313,7 +1313,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -1338,7 +1338,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
} while (num_rkeys_send || num_rkeys_recv);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -1509,13 +1509,13 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */
smc_llc_delete_asym_link(lgr);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -1582,7 +1582,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
goto out;
}
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
/* delete single link */
for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
@@ -1616,7 +1616,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
}
out_unlock:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
out:
kfree(qentry);
}
@@ -1652,7 +1652,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
int active_links;
int i;
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
lnk = qentry->link;
del_llc = &qentry->msg.delete_link;
@@ -1708,7 +1708,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
smc_llc_add_link_local(lnk);
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -2126,7 +2126,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
spin_lock_init(&lgr->llc_flow_lock);
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
- mutex_init(&lgr->llc_conf_mutex);
+ init_rwsem(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fa137063aaa0..6d0a534b7baa 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2430,7 +2430,7 @@ static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
{
struct tls_rec *rec;
- rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
+ rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list);
if (!rec)
return false;