summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.rst40
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c23
-rw-r--r--drivers/net/netdevsim/fib.c534
-rw-r--r--include/net/ip6_fib.h9
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c27
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/route.c44
-rw-r--r--net/ipv6/sysctl_net_ipv6.c9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/fib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh300
17 files changed, 854 insertions, 182 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 2685ec161060..61a358301f12 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -178,6 +178,26 @@ min_adv_mss - INTEGER
The advertised MSS depends on the first hop route MTU, but will
never be lower than this setting.
+fib_notify_on_flag_change - INTEGER
+ Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
+ RTM_F_TRAP flags are changed.
+
+ After installing a route to the kernel, user space receives an
+ acknowledgment, which means the route was installed in the kernel,
+ but not necessarily in hardware.
+ It is also possible for a route already installed in hardware to change
+ its action and therefore its flags. For example, a host route that is
+ trapping packets can be "promoted" to perform decapsulation following
+ the installation of an IPinIP/VXLAN tunnel.
+ The notifications will indicate to user-space the state of the route.
+
+ Default: 0 (Do not emit notifications.)
+
+ Possible values:
+
+ - 0 - Do not emit notifications.
+ - 1 - Emit notifications.
+
IP Fragmentation:
ipfrag_high_thresh - LONG INTEGER
@@ -1775,6 +1795,26 @@ nexthop_compat_mode - BOOLEAN
and extraneous notifications.
Default: true (backward compat mode)
+fib_notify_on_flag_change - INTEGER
+ Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/
+ RTM_F_TRAP flags are changed.
+
+ After installing a route to the kernel, user space receives an
+ acknowledgment, which means the route was installed in the kernel,
+ but not necessarily in hardware.
+ It is also possible for a route already installed in hardware to change
+ its action and therefore its flags. For example, a host route that is
+ trapping packets can be "promoted" to perform decapsulation following
+ the installation of an IPinIP/VXLAN tunnel.
+ The notifications will indicate to user-space the state of the route.
+
+ Default: 0 (Do not emit notifications.)
+
+ Possible values:
+
+ - 0 - Do not emit notifications.
+ - 1 - Emit notifications.
+
IPv6 Fragmentation:
ip6frag_high_thresh - INTEGER
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0ac7014703aa..cf111e73f81e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4988,6 +4988,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
}
+#if IS_ENABLED(CONFIG_IPV6)
static void
mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
@@ -5004,10 +5005,18 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
common);
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
- fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
- !should_offload);
+ fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
+ should_offload, !should_offload);
+}
+#else
+static void
+mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
}
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
static void
mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
@@ -5018,8 +5027,16 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
common);
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
- fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
+ fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt,
+ false, false);
+}
+#else
+static void
+mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
}
+#endif
static void
mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index f140bbca98c5..1779146926a5 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -31,7 +31,7 @@
struct nsim_fib_entry {
u64 max;
- u64 num;
+ atomic64_t num;
};
struct nsim_per_fib_data {
@@ -46,10 +46,13 @@ struct nsim_fib_data {
struct nsim_fib_entry nexthops;
struct rhashtable fib_rt_ht;
struct list_head fib_rt_list;
- spinlock_t fib_lock; /* Protects hashtable, list and accounting */
+ struct mutex fib_lock; /* Protects hashtable and list */
struct notifier_block nexthop_nb;
struct rhashtable nexthop_ht;
struct devlink *devlink;
+ struct work_struct fib_event_work;
+ struct list_head fib_event_queue;
+ spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
};
struct nsim_fib_rt_key {
@@ -83,6 +86,22 @@ struct nsim_fib6_rt_nh {
struct fib6_info *rt;
};
+struct nsim_fib6_event {
+ struct fib6_info **rt_arr;
+ unsigned int nrt6;
+};
+
+struct nsim_fib_event {
+ struct list_head list; /* node in fib queue */
+ union {
+ struct fib_entry_notifier_info fen_info;
+ struct nsim_fib6_event fib6_event;
+ };
+ struct nsim_fib_data *data;
+ unsigned long event;
+ int family;
+};
+
static const struct rhashtable_params nsim_fib_rt_ht_params = {
.key_offset = offsetof(struct nsim_fib_rt, key),
.head_offset = offsetof(struct nsim_fib_rt, ht_node),
@@ -128,7 +147,7 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data,
return 0;
}
- return max ? entry->max : entry->num;
+ return max ? entry->max : atomic64_read(&entry->num);
}
static void nsim_fib_set_max(struct nsim_fib_data *fib_data,
@@ -165,14 +184,12 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add,
int err = 0;
if (add) {
- if (entry->num < entry->max) {
- entry->num++;
- } else {
+ if (!atomic64_add_unless(&entry->num, 1, entry->max)) {
err = -ENOSPC;
NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib rule entries");
}
} else {
- entry->num--;
+ atomic64_dec_if_positive(&entry->num);
}
return err;
@@ -196,20 +213,15 @@ static int nsim_fib_rule_event(struct nsim_fib_data *data,
return err;
}
-static int nsim_fib_account(struct nsim_fib_entry *entry, bool add,
- struct netlink_ext_ack *extack)
+static int nsim_fib_account(struct nsim_fib_entry *entry, bool add)
{
int err = 0;
if (add) {
- if (entry->num < entry->max) {
- entry->num++;
- } else {
+ if (!atomic64_add_unless(&entry->num, 1, entry->max))
err = -ENOSPC;
- NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries");
- }
} else {
- entry->num--;
+ atomic64_dec_if_positive(&entry->num);
}
return err;
@@ -254,7 +266,7 @@ nsim_fib4_rt_create(struct nsim_fib_data *data,
{
struct nsim_fib4_rt *fib4_rt;
- fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_ATOMIC);
+ fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_KERNEL);
if (!fib4_rt)
return NULL;
@@ -311,51 +323,52 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net,
}
static int nsim_fib4_rt_add(struct nsim_fib_data *data,
- struct nsim_fib4_rt *fib4_rt,
- struct netlink_ext_ack *extack)
+ struct nsim_fib4_rt *fib4_rt)
{
struct net *net = devlink_net(data->devlink);
int err;
- err = nsim_fib_account(&data->ipv4.fib, true, extack);
- if (err)
- return err;
-
err = rhashtable_insert_fast(&data->fib_rt_ht,
&fib4_rt->common.ht_node,
nsim_fib_rt_ht_params);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to insert IPv4 route");
+ if (err)
goto err_fib_dismiss;
- }
+ /* Simulate hardware programming latency. */
+ msleep(1);
nsim_fib4_rt_hw_flags_set(net, fib4_rt, true);
return 0;
err_fib_dismiss:
- nsim_fib_account(&data->ipv4.fib, false, extack);
+ /* Drop the accounting that was increased from the notification
+ * context when FIB_EVENT_ENTRY_REPLACE was triggered.
+ */
+ nsim_fib_account(&data->ipv4.fib, false);
return err;
}
static int nsim_fib4_rt_replace(struct nsim_fib_data *data,
struct nsim_fib4_rt *fib4_rt,
- struct nsim_fib4_rt *fib4_rt_old,
- struct netlink_ext_ack *extack)
+ struct nsim_fib4_rt *fib4_rt_old)
{
struct net *net = devlink_net(data->devlink);
int err;
- /* We are replacing a route, so no need to change the accounting. */
+ /* We are replacing a route, so need to remove the accounting which
+ * was increased when FIB_EVENT_ENTRY_REPLACE was triggered.
+ */
+ err = nsim_fib_account(&data->ipv4.fib, false);
+ if (err)
+ return err;
err = rhashtable_replace_fast(&data->fib_rt_ht,
&fib4_rt_old->common.ht_node,
&fib4_rt->common.ht_node,
nsim_fib_rt_ht_params);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to replace IPv4 route");
+ if (err)
return err;
- }
+ msleep(1);
nsim_fib4_rt_hw_flags_set(net, fib4_rt, true);
nsim_fib4_rt_hw_flags_set(net, fib4_rt_old, false);
@@ -367,7 +380,6 @@ static int nsim_fib4_rt_replace(struct nsim_fib_data *data,
static int nsim_fib4_rt_insert(struct nsim_fib_data *data,
struct fib_entry_notifier_info *fen_info)
{
- struct netlink_ext_ack *extack = fen_info->info.extack;
struct nsim_fib4_rt *fib4_rt, *fib4_rt_old;
int err;
@@ -377,9 +389,9 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data,
fib4_rt_old = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info);
if (!fib4_rt_old)
- err = nsim_fib4_rt_add(data, fib4_rt, extack);
+ err = nsim_fib4_rt_add(data, fib4_rt);
else
- err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old, extack);
+ err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old);
if (err)
nsim_fib4_rt_destroy(fib4_rt);
@@ -390,7 +402,6 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data,
static void nsim_fib4_rt_remove(struct nsim_fib_data *data,
const struct fib_entry_notifier_info *fen_info)
{
- struct netlink_ext_ack *extack = fen_info->info.extack;
struct nsim_fib4_rt *fib4_rt;
fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info);
@@ -399,19 +410,15 @@ static void nsim_fib4_rt_remove(struct nsim_fib_data *data,
rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node,
nsim_fib_rt_ht_params);
- nsim_fib_account(&data->ipv4.fib, false, extack);
nsim_fib4_rt_destroy(fib4_rt);
}
static int nsim_fib4_event(struct nsim_fib_data *data,
- struct fib_notifier_info *info,
+ struct fib_entry_notifier_info *fen_info,
unsigned long event)
{
- struct fib_entry_notifier_info *fen_info;
int err = 0;
- fen_info = container_of(info, struct fib_entry_notifier_info, info);
-
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
err = nsim_fib4_rt_insert(data, fen_info);
@@ -445,7 +452,7 @@ static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt,
{
struct nsim_fib6_rt_nh *fib6_rt_nh;
- fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_ATOMIC);
+ fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_KERNEL);
if (!fib6_rt_nh)
return -ENOMEM;
@@ -457,6 +464,17 @@ static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt,
return 0;
}
+#if IS_ENABLED(CONFIG_IPV6)
+static void nsim_rt6_release(struct fib6_info *rt)
+{
+ fib6_info_release(rt);
+}
+#else
+static void nsim_rt6_release(struct fib6_info *rt)
+{
+}
+#endif
+
static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt,
const struct fib6_info *rt)
{
@@ -468,22 +486,20 @@ static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt,
fib6_rt->nhs--;
list_del(&fib6_rt_nh->list);
-#if IS_ENABLED(CONFIG_IPV6)
- fib6_info_release(fib6_rt_nh->rt);
-#endif
+ nsim_rt6_release(fib6_rt_nh->rt);
kfree(fib6_rt_nh);
}
static struct nsim_fib6_rt *
nsim_fib6_rt_create(struct nsim_fib_data *data,
- struct fib6_entry_notifier_info *fen6_info)
+ struct fib6_info **rt_arr, unsigned int nrt6)
{
- struct fib6_info *iter, *rt = fen6_info->rt;
+ struct fib6_info *rt = rt_arr[0];
struct nsim_fib6_rt *fib6_rt;
int i = 0;
int err;
- fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_ATOMIC);
+ fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_KERNEL);
if (!fib6_rt)
return ERR_PTR(-ENOMEM);
@@ -497,32 +513,18 @@ nsim_fib6_rt_create(struct nsim_fib_data *data,
*/
INIT_LIST_HEAD(&fib6_rt->nh_list);
- err = nsim_fib6_rt_nh_add(fib6_rt, rt);
- if (err)
- goto err_fib_rt_fini;
-
- if (!fen6_info->nsiblings)
- return fib6_rt;
-
- list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
- if (i == fen6_info->nsiblings)
- break;
-
- err = nsim_fib6_rt_nh_add(fib6_rt, iter);
+ for (i = 0; i < nrt6; i++) {
+ err = nsim_fib6_rt_nh_add(fib6_rt, rt_arr[i]);
if (err)
goto err_fib6_rt_nh_del;
- i++;
}
- WARN_ON_ONCE(i != fen6_info->nsiblings);
return fib6_rt;
err_fib6_rt_nh_del:
- list_for_each_entry_continue_reverse(iter, &rt->fib6_siblings,
- fib6_siblings)
- nsim_fib6_rt_nh_del(fib6_rt, iter);
- nsim_fib6_rt_nh_del(fib6_rt, rt);
-err_fib_rt_fini:
+ for (i--; i >= 0; i--) {
+ nsim_fib6_rt_nh_del(fib6_rt, rt_arr[i]);
+ };
nsim_fib_rt_fini(&fib6_rt->common);
kfree(fib6_rt);
return ERR_PTR(err);
@@ -555,127 +557,125 @@ nsim_fib6_rt_lookup(struct rhashtable *fib_rt_ht, const struct fib6_info *rt)
}
static int nsim_fib6_rt_append(struct nsim_fib_data *data,
- struct fib6_entry_notifier_info *fen6_info)
+ struct nsim_fib6_event *fib6_event)
{
- struct fib6_info *iter, *rt = fen6_info->rt;
+ struct fib6_info *rt = fib6_event->rt_arr[0];
struct nsim_fib6_rt *fib6_rt;
- int i = 0;
- int err;
+ int i, err;
fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt);
if (WARN_ON_ONCE(!fib6_rt))
return -EINVAL;
- err = nsim_fib6_rt_nh_add(fib6_rt, rt);
- if (err)
- return err;
- rt->trap = true;
-
- if (!fen6_info->nsiblings)
- return 0;
-
- list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
- if (i == fen6_info->nsiblings)
- break;
-
- err = nsim_fib6_rt_nh_add(fib6_rt, iter);
+ for (i = 0; i < fib6_event->nrt6; i++) {
+ err = nsim_fib6_rt_nh_add(fib6_rt, fib6_event->rt_arr[i]);
if (err)
goto err_fib6_rt_nh_del;
- iter->trap = true;
- i++;
+
+ fib6_event->rt_arr[i]->trap = true;
}
- WARN_ON_ONCE(i != fen6_info->nsiblings);
return 0;
err_fib6_rt_nh_del:
- list_for_each_entry_continue_reverse(iter, &rt->fib6_siblings,
- fib6_siblings) {
- iter->trap = false;
- nsim_fib6_rt_nh_del(fib6_rt, iter);
+ for (i--; i >= 0; i--) {
+ fib6_event->rt_arr[i]->trap = false;
+ nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]);
}
- rt->trap = false;
- nsim_fib6_rt_nh_del(fib6_rt, rt);
return err;
}
-static void nsim_fib6_rt_hw_flags_set(const struct nsim_fib6_rt *fib6_rt,
+#if IS_ENABLED(CONFIG_IPV6)
+static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
+ const struct nsim_fib6_rt *fib6_rt,
bool trap)
{
+ struct net *net = devlink_net(data->devlink);
struct nsim_fib6_rt_nh *fib6_rt_nh;
list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list)
- fib6_info_hw_flags_set(fib6_rt_nh->rt, false, trap);
+ fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap);
+}
+#else
+static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data,
+ const struct nsim_fib6_rt *fib6_rt,
+ bool trap)
+{
}
+#endif
static int nsim_fib6_rt_add(struct nsim_fib_data *data,
- struct nsim_fib6_rt *fib6_rt,
- struct netlink_ext_ack *extack)
+ struct nsim_fib6_rt *fib6_rt)
{
int err;
- err = nsim_fib_account(&data->ipv6.fib, true, extack);
- if (err)
- return err;
-
err = rhashtable_insert_fast(&data->fib_rt_ht,
&fib6_rt->common.ht_node,
nsim_fib_rt_ht_params);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to insert IPv6 route");
+
+ if (err)
goto err_fib_dismiss;
- }
- nsim_fib6_rt_hw_flags_set(fib6_rt, true);
+ msleep(1);
+ nsim_fib6_rt_hw_flags_set(data, fib6_rt, true);
return 0;
err_fib_dismiss:
- nsim_fib_account(&data->ipv6.fib, false, extack);
+ /* Drop the accounting that was increased from the notification
+ * context when FIB_EVENT_ENTRY_REPLACE was triggered.
+ */
+ nsim_fib_account(&data->ipv6.fib, false);
return err;
}
static int nsim_fib6_rt_replace(struct nsim_fib_data *data,
struct nsim_fib6_rt *fib6_rt,
- struct nsim_fib6_rt *fib6_rt_old,
- struct netlink_ext_ack *extack)
+ struct nsim_fib6_rt *fib6_rt_old)
{
int err;
- /* We are replacing a route, so no need to change the accounting. */
+ /* We are replacing a route, so need to remove the accounting which
+ * was increased when FIB_EVENT_ENTRY_REPLACE was triggered.
+ */
+ err = nsim_fib_account(&data->ipv6.fib, false);
+ if (err)
+ return err;
+
err = rhashtable_replace_fast(&data->fib_rt_ht,
&fib6_rt_old->common.ht_node,
&fib6_rt->common.ht_node,
nsim_fib_rt_ht_params);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to replace IPv6 route");
+
+ if (err)
return err;
- }
- nsim_fib6_rt_hw_flags_set(fib6_rt, true);
+ msleep(1);
+ nsim_fib6_rt_hw_flags_set(data, fib6_rt, true);
- nsim_fib6_rt_hw_flags_set(fib6_rt_old, false);
+ nsim_fib6_rt_hw_flags_set(data, fib6_rt_old, false);
nsim_fib6_rt_destroy(fib6_rt_old);
return 0;
}
static int nsim_fib6_rt_insert(struct nsim_fib_data *data,
- struct fib6_entry_notifier_info *fen6_info)
+ struct nsim_fib6_event *fib6_event)
{
- struct netlink_ext_ack *extack = fen6_info->info.extack;
+ struct fib6_info *rt = fib6_event->rt_arr[0];
struct nsim_fib6_rt *fib6_rt, *fib6_rt_old;
int err;
- fib6_rt = nsim_fib6_rt_create(data, fen6_info);
+ fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr,
+ fib6_event->nrt6);
if (IS_ERR(fib6_rt))
return PTR_ERR(fib6_rt);
- fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, fen6_info->rt);
+ fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt);
if (!fib6_rt_old)
- err = nsim_fib6_rt_add(data, fib6_rt, extack);
+ err = nsim_fib6_rt_add(data, fib6_rt);
else
- err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old, extack);
+ err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old);
if (err)
nsim_fib6_rt_destroy(fib6_rt);
@@ -683,59 +683,100 @@ static int nsim_fib6_rt_insert(struct nsim_fib_data *data,
return err;
}
-static void
-nsim_fib6_rt_remove(struct nsim_fib_data *data,
- const struct fib6_entry_notifier_info *fen6_info)
+static void nsim_fib6_rt_remove(struct nsim_fib_data *data,
+ struct nsim_fib6_event *fib6_event)
{
- struct netlink_ext_ack *extack = fen6_info->info.extack;
+ struct fib6_info *rt = fib6_event->rt_arr[0];
struct nsim_fib6_rt *fib6_rt;
+ int i;
/* Multipath routes are first added to the FIB trie and only then
* notified. If we vetoed the addition, we will get a delete
* notification for a route we do not have. Therefore, do not warn if
* route was not found.
*/
- fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, fen6_info->rt);
+ fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt);
if (!fib6_rt)
return;
/* If not all the nexthops are deleted, then only reduce the nexthop
* group.
*/
- if (fen6_info->nsiblings + 1 != fib6_rt->nhs) {
- nsim_fib6_rt_nh_del(fib6_rt, fen6_info->rt);
+ if (fib6_event->nrt6 != fib6_rt->nhs) {
+ for (i = 0; i < fib6_event->nrt6; i++)
+ nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]);
return;
}
rhashtable_remove_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node,
nsim_fib_rt_ht_params);
- nsim_fib_account(&data->ipv6.fib, false, extack);
nsim_fib6_rt_destroy(fib6_rt);
}
+static int nsim_fib6_event_init(struct nsim_fib6_event *fib6_event,
+ struct fib6_entry_notifier_info *fen6_info)
+{
+ struct fib6_info *rt = fen6_info->rt;
+ struct fib6_info **rt_arr;
+ struct fib6_info *iter;
+ unsigned int nrt6;
+ int i = 0;
+
+ nrt6 = fen6_info->nsiblings + 1;
+
+ rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
+ if (!rt_arr)
+ return -ENOMEM;
+
+ fib6_event->rt_arr = rt_arr;
+ fib6_event->nrt6 = nrt6;
+
+ rt_arr[0] = rt;
+ fib6_info_hold(rt);
+
+ if (!fen6_info->nsiblings)
+ return 0;
+
+ list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
+ if (i == fen6_info->nsiblings)
+ break;
+
+ rt_arr[i + 1] = iter;
+ fib6_info_hold(iter);
+ i++;
+ }
+ WARN_ON_ONCE(i != fen6_info->nsiblings);
+
+ return 0;
+}
+
+static void nsim_fib6_event_fini(struct nsim_fib6_event *fib6_event)
+{
+ int i;
+
+ for (i = 0; i < fib6_event->nrt6; i++)
+ nsim_rt6_release(fib6_event->rt_arr[i]);
+ kfree(fib6_event->rt_arr);
+}
+
static int nsim_fib6_event(struct nsim_fib_data *data,
- struct fib_notifier_info *info,
+ struct nsim_fib6_event *fib6_event,
unsigned long event)
{
- struct fib6_entry_notifier_info *fen6_info;
int err = 0;
- fen6_info = container_of(info, struct fib6_entry_notifier_info, info);
-
- if (fen6_info->rt->fib6_src.plen) {
- NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported");
+ if (fib6_event->rt_arr[0]->fib6_src.plen)
return 0;
- }
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
- err = nsim_fib6_rt_insert(data, fen6_info);
+ err = nsim_fib6_rt_insert(data, fib6_event);
break;
case FIB_EVENT_ENTRY_APPEND:
- err = nsim_fib6_rt_append(data, fen6_info);
+ err = nsim_fib6_rt_append(data, fib6_event);
break;
case FIB_EVENT_ENTRY_DEL:
- nsim_fib6_rt_remove(data, fen6_info);
+ nsim_fib6_rt_remove(data, fib6_event);
break;
default:
break;
@@ -744,51 +785,165 @@ static int nsim_fib6_event(struct nsim_fib_data *data,
return err;
}
-static int nsim_fib_event(struct nsim_fib_data *data,
- struct fib_notifier_info *info, unsigned long event)
+static int nsim_fib_event(struct nsim_fib_event *fib_event)
{
int err = 0;
- switch (info->family) {
+ switch (fib_event->family) {
case AF_INET:
- err = nsim_fib4_event(data, info, event);
+ nsim_fib4_event(fib_event->data, &fib_event->fen_info,
+ fib_event->event);
+ fib_info_put(fib_event->fen_info.fi);
break;
case AF_INET6:
- err = nsim_fib6_event(data, info, event);
+ nsim_fib6_event(fib_event->data, &fib_event->fib6_event,
+ fib_event->event);
+ nsim_fib6_event_fini(&fib_event->fib6_event);
+ break;
+ }
+
+ return err;
+}
+
+static int nsim_fib4_prepare_event(struct fib_notifier_info *info,
+ struct nsim_fib_event *fib_event,
+ unsigned long event)
+{
+ struct nsim_fib_data *data = fib_event->data;
+ struct fib_entry_notifier_info *fen_info;
+ struct netlink_ext_ack *extack;
+ int err = 0;
+
+ fen_info = container_of(info, struct fib_entry_notifier_info,
+ info);
+ fib_event->fen_info = *fen_info;
+ extack = info->extack;
+
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ err = nsim_fib_account(&data->ipv4.fib, true);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries");
+ return err;
+ }
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ nsim_fib_account(&data->ipv4.fib, false);
+ break;
+ }
+
+ /* Take reference on fib_info to prevent it from being
+ * freed while event is queued. Release it afterwards.
+ */
+ fib_info_hold(fib_event->fen_info.fi);
+
+ return 0;
+}
+
+static int nsim_fib6_prepare_event(struct fib_notifier_info *info,
+ struct nsim_fib_event *fib_event,
+ unsigned long event)
+{
+ struct nsim_fib_data *data = fib_event->data;
+ struct fib6_entry_notifier_info *fen6_info;
+ struct netlink_ext_ack *extack;
+ int err = 0;
+
+ fen6_info = container_of(info, struct fib6_entry_notifier_info,
+ info);
+
+ err = nsim_fib6_event_init(&fib_event->fib6_event, fen6_info);
+ if (err)
+ return err;
+
+ extack = info->extack;
+ switch (event) {
+ case FIB_EVENT_ENTRY_REPLACE:
+ err = nsim_fib_account(&data->ipv6.fib, true);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries");
+ goto err_fib6_event_fini;
+ }
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ nsim_fib_account(&data->ipv6.fib, false);
break;
}
+ return 0;
+
+err_fib6_event_fini:
+ nsim_fib6_event_fini(&fib_event->fib6_event);
return err;
}
+static int nsim_fib_event_schedule_work(struct nsim_fib_data *data,
+ struct fib_notifier_info *info,
+ unsigned long event)
+{
+ struct nsim_fib_event *fib_event;
+ int err;
+
+ if (info->family != AF_INET && info->family != AF_INET6)
+ /* netdevsim does not support 'RTNL_FAMILY_IP6MR' and
+ * 'RTNL_FAMILY_IPMR' and should ignore them.
+ */
+ return NOTIFY_DONE;
+
+ fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC);
+ if (!fib_event)
+ return NOTIFY_BAD;
+
+ fib_event->data = data;
+ fib_event->event = event;
+ fib_event->family = info->family;
+
+ switch (info->family) {
+ case AF_INET:
+ err = nsim_fib4_prepare_event(info, fib_event, event);
+ break;
+ case AF_INET6:
+ err = nsim_fib6_prepare_event(info, fib_event, event);
+ break;
+ }
+
+ if (err)
+ goto err_fib_prepare_event;
+
+ /* Enqueue the event and trigger the work */
+ spin_lock_bh(&data->fib_event_queue_lock);
+ list_add_tail(&fib_event->list, &data->fib_event_queue);
+ spin_unlock_bh(&data->fib_event_queue_lock);
+ schedule_work(&data->fib_event_work);
+
+ return NOTIFY_DONE;
+
+err_fib_prepare_event:
+ kfree(fib_event);
+ return NOTIFY_BAD;
+}
+
static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data,
fib_nb);
struct fib_notifier_info *info = ptr;
- int err = 0;
-
- /* IPv6 routes can be added via RAs from softIRQ. */
- spin_lock_bh(&data->fib_lock);
+ int err;
switch (event) {
case FIB_EVENT_RULE_ADD:
case FIB_EVENT_RULE_DEL:
err = nsim_fib_rule_event(data, info,
event == FIB_EVENT_RULE_ADD);
- break;
-
+ return notifier_from_errno(err);
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_APPEND:
case FIB_EVENT_ENTRY_DEL:
- err = nsim_fib_event(data, info, event);
- break;
+ return nsim_fib_event_schedule_work(data, info, event);
}
- spin_unlock_bh(&data->fib_lock);
-
- return notifier_from_errno(err);
+ return NOTIFY_DONE;
}
static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt,
@@ -799,7 +954,7 @@ static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt,
fib4_rt = container_of(fib_rt, struct nsim_fib4_rt, common);
nsim_fib4_rt_hw_flags_set(devlink_net(devlink), fib4_rt, false);
- nsim_fib_account(&data->ipv4.fib, false, NULL);
+ nsim_fib_account(&data->ipv4.fib, false);
nsim_fib4_rt_destroy(fib4_rt);
}
@@ -809,8 +964,8 @@ static void nsim_fib6_rt_free(struct nsim_fib_rt *fib_rt,
struct nsim_fib6_rt *fib6_rt;
fib6_rt = container_of(fib_rt, struct nsim_fib6_rt, common);
- nsim_fib6_rt_hw_flags_set(fib6_rt, false);
- nsim_fib_account(&data->ipv6.fib, false, NULL);
+ nsim_fib6_rt_hw_flags_set(data, fib6_rt, false);
+ nsim_fib_account(&data->ipv6.fib, false);
nsim_fib6_rt_destroy(fib6_rt);
}
@@ -838,6 +993,9 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
fib_nb);
struct nsim_fib_rt *fib_rt, *fib_rt_tmp;
+ /* Flush the work to make sure there is no race with notifications. */
+ flush_work(&data->fib_event_work);
+
/* The notifier block is still not registered, so we do not need to
* take any locks here.
*/
@@ -847,8 +1005,8 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
nsim_fib_rt_free(fib_rt, data);
}
- data->ipv4.rules.num = 0ULL;
- data->ipv6.rules.num = 0ULL;
+ atomic64_set(&data->ipv4.rules.num, 0ULL);
+ atomic64_set(&data->ipv6.rules.num, 0ULL);
}
static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data,
@@ -894,22 +1052,28 @@ static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop)
static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ,
bool add, struct netlink_ext_ack *extack)
{
- int err = 0;
+ int i, err = 0;
if (add) {
- if (data->nexthops.num + occ <= data->nexthops.max) {
- data->nexthops.num += occ;
- } else {
- err = -ENOSPC;
- NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops");
- }
+ for (i = 0; i < occ; i++)
+ if (!atomic64_add_unless(&data->nexthops.num, 1,
+ data->nexthops.max)) {
+ err = -ENOSPC;
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops");
+ goto err_num_decrease;
+ }
} else {
- if (WARN_ON(occ > data->nexthops.num))
+ if (WARN_ON(occ > atomic64_read(&data->nexthops.num)))
return -EINVAL;
- data->nexthops.num -= occ;
+ atomic64_sub(occ, &data->nexthops.num);
}
return err;
+
+err_num_decrease:
+ atomic64_sub(i, &data->nexthops.num);
+ return err;
+
}
static int nsim_nexthop_add(struct nsim_fib_data *data,
@@ -1102,6 +1266,29 @@ static void nsim_fib_set_max_all(struct nsim_fib_data *data,
}
}
+static void nsim_fib_event_work(struct work_struct *work)
+{
+ struct nsim_fib_data *data = container_of(work, struct nsim_fib_data,
+ fib_event_work);
+ struct nsim_fib_event *fib_event, *next_fib_event;
+
+ LIST_HEAD(fib_event_queue);
+
+ spin_lock_bh(&data->fib_event_queue_lock);
+ list_splice_init(&data->fib_event_queue, &fib_event_queue);
+ spin_unlock_bh(&data->fib_event_queue_lock);
+
+ mutex_lock(&data->fib_lock);
+ list_for_each_entry_safe(fib_event, next_fib_event, &fib_event_queue,
+ list) {
+ nsim_fib_event(fib_event);
+ list_del(&fib_event->list);
+ kfree(fib_event);
+ cond_resched();
+ }
+ mutex_unlock(&data->fib_lock);
+}
+
struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
@@ -1117,12 +1304,16 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
if (err)
goto err_data_free;
- spin_lock_init(&data->fib_lock);
+ mutex_init(&data->fib_lock);
INIT_LIST_HEAD(&data->fib_rt_list);
err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params);
if (err)
goto err_rhashtable_nexthop_destroy;
+ INIT_WORK(&data->fib_event_work, nsim_fib_event_work);
+ INIT_LIST_HEAD(&data->fib_event_queue);
+ spin_lock_init(&data->fib_event_queue_lock);
+
nsim_fib_set_max_all(data, devlink);
data->nexthop_nb.notifier_call = nsim_nexthop_event_nb;
@@ -1166,11 +1357,13 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
err_nexthop_nb_unregister:
unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb);
err_rhashtable_fib_destroy:
+ flush_work(&data->fib_event_work);
rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
data);
err_rhashtable_nexthop_destroy:
rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free,
data);
+ mutex_destroy(&data->fib_lock);
err_data_free:
kfree(data);
return ERR_PTR(err);
@@ -1190,10 +1383,13 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
NSIM_RESOURCE_IPV4_FIB);
unregister_fib_notifier(devlink_net(devlink), &data->fib_nb);
unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb);
+ flush_work(&data->fib_event_work);
rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free,
data);
rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free,
data);
+ WARN_ON_ONCE(!list_empty(&data->fib_event_queue));
WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
+ mutex_destroy(&data->fib_lock);
kfree(data);
}
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index ac5ff3c3afb1..1e262b23c68b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -336,13 +336,6 @@ static inline void fib6_info_release(struct fib6_info *f6i)
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
}
-static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload,
- bool trap)
-{
- f6i->offload = offload;
- f6i->trap = trap;
-}
-
enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
FWS_S,
@@ -545,6 +538,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
{
return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}
+void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
+ bool offload, bool trap);
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8e4fcac4df72..70a2a085dd1a 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -188,6 +188,8 @@ struct netns_ipv4 {
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
+ int sysctl_fib_notify_on_flag_change;
+
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5ec054473d81..21c0debbd39e 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -51,6 +51,7 @@ struct netns_sysctl_ipv6 {
int max_hbh_opts_len;
int seg6_flowlabel;
bool skip_notify_on_dev_down;
+ int fib_notify_on_flag_change;
};
struct netns_ipv6 {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b94fa8eb831b..ab42f6404fc6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1871,6 +1871,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_igmp_llm_reports = 1;
net->ipv4.sysctl_igmp_qrv = 2;
+ net->ipv4.sysctl_fib_notify_on_flag_change = 0;
+
return 0;
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 818916b2a04d..aff454ef0fa3 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -39,9 +39,10 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack);
bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- struct fib_rt_info *fri, unsigned int flags);
+ const struct fib_rt_info *fri, unsigned int flags);
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
+size_t fib_nlmsg_size(struct fib_info *fi);
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b5400cec4f69..4c38facf91c0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -452,7 +452,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
return -1;
}
-static inline size_t fib_nlmsg_size(struct fib_info *fi)
+size_t fib_nlmsg_size(struct fib_info *fi)
{
size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(4) /* RTA_TABLE */
@@ -1733,7 +1733,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
- struct fib_rt_info *fri, unsigned int flags)
+ const struct fib_rt_info *fri, unsigned int flags)
{
unsigned int nhs = fib_info_num_path(fri->fi);
struct fib_info *fi = fri->fi;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 28117c05dc35..60559b708158 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1038,6 +1038,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
struct fib_alias *fa_match;
+ struct sk_buff *skb;
+ int err;
rcu_read_lock();
@@ -1045,9 +1047,34 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
+ if (fa_match->offload == fri->offload && fa_match->trap == fri->trap)
+ goto out;
+
fa_match->offload = fri->offload;
fa_match->trap = fri->trap;
+ if (!net->ipv4.sysctl_fib_notify_on_flag_change)
+ goto out;
+
+ skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC);
+ goto out;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err);
out:
rcu_read_unlock();
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3e5f4f2e705e..e5798b3b59d2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1354,6 +1354,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE
},
+ {
+ .procname = "fib_notify_on_flag_change",
+ .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8e9c3e9ea36e..0e9994e0ecd7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -954,6 +954,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
+ net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 41d8f801b75f..92b400eb8476 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6064,6 +6064,50 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
+void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
+ bool offload, bool trap)
+{
+ struct sk_buff *skb;
+ int err;
+
+ if (f6i->offload == offload && f6i->trap == trap)
+ return;
+
+ f6i->offload = offload;
+ f6i->trap = trap;
+
+ if (!rcu_access_pointer(f6i->fib6_node))
+ /* The route was removed from the tree, do not send
+ * notfication.
+ */
+ return;
+
+ if (!net->ipv6.sysctl.fib_notify_on_flag_change)
+ return;
+
+ skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
+ 0, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
+ return;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+EXPORT_SYMBOL(fib6_info_hw_flags_set);
+
static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr)
{
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 5b60a4bdd36a..392ef01e3366 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -160,6 +160,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fib_notify_on_flag_change",
+ .data = &init_net.ipv6.sysctl.fib_notify_on_flag_change,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
index eab79b9e58cd..dcbf32b99bb6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/fib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
@@ -225,6 +225,16 @@ ipv6_local_replace()
ip -n $ns link del dev dummy1
}
+fib_notify_on_flag_change_set()
+{
+ local notify=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify
+ ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify
+
+ log_info "Set fib_notify_on_flag_change to $notify"
+}
+
setup_prepare()
{
ip netns add testns1
@@ -251,6 +261,10 @@ trap cleanup EXIT
setup_prepare
+fib_notify_on_flag_change_set 1
+tests_run
+
+fib_notify_on_flag_change_set 0
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 2f87c3be76a9..251f228ce63e 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -302,6 +302,16 @@ ipv6_error_path()
ipv6_error_path_replay
}
+fib_notify_on_flag_change_set()
+{
+ local notify=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify
+ ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify
+
+ log_info "Set fib_notify_on_flag_change to $notify"
+}
+
setup_prepare()
{
local netdev
@@ -336,6 +346,10 @@ trap cleanup EXIT
setup_prepare
+fib_notify_on_flag_change_set 1
+tests_run
+
+fib_notify_on_flag_change_set 0
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
new file mode 100755
index 000000000000..16a9dd43aefc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
@@ -0,0 +1,300 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ ipv4_route_addition_test
+ ipv4_route_deletion_test
+ ipv4_route_replacement_test
+ ipv6_route_addition_test
+ ipv6_route_deletion_test
+ ipv6_route_replacement_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+check_rt_trap()
+{
+ local outfile=$1; shift
+ local line
+
+ # Make sure that the first notification was emitted without RTM_F_TRAP
+ # flag and the second with RTM_F_TRAP flag
+ head -n 1 $outfile | grep -q "rt_trap"
+ if [[ $? -eq 0 ]]; then
+ return 1
+ fi
+
+ head -n 2 $outfile | tail -n 1 | grep -q "rt_trap"
+}
+
+route_notify_check()
+{
+ local outfile=$1; shift
+ local expected_num_lines=$1; shift
+
+ # check the monitor results
+ lines=`wc -l $outfile | cut "-d " -f1`
+ test $lines -eq $expected_num_lines
+ check_err $? "$expected_num_lines notifications were expected but $lines were received"
+
+ if [[ $expected_num_lines -eq 2 ]]; then
+ check_rt_trap $outfile
+ check_err $? "Wrong RTM_F_TRAP flags in notifications"
+ fi
+}
+
+route_addition_check()
+{
+ local ip=$1; shift
+ local notify=$1; shift
+ local route=$1; shift
+ local expected_num_notifications=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+
+ local outfile=$(mktemp)
+
+ $IP monitor route &> $outfile &
+ sleep 1
+ $IP route add $route dev dummy1
+ sleep 1
+ kill %% && wait %% &> /dev/null
+
+ route_notify_check $outfile $expected_num_notifications
+ rm -f $outfile
+
+ $IP route del $route dev dummy1
+}
+
+ipv4_route_addition_test()
+{
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ # route_addition_check will assign value to RET.
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the programmed route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv4 route addition"
+}
+
+route_deletion_check()
+{
+ local ip=$1; shift
+ local notify=$1; shift
+ local route=$1; shift
+ local expected_num_notifications=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+ $IP route add $route dev dummy1
+ sleep 1
+
+ local outfile=$(mktemp)
+
+ $IP monitor route &> $outfile &
+ sleep 1
+ $IP route del $route dev dummy1
+ sleep 1
+ kill %% && wait %% &> /dev/null
+
+ route_notify_check $outfile $expected_num_notifications
+ rm -f $outfile
+}
+
+ipv4_route_deletion_test()
+{
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+ local expected_num_notifications=1
+
+ # Make sure a single notification will be emitted for the deleted route,
+ # regardless of fib_notify_on_flag_change value.
+ local notify=0
+ # route_deletion_check will assign value to RET.
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ notify=1
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv4 route deletion"
+}
+
+route_replacement_check()
+{
+ local ip=$1; shift
+ local notify=$1; shift
+ local route=$1; shift
+ local expected_num_notifications=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+ $IP route add $route dev dummy1
+ sleep 1
+
+ local outfile=$(mktemp)
+
+ $IP monitor route &> $outfile &
+ sleep 1
+ $IP route replace $route dev dummy2
+ sleep 1
+ kill %% && wait %% &> /dev/null
+
+ route_notify_check $outfile $expected_num_notifications
+ rm -f $outfile
+
+ $IP route del $route dev dummy2
+}
+
+ipv4_route_replacement_test()
+{
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+
+ $IP link add name dummy2 type dummy
+ $IP link set dev dummy2 up
+
+ # Make sure a single notification will be emitted for the new route.
+ local notify=0
+ local expected_num_notifications=1
+ # route_replacement_check will assign value to RET.
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ $IP link del name dummy2
+
+ log_test "IPv4 route replacement"
+}
+
+ipv6_route_addition_test()
+{
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the programmed route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv6 route addition"
+}
+
+ipv6_route_deletion_test()
+{
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+ local expected_num_notifications=1
+
+ # Make sure a single notification will be emitted for the deleted route,
+ # regardless of fib_notify_on_flag_change value.
+ local notify=0
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ notify=1
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv6 route deletion"
+}
+
+ipv6_route_replacement_test()
+{
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+
+ $IP link add name dummy2 type dummy
+ $IP link set dev dummy2 up
+
+ # Make sure a single notification will be emitted for the new route.
+ local notify=0
+ local expected_num_notifications=1
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ $IP link del name dummy2
+
+ log_test "IPv6 route replacement"
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ ip netns add testns1
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to add netns \"testns1\""
+ exit 1
+ fi
+
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to reload into netns \"testns1\""
+ exit 1
+ fi
+
+ IP="ip -n testns1"
+
+ $IP link add name dummy1 type dummy
+ $IP link set dev dummy1 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ $IP link del name dummy1
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS