diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 233 |
1 files changed, 170 insertions, 63 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 0608605cfc24..2b514d95c528 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -462,7 +462,9 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -DEFINE_PER_CPU(struct page_pool *, system_page_pool); +DEFINE_PER_CPU(struct page_pool_bh, system_page_pool) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; #ifdef CONFIG_LOCKDEP /* @@ -828,7 +830,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id) dev_hold(dev); rcu_read_unlock(); - dev = __netdev_put_lock(dev); + dev = __netdev_put_lock(dev, net); if (!dev) return NULL; @@ -1039,10 +1041,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) * This helper is intended for locking net_device after it has been looked up * using a lockless lookup helper. Lock prevents the instance from going away. */ -struct net_device *__netdev_put_lock(struct net_device *dev) +struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net) { netdev_lock(dev); - if (dev->reg_state > NETREG_REGISTERED) { + if (dev->reg_state > NETREG_REGISTERED || + dev->moving_ns || !net_eq(dev_net(dev), net)) { netdev_unlock(dev); dev_put(dev); return NULL; @@ -1051,6 +1054,20 @@ struct net_device *__netdev_put_lock(struct net_device *dev) return dev; } +static struct net_device * +__netdev_put_lock_ops_compat(struct net_device *dev, struct net *net) +{ + netdev_lock_ops_compat(dev); + if (dev->reg_state > NETREG_REGISTERED || + dev->moving_ns || !net_eq(dev_net(dev), net)) { + netdev_unlock_ops_compat(dev); + dev_put(dev); + return NULL; + } + dev_put(dev); + return dev; +} + /** * netdev_get_by_index_lock() - find a device by its ifindex * @net: the applicable net namespace @@ -1070,7 +1087,19 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex) if (!dev) return NULL; - return __netdev_put_lock(dev); + return __netdev_put_lock(dev, net); +} + +struct net_device * +netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex) +{ + struct net_device *dev; + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return NULL; + + return __netdev_put_lock_ops_compat(dev, net); } struct net_device * @@ -1090,7 +1119,32 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev, dev_hold(dev); rcu_read_unlock(); - dev = __netdev_put_lock(dev); + dev = __netdev_put_lock(dev, net); + if (dev) + return dev; + + (*index)++; + } while (true); +} + +struct net_device * +netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev, + unsigned long *index) +{ + if (dev) + netdev_unlock_ops_compat(dev); + + do { + rcu_read_lock(); + dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT); + if (!dev) { + rcu_read_unlock(); + return NULL; + } + dev_hold(dev); + rcu_read_unlock(); + + dev = __netdev_put_lock_ops_compat(dev, net); if (dev) return dev; @@ -1518,16 +1572,10 @@ void netdev_features_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_features_change); -/** - * netdev_state_change - device changes state - * @dev: device to cause notification - * - * Called to indicate a device has changed state. This function calls - * the notifier chains for netdev_chain and sends a NEWLINK message - * to the routing socket. - */ -void netdev_state_change(struct net_device *dev) +void netif_state_change(struct net_device *dev) { + netdev_ops_assert_locked_or_invisible(dev); + if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { .info.dev = dev, @@ -1538,7 +1586,6 @@ void netdev_state_change(struct net_device *dev) rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL); } } -EXPORT_SYMBOL(netdev_state_change); /** * __netdev_notify_peers - notify network peers about existence of @dev, @@ -3549,9 +3596,10 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +#ifdef CONFIG_NET_CRC32C int skb_crc32c_csum_help(struct sk_buff *skb) { - __le32 crc32c_csum; + u32 crc; int ret = 0, offset, start; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3579,15 +3627,14 @@ int skb_crc32c_csum_help(struct sk_buff *skb) if (ret) goto out; - crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, - skb->len - start, ~(__u32)0, - crc32c_csum_stub)); - *(__le32 *)(skb->data + offset) = crc32c_csum; + crc = ~skb_crc32c(skb, start, skb->len - start, ~0); + *(__le32 *)(skb->data + offset) = cpu_to_le32(crc); skb_reset_csum_not_inet(skb); out: return ret; } EXPORT_SYMBOL(skb_crc32c_csum_help); +#endif /* CONFIG_NET_CRC32C */ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { @@ -3851,12 +3898,42 @@ sw_checksum: } EXPORT_SYMBOL(skb_csum_hwoffload_help); +static struct sk_buff *validate_xmit_unreadable_skb(struct sk_buff *skb, + struct net_device *dev) +{ + struct skb_shared_info *shinfo; + struct net_iov *niov; + + if (likely(skb_frags_readable(skb))) + goto out; + + if (!dev->netmem_tx) + goto out_free; + + shinfo = skb_shinfo(skb); + + if (shinfo->nr_frags > 0) { + niov = netmem_to_net_iov(skb_frag_netmem(&shinfo->frags[0])); + if (net_is_devmem_iov(niov) && + net_devmem_iov_binding(niov)->dev != dev) + goto out_free; + } + +out: + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again) { netdev_features_t features; - if (!skb_frags_readable(skb)) - goto out_kfree_skb; + skb = validate_xmit_unreadable_skb(skb, dev); + if (unlikely(!skb)) + goto out_null; features = netif_skb_features(skb); skb = validate_xmit_vlan(skb, features); @@ -4738,6 +4815,7 @@ static inline void ____napi_schedule(struct softnet_data *sd, } use_local_napi: + DEBUG_NET_WARN_ON_ONCE(!list_empty(&napi->poll_list)); list_add_tail(&napi->poll_list, &sd->poll_list); WRITE_ONCE(napi->list_owner, smp_processor_id()); /* If not called from net_rx_action() @@ -4953,7 +5031,8 @@ static void rps_trigger_softirq(void *data) struct softnet_data *sd = data; ____napi_schedule(sd, &sd->backlog); - sd->received_rps++; + /* Pairs with READ_ONCE() in softnet_seq_show() */ + WRITE_ONCE(sd->received_rps, sd->received_rps + 1); } #endif /* CONFIG_RPS */ @@ -5038,7 +5117,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); if (fl) { - new_flow = skb_get_hash(skb) & (fl->num_buckets - 1); + new_flow = hash_32(skb_get_hash(skb), fl->log_buckets); old_flow = fl->history[fl->history_head]; fl->history[fl->history_head] = new_flow; @@ -5049,7 +5128,8 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) fl->buckets[old_flow]--; if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { - fl->count++; + /* Pairs with READ_ONCE() in softnet_seq_show() */ + WRITE_ONCE(fl->count, fl->count + 1); rcu_read_unlock(); return true; } @@ -5245,7 +5325,10 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog) struct sk_buff *skb = *pskb; int err, hroom, troom; - if (!skb_cow_data_for_xdp(this_cpu_read(system_page_pool), pskb, prog)) + local_lock_nested_bh(&system_page_pool.bh_lock); + err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog); + local_unlock_nested_bh(&system_page_pool.bh_lock); + if (!err) return 0; /* In case we have to go down the path and also linearize, @@ -7394,9 +7477,14 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) work = __napi_poll(n, &do_repoll); - if (do_repoll) + if (do_repoll) { +#if defined(CONFIG_DEBUG_NET) + if (unlikely(!napi_is_scheduled(n))) + pr_crit("repoll requested for device %s %ps but napi is not scheduled.\n", + n->dev->name, n->poll); +#endif list_add_tail(&n->poll_list, repoll); - + } netpoll_poll_unlock(have); return work; @@ -7522,7 +7610,8 @@ start: */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) { - sd->time_squeeze++; + /* Pairs with READ_ONCE() in softnet_seq_show() */ + WRITE_ONCE(sd->time_squeeze, sd->time_squeeze + 1); break; } } @@ -9195,23 +9284,20 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) dev_change_rx_flags(dev, IFF_PROMISC); } - if (notify) + if (notify) { + /* The ops lock is only required to ensure consistent locking + * for `NETDEV_CHANGE` notifiers. This function is sometimes + * called without the lock, even for devices that are ops + * locked, such as in `dev_uc_sync_multiple` when using + * bonding or teaming. + */ + netdev_ops_assert_locked(dev); __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL); + } return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9223,7 +9309,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { @@ -9584,7 +9669,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, } EXPORT_SYMBOL(dev_pre_changeaddr_notify); -int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; @@ -9592,15 +9677,15 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, if (!ops->ndo_set_mac_address) return -EOPNOTSUPP; - if (sa->sa_family != dev->type) + if (ss->ss_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack); + err = dev_pre_changeaddr_notify(dev, ss->__data, extack); if (err) return err; - if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) { - err = ops->ndo_set_mac_address(dev, sa); + if (memcmp(dev->dev_addr, ss->__data, dev->addr_len)) { + err = ops->ndo_set_mac_address(dev, ss); if (err) return err; } @@ -9612,6 +9697,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, DECLARE_RWSEM(dev_addr_sem); +/* "sa" is a true struct sockaddr with limited "sa_data" member. */ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { size_t size = sizeof(sa->sa_data_min); @@ -10460,6 +10546,7 @@ static void netdev_sync_lower_features(struct net_device *upper, if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", &feature, lower->name); + netdev_lock_ops(lower); lower->wanted_features &= ~feature; __netdev_update_features(lower); @@ -10468,6 +10555,7 @@ static void netdev_sync_lower_features(struct net_device *upper, &feature, lower->name); else netdev_features_change(lower); + netdev_unlock_ops(lower); } } } @@ -11064,8 +11152,7 @@ int register_netdevice(struct net_device *dev) * Prevent userspace races by waiting until the network * device is fully setup before sending notifications. */ - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing)) rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL); out: @@ -11941,15 +12028,24 @@ void unregister_netdevice_many_notify(struct list_head *head, BUG_ON(dev->reg_state != NETREG_REGISTERED); } - /* If device is running, close it first. */ + /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { - list_add_tail(&dev->close_list, &close_head); - netdev_lock_ops(dev); + if (netdev_need_ops_lock(dev)) { + list_add_tail(&dev->close_list, &close_head); + netdev_lock(dev); + } + } + dev_close_many(&close_head, true); + /* ... now unlock them and go over the rest. */ + list_for_each_entry(dev, head, unreg_list) { + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + list_add_tail(&dev->close_list, &close_head); } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { - netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); @@ -11964,9 +12060,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -11979,8 +12075,7 @@ void unregister_netdevice_many_notify(struct list_head *head, */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing)) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, GFP_KERNEL, NULL, 0, portid, nlh); @@ -12154,12 +12249,18 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, netif_close(dev); /* And unlink it from device chain */ unlist_netdevice(dev); - netdev_unlock_ops(dev); + + if (!netdev_need_ops_lock(dev)) + netdev_lock(dev); + dev->moving_ns = true; + netdev_unlock(dev); synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. @@ -12190,7 +12291,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, move_netdevice_notifiers_dev_net(dev, net); /* Actually switch the network namespace */ + netdev_lock(dev); dev_net_set(dev, net); + netdev_unlock(dev); dev->ifindex = new_ifindex; if (new_name[0]) { @@ -12216,7 +12319,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, err = netdev_change_owner(dev, net_old, net); WARN_ON(err); - netdev_lock_ops(dev); + netdev_lock(dev); + dev->moving_ns = false; + if (!netdev_need_ops_lock(dev)) + netdev_unlock(dev); + /* Add the device back in the hashes */ list_netdevice(dev); /* Notify protocols, that a new device appeared. */ @@ -12627,7 +12734,7 @@ static int net_page_pool_create(int cpuid) return err; } - per_cpu(system_page_pool, cpuid) = pp_ptr; + per_cpu(system_page_pool.pool, cpuid) = pp_ptr; #endif return 0; } @@ -12757,13 +12864,13 @@ out: for_each_possible_cpu(i) { struct page_pool *pp_ptr; - pp_ptr = per_cpu(system_page_pool, i); + pp_ptr = per_cpu(system_page_pool.pool, i); if (!pp_ptr) continue; xdp_unreg_page_pool(pp_ptr); page_pool_destroy(pp_ptr); - per_cpu(system_page_pool, i) = NULL; + per_cpu(system_page_pool.pool, i) = NULL; } } |