diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/dev.c | 31 | ||||
| -rw-r--r-- | net/core/skbuff.c | 51 | ||||
| -rw-r--r-- | net/core/sock.c | 3 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 25 | ||||
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 1 | ||||
| -rw-r--r-- | net/tls/tls_sw.c | 2 |
7 files changed, 82 insertions, 32 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 4a77ebda4fb1..611bd7197064 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4545,6 +4545,12 @@ static void rps_trigger_softirq(void *data) #endif /* CONFIG_RPS */ +/* Called from hardirq (IPI) context */ +static void trigger_rx_softirq(void *data __always_unused) +{ + __raise_softirq_irqoff(NET_RX_SOFTIRQ); +} + /* * Check if this softnet_data structure is another cpu one * If yes, queue it to our IPI list and return 1 @@ -6571,6 +6577,28 @@ static int napi_threaded_poll(void *data) return 0; } +static void skb_defer_free_flush(struct softnet_data *sd) +{ + struct sk_buff *skb, *next; + unsigned long flags; + + /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */ + if (!READ_ONCE(sd->defer_list)) + return; + + spin_lock_irqsave(&sd->defer_lock, flags); + skb = sd->defer_list; + sd->defer_list = NULL; + sd->defer_count = 0; + spin_unlock_irqrestore(&sd->defer_lock, flags); + + while (skb != NULL) { + next = skb->next; + __kfree_skb(skb); + skb = next; + } +} + static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -6616,6 +6644,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); + skb_defer_free_flush(sd); } struct netdev_adjacent { @@ -11326,6 +11355,8 @@ static int __init net_dev_init(void) INIT_CSD(&sd->csd, rps_trigger_softirq, sd); sd->cpu = i; #endif + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL); + spin_lock_init(&sd->defer_lock); init_gro_hash(&sd->backlog); sd->backlog.poll = process_backlog; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 30b523fa4ad2..028a280fbabd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -204,7 +204,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; - + skb->alloc_cpu = raw_smp_processor_id(); /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); @@ -1037,6 +1037,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif + CHECK_SKB_FIELD(alloc_cpu); #ifdef CONFIG_XPS CHECK_SKB_FIELD(sender_cpu); #endif @@ -6486,3 +6487,51 @@ free_now: } EXPORT_SYMBOL(__skb_ext_put); #endif /* CONFIG_SKB_EXTENSIONS */ + +/** + * skb_attempt_defer_free - queue skb for remote freeing + * @skb: buffer + * + * Put @skb in a per-cpu list, using the cpu which + * allocated the skb/pages to reduce false sharing + * and memory zone spinlock contention. + */ +void skb_attempt_defer_free(struct sk_buff *skb) +{ + int cpu = skb->alloc_cpu; + struct softnet_data *sd; + unsigned long flags; + bool kick; + + if (WARN_ON_ONCE(cpu >= nr_cpu_ids) || + !cpu_online(cpu) || + cpu == raw_smp_processor_id()) { + __kfree_skb(skb); + return; + } + + sd = &per_cpu(softnet_data, cpu); + /* We do not send an IPI or any signal. + * Remote cpu will eventually call skb_defer_free_flush() + */ + spin_lock_irqsave(&sd->defer_lock, flags); + skb->next = sd->defer_list; + /* Paired with READ_ONCE() in skb_defer_free_flush() */ + WRITE_ONCE(sd->defer_list, skb); + sd->defer_count++; + + /* kick every time queue length reaches 128. + * This should avoid blocking in smp_call_function_single_async(). + * This condition should hardly be bit under normal conditions, + * unless cpu suddenly stopped to receive NIC interrupts. + */ + kick = sd->defer_count == 128; + + spin_unlock_irqrestore(&sd->defer_lock, flags); + + /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU + * if we are unlucky enough (this seems very unlikely). + */ + if (unlikely(kick)) + smp_call_function_single_async(cpu, &sd->defer_csd); +} diff --git a/net/core/sock.c b/net/core/sock.c index 29abec3eabd8..a0f3989de3d6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2082,9 +2082,6 @@ void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); - WARN_ON_ONCE(!llist_empty(&sk->defer_list)); - sk_defer_free_flush(sk); - if (rcu_access_pointer(sk->sk_reuseport_cb)) { reuseport_detach_sock(sk); use_call_rcu = true; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e20b87b3bf90..db55af9eb37b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -843,7 +843,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); - sk_defer_free_flush(sk); if (spliced) return spliced; @@ -1589,20 +1588,6 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } -void __sk_defer_free_flush(struct sock *sk) -{ - struct llist_node *head; - struct sk_buff *skb, *n; - - head = llist_del_all(&sk->defer_list); - llist_for_each_entry_safe(skb, n, head, ll_node) { - prefetch(n); - skb_mark_not_on_list(skb); - __kfree_skb(skb); - } -} -EXPORT_SYMBOL(__sk_defer_free_flush); - static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); @@ -1610,11 +1595,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; - if (!skb_queue_empty(&sk->sk_receive_queue) || - !llist_empty(&sk->defer_list)) { - llist_add(&skb->ll_node, &sk->defer_list); - return; - } + return skb_attempt_defer_free(skb); } __kfree_skb(skb); } @@ -2453,7 +2434,6 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); - sk_defer_free_flush(sk); sk_wait_data(sk, &timeo, last); } @@ -2571,7 +2551,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, lock_sock(sk); ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags); release_sock(sk); - sk_defer_free_flush(sk); if (cmsg_flags && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) @@ -3096,7 +3075,6 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; } - sk_defer_free_flush(sk); sk_error_report(sk); return 0; } @@ -4225,7 +4203,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); release_sock(sk); - sk_defer_free_flush(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) goto zerocopy_rcv_cmsg; switch (len) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2c2d42142555..918816ec5dd4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2065,7 +2065,6 @@ process: sk_incoming_cpu_update(sk); - sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 54277de7474b..60bdec257ba7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1728,7 +1728,6 @@ process: sk_incoming_cpu_update(sk); - sk_defer_free_flush(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index ddbe05ec5489..bc54f6c5b1a4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1911,7 +1911,6 @@ recv_end: end: release_sock(sk); - sk_defer_free_flush(sk); if (psock) sk_psock_put(sk, psock); return copied ? : err; @@ -1983,7 +1982,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, splice_read_end: release_sock(sk); - sk_defer_free_flush(sk); return copied ? : err; } |
