summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c31
-rw-r--r--net/core/skbuff.c51
-rw-r--r--net/core/sock.c3
-rw-r--r--net/ipv4/tcp.c25
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/tls/tls_sw.c2
7 files changed, 82 insertions, 32 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4a77ebda4fb1..611bd7197064 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4545,6 +4545,12 @@ static void rps_trigger_softirq(void *data)
#endif /* CONFIG_RPS */
+/* Called from hardirq (IPI) context */
+static void trigger_rx_softirq(void *data __always_unused)
+{
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+
/*
* Check if this softnet_data structure is another cpu one
* If yes, queue it to our IPI list and return 1
@@ -6571,6 +6577,28 @@ static int napi_threaded_poll(void *data)
return 0;
}
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+ struct sk_buff *skb, *next;
+ unsigned long flags;
+
+ /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+ if (!READ_ONCE(sd->defer_list))
+ return;
+
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ skb = sd->defer_list;
+ sd->defer_list = NULL;
+ sd->defer_count = 0;
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ while (skb != NULL) {
+ next = skb->next;
+ __kfree_skb(skb);
+ skb = next;
+ }
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -6616,6 +6644,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
+ skb_defer_free_flush(sd);
}
struct netdev_adjacent {
@@ -11326,6 +11355,8 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
sd->cpu = i;
#endif
+ INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL);
+ spin_lock_init(&sd->defer_lock);
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 30b523fa4ad2..028a280fbabd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -204,7 +204,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb_set_end_offset(skb, size);
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
-
+ skb->alloc_cpu = raw_smp_processor_id();
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
@@ -1037,6 +1037,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_NET_RX_BUSY_POLL
CHECK_SKB_FIELD(napi_id);
#endif
+ CHECK_SKB_FIELD(alloc_cpu);
#ifdef CONFIG_XPS
CHECK_SKB_FIELD(sender_cpu);
#endif
@@ -6486,3 +6487,51 @@ free_now:
}
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */
+
+/**
+ * skb_attempt_defer_free - queue skb for remote freeing
+ * @skb: buffer
+ *
+ * Put @skb in a per-cpu list, using the cpu which
+ * allocated the skb/pages to reduce false sharing
+ * and memory zone spinlock contention.
+ */
+void skb_attempt_defer_free(struct sk_buff *skb)
+{
+ int cpu = skb->alloc_cpu;
+ struct softnet_data *sd;
+ unsigned long flags;
+ bool kick;
+
+ if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
+ !cpu_online(cpu) ||
+ cpu == raw_smp_processor_id()) {
+ __kfree_skb(skb);
+ return;
+ }
+
+ sd = &per_cpu(softnet_data, cpu);
+ /* We do not send an IPI or any signal.
+ * Remote cpu will eventually call skb_defer_free_flush()
+ */
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ skb->next = sd->defer_list;
+ /* Paired with READ_ONCE() in skb_defer_free_flush() */
+ WRITE_ONCE(sd->defer_list, skb);
+ sd->defer_count++;
+
+ /* kick every time queue length reaches 128.
+ * This should avoid blocking in smp_call_function_single_async().
+ * This condition should hardly be bit under normal conditions,
+ * unless cpu suddenly stopped to receive NIC interrupts.
+ */
+ kick = sd->defer_count == 128;
+
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
+ * if we are unlucky enough (this seems very unlikely).
+ */
+ if (unlikely(kick))
+ smp_call_function_single_async(cpu, &sd->defer_csd);
+}
diff --git a/net/core/sock.c b/net/core/sock.c
index 29abec3eabd8..a0f3989de3d6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2082,9 +2082,6 @@ void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
- WARN_ON_ONCE(!llist_empty(&sk->defer_list));
- sk_defer_free_flush(sk);
-
if (rcu_access_pointer(sk->sk_reuseport_cb)) {
reuseport_detach_sock(sk);
use_call_rcu = true;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e20b87b3bf90..db55af9eb37b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -843,7 +843,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
- sk_defer_free_flush(sk);
if (spliced)
return spliced;
@@ -1589,20 +1588,6 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
tcp_send_ack(sk);
}
-void __sk_defer_free_flush(struct sock *sk)
-{
- struct llist_node *head;
- struct sk_buff *skb, *n;
-
- head = llist_del_all(&sk->defer_list);
- llist_for_each_entry_safe(skb, n, head, ll_node) {
- prefetch(n);
- skb_mark_not_on_list(skb);
- __kfree_skb(skb);
- }
-}
-EXPORT_SYMBOL(__sk_defer_free_flush);
-
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
@@ -1610,11 +1595,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
sock_rfree(skb);
skb->destructor = NULL;
skb->sk = NULL;
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- !llist_empty(&sk->defer_list)) {
- llist_add(&skb->ll_node, &sk->defer_list);
- return;
- }
+ return skb_attempt_defer_free(skb);
}
__kfree_skb(skb);
}
@@ -2453,7 +2434,6 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
__sk_flush_backlog(sk);
} else {
tcp_cleanup_rbuf(sk, copied);
- sk_defer_free_flush(sk);
sk_wait_data(sk, &timeo, last);
}
@@ -2571,7 +2551,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
lock_sock(sk);
ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
release_sock(sk);
- sk_defer_free_flush(sk);
if (cmsg_flags && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
@@ -3096,7 +3075,6 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_frag.page = NULL;
sk->sk_frag.offset = 0;
}
- sk_defer_free_flush(sk);
sk_error_report(sk);
return 0;
}
@@ -4225,7 +4203,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
&zc, &len, err);
release_sock(sk);
- sk_defer_free_flush(sk);
if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
goto zerocopy_rcv_cmsg;
switch (len) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2c2d42142555..918816ec5dd4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2065,7 +2065,6 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 54277de7474b..60bdec257ba7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1728,7 +1728,6 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index ddbe05ec5489..bc54f6c5b1a4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1911,7 +1911,6 @@ recv_end:
end:
release_sock(sk);
- sk_defer_free_flush(sk);
if (psock)
sk_psock_put(sk, psock);
return copied ? : err;
@@ -1983,7 +1982,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
splice_read_end:
release_sock(sk);
- sk_defer_free_flush(sk);
return copied ? : err;
}