summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2025-08-26 12:50:29 +0000
committerPaolo Abeni <pabeni@redhat.com>2025-08-28 13:14:50 +0200
commitc51613fa276f038bdd18656a57a90ccc5d4e5200 (patch)
tree114a352c439984a17a8c3c44538cfa57a11f3575
parentcb4d5a6eb600a43c2e3ec7f54e06d07aa33d8062 (diff)
net: add sk->sk_drop_counters
Some sockets suffer from heavy false sharing on sk->sk_drops, and fields in the same cache line. Add sk->sk_drop_counters to: - move the drop counter(s) to dedicated cache lines. - Add basic NUMA awareness to these drop counter(s). Following patches will use this infrastructure for UDP and RAW sockets. sk_clone_lock() is not yet ready, it would need to properly set newsk->sk_drop_counters if we plan to use this for TCP sockets. v2: used Paolo suggestion from https://lore.kernel.org/netdev/8f09830a-d83d-43c9-b36b-88ba0a23e9b2@redhat.com/ Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250826125031.1578842-4-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/net/sock.h32
-rw-r--r--net/core/sock.c2
2 files changed, 33 insertions, 1 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 9edb42ff0622..73cd3316e288 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -102,6 +102,11 @@ struct net;
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;
+struct socket_drop_counters {
+ atomic_t drops0 ____cacheline_aligned_in_smp;
+ atomic_t drops1 ____cacheline_aligned_in_smp;
+};
+
/**
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
@@ -282,6 +287,7 @@ struct sk_filter;
* @sk_err_soft: errors that don't cause failure but are the cause of a
* persistent failure not just 'timed out'
* @sk_drops: raw/udp drops counter
+ * @sk_drop_counters: optional pointer to socket_drop_counters
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
@@ -449,6 +455,7 @@ struct sock {
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+ struct socket_drop_counters *sk_drop_counters;
__cacheline_group_end(sock_read_rxtx);
__cacheline_group_begin(sock_write_rxtx);
@@ -2684,7 +2691,18 @@ struct sock_skb_cb {
static inline void sk_drops_add(struct sock *sk, int segs)
{
- atomic_add(segs, &sk->sk_drops);
+ struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+ if (sdc) {
+ int n = numa_node_id() % 2;
+
+ if (n)
+ atomic_add(segs, &sdc->drops1);
+ else
+ atomic_add(segs, &sdc->drops0);
+ } else {
+ atomic_add(segs, &sk->sk_drops);
+ }
}
static inline void sk_drops_inc(struct sock *sk)
@@ -2694,11 +2712,23 @@ static inline void sk_drops_inc(struct sock *sk)
static inline int sk_drops_read(const struct sock *sk)
{
+ const struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+ if (sdc) {
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
+ return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
+ }
return atomic_read(&sk->sk_drops);
}
static inline void sk_drops_reset(struct sock *sk)
{
+ struct socket_drop_counters *sdc = sk->sk_drop_counters;
+
+ if (sdc) {
+ atomic_set(&sdc->drops0, 0);
+ atomic_set(&sdc->drops1, 0);
+ }
atomic_set(&sk->sk_drops, 0);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 75368823969a..e66ad1ec3a2d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2505,6 +2505,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
newsk->sk_reserved_mem = 0;
+ DEBUG_NET_WARN_ON_ONCE(newsk->sk_drop_counters);
sk_drops_reset(newsk);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
@@ -4457,6 +4458,7 @@ static int __init sock_struct_check(void)
#ifdef CONFIG_MEMCG
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
#endif
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_drop_counters);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);