From 7123aaa3a1416529ce461e98108e6b343b294643 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jun 2012 05:03:21 +0000 Subject: af_unix: speedup /proc/net/unix /proc/net/unix has quadratic behavior, and can hold unix_table_lock for a while if high number of unix sockets are alive. (90 ms for 200k sockets...) We already have a hash table, so its quite easy to use it. Problem is unbound sockets are still hashed in a single hash slot (unix_socket_table[UNIX_HASH_TABLE]) This patch also spreads unbound sockets to 256 hash slots, to speedup both /proc/net/unix and unix_diag. Time to read /proc/net/unix with 200k unix sockets : (time dd if=/proc/net/unix of=/dev/null bs=4k) before : 520 secs after : 2 secs Signed-off-by: Eric Dumazet Cc: Steven Whitehouse Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/diag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/unix/diag.c') diff --git a/net/unix/diag.c b/net/unix/diag.c index 47d3002737f5..7e8a24bff34a 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -195,7 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = s_num = cb->args[1]; spin_lock(&unix_table_lock); - for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + for (slot = s_slot; + slot < ARRAY_SIZE(unix_socket_table); + s_num = 0, slot++) { struct sock *sk; struct hlist_node *node; @@ -228,7 +230,7 @@ static struct sock *unix_lookup_by_ino(int ino) struct sock *sk; spin_lock(&unix_table_lock); - for (i = 0; i <= UNIX_HASH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { struct hlist_node *node; sk_for_each(sk, node, &unix_socket_table[i]) -- cgit From b61bb01974730e2fd7d36ab4cc848ca6f44cffd4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Jun 2012 21:41:00 -0700 Subject: unix_diag: Move away from NLMSG_PUT(). And use nlmsg_data() while we're here too and remove useless casts. Signed-off-by: David S. Miller --- net/unix/diag.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'net/unix/diag.c') diff --git a/net/unix/diag.c b/net/unix/diag.c index 7e8a24bff34a..977ca317550d 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -126,10 +126,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = NLMSG_PUT(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep)); + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), 0); + if (!nlh) + goto out_nlmsg_trim; nlh->nlmsg_flags = flags; - rep = NLMSG_DATA(nlh); + rep = nlmsg_data(nlh); rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; @@ -139,32 +141,32 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if ((req->udiag_show & UDIAG_SHOW_NAME) && sk_diag_dump_name(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_VFS) && sk_diag_dump_vfs(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_PEER) && sk_diag_dump_peer(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_ICONS) && sk_diag_dump_icons(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_RQLEN) && sk_diag_show_rqlen(sk, skb)) - goto nlmsg_failure; + goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_MEMINFO) && sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) - goto nlmsg_failure; + goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, b); return -EMSGSIZE; } @@ -189,7 +191,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct unix_diag_req *req; int num, s_num, slot, s_slot; - req = NLMSG_DATA(cb->nlh); + req = nlmsg_data(cb->nlh); s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -309,7 +311,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) }; return netlink_dump_start(sock_diag_nlsk, skb, h, &c); } else - return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); + return unix_diag_get_exact(skb, h, nlmsg_data(h)); } static const struct sock_diag_handler unix_diag_handler = { -- cgit From 4245375db87767aacaad16f07040b5d89a9056c8 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 26 Jun 2012 23:36:10 +0000 Subject: unix_diag: Do not use RTA_PUT() macros Also, no need to trim on nlmsg_put() failure, nothing has been added yet. We also want to use nlmsg_end(), nlmsg_new() and nlmsg_free(). Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/unix/diag.c | 80 ++++++++++++++++++++++++--------------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) (limited to 'net/unix/diag.c') diff --git a/net/unix/diag.c b/net/unix/diag.c index 977ca317550d..a74864eedfcd 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -8,40 +8,31 @@ #include #include -#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \ - RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) - static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { struct unix_address *addr = unix_sk(sk)->addr; - char *s; - - if (addr) { - s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short)); - memcpy(s, addr->name->sun_path, addr->len - sizeof(short)); - } - return 0; + if (!addr) + return 0; -rtattr_failure: - return -EMSGSIZE; + return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short), + addr->name->sun_path); } static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb) { struct dentry *dentry = unix_sk(sk)->path.dentry; - struct unix_diag_vfs *uv; if (dentry) { - uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv)); - uv->udiag_vfs_ino = dentry->d_inode->i_ino; - uv->udiag_vfs_dev = dentry->d_sb->s_dev; + struct unix_diag_vfs uv = { + .udiag_vfs_ino = dentry->d_inode->i_ino, + .udiag_vfs_dev = dentry->d_sb->s_dev, + }; + + return nla_put(nlskb, UNIX_DIAG_VFS, sizeof(uv), &uv); } return 0; - -rtattr_failure: - return -EMSGSIZE; } static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) @@ -56,24 +47,28 @@ static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb) unix_state_unlock(peer); sock_put(peer); - RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino); + return nla_put_u32(nlskb, UNIX_DIAG_PEER, ino); } return 0; -rtattr_failure: - return -EMSGSIZE; } static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) { struct sk_buff *skb; + struct nlattr *attr; u32 *buf; int i; if (sk->sk_state == TCP_LISTEN) { spin_lock(&sk->sk_receive_queue.lock); - buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS, - sk->sk_receive_queue.qlen * sizeof(u32)); + + attr = nla_reserve(nlskb, UNIX_DIAG_ICONS, + sk->sk_receive_queue.qlen * sizeof(u32)); + if (!attr) + goto errout; + + buf = nla_data(attr); i = 0; skb_queue_walk(&sk->sk_receive_queue, skb) { struct sock *req, *peer; @@ -94,45 +89,38 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) return 0; -rtattr_failure: +errout: spin_unlock(&sk->sk_receive_queue.lock); return -EMSGSIZE; } static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) { - struct unix_diag_rqlen *rql; - - rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql)); + struct unix_diag_rqlen rql; if (sk->sk_state == TCP_LISTEN) { - rql->udiag_rqueue = sk->sk_receive_queue.qlen; - rql->udiag_wqueue = sk->sk_max_ack_backlog; + rql.udiag_rqueue = sk->sk_receive_queue.qlen; + rql.udiag_wqueue = sk->sk_max_ack_backlog; } else { - rql->udiag_rqueue = (__u32)unix_inq_len(sk); - rql->udiag_wqueue = (__u32)unix_outq_len(sk); + rql.udiag_rqueue = (u32) unix_inq_len(sk); + rql.udiag_wqueue = (u32) unix_outq_len(sk); } - return 0; - -rtattr_failure: - return -EMSGSIZE; + return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, u32 pid, u32 seq, u32 flags, int sk_ino) { - unsigned char *b = skb_tail_pointer(skb); struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), 0); + nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); if (!nlh) - goto out_nlmsg_trim; - nlh->nlmsg_flags = flags; + return -EMSGSIZE; rep = nlmsg_data(nlh); - rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; @@ -163,11 +151,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); out_nlmsg_trim: - nlmsg_trim(skb, b); + nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -272,15 +259,14 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, extra_len = 256; again: err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct unix_diag_msg) + extra_len)), - GFP_KERNEL); + rep = nlmsg_new(sizeof(struct unix_diag_msg) + extra_len, GFP_KERNEL); if (!rep) goto out; err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { - kfree_skb(rep); + nlmsg_free(rep); extra_len += 256; if (extra_len >= PAGE_SIZE) goto out; -- cgit From 51d7cccf07238f5236c5b9269231a30dd5f8e714 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 16 Jul 2012 04:28:49 +0000 Subject: net: make sock diag per-namespace Before this patch sock_diag works for init_net only and dumps information about sockets from all namespaces. This patch expands sock_diag for all name-spaces. It creates a netlink kernel socket for each netns and filters data during dumping. v2: filter accoding with netns in all places remove an unused variable. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pavel Emelyanov CC: Eric Dumazet Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/diag.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/unix/diag.c') diff --git a/net/unix/diag.c b/net/unix/diag.c index a74864eedfcd..750b13408449 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -177,6 +177,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct unix_diag_req *req; int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); @@ -192,6 +193,8 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; sk_for_each(sk, node, &unix_socket_table[slot]) { + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -243,6 +246,7 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, struct sock *sk; struct sk_buff *rep; unsigned int extra_len; + struct net *net = sock_net(in_skb->sk); if (req->udiag_ino == 0) goto out_nosk; @@ -273,7 +277,7 @@ again: goto again; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -287,6 +291,7 @@ out_nosk: static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -295,7 +300,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else return unix_diag_get_exact(skb, h, nlmsg_data(h)); } -- cgit