From 8750939b6ad86abc3f53ec8a9683a1cded4a5654 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:11 -0800 Subject: devlink: validate length of param values DEVLINK_ATTR_PARAM_VALUE_DATA may have different types so it's not checked by the normal netlink policy. Make sure the attribute length is what we expect. Fixes: e3b7ca18ad7b ("devlink: Add param set command") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 5e220809844c..8e44dc5cde73 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + struct nlattr *param_data; int len; - if (param->type != DEVLINK_PARAM_TYPE_BOOL && - !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) return -EINVAL; switch (param->type) { case DEVLINK_PARAM_TYPE_U8: - value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u8)) + return -EINVAL; + value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: - value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u16)) + return -EINVAL; + value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: - value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u32)) + return -EINVAL; + value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: - len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), - nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); - if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len = strnlen(nla_data(param_data), nla_len(param_data)); + if (len == nla_len(param_data) || len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - strcpy(value->vstr, - nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: - value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? - true : false; + if (param_data && nla_len(param_data)) + return -EINVAL; + value->vbool = nla_get_flag(param_data); break; } return 0; -- cgit From ff3b63b8c299b73ac599b120653b47e275407656 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 2 Mar 2020 21:05:12 -0800 Subject: devlink: validate length of region addr/len DEVLINK_ATTR_REGION_CHUNK_ADDR and DEVLINK_ATTR_REGION_CHUNK_LEN lack entries in the netlink policy. Corresponding nla_get_u64()s may read beyond the end of the message. Fixes: 4e54795a27f5 ("devlink: Add support for region snapshot read command") Signed-off-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core') diff --git a/net/core/devlink.c b/net/core/devlink.c index 8e44dc5cde73..b831c5545d6a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5958,6 +5958,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, -- cgit From 018d26fcd12a75fb9b5fe233762aa3f2f0854b88 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 5 Mar 2020 17:45:57 +0300 Subject: cgroup, netclassid: periodically release file_lock on classid updating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In our production environment we have faced with problem that updating classid in cgroup with heavy tasks cause long freeze of the file tables in this tasks. By heavy tasks we understand tasks with many threads and opened sockets (e.g. balancers). This freeze leads to an increase number of client timeouts. This patch implements following logic to fix this issue: аfter iterating 1000 file descriptors file table lock will be released thus providing a time gap for socket creation/deletion. Now update is non atomic and socket may be skipped using calls: dup2(oldfd, newfd); close(oldfd); But this case is not typical. Moreover before this patch skip is possible too by hiding socket fd in unix socket buffer. New sockets will be allocated with updated classid because cgroup state is updated before start of the file descriptors iteration. So in common cases this patch has no side effects. Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/core/netclassid_cgroup.c | 47 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0642f91c4038..b4c87fe31be2 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); -- cgit From d752a4986532cb6305dfd5290a614cde8072769d Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 9 Mar 2020 22:16:06 -0700 Subject: net: memcg: late association of sock to memcg If a TCP socket is allocated in IRQ context or cloned from unassociated (i.e. not associated to a memcg) in IRQ context then it will remain unassociated for its whole life. Almost half of the TCPs created on the system are created in IRQ context, so, memory used by such sockets will not be accounted by the memcg. This issue is more widespread in cgroup v1 where network memory accounting is opt-in but it can happen in cgroup v2 if the source socket for the cloning was created in root memcg. To fix the issue, just do the association of the sockets at the accept() time in the process context and then force charge the memory buffer already used and reserved by the socket. Signed-off-by: Shakeel Butt Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index a4c8fac781ff..8f71684305c3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); -- cgit