diff options
Diffstat (limited to 'net')
38 files changed, 523 insertions, 149 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 80e6f3a6864d..1153bbcdff72 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -377,6 +377,22 @@ out: return ret; } +static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx) +{ + /* make sure the fields we don't use are zeroed */ + if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags))) + return -EINVAL; + + /* flags is allowed */ + + if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) + + FIELD_SIZEOF(struct bpf_flow_keys, flags), + sizeof(struct bpf_flow_keys))) + return -EINVAL; + + return 0; +} + int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -384,9 +400,11 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; + struct bpf_flow_keys *user_ctx; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; const struct ethhdr *eth; + unsigned int flags = 0; u32 retval, duration; void *data; int ret; @@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; - if (size < ETH_HLEN) return -EINVAL; @@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (!repeat) repeat = 1; + user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys)); + if (IS_ERR(user_ctx)) { + kfree(data); + return PTR_ERR(user_ctx); + } + if (user_ctx) { + ret = verify_user_bpf_flow_keys(user_ctx); + if (ret) + goto out; + flags = user_ctx->flags; + } + ctx.flow_keys = &flow_keys; ctx.data = data; ctx.data_end = (__u8 *)data + size; @@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, - size); + size, flags); if (signal_pending(current)) { preempt_enable(); @@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, user_ctx, + sizeof(struct bpf_flow_keys)); out: + kfree(user_ctx); kfree(data); return ret; } diff --git a/net/core/devlink.c b/net/core/devlink.c index e8f0b891f000..d3dbb904bf3b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -370,14 +370,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot) -{ - snapshot->region->cur_snapshots--; - list_del(&snapshot->list); - (*snapshot->data_destructor)(snapshot->data); - kfree(snapshot); -} - #define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) #define DEVLINK_NL_FLAG_NEED_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_SB BIT(2) @@ -3595,6 +3587,16 @@ out_free_msg: nlmsg_free(msg); } +static void devlink_region_snapshot_del(struct devlink_region *region, + struct devlink_snapshot *snapshot) +{ + devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); + region->cur_snapshots--; + list_del(&snapshot->list); + (*snapshot->data_destructor)(snapshot->data); + kfree(snapshot); +} + static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb, struct genl_info *info) { @@ -3690,8 +3692,7 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, if (!snapshot) return -EINVAL; - devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); - devlink_region_snapshot_del(snapshot); + devlink_region_snapshot_del(region, snapshot); return 0; } @@ -6743,7 +6744,7 @@ void devlink_region_destroy(struct devlink_region *region) /* Free all snapshots of region */ list_for_each_entry_safe(snapshot, ts, ®ion->snapshot_list, list) - devlink_region_snapshot_del(snapshot); + devlink_region_snapshot_del(region, snapshot); list_del(®ion->list); @@ -6938,11 +6939,10 @@ int devlink_compat_switch_id_get(struct net_device *dev, { struct devlink_port *devlink_port; - /* RTNL mutex is held here which ensures that devlink_port - * instance cannot disappear in the middle. No need to take + /* Caller must hold RTNL mutex or reference to dev, which ensures that + * devlink_port instance cannot disappear in the middle. No need to take * any devlink lock as only permanent values are accessed. */ - ASSERT_RTNL(); devlink_port = netdev_to_devlink_port(dev); if (!devlink_port || !devlink_port->attrs.switch_port) return -EOPNOTSUPP; diff --git a/net/core/filter.c b/net/core/filter.c index 7878f918b8c0..0c1059cdad3d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, int err; switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: { + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: { struct bpf_dtab_netdev *dst = fwd; err = dev_map_enqueue(dst, xdp, dev_rx); @@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void) if (map) { switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: __dev_map_flush(map); break; case BPF_MAP_TYPE_CPUMAP: @@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_DEVMAP_HASH: + return __dev_map_hash_lookup_elem(map, index); case BPF_MAP_TYPE_CPUMAP: return __cpu_map_lookup_elem(map, index); case BPF_MAP_TYPE_XSKMAP: @@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { struct bpf_dtab_netdev *dst = fwd; err = dev_map_generic_redirect(dst, skb, xdp_prog); @@ -5850,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, + struct tcphdr *, th, u32, th_len) +{ +#ifdef CONFIG_SYN_COOKIES + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + return -ENOENT; + + if (!th->syn || th->ack || th->fin || th->rst) + return -EINVAL; + + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && sk->sk_ipv6only) + return -EINVAL; + + mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); + break; + +#if IS_BUILTIN(CONFIG_IPV6) + case 6: + if (unlikely(iph_len < sizeof(struct ipv6hdr))) + return -EINVAL; + + if (sk->sk_family != AF_INET6) + return -EINVAL; + + mss = tcp_v6_get_syncookie(sk, iph, th, &cookie); + break; +#endif /* CONFIG_IPV6 */ + + default: + return -EPROTONOSUPPORT; + } + if (mss <= 0) + return -ENOENT; + + return cookie | ((u64)mss << 32); +#else + return -EOPNOTSUPP; +#endif /* CONFIG_SYN_COOKIES */ +} + +static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { + .func = bpf_tcp_gen_syncookie, + .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5999,6 +6073,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; default: return bpf_base_func_proto(func_id); } @@ -6019,6 +6095,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; #ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_cgroup_id: return &bpf_skb_cgroup_id_proto; @@ -6135,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_skb_ecn_set_ce: return &bpf_skb_ecn_set_ce_proto; + case BPF_FUNC_tcp_gen_syncookie: + return &bpf_tcp_gen_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6174,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_skc_lookup_tcp_proto; case BPF_FUNC_tcp_check_syncookie: return &bpf_tcp_check_syncookie_proto; + case BPF_FUNC_tcp_gen_syncookie: + return &bpf_tcp_gen_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6267,6 +6349,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3e6fedb57bc1..9741b593ea53 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CONTROL, @@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL)) { + key_tags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL, + target_container); + key_tags->flow_label = ntohl(flow_keys->flow_label); + } } bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen) + __be16 proto, int nhoff, int hlen, unsigned int flags) { struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; @@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != + (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != + (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != + (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); + flow_keys->flags = flags; + preempt_disable(); result = BPF_PROG_RUN(prog, ctx); preempt_enable(); @@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net, } ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, - hlen); + hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 3272dc7a8c81..5bc65587f1c4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -61,7 +61,7 @@ static int page_pool_init(struct page_pool *pool, struct page_pool *page_pool_create(const struct page_pool_params *params) { struct page_pool *pool; - int err = 0; + int err; pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); if (!pool) @@ -82,12 +82,9 @@ EXPORT_SYMBOL(page_pool_create); static struct page *__page_pool_get_cached(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; + bool refill = false; struct page *page; - /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) - return NULL; - /* Test for safe-context, caller should provide this guarantee */ if (likely(in_serving_softirq())) { if (likely(pool->alloc.count)) { @@ -95,27 +92,23 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) page = pool->alloc.cache[--pool->alloc.count]; return page; } - /* Slower-path: Alloc array empty, time to refill - * - * Open-coded bulk ptr_ring consumer. - * - * Discussion: the ring consumer lock is not really - * needed due to the softirq/NAPI protection, but - * later need the ability to reclaim pages on the - * ring. Thus, keeping the locks. - */ - spin_lock(&r->consumer_lock); - while ((page = __ptr_ring_consume(r))) { - if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) - break; - pool->alloc.cache[pool->alloc.count++] = page; - } - spin_unlock(&r->consumer_lock); - return page; + refill = true; } - /* Slow-path: Get page from locked ring queue */ - page = ptr_ring_consume(&pool->ring); + /* Quicker fallback, avoid locks when ring is empty */ + if (__ptr_ring_empty(r)) + return NULL; + + /* Slow-path: Get page from locked ring queue, + * refill alloc array if requested. + */ + spin_lock(&r->consumer_lock); + page = __ptr_ring_consume(r); + if (refill) + pool->alloc.count = __ptr_ring_consume_batched(r, + pool->alloc.cache, + PP_ALLOC_CACHE_REFILL); + spin_unlock(&r->consumer_lock); return page; } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 0e70f3f65f6f..748dc3ce58d3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c21e8a22fb3b..706cbb3b2986 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th, #endif } +/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped + * value on success. + */ +static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +{ + const unsigned char *ptr = (const unsigned char *)(th + 1); + int length = (th->doff * 4) - sizeof(struct tcphdr); + u16 mss = 0; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return mss; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + if (length < 2) + return mss; + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return mss; + if (opsize > length) + return mss; /* fail on partial options */ + if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) { + u16 in_mss = get_unaligned_be16(ptr); + + if (in_mss) { + if (user_mss && user_mss < in_mss) + in_mss = user_mss; + mss = in_mss; + } + } + ptr += opsize - 2; + length -= opsize; + } + } + return mss; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -6422,9 +6465,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc); /* * Return true if a syncookie should be sent */ -static bool tcp_syn_flood_action(const struct sock *sk, - const struct sk_buff *skb, - const char *proto) +static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; @@ -6444,7 +6485,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); + proto, sk->sk_num, msg); return want_cookie; } @@ -6466,6 +6507,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk, } } +/* If a SYN cookie is required and supported, returns a clamped MSS value to be + * used for SYN cookie generation. + */ +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct tcphdr *th) +{ + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + !inet_csk_reqsk_queue_is_full(sk)) + return 0; + + if (!tcp_syn_flood_action(sk, rsk_ops->slab_name)) + return 0; + + if (sk_acceptq_is_full(sk)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + return 0; + } + + mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss); + if (!mss) + mss = af_ops->mss_clamp; + + return mss; +} +EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss); + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6487,7 +6558,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, */ if ((net->ipv4.sysctl_tcp_syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e0a372676329..fd394ad179a0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, th); + if (mss) { + *cookie = __cookie_v4_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 5cdb4a69d277..fd1f52a21bf1 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5da069e91cac..87f44d3250ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, th); + if (mss) { + *cookie = __cookie_v6_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 0feb77fa9edc..d098d87bc331 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -7,7 +7,7 @@ #include <linux/rcupdate.h> #include <linux/jhash.h> #include <linux/types.h> -#include <linux/netfilter/ipset/ip_set_timeout.h> +#include <linux/netfilter/ipset/ip_set.h> #define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) #define ipset_dereference_protected(p, set) \ @@ -953,7 +953,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); - n = rcu_dereference_bh(hbucket(t, key)); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) continue; for (i = 0; i < n->pos; i++) { diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 6f9ead6319e0..67ac50104e6f 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -288,7 +288,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (n && !(SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(n, set)))) - n = NULL; + n = NULL; e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 46f06f92ab8f..8b80ab794a92 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -617,7 +617,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; - union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; + union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 145c8c992927..8b48e7ce1c2c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1737,12 +1737,18 @@ proc_do_defense_mode(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 3)) { - /* Restore the correct value */ - *valp = val; + if (val < 0 || val > 3) { + rc = -EINVAL; } else { + *valp = val; update_defense_level(ipvs); } } @@ -1756,33 +1762,20 @@ proc_do_sync_threshold(struct ctl_table *table, int write, int *valp = table->data; int val[2]; int rc; + struct ctl_table tmp = { + .data = &val, + .maxlen = table->maxlen, + .mode = table->mode, + }; - /* backup the value first */ memcpy(val, valp, sizeof(val)); - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (valp[0] < 0 || valp[1] < 0 || - (valp[0] >= valp[1] && valp[1]))) { - /* Restore the correct value */ - memcpy(valp, val, sizeof(val)); - } - return rc; -} - -static int -proc_do_sync_mode(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int *valp = table->data; - int val = *valp; - int rc; - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 1)) { - /* Restore the correct value */ - *valp = val; - } + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (write) { + if (val[0] < 0 || val[1] < 0 || + (val[0] >= val[1] && val[1])) + rc = -EINVAL; + else + memcpy(valp, val, sizeof(val)); } return rc; } @@ -1795,12 +1788,18 @@ proc_do_sync_ports(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if (*valp < 1 || !is_power_of_2(*valp)) { - /* Restore the correct value */ + if (val < 1 || !is_power_of_2(val)) + rc = -EINVAL; + else *valp = val; - } } return rc; } @@ -1860,7 +1859,9 @@ static struct ctl_table vs_vars[] = { .procname = "sync_version", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_do_sync_mode, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "sync_ports", diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c index 94d9d349ebb0..da0280cec506 100644 --- a/net/netfilter/ipvs/ip_vs_mh.c +++ b/net/netfilter/ipvs/ip_vs_mh.c @@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s, return 0; } - table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), - sizeof(unsigned long), GFP_KERNEL); + table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), + sizeof(unsigned long), GFP_KERNEL); if (!table) return -ENOMEM; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 000d961b97e4..32b028853a7c 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -710,7 +710,7 @@ static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd sizeof(tcp_timeouts)); if (!pd->timeout_table) return -ENOMEM; - pd->tcp_state_table = tcp_states; + pd->tcp_state_table = tcp_states; return 0; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 0ecb3e289ef2..c57d2348c505 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -162,7 +162,7 @@ static int try_rfc959(const char *data, size_t dlen, if (length == 0) return 0; - cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]); cmd->u.tcp.port = htons((array[4] << 8) | array[5]); return length; diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 74b8113f7aeb..d1c6b2a2e7bd 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -11,7 +11,7 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> -static spinlock_t nf_connlabels_lock; +static __read_mostly DEFINE_SPINLOCK(nf_connlabels_lock); static int replace_u32(u32 *address, u32 mask, u32 new) { @@ -89,7 +89,6 @@ int nf_conntrack_labels_init(void) { BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); - spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 85c1f8c213b0..1926fd56df56 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1227,7 +1227,7 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, - [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, + [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; #define TCP_NLATTR_SIZE ( \ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e0d392cb3075..d97f4ea47cf3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -511,8 +511,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Log invalid packets of a given protocol */ static int log_invalid_proto_min __read_mostly; static int log_invalid_proto_max __read_mostly = 255; -static int zero; -static int one = 1; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; @@ -629,8 +627,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", @@ -654,8 +652,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_HELPER] = { .procname = "nf_conntrack_helper", @@ -663,8 +661,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { @@ -673,8 +671,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP @@ -684,8 +682,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { @@ -759,16 +757,16 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", @@ -904,8 +902,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_NF_CT_PROTO_GRE diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 7ac733ebd060..0a59c14b5177 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -722,7 +722,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, return ret; } -const struct nf_hook_ops nf_nat_ipv4_ops[] = { +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_in, @@ -961,7 +961,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, return ret; } -const struct nf_hook_ops nf_nat_ipv6_ops[] = { +static const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index c769462a839e..b0930d4aba22 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -56,7 +56,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, switch (opcode) { case TCPOPT_MSS: if (opsize == TCPOLEN_MSS) { - opts->mss = get_unaligned_be16(ptr); + opts->mss_option = get_unaligned_be16(ptr); opts->options |= NF_SYNPROXY_OPT_MSS; } break; @@ -115,7 +115,7 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts) if (options & NF_SYNPROXY_OPT_MSS) *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | - opts->mss); + opts->mss_option); if (options & NF_SYNPROXY_OPT_TIMESTAMP) { if (options & NF_SYNPROXY_OPT_SACK_PERM) @@ -642,7 +642,7 @@ synproxy_recv_client_ack(struct net *net, } this_cpu_inc(snet->stats->cookie_valid); - opts->mss = mss; + opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) @@ -1060,7 +1060,7 @@ synproxy_recv_client_ack_ipv6(struct net *net, } this_cpu_inc(snet->stats->cookie_valid); - opts->mss = mss; + opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6dee4f9a944c..d69e1863e536 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -651,7 +651,7 @@ nfulnl_log_packet(struct net *net, /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = nlmsg_total_size(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -668,7 +668,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { - size += nla_total_size(skb->dev->hard_header_len) + size += nla_total_size(skb->dev->hard_header_len) + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + nla_total_size(sizeof(u_int16_t)); /* hwlen */ } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b6a7ce622c72..feabdfb22920 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -394,7 +394,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, char *secdata = NULL; u32 seclen = 0; - size = nlmsg_total_size(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -453,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } if (queue->flags & NFQA_CFG_F_UID_GID) { - size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + nla_total_size(sizeof(u_int32_t))); /* gid */ } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index b310b637b550..974300178fa9 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -13,6 +13,7 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> struct nft_bitwise { enum nft_registers sreg:8; @@ -126,12 +127,30 @@ nla_put_failure: return -1; } +static struct nft_data zero; + +static int nft_bitwise_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || + priv->sreg != priv->dreg) + return -EOPNOTSUPP; + + memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask)); + + return 0; +} + static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, + .offload = nft_bitwise_offload, }; struct nft_expr_type nft_bitwise_type __read_mostly = { diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index ca2ae4b95a8d..c7f0ef73d939 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -125,17 +125,13 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, return 0; } -static int nft_immediate_offload(struct nft_offload_ctx *ctx, - struct nft_flow_rule *flow, - const struct nft_expr *expr) +static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_immediate_expr *priv) { - const struct nft_immediate_expr *priv = nft_expr_priv(expr); struct flow_action_entry *entry; const struct nft_data *data; - if (priv->dreg != NFT_REG_VERDICT) - return -EOPNOTSUPP; - entry = &flow->rule->action.entries[ctx->num_actions++]; data = &priv->data; @@ -153,6 +149,20 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static int nft_immediate_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return nft_immediate_offload_verdict(ctx, flow, priv); + + memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data)); + + return 0; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b5aeccdddb22..087a056e34d1 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -10,7 +10,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_bitmap_elem { struct list_head head; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 6e8d20c03e3d..c490451fcebf 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -16,7 +16,7 @@ #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_RHASH_ELEMENT_HINT 3 diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 419d58ef802b..57123259452f 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -13,7 +13,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_rbtree { struct rb_root root; diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 928e661d1517..db4c23f5dfcb 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -31,8 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts, opts->options |= NF_SYNPROXY_OPT_ECN; opts->options &= priv->info.options; - opts->mss_encode = opts->mss; - opts->mss = info->mss; + opts->mss_encode = opts->mss_option; + opts->mss_option = info->mss; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, opts); else diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 9cec9eae556a..f56d3ed93e56 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -283,7 +283,7 @@ static int __init idletimer_tg_init(void) idletimer_tg_kobj = &idletimer_tg_device->kobj; - err = xt_register_target(&idletimer_tg); + err = xt_register_target(&idletimer_tg); if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index ecbfa291fb70..731bc2cafae4 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -14,7 +14,6 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h> #include <uapi/linux/netfilter/xt_set.h> MODULE_LICENSE("GPL"); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 2b969f99ef13..2977137c28eb 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -705,7 +705,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); + sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern); if (!sk) return -ENOMEM; @@ -741,6 +741,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { + /* This option only supports IPv4 sockets. */ + if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) + continue; + read_lock(&rs->rs_recv_lock); /* XXX too lazy to maintain counts.. */ @@ -762,21 +766,60 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, lens->each = sizeof(struct rds_info_message); } +#if IS_ENABLED(CONFIG_IPV6) +static void rds6_sock_inc_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_incoming *inc; + unsigned int total = 0; + struct rds_sock *rs; + + len /= sizeof(struct rds6_info_message); + + spin_lock_bh(&rds_sock_lock); + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + read_lock(&rs->rs_recv_lock); + + list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { + total++; + if (total <= len) + rds6_inc_info_copy(inc, iter, &inc->i_saddr, + &rs->rs_bound_addr, 1); + } + + read_unlock(&rs->rs_recv_lock); + } + + spin_unlock_bh(&rds_sock_lock); + + lens->nr = total; + lens->each = sizeof(struct rds6_info_message); +} +#endif + static void rds_sock_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_info_socket sinfo; + unsigned int cnt = 0; struct rds_sock *rs; len /= sizeof(struct rds_info_socket); spin_lock_bh(&rds_sock_lock); - if (len < rds_sock_count) + if (len < rds_sock_count) { + cnt = rds_sock_count; goto out; + } list_for_each_entry(rs, &rds_sock_list, rs_item) { + /* This option only supports IPv4 sockets. */ + if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) + continue; sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); sinfo.bound_addr = rs->rs_bound_addr_v4; @@ -786,15 +829,51 @@ static void rds_sock_info(struct socket *sock, unsigned int len, sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); rds_info_copy(iter, &sinfo, sizeof(sinfo)); + cnt++; } out: - lens->nr = rds_sock_count; + lens->nr = cnt; lens->each = sizeof(struct rds_info_socket); spin_unlock_bh(&rds_sock_lock); } +#if IS_ENABLED(CONFIG_IPV6) +static void rds6_sock_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds6_info_socket sinfo6; + struct rds_sock *rs; + + len /= sizeof(struct rds6_info_socket); + + spin_lock_bh(&rds_sock_lock); + + if (len < rds_sock_count) + goto out; + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + sinfo6.sndbuf = rds_sk_sndbuf(rs); + sinfo6.rcvbuf = rds_sk_rcvbuf(rs); + sinfo6.bound_addr = rs->rs_bound_addr; + sinfo6.connected_addr = rs->rs_conn_addr; + sinfo6.bound_port = rs->rs_bound_port; + sinfo6.connected_port = rs->rs_conn_port; + sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs)); + + rds_info_copy(iter, &sinfo6, sizeof(sinfo6)); + } + + out: + lens->nr = rds_sock_count; + lens->each = sizeof(struct rds6_info_socket); + + spin_unlock_bh(&rds_sock_lock); +} +#endif + static void rds_exit(void) { sock_unregister(rds_family_ops.family); @@ -808,6 +887,10 @@ static void rds_exit(void) rds_bind_lock_destroy(); rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +#if IS_ENABLED(CONFIG_IPV6) + rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info); + rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); +#endif } module_exit(rds_exit); @@ -845,6 +928,10 @@ static int rds_init(void) rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +#if IS_ENABLED(CONFIG_IPV6) + rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info); + rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); +#endif goto out; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cae88cbdaa0..1a8a4a760b84 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -385,6 +385,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) unsigned int posted = 0; int ret = 0; bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); + bool must_wake = false; u32 pos; /* the goal here is to just make sure that someone, somewhere @@ -405,6 +406,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) recv = &ic->i_recvs[pos]; ret = rds_ib_recv_refill_one(conn, recv, gfp); if (ret) { + must_wake = true; break; } @@ -423,6 +425,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) } posted++; + + if ((posted > 128 && need_resched()) || posted > 8192) { + must_wake = true; + break; + } } /* We're doing flow control - update the window. */ @@ -445,10 +452,13 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) * if we should requeue. */ if (rds_conn_up(conn) && - ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || + (must_wake || + (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || rds_ib_ring_empty(&ic->i_recv_ring))) { queue_delayed_work(rds_wq, &conn->c_recv_w, 1); } + if (can_wait) + cond_resched(); } /* diff --git a/net/rds/rds.h b/net/rds/rds.h index f0066d168499..ad605fd61655 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -717,7 +717,7 @@ struct rds_statistics { uint64_t s_cong_send_blocked; uint64_t s_recv_bytes_added_to_socket; uint64_t s_recv_bytes_removed_from_socket; - + uint64_t s_send_stuck_rm; }; /* af_rds.c */ diff --git a/net/rds/send.c b/net/rds/send.c index 031b1e97a466..9ce552abf9e9 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -145,6 +145,7 @@ int rds_send_xmit(struct rds_conn_path *cp) LIST_HEAD(to_be_dropped); int batch_count; unsigned long send_gen = 0; + int same_rm = 0; restart: batch_count = 0; @@ -200,6 +201,17 @@ restart: rm = cp->cp_xmit_rm; + if (!rm) { + same_rm = 0; + } else { + same_rm++; + if (same_rm >= 4096) { + rds_stats_inc(s_send_stuck_rm); + ret = -EAGAIN; + break; + } + } + /* * If between sending messages, we can send a pending congestion * map update. diff --git a/net/rds/stats.c b/net/rds/stats.c index 73be187d389e..9e87da43c004 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -76,6 +76,9 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "recv_bytes_added_to_sock", + "recv_bytes_freed_fromsock", + "send_stuck_rm", }; void rds_stats_info_copy(struct rds_info_iterator *iter, diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 83de74ca729a..a0607969f8c0 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -14,6 +14,7 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/idr.h> +#include <linux/highmem.h> #include "xdp_umem.h" #include "xsk_queue.h" @@ -164,6 +165,14 @@ void xdp_umem_clear_dev(struct xdp_umem *umem) umem->zc = false; } +static void xdp_umem_unmap_pages(struct xdp_umem *umem) +{ + unsigned int i; + + for (i = 0; i < umem->npgs; i++) + kunmap(umem->pgs[i]); +} + static void xdp_umem_unpin_pages(struct xdp_umem *umem) { unsigned int i; @@ -207,6 +216,7 @@ static void xdp_umem_release(struct xdp_umem *umem) xsk_reuseq_destroy(umem); + xdp_umem_unmap_pages(umem); xdp_umem_unpin_pages(umem); kfree(umem->pages); @@ -369,7 +379,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) } for (i = 0; i < umem->npgs; i++) - umem->pages[i].addr = page_address(umem->pgs[i]); + umem->pages[i].addr = kmap(umem->pgs[i]); return 0; |
