summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_xen.c8
-rw-r--r--net/Kconfig.debug2
-rw-r--r--net/ax25/af_ax25.c60
-rw-r--r--net/ax25/ax25_dev.c1
-rw-r--r--net/ax25/ax25_subr.c2
-rw-r--r--net/bluetooth/hci_core.c4
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/hci_sync.c6
-rw-r--r--net/bluetooth/mgmt.c37
-rw-r--r--net/ceph/crush/mapper.c5
-rw-r--r--net/core/flow_offload.c6
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/dccp/proto.c33
-rw-r--r--net/ipv4/inet_connection_sock.c247
-rw-r--r--net/ipv4/inet_hashtables.c203
-rw-r--r--net/ipv4/ip_gre.c11
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/xfrm4_protocol.c1
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ndisc.c42
-rw-r--r--net/ipv6/ping.c8
-rw-r--r--net/ipv6/seg6_hmac.c1
-rw-r--r--net/ipv6/seg6_local.c1
-rw-r--r--net/key/af_key.c10
-rw-r--r--net/l2tp/l2tp_ip6.c5
-rw-r--r--net/mac80211/chan.c7
-rw-r--r--net/netfilter/nf_tables_api.c158
-rw-r--r--net/netfilter/nf_tables_offload.c23
-rw-r--r--net/netfilter/nfnetlink.c24
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c5
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_limit.c2
-rw-r--r--net/netfilter/nft_nat.c3
-rw-r--r--net/nfc/core.c4
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/conntrack.c4
-rw-r--r--net/packet/af_packet.c6
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/smc/af_smc.c1
-rw-r--r--net/smc/smc_cdc.c2
-rw-r--r--net/sunrpc/xdr.c37
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c4
-rw-r--r--net/tipc/bearer.c3
-rw-r--r--net/tls/tls_main.c8
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/xdp/xsk.c5
-rw-r--r--net/xdp/xsk_queue.h8
-rw-r--r--net/xfrm/xfrm_output.c3
52 files changed, 422 insertions, 643 deletions
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 77883b6788cd..833cd3792c51 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -279,13 +279,13 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
grant_ref_t ref;
ref = priv->rings[i].intf->ref[j];
- gnttab_end_foreign_access(ref, 0);
+ gnttab_end_foreign_access(ref, NULL);
}
free_pages_exact(priv->rings[i].data.in,
1UL << (priv->rings[i].intf->ring_order +
XEN_PAGE_SHIFT));
}
- gnttab_end_foreign_access(priv->rings[i].ref, 0);
+ gnttab_end_foreign_access(priv->rings[i].ref, NULL);
free_page((unsigned long)priv->rings[i].intf);
}
kfree(priv->rings);
@@ -353,10 +353,10 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
out:
if (bytes) {
for (i--; i >= 0; i--)
- gnttab_end_foreign_access(ring->intf->ref[i], 0);
+ gnttab_end_foreign_access(ring->intf->ref[i], NULL);
free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT));
}
- gnttab_end_foreign_access(ring->ref, 0);
+ gnttab_end_foreign_access(ring->ref, NULL);
free_page((unsigned long)ring->intf);
return ret;
}
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index a5781cf63b16..e6ae11cc2fb7 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -20,7 +20,7 @@ config NET_NS_REFCNT_TRACKER
config DEBUG_NET
bool "Add generic networking debug"
- depends on DEBUG_KERNEL
+ depends on DEBUG_KERNEL && NET
help
Enable extra sanity checks in networking.
This is mostly used by fuzzers, but is safe to select.
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 116481e4da82..4c7030ed8d33 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -62,12 +62,12 @@ static void ax25_free_sock(struct sock *sk)
*/
static void ax25_cb_del(ax25_cb *ax25)
{
+ spin_lock_bh(&ax25_list_lock);
if (!hlist_unhashed(&ax25->ax25_node)) {
- spin_lock_bh(&ax25_list_lock);
hlist_del_init(&ax25->ax25_node);
- spin_unlock_bh(&ax25_list_lock);
ax25_cb_put(ax25);
}
+ spin_unlock_bh(&ax25_list_lock);
}
/*
@@ -81,6 +81,7 @@ static void ax25_kill_by_device(struct net_device *dev)
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
return;
+ ax25_dev->device_up = false;
spin_lock_bh(&ax25_list_lock);
again:
@@ -91,6 +92,7 @@ again:
spin_unlock_bh(&ax25_list_lock);
ax25_disconnect(s, ENETUNREACH);
s->ax25_dev = NULL;
+ ax25_cb_del(s);
spin_lock_bh(&ax25_list_lock);
goto again;
}
@@ -103,6 +105,7 @@ again:
dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
}
+ ax25_cb_del(s);
release_sock(sk);
spin_lock_bh(&ax25_list_lock);
sock_put(sk);
@@ -995,9 +998,11 @@ static int ax25_release(struct socket *sock)
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
case AX25_STATE_0:
- release_sock(sk);
- ax25_disconnect(ax25, 0);
- lock_sock(sk);
+ if (!sock_flag(ax25->sk, SOCK_DEAD)) {
+ release_sock(sk);
+ ax25_disconnect(ax25, 0);
+ lock_sock(sk);
+ }
ax25_destroy_socket(ax25);
break;
@@ -1053,11 +1058,13 @@ static int ax25_release(struct socket *sock)
ax25_destroy_socket(ax25);
}
if (ax25_dev) {
- del_timer_sync(&ax25->timer);
- del_timer_sync(&ax25->t1timer);
- del_timer_sync(&ax25->t2timer);
- del_timer_sync(&ax25->t3timer);
- del_timer_sync(&ax25->idletimer);
+ if (!ax25_dev->device_up) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ }
dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
}
@@ -1654,9 +1661,12 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
@@ -1668,10 +1678,29 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
@@ -1718,6 +1747,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
out:
release_sock(sk);
+done:
return err;
}
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index b80fccbac62a..95a76d571c44 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -62,6 +62,7 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->dev = dev;
dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC);
ax25_dev->forward = NULL;
+ ax25_dev->device_up = true;
ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE;
ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 3a476e4f6cd0..9ff98f46dc6b 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -268,7 +268,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
del_timer_sync(&ax25->t3timer);
del_timer_sync(&ax25->idletimer);
} else {
- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
+ if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY))
ax25_stop_heartbeat(ax25);
ax25_stop_t1timer(ax25);
ax25_stop_t2timer(ax25);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5abb2ca5b129..59a5c1341c26 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2153,7 +2153,7 @@ int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr,
bacpy(&entry->bdaddr, bdaddr);
entry->bdaddr_type = type;
- bitmap_from_u64(entry->flags, flags);
+ entry->flags = flags;
list_add(&entry->list, list);
@@ -2634,7 +2634,7 @@ int hci_register_dev(struct hci_dev *hdev)
* callback.
*/
if (hdev->wakeup)
- set_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, hdev->conn_flags);
+ hdev->conn_flags |= HCI_CONN_FLAG_REMOTE_WAKEUP;
hci_sock_dev_event(hdev, HCI_DEV_REG);
hci_dev_hold(hdev);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 635cc5fb451e..38ecaf9264ee 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -482,7 +482,7 @@ static int add_to_accept_list(struct hci_request *req,
/* During suspend, only wakeable devices can be in accept list */
if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
return 0;
*num_entries += 1;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 4d2203c5f1bb..286d6767f017 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1637,7 +1637,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
* indicates that LL Privacy has been enabled and
* HCI_OP_LE_SET_PRIVACY_MODE is supported.
*/
- if (!test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, params->flags))
+ if (!(params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY))
return 0;
irk = hci_find_irk_by_addr(hdev, &params->addr, params->addr_type);
@@ -1666,7 +1666,7 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
/* During suspend, only wakeable devices can be in acceptlist */
if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
return 0;
/* Select filter policy to accept all advertising */
@@ -4888,7 +4888,7 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
hci_clear_event_filter_sync(hdev);
list_for_each_entry(b, &hdev->accept_list, list) {
- if (!test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, b->flags))
+ if (!(b->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
continue;
bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 74937a834648..ae758ab1b558 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4013,10 +4013,11 @@ static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev,
memcpy(ev.uuid, rpa_resolution_uuid, 16);
ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1));
+ // Do we need to be atomic with the conn_flags?
if (enabled && privacy_mode_capable(hdev))
- set_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags);
+ hdev->conn_flags |= HCI_CONN_FLAG_DEVICE_PRIVACY;
else
- clear_bit(HCI_CONN_FLAG_DEVICE_PRIVACY, hdev->conn_flags);
+ hdev->conn_flags &= ~HCI_CONN_FLAG_DEVICE_PRIVACY;
return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev,
&ev, sizeof(ev),
@@ -4435,8 +4436,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = hdev->conn_flags;
memset(&rp, 0, sizeof(rp));
@@ -4447,8 +4447,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
if (!br_params)
goto done;
- bitmap_to_arr32(&current_flags, br_params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = br_params->flags;
} else {
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
le_addr_type(cp->addr.type));
@@ -4456,8 +4455,7 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
if (!params)
goto done;
- bitmap_to_arr32(&current_flags, params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = params->flags;
}
bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -4502,8 +4500,8 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
&cp->addr.bdaddr, cp->addr.type,
__le32_to_cpu(current_flags));
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ // We should take hci_dev_lock() early, I think.. conn_flags can change
+ supported_flags = hdev->conn_flags;
if ((supported_flags | current_flags) != supported_flags) {
bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)",
@@ -4519,7 +4517,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
cp->addr.type);
if (br_params) {
- bitmap_from_u64(br_params->flags, current_flags);
+ br_params->flags = current_flags;
status = MGMT_STATUS_SUCCESS;
} else {
bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)",
@@ -4529,15 +4527,11 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
le_addr_type(cp->addr.type));
if (params) {
- DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS);
-
- bitmap_from_u64(flags, current_flags);
-
/* Devices using RPAs can only be programmed in the
* acceptlist LL Privacy has been enable otherwise they
* cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP.
*/
- if (test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, flags) &&
+ if ((current_flags & HCI_CONN_FLAG_REMOTE_WAKEUP) &&
!use_ll_privacy(hdev) &&
hci_find_irk_by_addr(hdev, &params->addr,
params->addr_type)) {
@@ -4546,14 +4540,13 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- bitmap_from_u64(params->flags, current_flags);
+ params->flags = current_flags;
status = MGMT_STATUS_SUCCESS;
/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
* has been set.
*/
- if (test_bit(HCI_CONN_FLAG_DEVICE_PRIVACY,
- params->flags))
+ if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY)
hci_update_passive_scan(hdev);
} else {
bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
@@ -7154,8 +7147,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
addr_type);
if (params)
- bitmap_to_arr32(&current_flags, params->flags,
- __HCI_CONN_NUM_FLAGS);
+ current_flags = params->flags;
}
err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL);
@@ -7164,8 +7156,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
added:
device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
- bitmap_to_arr32(&supported_flags, hdev->conn_flags,
- __HCI_CONN_NUM_FLAGS);
+ supported_flags = hdev->conn_flags;
device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type,
supported_flags, current_flags);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 7057f8db4f99..1daf95e17d67 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -906,7 +906,6 @@ int crush_do_rule(const struct crush_map *map,
int recurse_to_leaf;
int wsize = 0;
int osize;
- int *tmp;
const struct crush_rule *rule;
__u32 step;
int i, j;
@@ -1073,9 +1072,7 @@ int crush_do_rule(const struct crush_map *map,
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */
- tmp = o;
- o = w;
- w = tmp;
+ swap(o, w);
wsize = osize;
break;
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 73f68d4625f3..929f6379a279 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -595,3 +595,9 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count;
}
EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
+bool flow_indr_dev_exists(void)
+{
+ return !list_empty(&flow_block_indr_dev_list);
+}
+EXPORT_SYMBOL(flow_indr_dev_exists);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 47b6c1f0fdbb..54625287ee5b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1579,7 +1579,7 @@ static void neigh_managed_work(struct work_struct *work)
list_for_each_entry(neigh, &tbl->managed_list, managed_list)
neigh_event_send_probe(neigh, NULL, false);
queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
- NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+ max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ));
write_unlock_bh(&tbl->lock);
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2e78458900f2..eb8e128e43e8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1156,7 +1150,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
+ goto out_free_bind_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash2;
+ goto out_free_dccp_bhash;
rc = dccp_ackvec_init();
if (rc)
@@ -1222,38 +1207,30 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c0b7e6c21360..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,32 +117,6 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- int addr_type;
-
- if (sk->sk_family == AF_INET6) {
- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
- int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
- return ipv4_portaddr_hash(net, 0, port);
-}
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -156,71 +130,16 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
- bool reuseport_cb_ok, bool reuseport_ok)
-{
- int bound_dev_if2;
-
- if (sk == sk2)
- return false;
-
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2) {
- if (sk->sk_reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if (!relax || (!reuseport_ok && sk->sk_reuseport &&
- sk2->sk_reuseport && reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
- return true;
- } else if (!reuseport_ok || !sk->sk_reuseport ||
- !sk2->sk_reuseport || !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
- struct inet_bind2_bucket *tb2, kuid_t sk_uid,
- bool relax, bool reuseport_cb_ok,
- bool reuseport_ok)
-{
- struct sock *sk2;
-
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
-
- if (bind_conflict_exist(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
- return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
- struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
bool relax, bool reuseport_ok)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- kuid_t uid = sock_i_uid((struct sock *)sk);
- struct sock_reuseport *reuseport_cb;
- struct inet_bind2_hashbucket *head2;
- bool reuseport_cb_ok;
struct sock *sk2;
- struct net *net;
- int l3mdev;
- u32 hash;
+ bool reuseport_cb_ok;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -231,42 +150,40 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners and tb2->owners list belong
- * to the same net
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
*/
- if (!use_bhash2_on_bind(sk)) {
- sk_for_each_bound(sk2, &tb->owners)
- if (bind_conflict_exist(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
+ sk_for_each_bound(sk2, &tb->owners) {
+ int bound_dev_if2;
- return false;
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ }
+ }
}
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- net = sock_net(sk);
-
- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
- * INADDR_ANY (if ipv4) socket.
- */
- hash = get_bhash2_nulladdr_hash(sk, net, port);
- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
- l3mdev = inet_sk_bound_l3mdev(sk);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
- break;
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- return false;
+ return sk2 != NULL;
}
/*
@@ -274,20 +191,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
- struct inet_bind2_bucket **tb2_ret,
- struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- struct inet_bind2_hashbucket *head2;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- bool relax = false;
- int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -326,12 +239,10 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
- if (!inet_csk_bind_conflict(sk, port, tb, tb2,
- relax, false))
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
@@ -361,8 +272,6 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
- *tb2_ret = tb2;
- *head2_ret = head2;
return head;
}
@@ -458,81 +367,54 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- bool bhash_created = false, bhash2_created = false;
- struct inet_bind2_bucket *tb2 = NULL;
- struct inet_bind2_hashbucket *head2;
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- int ret = 1, port = snum;
- bool found_port = false;
+ struct inet_bind_bucket *tb = NULL;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+ head = inet_csk_find_open_port(sk, &tb, &port);
if (!head)
return ret;
- if (tb && tb2)
- goto success;
- found_port = true;
- } else {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev))
- break;
-
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
- }
-
- if (!tb) {
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
- head, port, l3mdev);
if (!tb)
- goto fail_unlock;
- bhash_created = true;
- }
-
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
- if (!tb2)
- goto fail_unlock;
- bhash2_created = true;
+ goto tb_not_found;
+ goto success;
}
-
- /* If we had to find an open port, we already checked for conflicts */
- if (!found_port && !hlist_empty(&tb->owners)) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
+ goto tb_found;
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port, l3mdev);
+ if (!tb)
+ goto fail_unlock;
+tb_found:
+ if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
- if (ret) {
- if (bhash_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
- }
spin_unlock_bh(&head->lock);
return ret;
}
@@ -1079,7 +961,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8de5e699b3f..b9d995b5ce24 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,41 +81,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
return tb;
}
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
- struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk)
-{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb) {
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
-#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
-#endif
- return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
@@ -127,25 +92,12 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(cachep, tb);
- }
-}
-
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum)
+ const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
- inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -157,7 +109,6 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -166,13 +117,6 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- if (inet_csk(sk)->icsk_bind2_hash) {
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
- inet_csk(sk)->icsk_bind2_hash = NULL;
- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
- }
spin_unlock(&head->lock);
}
@@ -189,19 +133,14 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
- struct inet_bind2_hashbucket *head_bhash2;
- bool created_inet_bind_bucket = false;
- struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- if (unlikely(!tb || !tb2)) {
+ if (unlikely(!tb)) {
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -214,45 +153,25 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev))
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- net, head, port, l3mdev);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
- created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
-
- goto bhash2_find;
- } else if (!bind2_bucket_addr_match(tb2, child)) {
- l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
- &head_bhash2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head_bhash2, port,
- l3mdev, child);
- if (!tb2)
- goto error;
- }
}
- inet_bind_hash(child, tb, tb2, port);
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
-
-error:
- if (created_inet_bind_bucket)
- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
- spin_unlock(&head->lock);
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -756,76 +675,6 @@ void inet_unhash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_unhash);
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
- struct net *net, unsigned short port,
- int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net, const unsigned short port,
- int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
- const struct net *net, unsigned short port)
-{
- u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
- else
-#endif
- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev, struct sock *sk,
- struct inet_bind2_hashbucket **head)
-{
- struct inet_bind2_bucket *bhash2 = NULL;
- struct inet_bind2_hashbucket *h;
-
- h = inet_bhashfn_portaddr(hinfo, sk, net, port);
- inet_bind_bucket_for_each(bhash2, &h->chain) {
- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
- break;
- }
-
- if (head)
- *head = h;
-
- return bhash2;
-}
-
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -846,13 +695,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -909,7 +755,8 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -927,7 +774,6 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
- tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -943,17 +789,6 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* Find the corresponding tb2 bucket since we need to
- * add the socket to the bhash2 table as well
- */
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
- if (!tb2)
- goto error;
- }
-
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -963,7 +798,7 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -975,12 +810,6 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
-
-error:
- if (tb_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
- return -ENOMEM;
}
/*
@@ -1026,10 +855,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
init_hashinfo_lhash2(h);
/* this one is used for source ports of outgoing connections */
- table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
- sizeof(*table_perturb), GFP_KERNEL);
- if (!table_perturb)
- panic("TCP: failed to alloc table_perturb");
+ table_perturb = alloc_large_system_hash("Table-perturb",
+ sizeof(*table_perturb),
+ INET_TABLE_PERTURB_SIZE,
+ 0, 0, NULL, NULL,
+ INET_TABLE_PERTURB_SIZE,
+ INET_TABLE_PERTURB_SIZE);
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7e474a85deaf..3b9cd487075a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -629,21 +629,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
- const int pull_len = tunnel->hlen + sizeof(struct iphdr);
-
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- if (pull_len > skb_transport_offset(skb))
- goto free_skb;
-
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, pull_len);
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9984d23a7f3e..028513d3e2a2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4604,12 +4604,6 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
- tcp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("tcp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT,
- NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4632,9 +4626,8 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind bhash tables",
- sizeof(struct inet_bind_hashbucket) +
- sizeof(struct inet_bind2_hashbucket),
+ alloc_large_system_hash("TCP bind",
+ sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4643,12 +4636,9 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
- tcp_hashinfo.bhash2 =
- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3231af73e430..2e2a9ece9af2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2706,12 +2706,15 @@ static void tcp_mtup_probe_success(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ u64 val;
- /* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size);
+
+ val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache);
+ do_div(val, icsk->icsk_mtup.probe_size);
+ DEBUG_NET_WARN_ON_ONCE((u32)val != val);
+ tcp_snd_cwnd_set(tp, max_t(u32, 1U, val));
+
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dac2650f3863..fe8f23b95d32 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1207,8 +1207,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->l3index = l3index;
key->flags = flags;
memcpy(&key->addr, addr,
- (family == AF_INET6) ? sizeof(struct in6_addr) :
- sizeof(struct in_addr));
+ (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) ? sizeof(struct in6_addr) :
+ sizeof(struct in_addr));
hlist_add_head_rcu(&key->node, &md5sig->head);
return 0;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4b2284ed4a2..1c054431e358 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -4115,8 +4115,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
trace_tcp_retransmit_synack(sk, req);
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 2fe5860c21d6..b146ce88c5d0 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -304,4 +304,3 @@ void __init xfrm4_protocol_init(void)
{
xfrm_input_register_afinfo(&xfrm4_input_afinfo);
}
-EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ca0aa744593e..1b1932502e9e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5586,7 +5586,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
- array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na;
+ array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
}
static inline size_t inet6_ifla6_size(void)
@@ -7038,8 +7038,8 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra2 = (void *)SYSCTL_ONE,
},
{
- .procname = "accept_unsolicited_na",
- .data = &ipv6_devconf.accept_unsolicited_na,
+ .procname = "accept_untracked_na",
+ .data = &ipv6_devconf.accept_untracked_na,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4081b12a01ff..77e3f5970ce4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1450,7 +1450,7 @@ static int __ip6_append_data(struct sock *sk,
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
unsigned int flags, struct ipcm6_cookie *ipc6)
{
struct sk_buff *skb, *skb_prev = NULL;
@@ -1798,7 +1798,7 @@ error:
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags)
{
@@ -1995,7 +1995,7 @@ EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
unsigned int flags, struct inet_cork_full *cork)
{
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 254addad0dd3..b0dfe97ea4ee 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -979,7 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
- bool create_neigh;
+ u8 new_state;
if (skb->len < sizeof(struct nd_msg)) {
ND_PRINTK(2, warn, "NA: packet too short\n");
@@ -1000,7 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/* For some 802.11 wireless deployments (and possibly other networks),
* there will be a NA proxy and unsolicitd packets are attacks
* and thus should not be accepted.
- * drop_unsolicited_na takes precedence over accept_unsolicited_na
+ * drop_unsolicited_na takes precedence over accept_untracked_na
*/
if (!msg->icmph.icmp6_solicited && idev &&
idev->cnf.drop_unsolicited_na)
@@ -1041,25 +1041,33 @@ static void ndisc_recv_na(struct sk_buff *skb)
in6_ifa_put(ifp);
return;
}
+
+ neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+
/* RFC 9131 updates original Neighbour Discovery RFC 4861.
- * An unsolicited NA can now create a neighbour cache entry
- * on routers if it has Target LL Address option.
+ * NAs with Target LL Address option without a corresponding
+ * entry in the neighbour cache can now create a STALE neighbour
+ * cache entry on routers.
+ *
+ * entry accept fwding solicited behaviour
+ * ------- ------ ------ --------- ----------------------
+ * present X X 0 Set state to STALE
+ * present X X 1 Set state to REACHABLE
+ * absent 0 X X Do nothing
+ * absent 1 0 X Do nothing
+ * absent 1 1 X Add a new STALE entry
*
- * drop accept fwding behaviour
- * ---- ------ ------ ----------------------------------------------
- * 1 X X Drop NA packet and don't pass up the stack
- * 0 0 X Pass NA packet up the stack, don't update NC
- * 0 1 0 Pass NA packet up the stack, don't update NC
- * 0 1 1 Pass NA packet up the stack, and add a STALE
- * NC entry
* Note that we don't do a (daddr == all-routers-mcast) check.
*/
- create_neigh = !msg->icmph.icmp6_solicited && lladdr &&
- idev && idev->cnf.forwarding &&
- idev->cnf.accept_unsolicited_na;
- neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh);
+ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
+ if (!neigh && lladdr &&
+ idev && idev->cnf.forwarding &&
+ idev->cnf.accept_untracked_na) {
+ neigh = neigh_create(&nd_tbl, &msg->target, dev);
+ new_state = NUD_STALE;
+ }
- if (neigh) {
+ if (neigh && !IS_ERR(neigh)) {
u8 old_flags = neigh->flags;
struct net *net = dev_net(dev);
@@ -1079,7 +1087,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
}
ndisc_update(dev, neigh, lladdr,
- msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+ new_state,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
(msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ff033d16549e..ecf3a553a0dc 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -101,6 +101,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.sockc.tsflags = sk->sk_tsflags;
ipc6.sockc.mark = sk->sk_mark;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+
if (msg->msg_controllen) {
struct ipv6_txoptions opt = {};
@@ -112,17 +115,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return err;
/* Changes to txoptions and flow info are not implemented, yet.
- * Drop the options, fl6 is wiped below.
+ * Drop the options.
*/
ipc6.opt = NULL;
}
- memset(&fl6, 0, sizeof(fl6));
-
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
- fl6.flowi6_oif = oif;
fl6.flowi6_mark = ipc6.sockc.mark;
fl6.flowi6_uid = sk->sk_uid;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 29bc4e7c3046..6de01185cc68 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -399,7 +399,6 @@ int __init seg6_hmac_init(void)
{
return seg6_hmac_init_algo();
}
-EXPORT_SYMBOL(seg6_hmac_init);
int __net_init seg6_hmac_net_init(struct net *net)
{
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9fbe243a0e81..98a34287439c 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -218,6 +218,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
struct flowi6 fl6;
int dev_flags = 0;
+ memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
fl6.saddr = hdr->saddr;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 11e1a3a3e442..fb16d7c4e1b8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2826,10 +2826,12 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
- BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
- if (err)
- return err;
+ /* Non-zero return value of pfkey_broadcast() does not always signal
+ * an error and even on an actual error we may still want to process
+ * the message so rather ignore the return value.
+ */
+ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+ BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
memset(ext_hdrs, 0, sizeof(ext_hdrs));
err = parse_exthdrs(skb, hdr, ext_hdrs);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index c6ff8bf9b55f..9dbd801ddb98 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -504,14 +504,15 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen = len + transhdrlen;
+ int ulen;
int err;
/* Rough check on arithmetic overflow,
* better check is made in ip6_append_data().
*/
- if (len > INT_MAX)
+ if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
+ ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e3452445b363..d8246e00a10b 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1749,12 +1749,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) {
if (old_ctx)
- err = ieee80211_vif_use_reserved_reassign(sdata);
- else
- err = ieee80211_vif_use_reserved_assign(sdata);
+ return ieee80211_vif_use_reserved_reassign(sdata);
- if (err)
- return err;
+ return ieee80211_vif_use_reserved_assign(sdata);
}
/*
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 12fc9cda4a2c..51144fc66889 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -222,12 +222,18 @@ err_register:
}
static void nft_netdev_unregister_hooks(struct net *net,
- struct list_head *hook_list)
+ struct list_head *hook_list,
+ bool release_netdev)
{
- struct nft_hook *hook;
+ struct nft_hook *hook, *next;
- list_for_each_entry(hook, hook_list, list)
+ list_for_each_entry_safe(hook, next, hook_list, list) {
nf_unregister_net_hook(net, &hook->ops);
+ if (release_netdev) {
+ list_del(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
}
static int nf_tables_register_hook(struct net *net,
@@ -253,9 +259,10 @@ static int nf_tables_register_hook(struct net *net,
return nf_register_net_hook(net, &basechain->ops);
}
-static void nf_tables_unregister_hook(struct net *net,
- const struct nft_table *table,
- struct nft_chain *chain)
+static void __nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain,
+ bool release_netdev)
{
struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
@@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(struct net *net,
return basechain->type->ops_unregister(net, ops);
if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
- nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ nft_netdev_unregister_hooks(net, &basechain->hook_list,
+ release_netdev);
else
nf_unregister_net_hook(net, &basechain->ops);
}
+static void nf_tables_unregister_hook(struct net *net,
+ const struct nft_table *table,
+ struct nft_chain *chain)
+{
+ return __nf_tables_unregister_hook(net, table, chain, false);
+}
+
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -529,6 +544,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
+ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -1899,7 +1915,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
goto err_hook_dev;
}
hook->ops.dev = dev;
- hook->inactive = false;
return hook;
@@ -2151,7 +2166,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
+ !nft_chain_offload_support(basechain))
return -EOPNOTSUPP;
flow_block_init(&basechain->flow_block);
@@ -2873,27 +2888,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
err = nf_tables_expr_parse(ctx, nla, &expr_info);
if (err < 0)
- goto err1;
+ goto err_expr_parse;
+
+ err = -EOPNOTSUPP;
+ if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err_expr_stateful;
err = -ENOMEM;
expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT);
if (expr == NULL)
- goto err2;
+ goto err_expr_stateful;
err = nf_tables_newexpr(ctx, &expr_info, expr);
if (err < 0)
- goto err3;
+ goto err_expr_new;
return expr;
-err3:
+err_expr_new:
kfree(expr);
-err2:
+err_expr_stateful:
owner = expr_info.ops->type->owner;
if (expr_info.ops->type->release_ops)
expr_info.ops->type->release_ops(expr_info.ops);
module_put(owner);
-err1:
+err_expr_parse:
return ERR_PTR(err);
}
@@ -4242,6 +4261,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
u32 len;
int err;
+ if (desc->field_count >= ARRAY_SIZE(desc->field_len))
+ return -E2BIG;
+
err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
nft_concat_policy, NULL);
if (err < 0)
@@ -4251,9 +4273,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
return -EINVAL;
len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
-
- if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT)
- return -E2BIG;
+ if (!len || len > U8_MAX)
+ return -EINVAL;
desc->field_len[desc->field_count++] = len;
@@ -4264,7 +4285,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
const struct nlattr *nla)
{
struct nlattr *attr;
- int rem, err;
+ u32 num_regs = 0;
+ int rem, err, i;
nla_for_each_nested(attr, nla, rem) {
if (nla_type(attr) != NFTA_LIST_ELEM)
@@ -4275,6 +4297,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
return err;
}
+ for (i = 0; i < desc->field_count; i++)
+ num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+
+ if (num_regs > NFT_REG32_COUNT)
+ return -E2BIG;
+
return 0;
}
@@ -5344,8 +5372,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_get_set_elem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
break;
+ }
}
return err;
@@ -5413,9 +5443,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
return expr;
err = -EOPNOTSUPP;
- if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL))
- goto err_set_elem_expr;
-
if (expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err_set_elem_expr;
@@ -6125,8 +6152,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
return err;
+ }
}
if (nft_net->validate_state == NFT_VALIDATE_DO)
@@ -6396,8 +6425,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
break;
+ }
}
return err;
}
@@ -7291,13 +7322,25 @@ static void nft_unregister_flowtable_hook(struct net *net,
FLOW_BLOCK_UNBIND);
}
-static void nft_unregister_flowtable_net_hooks(struct net *net,
- struct list_head *hook_list)
+static void __nft_unregister_flowtable_net_hooks(struct net *net,
+ struct list_head *hook_list,
+ bool release_netdev)
{
- struct nft_hook *hook;
+ struct nft_hook *hook, *next;
- list_for_each_entry(hook, hook_list, list)
+ list_for_each_entry_safe(hook, next, hook_list, list) {
nf_unregister_net_hook(net, &hook->ops);
+ if (release_netdev) {
+ list_del(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
+}
+
+static void nft_unregister_flowtable_net_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ __nft_unregister_flowtable_net_hooks(net, hook_list, false);
}
static int nft_register_flowtable_net_hooks(struct net *net,
@@ -7390,11 +7433,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
if (nla[NFTA_FLOWTABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
- if (flags & ~NFT_FLOWTABLE_MASK)
- return -EOPNOTSUPP;
+ if (flags & ~NFT_FLOWTABLE_MASK) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
- (flags & NFT_FLOWTABLE_HW_OFFLOAD))
- return -EOPNOTSUPP;
+ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
} else {
flags = flowtable->data.flags;
}
@@ -7575,6 +7622,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
+ LIST_HEAD(flowtable_del_list);
struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
@@ -7590,7 +7638,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
err = -ENOENT;
goto err_flowtable_del_hook;
}
- hook->inactive = true;
+ list_move(&hook->list, &flowtable_del_list);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
@@ -7603,6 +7651,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
nft_flowtable_hook_release(&flowtable_hook);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -7610,13 +7659,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
return 0;
err_flowtable_del_hook:
- list_for_each_entry(this, &flowtable_hook.list, list) {
- hook = nft_hook_list_find(&flowtable->hook_list, this);
- if (!hook)
- break;
-
- hook->inactive = false;
- }
+ list_splice(&flowtable_del_list, &flowtable->hook_list);
nft_flowtable_hook_release(&flowtable_hook);
return err;
@@ -8286,6 +8329,9 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_DELRULE:
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
@@ -8520,17 +8566,6 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
-static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
- struct list_head *hook_list)
-{
- struct nft_hook *hook, *next;
-
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- if (hook->inactive)
- list_move(&hook->list, hook_list);
- }
-}
-
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -8785,6 +8820,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
@@ -8875,8 +8913,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nft_flowtable_hooks_del(nft_trans_flowtable(trans),
- &nft_trans_flowtable_hooks(trans));
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
@@ -8957,7 +8993,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- struct nft_hook *hook;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
@@ -9088,8 +9123,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
- hook->inactive = false;
+ list_splice(&nft_trans_flowtable_hooks(trans),
+ &nft_trans_flowtable(trans)->hook_list);
} else {
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
@@ -9739,9 +9774,10 @@ static void __nft_release_hook(struct net *net, struct nft_table *table)
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
+ __nf_tables_unregister_hook(net, table, chain, true);
list_for_each_entry(flowtable, &table->flowtables, list)
- nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list);
+ __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
+ true);
}
static void __nft_release_hooks(struct net *net)
@@ -9880,7 +9916,11 @@ static int __net_init nf_tables_init_net(struct net *net)
static void __net_exit nf_tables_pre_exit_net(struct net *net)
{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ mutex_lock(&nft_net->commit_mutex);
__nft_release_hooks(net);
+ mutex_unlock(&nft_net->commit_mutex);
}
static void __net_exit nf_tables_exit_net(struct net *net)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 2d36952b1392..910ef881c3b8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -208,7 +208,7 @@ static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
return 0;
}
-int nft_chain_offload_priority(struct nft_base_chain *basechain)
+static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
{
if (basechain->ops.priority <= 0 ||
basechain->ops.priority > USHRT_MAX)
@@ -217,6 +217,27 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
+bool nft_chain_offload_support(const struct nft_base_chain *basechain)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+
+ if (nft_chain_offload_priority(basechain) < 0)
+ return false;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.pf != NFPROTO_NETDEV ||
+ hook->ops.hooknum != NF_NETDEV_INGRESS)
+ return false;
+
+ dev = hook->ops.dev;
+ if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
+ return false;
+ }
+
+ return true;
+}
+
static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ad3bbe34ca88..2f7c477fc9e7 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -45,7 +45,6 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
static unsigned int nfnetlink_pernet_id __read_mostly;
struct nfnl_net {
- unsigned int ctnetlink_listeners;
struct sock *nfnl;
};
@@ -673,18 +672,8 @@ static int nfnetlink_bind(struct net *net, int group)
#ifdef CONFIG_NF_CONNTRACK_EVENTS
if (type == NFNL_SUBSYS_CTNETLINK) {
- struct nfnl_net *nfnlnet = nfnl_pernet(net);
-
nfnl_lock(NFNL_SUBSYS_CTNETLINK);
-
- if (WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == UINT_MAX)) {
- nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
- return -EOVERFLOW;
- }
-
- nfnlnet->ctnetlink_listeners++;
- if (nfnlnet->ctnetlink_listeners == 1)
- WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
+ WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
}
#endif
@@ -694,15 +683,12 @@ static int nfnetlink_bind(struct net *net, int group)
static void nfnetlink_unbind(struct net *net, int group)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- int type = nfnl_group2type[group];
-
- if (type == NFNL_SUBSYS_CTNETLINK) {
- struct nfnl_net *nfnlnet = nfnl_pernet(net);
+ if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
+ return;
+ if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) {
nfnl_lock(NFNL_SUBSYS_CTNETLINK);
- WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0);
- nfnlnet->ctnetlink_listeners--;
- if (nfnlnet->ctnetlink_listeners == 0)
+ if (!nfnetlink_has_listeners(net, group))
WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index f069c24c6146..af15102bc696 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -35,12 +35,13 @@ static unsigned int nfct_timeout_id __read_mostly;
struct ctnl_timeout {
struct list_head head;
+ struct list_head free_head;
struct rcu_head rcu_head;
refcount_t refcnt;
char name[CTNL_TIMEOUT_NAME_MAX];
- struct nf_ct_timeout timeout;
- struct list_head free_head;
+ /* must be at the end */
+ struct nf_ct_timeout timeout;
};
struct nfct_timeout_pernet {
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index a16cf47199b7..a25c88bc8b75 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -232,19 +232,21 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
- fl.u.ip4.saddr = ct->tuplehash[dir].tuple.dst.u3.ip;
+ fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
fl.u.ip4.flowi4_mark = pkt->skb->mark;
+ fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
case NFPROTO_IPV6:
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
- fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
fl.u.ip6.flowi6_mark = pkt->skb->mark;
+ fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
break;
}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 04ea8b9bf202..981addb2d051 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src
struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+ priv_dst->cost = priv_src->cost;
+
return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
}
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 4394df4bc99b..e5fd6995e4bf 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -335,7 +335,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr,
{
const struct nft_nat *priv = nft_expr_priv(expr);
- if (priv->family == nft_pf(pkt))
+ if (priv->family == nft_pf(pkt) ||
+ priv->family == NFPROTO_INET)
nft_nat_eval(expr, regs, pkt);
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 6ff3e10ff8e3..eb2c0959e5b6 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -975,7 +975,7 @@ static void nfc_release(struct device *d)
kfree(se);
}
- ida_simple_remove(&nfc_index_ida, dev->idx);
+ ida_free(&nfc_index_ida, dev->idx);
kfree(dev);
}
@@ -1066,7 +1066,7 @@ struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
if (!dev)
return NULL;
- rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
+ rc = ida_alloc(&nfc_index_ida, GFP_KERNEL);
if (rc < 0)
goto err_free_dev;
dev->idx = rc;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1b5d73079dc9..868db4669a29 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -373,6 +373,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
*addr = new_addr;
}
@@ -420,6 +421,7 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
memcpy(addr, new_addr, sizeof(__be32[4]));
}
@@ -660,6 +662,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
+ ovs_ct_clear(skb, NULL);
inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -699,6 +702,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
uh->dest = dst;
flow_key->tp.src = src;
flow_key->tp.dst = dst;
+ ovs_ct_clear(skb, NULL);
}
skb_clear_hash(skb);
@@ -761,6 +765,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
+
flow_key->tp.src = sh->source;
flow_key->tp.dst = sh->dest;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 4a947c13c813..4e70df91d0f2 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1342,7 +1342,9 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key, false);
+
+ if (key)
+ ovs_ct_fill_key(skb, key, false);
return 0;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 677f9cfa9660..ca6e92a22923 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1935,8 +1935,10 @@ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
/* Move network header to the right position for VLAN tagged packets */
if (likely(skb->dev->type == ARPHRD_ETHER) &&
eth_type_vlan(skb->protocol) &&
- __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
- skb_set_network_header(skb, depth);
+ __vlan_get_protocol(skb, skb->protocol, &depth) != 0) {
+ if (pskb_may_pull(skb, depth))
+ skb_set_network_header(skb, depth);
+ }
skb_probe_transport_header(skb);
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 8af9d6e5ba61..e013253b10d1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -548,7 +548,7 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb,
break;
#endif
default:
- return -1;
+ return false;
}
if (ip6h->hop_limit <= 1)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a201bf29af98..433bb5a7df31 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2161,6 +2161,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
not_found:
ini->smcr_version &= ~SMC_V2;
+ ini->smcrv2.ib_dev_v2 = NULL;
ini->check_smcrv2 = false;
}
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 5c731f27996e..53f63bfbaf5f 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -82,7 +82,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
/* abnormal termination */
if (!rc)
smc_wr_tx_put_slot(link,
- (struct smc_wr_tx_pend_priv *)pend);
+ (struct smc_wr_tx_pend_priv *)(*pend));
rc = -EPIPE;
}
return rc;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index df194cc07035..f87a2d8f23a7 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -919,7 +919,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
- * xdr_commit_encode - Ensure all data is written to buffer
+ * __xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
@@ -931,26 +931,29 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
-inline void xdr_commit_encode(struct xdr_stream *xdr)
+void __xdr_commit_encode(struct xdr_stream *xdr)
{
- int shift = xdr->scratch.iov_len;
+ size_t shift = xdr->scratch.iov_len;
void *page;
- if (shift == 0)
- return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr_reset_scratch_buffer(xdr);
}
-EXPORT_SYMBOL_GPL(xdr_commit_encode);
+EXPORT_SYMBOL_GPL(__xdr_commit_encode);
-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
- size_t nbytes)
+/*
+ * The buffer space to be reserved crosses the boundary between
+ * xdr->buf->head and xdr->buf->pages, or between two pages
+ * in xdr->buf->pages.
+ */
+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
- __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
+ void *p;
if (nbytes > PAGE_SIZE)
goto out_overflow; /* Bigger buffers require special handling */
@@ -964,6 +967,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
+
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
@@ -972,14 +976,19 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
* space at the end of the previous buffer:
*/
xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
- p = page_address(*xdr->page_ptr);
+
/*
- * Note this is where the next encode will start after we've
- * shifted this one back:
+ * xdr->p is where the next encode will start after
+ * xdr_commit_encode() has shifted this one back:
*/
- xdr->p = (void *)p + frag2bytes;
+ p = page_address(*xdr->page_ptr);
+ xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - nbytes >= PAGE_SIZE)
+ xdr->end = p + PAGE_SIZE;
+ else
+ xdr->end = p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5f0155fdefc7..11cf7c646644 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -478,10 +478,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int write_len;
u64 offset;
- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
- if (!seg)
+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
goto out_overflow;
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
write_len = min(remaining, seg->rs_length - info->wi_seg_off);
if (!write_len)
goto out_overflow;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6d39ca05f249..932c87b98eca 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name,
u32 i;
if (!bearer_name_validate(name, &b_names)) {
- errstr = "illegal name";
NL_SET_ERR_MSG(extack, "Illegal name");
- goto rejected;
+ return res;
}
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b91ddc110786..da176411c1b5 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -544,7 +544,7 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
rc = do_tls_getsockopt_conf(sk, optval, optlen,
optname == TLS_TX);
break;
- case TLS_TX_ZEROCOPY_SENDFILE:
+ case TLS_TX_ZEROCOPY_RO:
rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
break;
default:
@@ -731,7 +731,7 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
optname == TLS_TX);
release_sock(sk);
break;
- case TLS_TX_ZEROCOPY_SENDFILE:
+ case TLS_TX_ZEROCOPY_RO:
lock_sock(sk);
rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
release_sock(sk);
@@ -970,7 +970,7 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
goto nla_failure;
if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) {
- err = nla_put_flag(skb, TLS_INFO_ZC_SENDFILE);
+ err = nla_put_flag(skb, TLS_INFO_ZC_RO_TX);
if (err)
goto nla_failure;
}
@@ -994,7 +994,7 @@ static size_t tls_get_info_size(const struct sock *sk)
nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */
nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
- nla_total_size(0) + /* TLS_INFO_ZC_SENDFILE */
+ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */
0;
return size;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 654dcef7cfb3..2206e6f8902d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -490,7 +490,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
+ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index e0a4526ab66b..19ac872a6624 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -373,7 +373,8 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
goto out;
}
- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries);
+ max_entries = xskq_cons_nb_entries(xs->tx, max_entries);
+ nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, max_entries);
if (!nb_pkts) {
xs->tx->queue_empty_descs++;
goto out;
@@ -389,7 +390,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
if (!nb_pkts)
goto out;
- xskq_cons_release_n(xs->tx, nb_pkts);
+ xskq_cons_release_n(xs->tx, max_entries);
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index a794410989cc..fb20bf7207cf 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -282,14 +282,6 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
return xskq_cons_read_desc(q, desc, pool);
}
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
-{
- u32 entries = xskq_cons_nb_entries(q, max);
-
- return xskq_cons_read_desc_batch(q, pool, entries);
-}
-
/* To improve performance in the xskq_cons_release functions, only update local state here.
* Reflect this to global state when we get new entries from the ring in
* xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop.
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index d4935b3b9983..555ab35cd119 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -273,6 +273,7 @@ static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
{
+ bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU);
struct dst_entry *dst = skb_dst(skb);
struct iphdr *top_iph;
int flags;
@@ -303,7 +304,7 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
if (flags & XFRM_STATE_NOECN)
IP_ECN_clear(top_iph);
- top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+ top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));