928 files changed, 33934 insertions, 50044 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index a67caee11929..53cf446ce2e3 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __6LOWPAN_I_H
 #define __6LOWPAN_I_H
 
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index 12d131ab2324..2247b96dbc75 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_6LOWPAN) += 6lowpan.o
 
 6lowpan-y := core.o iphc.o nhc.o ndisc.o
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index 803041400136..67951c40734b 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __6LOWPAN_NHC_H
 #define __6LOWPAN_NHC_H
 
diff --git a/net/802/Makefile b/net/802/Makefile
index 37e654d6615e..19406a87bdaa 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux 802.x protocol layers.
 #
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 7bc8db08d7ef..9b703454b93e 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux VLAN layer.
 #
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579b5b9f..8dfdd94e430f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
 	return 0;
 }
 
-int register_vlan_dev(struct net_device *dev)
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (err < 0)
 		goto out_uninit_mvrp;
 
-	err = netdev_upper_dev_link(real_dev, dev);
+	err = netdev_upper_dev_link(real_dev, dev, extack);
 	if (err)
 		goto out_unregister_netdev;
 
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	vlan->flags = VLAN_FLAG_REORDER_HDR;
 
 	new_dev->rtnl_link_ops = &vlan_link_ops;
-	err = register_vlan_dev(new_dev);
+	err = register_vlan_dev(new_dev, NULL);
 	if (err < 0)
 		goto out_free_newdev;
 
@@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev,
 	vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
 #endif
 
+	vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
+
 	netdev_update_features(vlandev);
 }
 
@@ -376,6 +379,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			dev->name);
 		vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
 	}
+	if (event == NETDEV_DOWN &&
+	    (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
 
 	vlan_info = rtnl_dereference(dev->vlan_info);
 	if (!vlan_info)
@@ -423,9 +429,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		struct net_device *tmp;
 		LIST_HEAD(close_list);
 
-		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
-			vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
 		/* Put all VLANs for this dev in the down state too.  */
 		vlan_group_for_each_dev(grp, i, vlandev) {
 			flgs = vlandev->flags;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index df8bd65dd370..a8ba51030b75 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __BEN_VLAN_802_1Q_INC__
 #define __BEN_VLAN_802_1Q_INC__
 
@@ -107,7 +108,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
 int vlan_check_real_dev(struct net_device *real_dev,
 			__be16 protocol, u16 vlan_id);
 void vlan_setup(struct net_device *dev);
-int register_vlan_dev(struct net_device *dev);
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
 bool vlan_dev_inherit_address(struct net_device *dev,
 			      struct net_device *real_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e2ed69850489..64aa9f755e1d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
@@ -21,6 +22,12 @@ bool vlan_do_receive(struct sk_buff **skbp)
 	if (unlikely(!skb))
 		return false;
 
+	if (unlikely(!(vlan_dev->flags & IFF_UP))) {
+		kfree_skb(skb);
+		*skbp = NULL;
+		return false;
+	}
+
 	skb->dev = vlan_dev;
 	if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
 		/* Our lower layer thinks this is not local, let's make sure.
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de3103e..6689c0b272a7 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	vlan->vlan_proto = proto;
 	vlan->vlan_id	 = nla_get_u16(data[IFLA_VLAN_ID]);
 	vlan->real_dev	 = real_dev;
+	dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
 	vlan->flags	 = VLAN_FLAG_REORDER_HDR;
 
 	err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
@@ -160,7 +161,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
-	return register_vlan_dev(dev);
+	return register_vlan_dev(dev, extack);
 }
 
 static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 8838a2e92eb6..48cd4b4784e8 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __BEN_VLAN_PROC_INC__
 #define __BEN_VLAN_PROC_INC__
 
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 697ea7caf466..c0486cfc85d9 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_NET_9P) := 9pnet.o
 obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
 obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index ddfa86648f95..903a190319b9 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -272,6 +272,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
 {
 	int ret;
 	struct p9_trans_fd *ts = NULL;
+	loff_t pos;
 
 	if (client && client->status != Disconnected)
 		ts = client->trans;
@@ -282,7 +283,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)
 	if (!(ts->rd->f_flags & O_NONBLOCK))
 		p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");
 
-	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);
+	pos = ts->rd->f_pos;
+	ret = kernel_read(ts->rd, v, len, &pos);
 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
 		client->status = Disconnected;
 	return ret;
@@ -420,8 +422,7 @@ error:
 
 static int p9_fd_write(struct p9_client *client, void *v, int len)
 {
-	int ret;
-	mm_segment_t oldfs;
+	ssize_t ret;
 	struct p9_trans_fd *ts = NULL;
 
 	if (client && client->status != Disconnected)
@@ -433,12 +434,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
 	if (!(ts->wr->f_flags & O_NONBLOCK))
 		p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");
 
-	oldfs = get_fs();
-	set_fs(get_ds());
-	/* The cast to a user pointer is valid due to the set_fs() */
-	ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos);
-	set_fs(oldfs);
-
+	ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos);
 	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
 		client->status = Disconnected;
 	return ret;
diff --git a/net/Kconfig b/net/Kconfig
index 7d57ef34b79c..9dba2715919d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -166,13 +166,6 @@ menuconfig NETFILTER
 
 if NETFILTER
 
-config NETFILTER_DEBUG
-	bool "Network packet filtering debugging"
-	depends on NETFILTER
-	help
-	  You can say Y here if you want to get additional messages useful in
-	  debugging the netfilter code.
-
 config NETFILTER_ADVANCED
 	bool "Advanced netfilter configuration"
 	depends on NETFILTER
@@ -235,6 +228,7 @@ source "net/openvswitch/Kconfig"
 source "net/vmw_vsock/Kconfig"
 source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
+source "net/nsh/Kconfig"
 source "net/hsr/Kconfig"
 source "net/switchdev/Kconfig"
 source "net/l3mdev/Kconfig"
@@ -301,6 +295,18 @@ config BPF_JIT
 	  /proc/sys/net/core/bpf_jit_harden   (optional)
 	  /proc/sys/net/core/bpf_jit_kallsyms (optional)
 
+config BPF_STREAM_PARSER
+	bool "enable BPF STREAM_PARSER"
+	depends on BPF_SYSCALL
+	select STREAM_PARSER
+	---help---
+	 Enabling this allows a stream parser to be used with
+	 BPF_MAP_TYPE_SOCKMAP.
+
+	 BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
+	 It can be used to enforce socket policy, implement socket redirects,
+	 etc.
+
 config NET_FLOW_LIMIT
 	bool
 	depends on RPS
@@ -364,7 +370,6 @@ endmenu
 
 source "net/ax25/Kconfig"
 source "net/can/Kconfig"
-source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bed80fa398b7..14fede520840 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the linux networking.
 #
@@ -31,7 +32,6 @@ obj-$(CONFIG_NETROM)		+= netrom/
 obj-$(CONFIG_ROSE)		+= rose/
 obj-$(CONFIG_AX25)		+= ax25/
 obj-$(CONFIG_CAN)		+= can/
-obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
@@ -76,6 +76,7 @@ obj-$(CONFIG_NET_IFE)		+= ife/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
 obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
 obj-$(CONFIG_MPLS)		+= mpls/
+obj-$(CONFIG_NET_NSH)		+= nsh/
 obj-$(CONFIG_HSR)		+= hsr/
 ifneq ($(CONFIG_NET_SWITCHDEV),)
 obj-y				+= switchdev/
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index e4158b8b926d..284c8e585533 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Moved here from drivers/net/net_init.c, which is:
  *	Written 1993,1994,1995 by Donald Becker.
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index ebb864361f7a..c744a853fa5f 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem.
  *
diff --git a/net/atm/Makefile b/net/atm/Makefile
index cc50bd1ff1de..bfec0f2d83b5 100644
--- a/net/atm/Makefile
+++ b/net/atm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the ATM Protocol Families.
 #
diff --git a/net/atm/addr.c b/net/atm/addr.c
index dcda35c66f15..0530b63f509a 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/addr.c - Local ATM address registry */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/addr.h b/net/atm/addr.h
index 6837e9e7eb13..da3f848411a0 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* net/atm/addr.h - Local ATM address registry */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index 876fbe83e2e4..a30b83c1cb3f 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/atm_misc.c - Various functions for use by ATM drivers */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL ICA */
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 350bf62b2ae3..5d2fed9f5710 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* ATM driver model support. */
 
 #include <linux/kernel.h>
diff --git a/net/atm/clip.c b/net/atm/clip.c
index f271a7bcf5b2..d4f6029d5109 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n)
 	return 1;
 }
 
-static void idle_timer_check(unsigned long dummy)
+static void idle_timer_check(struct timer_list *unused)
 {
 	write_lock(&arp_tbl.lock);
 	__neigh_for_each_release(&arp_tbl, neigh_check_cb);
@@ -617,7 +617,7 @@ static void atmarpd_close(struct atm_vcc *vcc)
 	module_put(THIS_MODULE);
 }
 
-static struct atmdev_ops atmarpd_dev_ops = {
+static const struct atmdev_ops atmarpd_dev_ops = {
 	.close = atmarpd_close
 };
 
@@ -887,7 +887,7 @@ static int __init atm_clip_init(void)
 	register_netdevice_notifier(&clip_dev_notifier);
 	register_inetaddr_notifier(&clip_inet_notifier);
 
-	setup_timer(&idle_timer, idle_timer_check, 0);
+	timer_setup(&idle_timer, idle_timer_check, 0);
 
 #ifdef CONFIG_PROC_FS
 	{
diff --git a/net/atm/common.h b/net/atm/common.h
index 959436b87182..d9d583712a91 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* net/atm/common.h - ATM sockets (common part for PVC and SVC) */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index bbd3b639992e..2ff0e5e470e3 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* ATM ioctl handling */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 093fe8707731..c976196da3ea 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -486,7 +486,7 @@ static void lec_atm_close(struct atm_vcc *vcc)
 	module_put(THIS_MODULE);
 }
 
-static struct atmdev_ops lecdev_ops = {
+static const struct atmdev_ops lecdev_ops = {
 	.close = lec_atm_close,
 	.send = lec_atm_send
 };
@@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 #define LEC_ARP_REFRESH_INTERVAL (3*HZ)
 
 static void lec_arp_check_expire(struct work_struct *work);
-static void lec_arp_expire_arp(unsigned long data);
+static void lec_arp_expire_arp(struct timer_list *t);
 
 /*
  * Arp table funcs
@@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
 	}
 	ether_addr_copy(to_return->mac_addr, mac_addr);
 	INIT_HLIST_NODE(&to_return->next);
-	setup_timer(&to_return->timer, lec_arp_expire_arp,
-			(unsigned long)to_return);
+	timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
 	to_return->last_used = jiffies;
 	to_return->priv = priv;
 	skb_queue_head_init(&to_return->tx_wait);
@@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
 }
 
 /* Arp sent timer expired */
-static void lec_arp_expire_arp(unsigned long data)
+static void lec_arp_expire_arp(struct timer_list *t)
 {
 	struct lec_arp_table *entry;
 
-	entry = (struct lec_arp_table *)data;
+	entry = from_timer(entry, t, timer);
 
 	pr_debug("\n");
 	if (entry->status == ESI_ARP_PENDING) {
@@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data)
 }
 
 /* Unknown/unused vcc expire, remove associated entry */
-static void lec_arp_expire_vcc(unsigned long data)
+static void lec_arp_expire_vcc(struct timer_list *t)
 {
 	unsigned long flags;
-	struct lec_arp_table *to_remove = (struct lec_arp_table *)data;
+	struct lec_arp_table *to_remove = from_timer(to_remove, t, timer);
 	struct lec_priv *priv = to_remove->priv;
 
 	del_timer(&to_remove->timer);
@@ -1799,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
 		else
 			send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
 		entry->timer.expires = jiffies + (1 * HZ);
-		entry->timer.function = lec_arp_expire_arp;
+		entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp;
 		add_timer(&entry->timer);
 		found = priv->mcast_vcc;
 	}
@@ -1999,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		entry->old_recv_push = old_push;
 		entry->status = ESI_UNKNOWN;
 		entry->timer.expires = jiffies + priv->vcc_timeout_period;
-		entry->timer.function = lec_arp_expire_vcc;
+		entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
 		hlist_add_head(&entry->next, &priv->lec_no_forward);
 		add_timer(&entry->timer);
 		dump_arp_table(priv);
@@ -2083,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 	entry->status = ESI_UNKNOWN;
 	hlist_add_head(&entry->next, &priv->lec_arp_empty_ones);
 	entry->timer.expires = jiffies + priv->vcc_timeout_period;
-	entry->timer.function = lec_arp_expire_vcc;
+	entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
 	add_timer(&entry->timer);
 	pr_debug("After vcc was added\n");
 	dump_arp_table(priv);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 4149db1b7885..be0e2667bd8c 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Lan Emulation client header file
  *
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index d923f53812a3..1205d8792d28 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Lec arp cache
  *
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 680a4b9095a1..e882d8b5db05 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
 			       unsigned long event, void *dev);
 static void mpc_timer_refresh(void);
-static void mpc_cache_check(unsigned long checking_time);
+static void mpc_cache_check(struct timer_list *unused);
 
 static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
 	0xaa, 0xaa, 0x03,
@@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = {
 
 struct mpoa_client *mpcs = NULL; /* FIXME */
 static struct atm_mpoa_qos *qos_head = NULL;
-static DEFINE_TIMER(mpc_timer, NULL, 0, 0);
+static DEFINE_TIMER(mpc_timer, NULL);
 
 
 static struct mpoa_client *find_mpc_by_itfnum(int itf)
@@ -779,7 +779,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	netif_rx(new_skb);
 }
 
-static struct atmdev_ops mpc_ops = { /* only send is required */
+static const struct atmdev_ops mpc_ops = { /* only send is required */
 	.close	= mpoad_close,
 	.send	= msg_from_mpoad
 };
@@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
 	int err;
 
 	if (mpcs == NULL) {
-		init_timer(&mpc_timer);
 		mpc_timer_refresh();
 
 		/* This lets us now how our LECs are doing */
@@ -1408,15 +1407,17 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
 	msg_to_mpoad(msg, mpc);
 }
 
+static unsigned long checking_time;
+
 static void mpc_timer_refresh(void)
 {
 	mpc_timer.expires = jiffies + (MPC_P2 * HZ);
-	mpc_timer.data = mpc_timer.expires;
-	mpc_timer.function = mpc_cache_check;
+	checking_time = mpc_timer.expires;
+	mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check;
 	add_timer(&mpc_timer);
 }
 
-static void mpc_cache_check(unsigned long checking_time)
+static void mpc_cache_check(struct timer_list *unused)
 {
 	struct mpoa_client *mpc = mpcs;
 	static unsigned long previous_resolving_check_time;
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index cfc7b745aa91..454abd07651a 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _MPC_H_
 #define _MPC_H_
 
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4ccaa16b1eb1..e01450bb32d6 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/atmmpc.h>
 #include <linux/slab.h>
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 30fe34841ced..6a266669ebf4 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef MPOA_CACHES_H
 #define MPOA_CACHES_H
 
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 2df34eb5d65f..8a0c17e1c203 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4caca2a90ec4..642f9272ab95 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/proc.c - ATM /proc interface
  *
  * Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA
diff --git a/net/atm/protocols.h b/net/atm/protocols.h
index acdfc856222d..18d4d008bac3 100644
--- a/net/atm/protocols.h
+++ b/net/atm/protocols.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* net/atm/protocols.h - ATM protocol handler entry points */
 
 /* Written 1995-1997 by Werner Almesberger, EPFL LRC */
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 040207ec399f..e1140b3bdcaa 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/pvc.c - ATM PVC sockets */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 821c0797553d..ee10e8d46185 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/raw.c - Raw AAL0 and AAL5 transports */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 918244757b7d..bada395ecdb1 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/resources.c - Statically allocated resources */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 521431e30507..048232e4d4c6 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* net/atm/resources.h - ATM-related resources */
 
 /* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 983c3a21a133..6c11cdf4dd4c 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/signaling.c - ATM signaling */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
@@ -217,7 +218,7 @@ static void sigd_close(struct atm_vcc *vcc)
 	read_unlock(&vcc_sklist_lock);
 }
 
-static struct atmdev_ops sigd_dev_ops = {
+static const struct atmdev_ops sigd_dev_ops = {
 	.close = sigd_close,
 	.send =	sigd_send
 };
diff --git a/net/atm/signaling.h b/net/atm/signaling.h
index 08b2a69cc572..2df8220f7ab5 100644
--- a/net/atm/signaling.h
+++ b/net/atm/signaling.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* net/atm/signaling.h - ATM signaling */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 5589de7086af..c458adcbc177 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* net/atm/svc.c - ATM SVC sockets */
 
 /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/ax25/Makefile b/net/ax25/Makefile
index 43c46d2cafb6..2e53affc8568 100644
--- a/net/ax25/Makefile
+++ b/net/ax25/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux AX.25 layer.
 #
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f3f9d18891de..06eac1f50c5e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *);
 /*
  *	Handler for deferred kills.
  */
-static void ax25_destroy_timer(unsigned long data)
+static void ax25_destroy_timer(struct timer_list *t)
 {
-	ax25_cb *ax25=(ax25_cb *)data;
+	ax25_cb *ax25 = from_timer(ax25, t, dtimer);
 	struct sock *sk;
 
 	sk=ax25->sk;
@@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
 	if (ax25->sk != NULL) {
 		if (sk_has_allocations(ax25->sk)) {
 			/* Defer: outstanding buffers */
-			setup_timer(&ax25->dtimer, ax25_destroy_timer,
-					(unsigned long)ax25);
+			timer_setup(&ax25->dtimer, ax25_destroy_timer, 0);
 			ax25->dtimer.expires  = jiffies + 2 * HZ;
 			add_timer(&ax25->dtimer);
 		} else {
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5fb2104b7304..e9d11313d45b 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -29,7 +29,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 
-static void ax25_ds_timeout(unsigned long);
+static void ax25_ds_timeout(struct timer_list *);
 
 /*
  *	Add DAMA slave timeout timer to timer list.
@@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long);
 
 void ax25_ds_setup_timer(ax25_dev *ax25_dev)
 {
-	setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout,
-		    (unsigned long)ax25_dev);
+	timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0);
 }
 
 void ax25_ds_del_timer(ax25_dev *ax25_dev)
@@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
  *	Silently discard all (slave) connections in case our master forgot us...
  */
 
-static void ax25_ds_timeout(unsigned long arg)
+static void ax25_ds_timeout(struct timer_list *t)
 {
-	ax25_dev *ax25_dev = (struct ax25_dev *) arg;
+	ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer);
 	ax25_cb *ax25;
 
 	if (ax25_dev == NULL || !ax25_dev->dama.slave)
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 23a6f38a80bf..c47b7ee1e4da 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -33,20 +33,19 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 
-static void ax25_heartbeat_expiry(unsigned long);
-static void ax25_t1timer_expiry(unsigned long);
-static void ax25_t2timer_expiry(unsigned long);
-static void ax25_t3timer_expiry(unsigned long);
-static void ax25_idletimer_expiry(unsigned long);
+static void ax25_heartbeat_expiry(struct timer_list *);
+static void ax25_t1timer_expiry(struct timer_list *);
+static void ax25_t2timer_expiry(struct timer_list *);
+static void ax25_t3timer_expiry(struct timer_list *);
+static void ax25_idletimer_expiry(struct timer_list *);
 
 void ax25_setup_timers(ax25_cb *ax25)
 {
-	setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25);
-	setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25);
-	setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25);
-	setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25);
-	setup_timer(&ax25->idletimer, ax25_idletimer_expiry,
-		    (unsigned long)ax25);
+	timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0);
+	timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0);
+	timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0);
+	timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0);
+	timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0);
 }
 
 void ax25_start_heartbeat(ax25_cb *ax25)
@@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer)
 
 EXPORT_SYMBOL(ax25_display_timer);
 
-static void ax25_heartbeat_expiry(unsigned long param)
+static void ax25_heartbeat_expiry(struct timer_list *t)
 {
 	int proto = AX25_PROTO_STD_SIMPLEX;
-	ax25_cb *ax25 = (ax25_cb *)param;
+	ax25_cb *ax25 = from_timer(ax25, t, timer);
 
 	if (ax25->ax25_dev)
 		proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
@@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param)
 	}
 }
 
-static void ax25_t1timer_expiry(unsigned long param)
+static void ax25_t1timer_expiry(struct timer_list *t)
 {
-	ax25_cb *ax25 = (ax25_cb *)param;
+	ax25_cb *ax25 = from_timer(ax25, t, t1timer);
 
 	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
 	case AX25_PROTO_STD_SIMPLEX:
@@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param)
 	}
 }
 
-static void ax25_t2timer_expiry(unsigned long param)
+static void ax25_t2timer_expiry(struct timer_list *t)
 {
-	ax25_cb *ax25 = (ax25_cb *)param;
+	ax25_cb *ax25 = from_timer(ax25, t, t2timer);
 
 	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
 	case AX25_PROTO_STD_SIMPLEX:
@@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param)
 	}
 }
 
-static void ax25_t3timer_expiry(unsigned long param)
+static void ax25_t3timer_expiry(struct timer_list *t)
 {
-	ax25_cb *ax25 = (ax25_cb *)param;
+	ax25_cb *ax25 = from_timer(ax25, t, t3timer);
 
 	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
 	case AX25_PROTO_STD_SIMPLEX:
@@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param)
 	}
 }
 
-static void ax25_idletimer_expiry(unsigned long param)
+static void ax25_idletimer_expiry(struct timer_list *t)
 {
-	ax25_cb *ax25 = (ax25_cb *)param;
+	ax25_cb *ax25 = from_timer(ax25, t, idletimer);
 
 	switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
 	case AX25_PROTO_STD_SIMPLEX:
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a3501173e200..1b659ab652fb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -729,11 +729,9 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
 				    const unsigned char *packet_buff,
 				    int packet_len, bool direct_link)
 {
-	unsigned char *skb_buff;
 	unsigned long new_direct_link_flag;
 
-	skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff,
-				packet_len);
+	skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len);
 	forw_packet_aggr->packet_len += packet_len;
 	forw_packet_aggr->num_packets++;
 
@@ -918,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
 	u16 tvlv_len = 0;
 	unsigned long send_time;
 
-	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
-	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+	if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+	    hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
 		return;
 
 	/* the interface gets activated here to avoid race conditions between
@@ -1266,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	 * drops as they can't send and receive at the same time.
 	 */
 	tq_iface_penalty = BATADV_TQ_MAX_VALUE;
-	if (if_outgoing && (if_incoming == if_outgoing) &&
+	if (if_outgoing && if_incoming == if_outgoing &&
 	    batadv_is_wifi_hardif(if_outgoing))
 		tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
 						      bat_priv);
@@ -1281,7 +1279,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	batadv_ogm_packet->tq = combined_tq;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+		   "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
 		   orig_node->orig, orig_neigh_node->orig, total_count,
 		   neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
 		   batadv_ogm_packet->tq, if_incoming->net_dev->name,
@@ -1371,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 				ret = BATADV_NEIGH_DUP;
 		} else {
 			set_mark = 0;
-			if (is_dup && (ret != BATADV_NEIGH_DUP))
+			if (is_dup && ret != BATADV_NEIGH_DUP)
 				ret = BATADV_ORIG_DUP;
 		}
 
@@ -1517,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 	/* drop packet if sender is not a direct neighbor and if we
 	 * don't route towards it
 	 */
-	if (!is_single_hop_neigh && (!orig_neigh_router)) {
+	if (!is_single_hop_neigh && !orig_neigh_router) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Drop packet: OGM via unknown neighbor!\n");
 		goto out_neigh;
@@ -1537,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 	sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
 	similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
 
-	if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+	if (is_bidirect && (dup_status == BATADV_NO_DUP ||
 			    (sameseq && similar_ttl))) {
 		batadv_iv_ogm_orig_update(bat_priv, orig_node,
 					  orig_ifinfo, ethhdr,
@@ -1555,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 		/* OGMs from secondary interfaces should only scheduled once
 		 * per interface where it has been received, not multiple times
 		 */
-		if ((ogm_packet->ttl <= 2) &&
-		    (if_incoming != if_outgoing)) {
+		if (ogm_packet->ttl <= 2 &&
+		    if_incoming != if_outgoing) {
 			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 				   "Drop packet: OGM from secondary interface and wrong outgoing interface\n");
 			goto out_neigh;
@@ -1592,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
 			      if_incoming, if_outgoing);
 
 out_neigh:
-	if ((orig_neigh_node) && (!is_single_hop_neigh))
+	if (orig_neigh_node && !is_single_hop_neigh)
 		batadv_orig_node_put(orig_neigh_node);
 out:
 	if (router_ifinfo)
@@ -2525,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 			tmp_gw_factor *= 100 * 100;
 			tmp_gw_factor >>= 18;
 
-			if ((tmp_gw_factor > max_gw_factor) ||
-			    ((tmp_gw_factor == max_gw_factor) &&
-			     (tq_avg > max_tq))) {
+			if (tmp_gw_factor > max_gw_factor ||
+			    (tmp_gw_factor == max_gw_factor &&
+			     tq_avg > max_tq)) {
 				if (curr_gw)
 					batadv_gw_node_put(curr_gw);
 				curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c5b33d..341ceab8338d 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -19,7 +19,6 @@
 #include "main.h"
 
 #include <linux/atomic.h>
-#include <linux/bug.h>
 #include <linux/cache.h>
 #include <linux/errno.h>
 #include <linux/if_ether.h>
@@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
 	int ret = 0;
 
 	ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	if (WARN_ON(!ifinfo1))
+	if (!ifinfo1)
 		goto err_ifinfo1;
 
 	ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
-	if (WARN_ON(!ifinfo2))
+	if (!ifinfo2)
 		goto err_ifinfo2;
 
 	ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
@@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
 	bool ret = false;
 
 	ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	if (WARN_ON(!ifinfo1))
+	if (!ifinfo1)
 		goto err_ifinfo1;
 
 	ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
-	if (WARN_ON(!ifinfo2))
+	if (!ifinfo2)
 		goto err_ifinfo2;
 
 	threshold = ifinfo1->bat_v.throughput / 4;
@@ -767,7 +766,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 		if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
 			goto next;
 
-		if (curr_gw && (bw <= max_bw))
+		if (curr_gw && bw <= max_bw)
 			goto next;
 
 		if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d98e16..1de992c58b35 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
 			hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
 
 		throughput = link_settings.base.speed;
-		if (throughput && (throughput != SPEED_UNKNOWN))
+		if (throughput && throughput != SPEED_UNKNOWN)
 			return throughput * 10;
 	}
 
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
 		goto out;
 
 	/* we are in the process of shutting this interface down */
-	if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
-	    (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+	if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+	    hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
 		goto out;
 
 	/* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1e3dc374bfde..c251445a42a0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -137,7 +137,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 	struct batadv_priv *bat_priv;
 	struct batadv_ogm2_packet *ogm_packet;
 	struct sk_buff *skb, *skb_tmp;
-	unsigned char *ogm_buff, *pkt_buff;
+	unsigned char *ogm_buff;
 	int ogm_buff_len;
 	u16 tvlv_len = 0;
 	int ret;
@@ -166,7 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 		goto reschedule;
 
 	skb_reserve(skb, ETH_HLEN);
-	pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len);
+	skb_put_data(skb, ogm_buff, ogm_buff_len);
 
 	ogm_packet = (struct batadv_ogm2_packet *)skb->data;
 	ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -200,7 +200,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n",
 				   hard_iface->net_dev->name, type);
 
 			batadv_hardif_put(hard_iface);
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
 	 * due to the store & forward characteristics of WIFI.
 	 * Very low throughput values are the exception.
 	 */
-	if ((throughput > 10) &&
-	    (if_incoming == if_outgoing) &&
+	if (throughput > 10 &&
+	    if_incoming == if_outgoing &&
 	    !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
 		return throughput / 2;
 
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
 	/* drop packets with old seqnos, however accept the first packet after
 	 * a host has been rebooted.
 	 */
-	if ((seq_diff < 0) && !protection_started)
+	if (seq_diff < 0 && !protection_started)
 		goto out;
 
 	neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
 		router_throughput = router_ifinfo->bat_v.throughput;
 		neigh_throughput = neigh_ifinfo->bat_v.throughput;
 
-		if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
-		    (router_throughput >= neigh_throughput))
+		if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
+		    router_throughput >= neigh_throughput)
 			goto out;
 	}
 
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
 		return;
 
 	/* only unknown & newer OGMs contain TVLVs we are interested in */
-	if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
+	if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
 		batadv_tvlv_containers_process(bat_priv, true, orig_node,
 					       NULL, NULL,
 					       (unsigned char *)(ogm2 + 1),
@@ -683,18 +683,18 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	ogm_throughput = ntohl(ogm_packet->throughput);
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n",
+		   "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n",
 		   ethhdr->h_source, if_incoming->net_dev->name,
 		   if_incoming->net_dev->dev_addr, ogm_packet->orig,
 		   ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl,
 		   ogm_packet->version, ntohs(ogm_packet->tvlv_len));
 
-	/* If the troughput metric is 0, immediately drop the packet. No need to
-	 * create orig_node / neigh_node for an unusable route.
+	/* If the throughput metric is 0, immediately drop the packet. No need
+	 * to create orig_node / neigh_node for an unusable route.
 	 */
 	if (ogm_throughput == 0) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "Drop packet: originator packet with troughput metric of 0\n");
+			   "Drop packet: originator packet with throughput metric of 0\n");
 		return;
 	}
 
@@ -762,7 +762,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n",
 				   ogm_packet->orig, hard_iface->net_dev->name,
 				   type);
 
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6930d6b50f99..760c0de72582 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
 	/* this is an hash collision with the temporary selected node. Choose
 	 * the one with the lowest address
 	 */
-	if ((tmp_max == max) && max_orig_node &&
-	    (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
+	if (tmp_max == max && max_orig_node &&
+	    batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
 		goto out;
 
 	ret = true;
@@ -834,7 +834,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 			last_seen_msecs = last_seen_msecs % 60000;
 			last_seen_secs = last_seen_msecs / 1000;
 
-			seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
+			seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n",
 				   &dat_entry->ip, dat_entry->mac_addr,
 				   batadv_print_vid(dat_entry->vid),
 				   last_seen_mins, last_seen_secs);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d5224d..10d521f0b17f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
 		}
 	}
 
-	if ((curr_gw) && (!next_gw)) {
+	if (curr_gw && !next_gw) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Removing selected gateway - no gateway in range\n");
 		batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
 				    NULL);
-	} else if ((!curr_gw) && (next_gw)) {
+	} else if (!curr_gw && next_gw) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
 			   next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 		goto out;
 	}
 
-	if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
-	    (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
+	if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
+	    gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
 		goto out;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5c74a8..2c26039c23fc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
 		if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
 			bw_unit_type = BATADV_BW_UNIT_MBIT;
 
-		if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) ||
-		    (bw_unit_type == BATADV_BW_UNIT_MBIT))
+		if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
+		    bw_unit_type == BATADV_BW_UNIT_MBIT)
 			*tmp_ptr = '\0';
 	}
 
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
 	if (!up_new)
 		up_new = 1;
 
-	if ((down_curr == down_new) && (up_curr == up_new))
+	if (down_curr == down_new && up_curr == up_new)
 		return count;
 
 	batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 	/* only fetch the tvlv value if the handler wasn't called via the
 	 * CIFNOTFND flag and if there is data to fetch
 	 */
-	if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) ||
-	    (tvlv_value_len < sizeof(gateway))) {
+	if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
+	    tvlv_value_len < sizeof(gateway)) {
 		gateway.bandwidth_down = 0;
 		gateway.bandwidth_up = 0;
 	} else {
 		gateway_ptr = tvlv_value;
 		gateway.bandwidth_down = gateway_ptr->bandwidth_down;
 		gateway.bandwidth_up = gateway_ptr->bandwidth_up;
-		if ((gateway.bandwidth_down == 0) ||
-		    (gateway.bandwidth_up == 0)) {
+		if (gateway.bandwidth_down == 0 ||
+		    gateway.bandwidth_up == 0) {
 			gateway.bandwidth_down = 0;
 			gateway.bandwidth_up = 0;
 		}
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
 	batadv_gw_node_update(bat_priv, orig, &gateway);
 
 	/* restart gateway selection */
-	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
+	if (gateway.bandwidth_down != 0 &&
+	    atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
 		batadv_gw_check_election(bat_priv, orig);
 }
 
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..4e3d5340ad96 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
-		if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
-		    (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+		if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+		    hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
 			continue;
 
 		if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
-		if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
-		    (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+		if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+		    hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
 			continue;
 
 		if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ out:
 static void
 batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
 {
-	if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
-	    (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+	if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+	    hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
 		return;
 
 	hard_iface->if_status = BATADV_IF_INACTIVE;
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
 	bat_priv = netdev_priv(hard_iface->soft_iface);
 
 	ret = netdev_master_upper_dev_link(hard_iface->net_dev,
-					   soft_iface, NULL, NULL);
+					   soft_iface, NULL, NULL, NULL);
 	if (ret)
 		goto err_dev;
 
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292886d1..bded31121d12 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
 	size_t packet_len;
 	int error;
 
-	if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
+	if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
 		return -EAGAIN;
 
-	if ((!buf) || (count < sizeof(struct batadv_icmp_packet)))
+	if (!buf || count < sizeof(struct batadv_icmp_packet))
 		return -EINVAL;
 
 	if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb26a66..4daed7ad46f2 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
  * list traversals just rcu-locked
  */
 struct list_head batadv_hardif_list;
-static int (*batadv_rx_handler[256])(struct sk_buff *,
-				     struct batadv_hard_iface *);
+static int (*batadv_rx_handler[256])(struct sk_buff *skb,
+				     struct batadv_hard_iface *recv_if);
 
 unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
@@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type,
 			     int (*recv_handler)(struct sk_buff *,
 						 struct batadv_hard_iface *))
 {
-	int (*curr)(struct sk_buff *,
-		    struct batadv_hard_iface *);
+	int (*curr)(struct sk_buff *skb,
+		    struct batadv_hard_iface *recv_if);
 	curr = batadv_rx_handler[packet_type];
 
-	if ((curr != batadv_recv_unhandled_packet) &&
-	    (curr != batadv_recv_unhandled_unicast_packet))
+	if (curr != batadv_recv_unhandled_packet &&
+	    curr != batadv_recv_unhandled_unicast_packet)
 		return -EBUSY;
 
 	batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2be8f1f46529..edb2f239d04d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.2"
+#define BATADV_SOURCE_VERSION "2017.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670641ac..e553a8770a89 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
 	bool orig_initialized;
 
 	if (orig_mcast_enabled && tvlv_value &&
-	    (tvlv_value_len >= sizeof(mcast_flags)))
+	    tvlv_value_len >= sizeof(mcast_flags))
 		mcast_flags = *(u8 *)tvlv_value;
 
 	spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b205257..2967b86c13da 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
 			continue;
 
 		/* don't purge if the interface is not (going) down */
-		if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
-		    (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
-		    (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+		if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+		    if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+		    if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
 			continue;
 
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
 			continue;
 
 		/* don't purge if the interface is not (going) down */
-		if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
-		    (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
-		    (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+		if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+		    if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+		    if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
 			continue;
 
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 		last_seen = neigh_node->last_seen;
 		if_incoming = neigh_node->if_incoming;
 
-		if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) ||
-		    (if_incoming->if_status == BATADV_IF_INACTIVE) ||
-		    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
-		    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
-			if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
-			    (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
-			    (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
+		if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
+		    if_incoming->if_status == BATADV_IF_INACTIVE ||
+		    if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+		    if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
+			if (if_incoming->if_status == BATADV_IF_INACTIVE ||
+			    if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+			    if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
 				batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 					   "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
 					   orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff26f9d..40d9bf3e5bfe 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 	batadv_orig_ifinfo_put(orig_ifinfo);
 
 	/* route deleted */
-	if ((curr_router) && (!neigh_node)) {
+	if (curr_router && !neigh_node) {
 		batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
 			   "Deleting route towards: %pM\n", orig_node->orig);
 		batadv_tt_global_del_orig(bat_priv, orig_node, -1,
 					  "Deleted route towards originator");
 
 	/* route added */
-	} else if ((!curr_router) && (neigh_node)) {
+	} else if (!curr_router && neigh_node) {
 		batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
 			   "Adding route towards: %pM (via %pM)\n",
 			   orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	/* add record route information if not full */
 	if ((icmph->msg_type == BATADV_ECHO_REPLY ||
 	     icmph->msg_type == BATADV_ECHO_REQUEST) &&
-	    (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+	    skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
 		if (skb_linearize(skb) < 0)
 			goto free_skb;
 
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d239a9d72ac3..7895323fd2a7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
 	hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
 
-	if ((hardif_neigh) && (ret != NET_XMIT_DROP))
+	if (hardif_neigh && ret != NET_XMIT_DROP)
 		hardif_neigh->bat_v.last_unicast_tx = jiffies;
 
 	if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
 		 * we delete only packets belonging to the given interface
 		 */
 		if (hard_iface &&
-		    (forw_packet->if_incoming != hard_iface) &&
-		    (forw_packet->if_outgoing != hard_iface))
+		    forw_packet->if_incoming != hard_iface &&
+		    forw_packet->if_outgoing != hard_iface)
 			continue;
 
 		hlist_del(&forw_packet->list);
@@ -911,7 +911,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n",
 				   bcast_packet->orig,
 				   hard_iface->net_dev->name, type);
 
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 10f7edfb176e..9f673cdfecf8 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
 	int result;
 
 	/* TODO: We must check if we can release all references to non-payload
-	 * data using skb_header_release in our skbs to allow skb_cow_header to
-	 * work optimally. This means that those skbs are not allowed to read
+	 * data using __skb_header_release in our skbs to allow skb_cow_header
+	 * to work optimally. This means that those skbs are not allowed to read
 	 * or write any data which is before the current position of skb->data
 	 * after that call and thus allow other skbs with the same data buffer
 	 * to write freely in that area.
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/* check ranges */
-	if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
+	if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
@@ -863,11 +863,13 @@ free_bat_counters:
  * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
  * @dev: batadv_soft_interface used as master interface
  * @slave_dev: net_device which should become the slave interface
+ * @extack: extended ACK report struct
  *
  * Return: 0 if successful or error otherwise.
  */
 static int batadv_softif_slave_add(struct net_device *dev,
-				   struct net_device *slave_dev)
+				   struct net_device *slave_dev,
+				   struct netlink_ext_ack *extack)
 {
 	struct batadv_hard_iface *hard_iface;
 	struct net *net = dev_net(dev);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30e4eaa..aa187fd42475 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
 	if (hard_iface->if_status == status_tmp)
 		goto out;
 
-	if ((hard_iface->soft_iface) &&
-	    (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
+	if (hard_iface->soft_iface &&
+	    strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
 		goto out;
 
 	if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8effe9238..4b90033f35a8 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
 
 	/* send the ack */
 	r = batadv_send_skb_to_orig(skb, orig_node, NULL);
-	if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+	if (unlikely(r < 0) || r == NET_XMIT_DROP) {
 		ret = BATADV_TP_REASON_DST_UNREACHABLE;
 		goto out;
 	}
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index ab3b654b05cc..4e2576fc0c59 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
 			   struct lowpan_peer *peer)
 {
 	const u8 *saddr;
-	struct lowpan_btle_dev *dev;
-
-	dev = lowpan_btle_dev(netdev);
 
 	saddr = peer->lladdr;
 
@@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev)
 
 static void ifdown(struct net_device *netdev)
 {
-	int err;
-
 	rtnl_lock();
-	err = dev_close(netdev);
-	if (err < 0)
-		BT_INFO("iface %s cannot be closed (%d)", netdev->name, err);
+	dev_close(netdev);
 	rtnl_unlock();
 }
 
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 68f951b3e85a..db82a40875e8 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -45,6 +45,11 @@ config BT_BREDR
 	bool "Bluetooth Classic (BR/EDR) features"
 	depends on BT
 	default y
+	help
+	  Bluetooth Classic includes support for Basic Rate (BR)
+	  available with Bluetooth version 1.0b or later and support
+	  for Enhanced Data Rate (EDR) available with Bluetooth
+	  version 2.0 or later.
 
 source "net/bluetooth/rfcomm/Kconfig"
 
@@ -58,11 +63,18 @@ config BT_HS
 	bool "Bluetooth High Speed (HS) features"
 	depends on BT_BREDR
 	default y
+	help
+	  Bluetooth High Speed includes support for off-loading
+	  Bluetooth connections via 802.11 (wifi) physical layer
+	  available with Bluetooth version 3.0 or later.
 
 config BT_LE
 	bool "Bluetooth Low Energy (LE) features"
 	depends on BT
 	default y
+	help
+	  Bluetooth Low Energy includes support low-energy physical
+	  layer available with Bluetooth version 4.0 or later.
 
 config BT_6LOWPAN
 	tristate "Bluetooth 6LoWPAN support"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 5d0a113e2e40..fda41c0b4781 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux Bluetooth subsystem.
 #
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index aad994edd3bb..51c2cf2d8923 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
 	hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
 				       &mgr->l2cap_conn->hcon->dst);
 	if (!hcon) {
-		BT_ERR("No phys link exist");
+		bt_dev_err(hdev, "no phys link exist");
 		rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS;
 		goto clean;
 	}
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index ebcab5bbadd7..78bec8df8525 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
 
 	/* Legacy key */
 	if (conn->key_type < 3) {
-		BT_ERR("Legacy key type %d", conn->key_type);
+		bt_dev_err(hdev, "legacy key type %d", conn->key_type);
 		return -EACCES;
 	}
 
@@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
 	/* Derive Generic AMP Link Key (gamp) */
 	err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key);
 	if (err) {
-		BT_ERR("Could not derive Generic AMP Key: err %d", err);
+		bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err);
 		return err;
 	}
 
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index c7b1a9aee579..2155ce802877 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -23,7 +23,6 @@
 #include "ecdh_helper.h"
 
 #include <linux/scatterlist.h>
-#include <crypto/kpp.h>
 #include <crypto/ecdh.h>
 
 struct ecdh_completion {
@@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
 		out[i] = __swab64(in[ndigits - 1 - i]);
 }
 
-bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
-			 u8 secret[32])
+/* compute_ecdh_secret() - function assumes that the private key was
+ *                         already set.
+ * @tfm:          KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key:   pair's ecc public key.
+ * secret:        memory where the ecdh computed shared secret will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
+			u8 secret[32])
 {
-	struct crypto_kpp *tfm;
 	struct kpp_request *req;
-	struct ecdh p;
+	u8 *tmp;
 	struct ecdh_completion result;
 	struct scatterlist src, dst;
-	u8 *tmp, *buf;
-	unsigned int buf_len;
-	int err = -ENOMEM;
+	int err;
 
 	tmp = kmalloc(64, GFP_KERNEL);
 	if (!tmp)
-		return false;
+		return -ENOMEM;
 
-	tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
-	if (IS_ERR(tfm)) {
-		pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
-		       PTR_ERR(tfm));
+	req = kpp_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		err = -ENOMEM;
 		goto free_tmp;
 	}
 
-	req = kpp_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		goto free_kpp;
-
 	init_completion(&result.completion);
 
-	/* Security Manager Protocol holds digits in litte-endian order
-	 * while ECC API expect big-endian data
-	 */
-	swap_digits((u64 *)private_key, (u64 *)tmp, 4);
-	p.key = (char *)tmp;
-	p.key_size = 32;
-	/* Set curve_id */
-	p.curve_id = ECC_CURVE_NIST_P256;
-	buf_len = crypto_ecdh_key_len(&p);
-	buf = kmalloc(buf_len, GFP_KERNEL);
-	if (!buf)
-		goto free_req;
-
-	crypto_ecdh_encode_key(buf, buf_len, &p);
-
-	/* Set A private Key */
-	err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
-	if (err)
-		goto free_all;
-
 	swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
 	swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
 
@@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
 	memcpy(secret, tmp, 32);
 
 free_all:
-	kzfree(buf);
-free_req:
 	kpp_request_free(req);
-free_kpp:
-	crypto_free_kpp(tfm);
 free_tmp:
-	kfree(tmp);
-	return (err == 0);
+	kzfree(tmp);
+	return err;
 }
 
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
+/* set_ecdh_privkey() - set or generate ecc private key.
+ *
+ * Function generates an ecc private key in the crypto subsystem when receiving
+ * a NULL private key or sets the received key when not NULL.
+ *
+ * @tfm:           KPP tfm handle allocated with crypto_alloc_kpp().
+ * @private_key:   user's ecc private key. When not NULL, the key is expected
+ *                 in little endian format.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
+{
+	u8 *buf, *tmp = NULL;
+	unsigned int buf_len;
+	int err;
+	struct ecdh p = {0};
+
+	p.curve_id = ECC_CURVE_NIST_P256;
+
+	if (private_key) {
+		tmp = kmalloc(32, GFP_KERNEL);
+		if (!tmp)
+			return -ENOMEM;
+		swap_digits((u64 *)private_key, (u64 *)tmp, 4);
+		p.key = tmp;
+		p.key_size = 32;
+	}
+
+	buf_len = crypto_ecdh_key_len(&p);
+	buf = kmalloc(buf_len, GFP_KERNEL);
+	if (!buf) {
+		err = -ENOMEM;
+		goto free_tmp;
+	}
+
+	err = crypto_ecdh_encode_key(buf, buf_len, &p);
+	if (err)
+		goto free_all;
+
+	err = crypto_kpp_set_secret(tfm, buf, buf_len);
+	/* fall through */
+free_all:
+	kzfree(buf);
+free_tmp:
+	kzfree(tmp);
+	return err;
+}
+
+/* generate_ecdh_public_key() - function assumes that the private key was
+ *                              already set.
+ *
+ * @tfm:          KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key:   memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
 {
-	struct crypto_kpp *tfm;
 	struct kpp_request *req;
-	struct ecdh p;
+	u8 *tmp;
 	struct ecdh_completion result;
 	struct scatterlist dst;
-	u8 *tmp, *buf;
-	unsigned int buf_len;
-	int err = -ENOMEM;
-	const unsigned short max_tries = 16;
-	unsigned short tries = 0;
+	int err;
 
 	tmp = kmalloc(64, GFP_KERNEL);
 	if (!tmp)
-		return false;
+		return -ENOMEM;
 
-	tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
-	if (IS_ERR(tfm)) {
-		pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
-		       PTR_ERR(tfm));
+	req = kpp_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		err = -ENOMEM;
 		goto free_tmp;
 	}
 
-	req = kpp_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		goto free_kpp;
-
 	init_completion(&result.completion);
+	sg_init_one(&dst, tmp, 64);
+	kpp_request_set_input(req, NULL, 0);
+	kpp_request_set_output(req, &dst, 64);
+	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				 ecdh_complete, &result);
 
-	/* Set curve_id */
-	p.curve_id = ECC_CURVE_NIST_P256;
-	p.key_size = 32;
-	buf_len = crypto_ecdh_key_len(&p);
-	buf = kmalloc(buf_len, GFP_KERNEL);
-	if (!buf)
-		goto free_req;
-
-	do {
-		if (tries++ >= max_tries)
-			goto free_all;
-
-		/* Set private Key */
-		p.key = (char *)private_key;
-		crypto_ecdh_encode_key(buf, buf_len, &p);
-		err = crypto_kpp_set_secret(tfm, buf, buf_len);
-		if (err)
-			goto free_all;
-
-		sg_init_one(&dst, tmp, 64);
-		kpp_request_set_input(req, NULL, 0);
-		kpp_request_set_output(req, &dst, 64);
-		kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					 ecdh_complete, &result);
-
-		err = crypto_kpp_generate_public_key(req);
-
-		if (err == -EINPROGRESS) {
-			wait_for_completion(&result.completion);
-			err = result.err;
-		}
-
-		/* Private key is not valid. Regenerate */
-		if (err == -EINVAL)
-			continue;
-
-		if (err < 0)
-			goto free_all;
-		else
-			break;
-
-	} while (true);
-
-	/* Keys are handed back in little endian as expected by Security
-	 * Manager Protocol
+	err = crypto_kpp_generate_public_key(req);
+	if (err == -EINPROGRESS) {
+		wait_for_completion(&result.completion);
+		err = result.err;
+	}
+	if (err < 0)
+		goto free_all;
+
+	/* The public key is handed back in little endian as expected by
+	 * the Security Manager Protocol.
 	 */
 	swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
 	swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
-	swap_digits((u64 *)private_key, (u64 *)tmp, 4);
-	memcpy(private_key, tmp, 32);
 
 free_all:
-	kzfree(buf);
-free_req:
 	kpp_request_free(req);
-free_kpp:
-	crypto_free_kpp(tfm);
 free_tmp:
 	kfree(tmp);
-	return (err == 0);
+	return err;
+}
+
+/* generate_ecdh_keys() - generate ecc key pair.
+ *
+ * @tfm:          KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key:   memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64])
+{
+	int err;
+
+	err = set_ecdh_privkey(tfm, NULL);
+	if (err)
+		return err;
+
+	return generate_ecdh_public_key(tfm, public_key);
 }
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index 7a423faf76e5..a6f8d03d4aaf 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -20,8 +20,11 @@
  * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
  * SOFTWARE IS DISCLAIMED.
  */
+#include <crypto/kpp.h>
 #include <linux/types.h>
 
-bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32],
-			 u8 secret[32]);
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]);
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
+			u8 secret[32]);
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index dc59eae54717..a9682534c377 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 		goto done;
 	}
 
-	BT_ERR("HCI request failed to create LE connection: status 0x%2.2x",
-	       status);
+	bt_dev_err(hdev, "request failed to create LE connection: "
+		   "status 0x%2.2x", status);
 
 	if (!conn)
 		goto done;
@@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 		 */
 		if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
 		    hdev->le_scan_type == LE_SCAN_ACTIVE) {
-			skb_queue_purge(&req.cmd_q);
+			hci_req_purge(&req);
 			hci_conn_del(conn);
 			return ERR_PTR(-EBUSY);
 		}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6bc679cd3481..40d260f2bea5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
 		amp_init1(req);
 		break;
 	default:
-		BT_ERR("Unknown device type %d", hdev->dev_type);
+		bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
 		break;
 	}
 
@@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work)
 	if (hdev->hw_error)
 		hdev->hw_error(hdev, hdev->hw_error_code);
 	else
-		BT_ERR("%s hardware error 0x%2.2x", hdev->name,
-		       hdev->hw_error_code);
+		bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
 
 	if (hci_dev_do_close(hdev))
 		return;
@@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work)
 		struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
 		u16 opcode = __le16_to_cpu(sent->opcode);
 
-		BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode);
+		bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);
 	} else {
-		BT_ERR("%s command tx timeout", hdev->name);
+		bt_dev_err(hdev, "command tx timeout");
 	}
 
 	atomic_set(&hdev->cmd_cnt, 1);
@@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
 
 	params = kzalloc(sizeof(*params), GFP_KERNEL);
 	if (!params) {
-		BT_ERR("Out of memory");
+		bt_dev_err(hdev, "out of memory");
 		return NULL;
 	}
 
@@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 
 	err = hdev->send(hdev, skb);
 	if (err < 0) {
-		BT_ERR("%s sending frame failed (%d)", hdev->name, err);
+		bt_dev_err(hdev, "sending frame failed (%d)", err);
 		kfree_skb(skb);
 	}
 }
@@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 
 	skb = hci_prepare_cmd(hdev, opcode, plen, param);
 	if (!skb) {
-		BT_ERR("%s no memory for command", hdev->name);
+		bt_dev_err(hdev, "no memory for command");
 		return -ENOMEM;
 	}
 
@@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
 		hci_add_acl_hdr(skb, chan->handle, flags);
 		break;
 	default:
-		BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type);
+		bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
 		return;
 	}
 
@@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
 			break;
 		default:
 			cnt = 0;
-			BT_ERR("Unknown link type");
+			bt_dev_err(hdev, "unknown link type %d", conn->type);
 		}
 
 		q = cnt / num;
@@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
 	struct hci_conn_hash *h = &hdev->conn_hash;
 	struct hci_conn *c;
 
-	BT_ERR("%s link tx timeout", hdev->name);
+	bt_dev_err(hdev, "link tx timeout");
 
 	rcu_read_lock();
 
 	/* Kill stalled connections */
 	list_for_each_entry_rcu(c, &h->list, list) {
 		if (c->type == type && c->sent) {
-			BT_ERR("%s killing stalled connection %pMR",
-			       hdev->name, &c->dst);
+			bt_dev_err(hdev, "killing stalled connection %pMR",
+				   &c->dst);
 			hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
 		}
 	}
@@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
 		break;
 	default:
 		cnt = 0;
-		BT_ERR("Unknown link type");
+		bt_dev_err(hdev, "unknown link type %d", chan->conn->type);
 	}
 
 	q = cnt / num;
@@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		l2cap_recv_acldata(conn, skb, flags);
 		return;
 	} else {
-		BT_ERR("%s ACL packet for unknown connection handle %d",
-		       hdev->name, handle);
+		bt_dev_err(hdev, "ACL packet for unknown connection handle %d",
+			   handle);
 	}
 
 	kfree_skb(skb);
@@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		sco_recv_scodata(conn, skb);
 		return;
 	} else {
-		BT_ERR("%s SCO packet for unknown connection handle %d",
-		       hdev->name, handle);
+		bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
+			   handle);
 	}
 
 	kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0b4dba08a14e..cd3bbb766c24 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		break;
 
 	default:
-		BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
+		bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d",
+			   cp->enable);
 		break;
 	}
 
@@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
 			conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
 					    HCI_ROLE_MASTER);
 			if (!conn)
-				BT_ERR("No memory for new connection");
+				bt_dev_err(hdev, "no memory for new connection");
 		}
 	}
 
@@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
 				    HCI_ROLE_SLAVE);
 		if (!conn) {
-			BT_ERR("No memory for new connection");
+			bt_dev_err(hdev, "no memory for new connection");
 			hci_dev_unlock(hdev);
 			return;
 		}
@@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 		if (!hci_conn_ssp_enabled(conn) &&
 		    test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
-			BT_INFO("re-auth of legacy device is not possible.");
+			bt_dev_info(hdev, "re-auth of legacy device is not possible.");
 		} else {
 			set_bit(HCI_CONN_AUTH, &conn->flags);
 			conn->sec_level = conn->pending_sec_level;
@@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
 	BT_DBG("%s status 0x%02x", hdev->name, status);
 
 	if (!skb || skb->len < sizeof(*rp)) {
-		BT_ERR("%s invalid HCI Read Encryption Key Size response",
-		       hdev->name);
+		bt_dev_err(hdev, "invalid read key size response");
 		return;
 	}
 
@@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
 	 * supported.
 	 */
 	if (rp->status) {
-		BT_ERR("%s failed to read key size for handle %u", hdev->name,
-		       handle);
+		bt_dev_err(hdev, "failed to read key size for handle %u",
+			   handle);
 		conn->enc_key_size = HCI_LINK_KEY_SIZE;
 	} else {
 		conn->enc_key_size = rp->key_size;
@@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
 
 		if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
-			BT_ERR("Sending HCI Read Encryption Key Size failed");
+			bt_dev_err(hdev, "sending read key size failed");
 			conn->enc_key_size = HCI_LINK_KEY_SIZE;
 			goto notify;
 		}
@@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	int i;
 
 	if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
-		BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
+		bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
 		return;
 	}
 
@@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			break;
 
 		default:
-			BT_ERR("Unknown type %d conn %p", conn->type, conn);
+			bt_dev_err(hdev, "unknown type %d conn %p",
+				   conn->type, conn);
 			break;
 		}
 	}
@@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
 			return chan->conn;
 		break;
 	default:
-		BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type);
+		bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
 		break;
 	}
 
@@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	int i;
 
 	if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
-		BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
+		bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
 		return;
 	}
 
@@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			break;
 
 		default:
-			BT_ERR("Unknown type %d conn %p", conn->type, conn);
+			bt_dev_err(hdev, "unknown type %d conn %p",
+				   conn->type, conn);
 			break;
 		}
 	}
@@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!conn) {
 		conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
 		if (!conn) {
-			BT_ERR("No memory for new connection");
+			bt_dev_err(hdev, "no memory for new connection");
 			goto unlock;
 		}
 
@@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 	case LE_ADV_SCAN_RSP:
 		break;
 	default:
-		BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
-				   type);
+		bt_dev_err_ratelimited(hdev, "unknown advertising packet "
+				       "type: 0x%02x", type);
 		return;
 	}
 
@@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 
 	/* Adjust for actual length */
 	if (len != real_len) {
-		BT_ERR_RATELIMITED("%s advertising data length corrected",
-				   hdev->name);
+		bt_dev_err_ratelimited(hdev, "advertising data len corrected");
 		len = real_len;
 	}
 
@@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
 		return false;
 
 	if (skb->len < sizeof(*hdr)) {
-		BT_ERR("Too short HCI event");
+		bt_dev_err(hdev, "too short HCI event");
 		return false;
 	}
 
@@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
 	}
 
 	if (hdr->evt != HCI_EV_CMD_COMPLETE) {
-		BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt);
+		bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
+			   hdr->evt);
 		return false;
 	}
 
 	if (skb->len < sizeof(*ev)) {
-		BT_ERR("Too short cmd_complete event");
+		bt_dev_err(hdev, "too short cmd_complete event");
 		return false;
 	}
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b73ac149de34..abc0f3224dd1 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
 	req->err = 0;
 }
 
+void hci_req_purge(struct hci_request *req)
+{
+	skb_queue_purge(&req->cmd_q);
+}
+
 static int req_run(struct hci_request *req, hci_req_complete_t complete,
 		   hci_req_complete_skb_t complete_skb)
 {
@@ -331,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 
 	skb = hci_prepare_cmd(hdev, opcode, plen, param);
 	if (!skb) {
-		BT_ERR("%s no memory for command (opcode 0x%4.4x)",
-		       hdev->name, opcode);
+		bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
+			   opcode);
 		req->err = -ENOMEM;
 		return;
 	}
@@ -1421,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 
 		err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
 		if (err < 0) {
-			BT_ERR("%s failed to generate new RPA", hdev->name);
+			bt_dev_err(hdev, "failed to generate new RPA");
 			return err;
 		}
 
@@ -1783,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
 
 	err = hci_req_run(&req, abort_conn_complete);
 	if (err && err != -ENODATA) {
-		BT_ERR("Failed to run HCI request: err %d", err);
+		bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err);
 		return err;
 	}
 
@@ -1867,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work)
 
 	hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
 	if (status) {
-		BT_ERR("Failed to disable LE scan: status 0x%02x", status);
+		bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
+			   status);
 		return;
 	}
 
@@ -1898,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work)
 	hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
 		     HCI_CMD_TIMEOUT, &status);
 	if (status) {
-		BT_ERR("Inquiry failed: status 0x%02x", status);
+		bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
 		goto discov_stopped;
 	}
 
@@ -1940,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work)
 
 	hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
 	if (status) {
-		BT_ERR("Failed to restart LE scan: status %d", status);
+		bt_dev_err(hdev, "failed to restart LE scan: status %d",
+			   status);
 		return;
 	}
 
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index dde77bd59f91..702beb140d9f 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -36,6 +36,7 @@ struct hci_request {
 };
 
 void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
+void hci_req_purge(struct hci_request *req);
 int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
 int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
 void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 65d734c165bd..923e9a271872 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 }
 
 /* Send frame to sockets with specific channel */
-void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
-			 int flag, struct sock *skip_sk)
+static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+				  int flag, struct sock *skip_sk)
 {
 	struct sock *sk;
 
 	BT_DBG("channel %u len %d", channel, skb->len);
 
-	read_lock(&hci_sk_list.lock);
-
 	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
@@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
 			kfree_skb(nskb);
 	}
 
+}
+
+void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+			 int flag, struct sock *skip_sk)
+{
+	read_lock(&hci_sk_list.lock);
+	__hci_send_to_channel(channel, skb, flag, skip_sk);
 	read_unlock(&hci_sk_list.lock);
 }
 
@@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
 		hdr->index = index;
 		hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
 
-		hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
-				    HCI_SOCK_TRUSTED, NULL);
+		__hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+				      HCI_SOCK_TRUSTED, NULL);
 		kfree_skb(skb);
 	}
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ca7a35ebaefb..9874844a95a9 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Bluetooth HCI driver model support. */
 
 #include <linux/module.h>
@@ -13,7 +14,7 @@ static void bt_link_release(struct device *dev)
 	kfree(conn);
 }
 
-static struct device_type bt_link = {
+static const struct device_type bt_link = {
 	.name    = "link",
 	.release = bt_link_release,
 };
@@ -50,7 +51,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
 
 	if (device_add(&conn->dev) < 0) {
-		BT_ERR("Failed to register connection device");
+		bt_dev_err(hdev, "failed to register connection device");
 		return;
 	}
 
@@ -86,7 +87,7 @@ static void bt_host_release(struct device *dev)
 	module_put(THIS_MODULE);
 }
 
-static struct device_type bt_host = {
+static const struct device_type bt_host = {
 	.name    = "host",
 	.release = bt_host_release,
 };
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 002743ea509c..8112893037bd 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -734,7 +734,7 @@ static void hidp_stop(struct hid_device *hid)
 	hid->claimed = 0;
 }
 
-static struct hid_ll_driver hidp_hid_driver = {
+struct hid_ll_driver hidp_hid_driver = {
 	.parse = hidp_parse,
 	.start = hidp_start,
 	.stop = hidp_stop,
@@ -743,6 +743,7 @@ static struct hid_ll_driver hidp_hid_driver = {
 	.raw_request = hidp_raw_request,
 	.output_report = hidp_output_report,
 };
+EXPORT_SYMBOL_GPL(hidp_hid_driver);
 
 /* This function sets up the hid device. It does not add it
    to the HID system. That is done in hidp_add_connection(). */
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 303c779bfe38..43ba91c440bc 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -58,7 +58,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				       u8 code, u8 ident, u16 dlen, void *data);
 static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
 			   void *data);
-static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data);
+static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size);
 static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
 
 static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
@@ -1473,7 +1473,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 
 			set_bit(CONF_REQ_SENT, &chan->conf_state);
 			l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-				       l2cap_build_conf_req(chan, buf), buf);
+				       l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
 			chan->num_conf_req++;
 		}
 
@@ -2987,12 +2987,15 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen,
 	return len;
 }
 
-static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
+static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size)
 {
 	struct l2cap_conf_opt *opt = *ptr;
 
 	BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val);
 
+	if (size < L2CAP_CONF_OPT_SIZE + len)
+		return;
+
 	opt->type = type;
 	opt->len  = len;
 
@@ -3017,7 +3020,7 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 	*ptr += L2CAP_CONF_OPT_SIZE + len;
 }
 
-static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
+static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size)
 {
 	struct l2cap_conf_efs efs;
 
@@ -3045,7 +3048,7 @@ static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
 	}
 
 	l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs),
-			   (unsigned long) &efs);
+			   (unsigned long) &efs, size);
 }
 
 static void l2cap_ack_timeout(struct work_struct *work)
@@ -3191,11 +3194,12 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan)
 	chan->ack_win = chan->tx_win;
 }
 
-static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
+static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
 {
 	struct l2cap_conf_req *req = data;
 	struct l2cap_conf_rfc rfc = { .mode = chan->mode };
 	void *ptr = req->data;
+	void *endptr = data + data_size;
 	u16 size;
 
 	BT_DBG("chan %p", chan);
@@ -3220,7 +3224,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
 
 done:
 	if (chan->imtu != L2CAP_DEFAULT_MTU)
-		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
 
 	switch (chan->mode) {
 	case L2CAP_MODE_BASIC:
@@ -3239,7 +3243,7 @@ done:
 		rfc.max_pdu_size    = 0;
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
-				   (unsigned long) &rfc);
+				   (unsigned long) &rfc, endptr - ptr);
 		break;
 
 	case L2CAP_MODE_ERTM:
@@ -3259,21 +3263,21 @@ done:
 				       L2CAP_DEFAULT_TX_WINDOW);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
-				   (unsigned long) &rfc);
+				   (unsigned long) &rfc, endptr - ptr);
 
 		if (test_bit(FLAG_EFS_ENABLE, &chan->flags))
-			l2cap_add_opt_efs(&ptr, chan);
+			l2cap_add_opt_efs(&ptr, chan, endptr - ptr);
 
 		if (test_bit(FLAG_EXT_CTRL, &chan->flags))
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
-					   chan->tx_win);
+					   chan->tx_win, endptr - ptr);
 
 		if (chan->conn->feat_mask & L2CAP_FEAT_FCS)
 			if (chan->fcs == L2CAP_FCS_NONE ||
 			    test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) {
 				chan->fcs = L2CAP_FCS_NONE;
 				l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1,
-						   chan->fcs);
+						   chan->fcs, endptr - ptr);
 			}
 		break;
 
@@ -3291,17 +3295,17 @@ done:
 		rfc.max_pdu_size = cpu_to_le16(size);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
-				   (unsigned long) &rfc);
+				   (unsigned long) &rfc, endptr - ptr);
 
 		if (test_bit(FLAG_EFS_ENABLE, &chan->flags))
-			l2cap_add_opt_efs(&ptr, chan);
+			l2cap_add_opt_efs(&ptr, chan, endptr - ptr);
 
 		if (chan->conn->feat_mask & L2CAP_FEAT_FCS)
 			if (chan->fcs == L2CAP_FCS_NONE ||
 			    test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) {
 				chan->fcs = L2CAP_FCS_NONE;
 				l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1,
-						   chan->fcs);
+						   chan->fcs, endptr - ptr);
 			}
 		break;
 	}
@@ -3312,10 +3316,11 @@ done:
 	return ptr - data;
 }
 
-static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data)
+static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
 {
 	struct l2cap_conf_rsp *rsp = data;
 	void *ptr = rsp->data;
+	void *endptr = data + data_size;
 	void *req = chan->conf_req;
 	int len = chan->conf_len;
 	int type, hint, olen;
@@ -3417,7 +3422,7 @@ done:
 			return -ECONNREFUSED;
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
-				   (unsigned long) &rfc);
+				   (unsigned long) &rfc, endptr - ptr);
 	}
 
 	if (result == L2CAP_CONF_SUCCESS) {
@@ -3430,7 +3435,7 @@ done:
 			chan->omtu = mtu;
 			set_bit(CONF_MTU_DONE, &chan->conf_state);
 		}
-		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu);
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr);
 
 		if (remote_efs) {
 			if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
@@ -3444,7 +3449,7 @@ done:
 
 				l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS,
 						   sizeof(efs),
-						   (unsigned long) &efs);
+						   (unsigned long) &efs, endptr - ptr);
 			} else {
 				/* Send PENDING Conf Rsp */
 				result = L2CAP_CONF_PENDING;
@@ -3477,7 +3482,7 @@ done:
 			set_bit(CONF_MODE_DONE, &chan->conf_state);
 
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					   sizeof(rfc), (unsigned long) &rfc);
+					   sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
 
 			if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
 				chan->remote_id = efs.id;
@@ -3491,7 +3496,7 @@ done:
 					le32_to_cpu(efs.sdu_itime);
 				l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS,
 						   sizeof(efs),
-						   (unsigned long) &efs);
+						   (unsigned long) &efs, endptr - ptr);
 			}
 			break;
 
@@ -3505,7 +3510,7 @@ done:
 			set_bit(CONF_MODE_DONE, &chan->conf_state);
 
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
-					   (unsigned long) &rfc);
+					   (unsigned long) &rfc, endptr - ptr);
 
 			break;
 
@@ -3527,10 +3532,11 @@ done:
 }
 
 static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
-				void *data, u16 *result)
+				void *data, size_t size, u16 *result)
 {
 	struct l2cap_conf_req *req = data;
 	void *ptr = req->data;
+	void *endptr = data + size;
 	int type, olen;
 	unsigned long val;
 	struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
@@ -3548,13 +3554,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
 				chan->imtu = L2CAP_DEFAULT_MIN_MTU;
 			} else
 				chan->imtu = val;
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_FLUSH_TO:
 			chan->flush_to = val;
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
-					   2, chan->flush_to);
+					   2, chan->flush_to, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_RFC:
@@ -3568,13 +3574,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
 			chan->fcs = 0;
 
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					   sizeof(rfc), (unsigned long) &rfc);
+					   sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_EWS:
 			chan->ack_win = min_t(u16, val, chan->ack_win);
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
-					   chan->tx_win);
+					   chan->tx_win, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_EFS:
@@ -3587,7 +3593,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
 				return -ECONNREFUSED;
 
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
-					   (unsigned long) &efs);
+					   (unsigned long) &efs, endptr - ptr);
 			break;
 
 		case L2CAP_CONF_FCS:
@@ -3692,7 +3698,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
 		return;
 
 	l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-		       l2cap_build_conf_req(chan, buf), buf);
+		       l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
 	chan->num_conf_req++;
 }
 
@@ -3900,7 +3906,7 @@ sendresp:
 		u8 buf[128];
 		set_bit(CONF_REQ_SENT, &chan->conf_state);
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-			       l2cap_build_conf_req(chan, buf), buf);
+			       l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
 		chan->num_conf_req++;
 	}
 
@@ -3978,7 +3984,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
 			break;
 
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-			       l2cap_build_conf_req(chan, req), req);
+			       l2cap_build_conf_req(chan, req, sizeof(req)), req);
 		chan->num_conf_req++;
 		break;
 
@@ -4090,7 +4096,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
 	}
 
 	/* Complete config. */
-	len = l2cap_parse_conf_req(chan, rsp);
+	len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp));
 	if (len < 0) {
 		l2cap_send_disconn_req(chan, ECONNRESET);
 		goto unlock;
@@ -4124,7 +4130,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
 	if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) {
 		u8 buf[64];
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-			       l2cap_build_conf_req(chan, buf), buf);
+			       l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
 		chan->num_conf_req++;
 	}
 
@@ -4184,7 +4190,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
 			char buf[64];
 
 			len = l2cap_parse_conf_rsp(chan, rsp->data, len,
-						   buf, &result);
+						   buf, sizeof(buf), &result);
 			if (len < 0) {
 				l2cap_send_disconn_req(chan, ECONNRESET);
 				goto done;
@@ -4214,7 +4220,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
 			/* throw out any old stored conf requests */
 			result = L2CAP_CONF_SUCCESS;
 			len = l2cap_parse_conf_rsp(chan, rsp->data, len,
-						   req, &result);
+						   req, sizeof(req), &result);
 			if (len < 0) {
 				l2cap_send_disconn_req(chan, ECONNRESET);
 				goto done;
@@ -4791,7 +4797,7 @@ static void l2cap_do_create(struct l2cap_chan *chan, int result,
 			set_bit(CONF_REQ_SENT, &chan->conf_state);
 			l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn),
 				       L2CAP_CONF_REQ,
-				       l2cap_build_conf_req(chan, buf), buf);
+				       l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
 			chan->num_conf_req++;
 		}
 	}
@@ -7465,7 +7471,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 				set_bit(CONF_REQ_SENT, &chan->conf_state);
 				l2cap_send_cmd(conn, l2cap_get_ident(conn),
 					       L2CAP_CONF_REQ,
-					       l2cap_build_conf_req(chan, buf),
+					       l2cap_build_conf_req(chan, buf, sizeof(buf)),
 					       buf);
 				chan->num_conf_req++;
 			}
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index aa4cf64e32a6..63e65d9b4b24 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -30,10 +30,10 @@
 
 #include <net/bluetooth/bluetooth.h>
 
-void baswap(bdaddr_t *dst, bdaddr_t *src)
+void baswap(bdaddr_t *dst, const bdaddr_t *src)
 {
-	unsigned char *d = (unsigned char *) dst;
-	unsigned char *s = (unsigned char *) src;
+	const unsigned char *s = (const unsigned char *)src;
+	unsigned char *d = (unsigned char *)dst;
 	unsigned int i;
 
 	for (i = 0; i < 6; i++)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1fba2a03f8ae..6e9fc86d8daf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	key_count = __le16_to_cpu(cp->key_count);
 	if (key_count > max_key_count) {
-		BT_ERR("load_link_keys: too big key_count value %u",
-		       key_count);
+		bt_dev_err(hdev, "load_link_keys: too big key_count value %u",
+			   key_count);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 	expected_len = sizeof(*cp) + key_count *
 					sizeof(struct mgmt_link_key_info);
 	if (expected_len != len) {
-		BT_ERR("load_link_keys: expected %u bytes, got %u bytes",
-		       expected_len, len);
+		bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
+			   expected_len, len);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data,
 
 		memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr));
 
-		BT_ERR("PIN code is not 16 bytes long");
+		bt_dev_err(hdev, "PIN code is not 16 bytes long");
 
 		err = send_pin_code_neg_reply(sk, hdev, &ncp);
 		if (err >= 0)
@@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 					MGMT_OP_ADD_REMOTE_OOB_DATA,
 					status, &cp->addr, sizeof(cp->addr));
 	} else {
-		BT_ERR("add_remote_oob_data: invalid length of %u bytes", len);
+		bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes",
+			   len);
 		err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
 				      MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
 
 	uuid_count = __le16_to_cpu(cp->uuid_count);
 	if (uuid_count > max_uuid_count) {
-		BT_ERR("service_discovery: too big uuid_count value %u",
-		       uuid_count);
+		bt_dev_err(hdev, "service_discovery: too big uuid_count value %u",
+			   uuid_count);
 		err = mgmt_cmd_complete(sk, hdev->id,
 					MGMT_OP_START_SERVICE_DISCOVERY,
 					MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
 
 	expected_len = sizeof(*cp) + uuid_count * 16;
 	if (expected_len != len) {
-		BT_ERR("service_discovery: expected %u bytes, got %u bytes",
-		       expected_len, len);
+		bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes",
+			   expected_len, len);
 		err = mgmt_cmd_complete(sk, hdev->id,
 					MGMT_OP_START_SERVICE_DISCOVERY,
 					MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
 		err = hci_req_run(&req, enable_advertising_instance);
 
 	if (err)
-		BT_ERR("Failed to re-configure advertising");
+		bt_dev_err(hdev, "failed to re-configure advertising");
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 
 	irk_count = __le16_to_cpu(cp->irk_count);
 	if (irk_count > max_irk_count) {
-		BT_ERR("load_irks: too big irk_count value %u", irk_count);
+		bt_dev_err(hdev, "load_irks: too big irk_count value %u",
+			   irk_count);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
 	expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
 	if (expected_len != len) {
-		BT_ERR("load_irks: expected %u bytes, got %u bytes",
-		       expected_len, len);
+		bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
+			   expected_len, len);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 
 	key_count = __le16_to_cpu(cp->key_count);
 	if (key_count > max_key_count) {
-		BT_ERR("load_ltks: too big key_count value %u", key_count);
+		bt_dev_err(hdev, "load_ltks: too big key_count value %u",
+			   key_count);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 	expected_len = sizeof(*cp) + key_count *
 					sizeof(struct mgmt_ltk_info);
 	if (expected_len != len) {
-		BT_ERR("load_keys: expected %u bytes, got %u bytes",
-		       expected_len, len);
+		bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
+			   expected_len, len);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
 	}
 
 	if (!cp) {
-		BT_ERR("invalid sent_cmd in conn_info response");
+		bt_dev_err(hdev, "invalid sent_cmd in conn_info response");
 		goto unlock;
 	}
 
 	handle = __le16_to_cpu(cp->handle);
 	conn = hci_conn_hash_lookup_handle(hdev, handle);
 	if (!conn) {
-		BT_ERR("unknown handle (%d) in conn_info response", handle);
+		bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
+			   handle);
 		goto unlock;
 	}
 
@@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	param_count = __le16_to_cpu(cp->param_count);
 	if (param_count > max_param_count) {
-		BT_ERR("load_conn_param: too big param_count value %u",
-		       param_count);
+		bt_dev_err(hdev, "load_conn_param: too big param_count value %u",
+			   param_count);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 	expected_len = sizeof(*cp) + param_count *
 					sizeof(struct mgmt_conn_param);
 	if (expected_len != len) {
-		BT_ERR("load_conn_param: expected %u bytes, got %u bytes",
-		       expected_len, len);
+		bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
+			   expected_len, len);
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
@@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 		} else if (param->addr.type == BDADDR_LE_RANDOM) {
 			addr_type = ADDR_LE_DEV_RANDOM;
 		} else {
-			BT_ERR("Ignoring invalid connection parameters");
+			bt_dev_err(hdev, "ignoring invalid connection parameters");
 			continue;
 		}
 
@@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 		       min, max, latency, timeout);
 
 		if (hci_check_conn_params(min, max, latency, timeout) < 0) {
-			BT_ERR("Ignoring invalid connection parameters");
+			bt_dev_err(hdev, "ignoring invalid connection parameters");
 			continue;
 		}
 
 		hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
 						addr_type);
 		if (!hci_param) {
-			BT_ERR("Failed to add connection parameters");
+			bt_dev_err(hdev, "failed to add connection parameters");
 			continue;
 		}
 
@@ -6383,6 +6387,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
 	if (skb_queue_empty(&req.cmd_q) ||
 	    !hdev_is_powered(hdev) ||
 	    hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
+		hci_req_purge(&req);
 		rp.instance = cp->instance;
 		err = mgmt_cmd_complete(sk, hdev->id,
 					MGMT_OP_REMOVE_ADVERTISING,
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index ee92c925ecc5..03e3c89c3046 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = {
 	0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
 };
 
-static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
-				   const u8 pub_a[64], const u8 pub_b[64],
-				   const u8 dhkey[32])
+static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32],
+				   const u8 priv_b[32], const u8 pub_a[64],
+				   const u8 pub_b[64], const u8 dhkey[32])
 {
 	u8 *tmp, *dhkey_a, *dhkey_b;
-	int ret = 0;
+	int ret;
 
 	tmp = kmalloc(64, GFP_KERNEL);
 	if (!tmp)
@@ -152,19 +152,32 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
 	dhkey_a = &tmp[0];
 	dhkey_b = &tmp[32];
 
-	compute_ecdh_secret(pub_b, priv_a, dhkey_a);
-	compute_ecdh_secret(pub_a, priv_b, dhkey_b);
+	ret = set_ecdh_privkey(tfm, priv_a);
+	if (ret)
+		goto out;
+
+	ret = compute_ecdh_secret(tfm, pub_b, dhkey_a);
+	if (ret)
+		goto out;
 
 	if (memcmp(dhkey_a, dhkey, 32)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
+	ret = set_ecdh_privkey(tfm, priv_b);
+	if (ret)
+		goto out;
+
+	ret = compute_ecdh_secret(tfm, pub_a, dhkey_b);
+	if (ret)
+		goto out;
+
 	if (memcmp(dhkey_b, dhkey, 32))
 		ret = -EINVAL;
-
+	/* fall through*/
 out:
-	kfree(dhkey_a);
+	kfree(tmp);
 	return ret;
 }
 
@@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = {
 
 static int __init test_ecdh(void)
 {
+	struct crypto_kpp *tfm;
 	ktime_t calltime, delta, rettime;
-	unsigned long long duration;
+	unsigned long long duration = 0;
 	int err;
 
 	calltime = ktime_get();
 
-	err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1);
+	tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+	if (IS_ERR(tfm)) {
+		BT_ERR("Unable to create ECDH crypto context");
+		err = PTR_ERR(tfm);
+		goto done;
+	}
+
+	err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1,
+			       dhkey_1);
 	if (err) {
 		BT_ERR("ECDH sample 1 failed");
 		goto done;
 	}
 
-	err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2);
+	err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2,
+			       dhkey_2);
 	if (err) {
 		BT_ERR("ECDH sample 2 failed");
 		goto done;
 	}
 
-	err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3);
+	err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3,
+			       dhkey_3);
 	if (err) {
 		BT_ERR("ECDH sample 3 failed");
 		goto done;
 	}
 
+	crypto_free_kpp(tfm);
+
 	rettime = ktime_get();
 	delta = ktime_sub(rettime, calltime);
 	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a0ef89772c36..01117ae84f1d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -26,6 +26,7 @@
 #include <crypto/algapi.h>
 #include <crypto/b128ops.h>
 #include <crypto/hash.h>
+#include <crypto/kpp.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -83,7 +84,6 @@ enum {
 struct smp_dev {
 	/* Secure Connections OOB data */
 	u8			local_pk[64];
-	u8			local_sk[32];
 	u8			local_rand[16];
 	bool			debug_key;
 
@@ -92,6 +92,7 @@ struct smp_dev {
 
 	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
+	struct crypto_kpp	*tfm_ecdh;
 };
 
 struct smp_chan {
@@ -124,13 +125,13 @@ struct smp_chan {
 
 	/* Secure Connections variables */
 	u8			local_pk[64];
-	u8			local_sk[32];
 	u8			remote_pk[64];
 	u8			dhkey[32];
 	u8			mackey[16];
 
 	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
+	struct crypto_kpp	*tfm_ecdh;
 };
 
 /* These debug key values are defined in the SMP section of the core
@@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
 
 	if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
 		BT_DBG("Using debug keys");
+		err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
+		if (err)
+			return err;
 		memcpy(smp->local_pk, debug_pk, 64);
-		memcpy(smp->local_sk, debug_sk, 32);
 		smp->debug_key = true;
 	} else {
 		while (true) {
-			/* Seed private key with random number */
-			get_random_bytes(smp->local_sk, 32);
-
-			/* Generate local key pair for Secure Connections */
-			if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
-				return -EIO;
+			/* Generate key pair for Secure Connections */
+			err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk);
+			if (err)
+				return err;
 
 			/* This is unlikely, but we need to check that
 			 * we didn't accidentially generate a debug key.
 			 */
-			if (crypto_memneq(smp->local_sk, debug_sk, 32))
+			if (crypto_memneq(smp->local_pk, debug_pk, 64))
 				break;
 		}
 		smp->debug_key = false;
@@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
 
 	SMP_DBG("OOB Public Key X: %32phN", smp->local_pk);
 	SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32);
-	SMP_DBG("OOB Private Key:  %32phN", smp->local_sk);
 
 	get_random_bytes(smp->local_rand, 16);
 
@@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
 
 	crypto_free_cipher(smp->tfm_aes);
 	crypto_free_shash(smp->tfm_cmac);
+	crypto_free_kpp(smp->tfm_ecdh);
 
 	/* Ensure that we don't leave any debug key around if debug key
 	 * support hasn't been explicitly enabled.
@@ -995,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp)
 		return SMP_UNSPECIFIED;
 
 	if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
-		BT_ERR("Pairing failed (confirmation values mismatch)");
+		bt_dev_err(hcon->hdev, "pairing failed "
+			   "(confirmation values mismatch)");
 		return SMP_CONFIRM_FAILED;
 	}
 
@@ -1209,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp)
 
 	key = hci_find_link_key(hdev, &hcon->dst);
 	if (!key) {
-		BT_ERR("%s No Link Key found to generate LTK", hdev->name);
+		bt_dev_err(hdev, "no Link Key found to generate LTK");
 		return;
 	}
 
@@ -1391,16 +1393,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 	smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(smp->tfm_aes)) {
 		BT_ERR("Unable to create AES crypto context");
-		kzfree(smp);
-		return NULL;
+		goto zfree_smp;
 	}
 
 	smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(smp->tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_cipher(smp->tfm_aes);
-		kzfree(smp);
-		return NULL;
+		goto free_cipher;
+	}
+
+	smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+	if (IS_ERR(smp->tfm_ecdh)) {
+		BT_ERR("Unable to create ECDH crypto context");
+		goto free_shash;
 	}
 
 	smp->conn = conn;
@@ -1413,6 +1418,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 	hci_conn_hold(conn->hcon);
 
 	return smp;
+
+free_shash:
+	crypto_free_shash(smp->tfm_cmac);
+free_cipher:
+	crypto_free_cipher(smp->tfm_aes);
+zfree_smp:
+	kzfree(smp);
+	return NULL;
 }
 
 static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
@@ -1883,7 +1896,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 		smp_dev = chan->data;
 
 		memcpy(smp->local_pk, smp_dev->local_pk, 64);
-		memcpy(smp->local_sk, smp_dev->local_sk, 32);
 		memcpy(smp->lr, smp_dev->local_rand, 16);
 
 		if (smp_dev->debug_key)
@@ -1894,22 +1906,20 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 
 	if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
 		BT_DBG("Using debug keys");
+		if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
+			return SMP_UNSPECIFIED;
 		memcpy(smp->local_pk, debug_pk, 64);
-		memcpy(smp->local_sk, debug_sk, 32);
 		set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
 	} else {
 		while (true) {
-			/* Seed private key with random number */
-			get_random_bytes(smp->local_sk, 32);
-
-			/* Generate local key pair for Secure Connections */
-			if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
+			/* Generate key pair for Secure Connections */
+			if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk))
 				return SMP_UNSPECIFIED;
 
 			/* This is unlikely, but we need to check that
 			 * we didn't accidentially generate a debug key.
 			 */
-			if (crypto_memneq(smp->local_sk, debug_sk, 32))
+			if (crypto_memneq(smp->local_pk, debug_pk, 64))
 				break;
 		}
 	}
@@ -1917,7 +1927,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 done:
 	SMP_DBG("Local Public Key X: %32phN", smp->local_pk);
 	SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32);
-	SMP_DBG("Local Private Key:  %32phN", smp->local_sk);
 
 	smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk);
 
@@ -2059,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
 		return SMP_UNSPECIFIED;
 
 	if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
-		BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode");
+		bt_dev_err(hdev, "refusing legacy fallback in SC-only mode");
 		return SMP_UNSPECIFIED;
 	}
 
-	BT_ERR("Trying to fall back to legacy SMP");
+	bt_dev_err(hdev, "trying to fall back to legacy SMP");
 
 	req = (void *) &smp->preq[1];
 	rsp = (void *) &smp->prsp[1];
@@ -2074,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
 	auth = req->auth_req & AUTH_REQ_MASK(hdev);
 
 	if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) {
-		BT_ERR("Failed to fall back to legacy SMP");
+		bt_dev_err(hdev, "failed to fall back to legacy SMP");
 		return SMP_UNSPECIFIED;
 	}
 
@@ -2347,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 
 	chan = conn->smp;
 	if (!chan) {
-		BT_ERR("SMP security requested but not available");
+		bt_dev_err(hcon->hdev, "security requested but not available");
 		return 1;
 	}
 
@@ -2540,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
 	 */
 	if (!bacmp(&info->bdaddr, BDADDR_ANY) ||
 	    !hci_is_identity_address(&info->bdaddr, info->addr_type)) {
-		BT_ERR("Ignoring IRK with no identity address");
+		bt_dev_err(hcon->hdev, "ignoring IRK with no identity address");
 		goto distribute;
 	}
 
@@ -2645,6 +2654,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp = chan->data;
 	struct hci_dev *hdev = hcon->hdev;
+	struct crypto_kpp *tfm_ecdh;
 	struct smp_cmd_pairing_confirm cfm;
 	int err;
 
@@ -2677,7 +2687,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
 	SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
 	SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
 
-	if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey))
+	/* Compute the shared secret on the same crypto tfm on which the private
+	 * key was set/generated.
+	 */
+	if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
+		struct smp_dev *smp_dev = chan->data;
+
+		tfm_ecdh = smp_dev->tfm_ecdh;
+	} else {
+		tfm_ecdh = smp->tfm_ecdh;
+	}
+
+	if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey))
 		return SMP_UNSPECIFIED;
 
 	SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -2933,8 +2954,8 @@ done:
 	return err;
 
 drop:
-	BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name,
-	       code, &hcon->dst);
+	bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR",
+		   code, &hcon->dst);
 	kfree_skb(skb);
 	return 0;
 }
@@ -3001,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 
 	smp = smp_chan_create(conn);
 	if (!smp) {
-		BT_ERR("%s unable to create SMP context for BR/EDR",
-		       hdev->name);
+		bt_dev_err(hdev, "unable to create SMP context for BR/EDR");
 		return;
 	}
 
@@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 	struct smp_dev *smp;
 	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
+	struct crypto_kpp *tfm_ecdh;
 
 	if (cid == L2CAP_CID_SMP_BREDR) {
 		smp = NULL;
@@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 		return ERR_CAST(tfm_cmac);
 	}
 
+	tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+	if (IS_ERR(tfm_ecdh)) {
+		BT_ERR("Unable to create ECDH crypto context");
+		crypto_free_shash(tfm_cmac);
+		crypto_free_cipher(tfm_aes);
+		kzfree(smp);
+		return ERR_CAST(tfm_ecdh);
+	}
+
 	smp->tfm_aes = tfm_aes;
 	smp->tfm_cmac = tfm_cmac;
+	smp->tfm_ecdh = tfm_ecdh;
 	smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
 	smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
 
@@ -3205,6 +3236,7 @@ create_chan:
 		if (smp) {
 			crypto_free_cipher(smp->tfm_aes);
 			crypto_free_shash(smp->tfm_cmac);
+			crypto_free_kpp(smp->tfm_ecdh);
 			kzfree(smp);
 		}
 		return ERR_PTR(-ENOMEM);
@@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 		chan->data = NULL;
 		crypto_free_cipher(smp->tfm_aes);
 		crypto_free_shash(smp->tfm_cmac);
+		crypto_free_kpp(smp->tfm_ecdh);
 		kzfree(smp);
 	}
 
@@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev)
 
 #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
 
-static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
+static int __init test_debug_key(struct crypto_kpp *tfm_ecdh)
 {
-	int i;
-
-	for (i = 0; i < ndigits; i++)
-		out[i] = __swab64(in[ndigits - 1 - i]);
-}
-
-static int __init test_debug_key(void)
-{
-	u8 pk[64], sk[32];
-
-	swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
+	u8 pk[64];
+	int err;
 
-	if (!generate_ecdh_keys(pk, sk))
-		return -EINVAL;
+	err = set_ecdh_privkey(tfm_ecdh, debug_sk);
+	if (err)
+		return err;
 
-	if (crypto_memneq(sk, debug_sk, 32))
-		return -EINVAL;
+	err = generate_ecdh_public_key(tfm_ecdh, pk);
+	if (err)
+		return err;
 
 	if (crypto_memneq(pk, debug_pk, 64))
 		return -EINVAL;
@@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = {
 };
 
 static int __init run_selftests(struct crypto_cipher *tfm_aes,
-				struct crypto_shash *tfm_cmac)
+				struct crypto_shash *tfm_cmac,
+				struct crypto_kpp *tfm_ecdh)
 {
 	ktime_t calltime, delta, rettime;
 	unsigned long long duration;
@@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
 
 	calltime = ktime_get();
 
-	err = test_debug_key();
+	err = test_debug_key(tfm_ecdh);
 	if (err) {
 		BT_ERR("debug_key test failed");
 		goto done;
@@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void)
 {
 	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
+	struct crypto_kpp *tfm_ecdh;
 	int err;
 
 	tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
@@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void)
 		return PTR_ERR(tfm_cmac);
 	}
 
-	err = run_selftests(tfm_aes, tfm_cmac);
+	tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+	if (IS_ERR(tfm_ecdh)) {
+		BT_ERR("Unable to create ECDH crypto context");
+		crypto_free_shash(tfm_cmac);
+		crypto_free_cipher(tfm_aes);
+		return PTR_ERR(tfm_ecdh);
+	}
+
+	err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
 
 	crypto_free_shash(tfm_cmac);
 	crypto_free_cipher(tfm_aes);
+	crypto_free_kpp(tfm_ecdh);
 
 	return err;
 }
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6be41a44d688..a86e6687026e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	if (is_l2)
 		__skb_push(skb, ETH_HLEN);
 	if (is_direct_pkt_access)
-		bpf_compute_data_end(skb);
+		bpf_compute_data_pointers(skb);
 	retval = bpf_test_run(prog, skb, repeat, &duration);
 	if (!is_l2)
 		__skb_push(skb, ETH_HLEN);
@@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 
 	xdp.data_hard_start = data;
 	xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+	xdp.data_meta = xdp.data;
 	xdp.data_end = xdp.data + size;
 
 	retval = bpf_test_run(prog, &xdp, repeat, &duration);
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 40b1ede527ca..ac9ef337f0fa 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the IEEE 802.1d ethernet bridging layer.
 #
@@ -7,7 +8,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o
 bridge-y	:= br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
 			br_ioctl.o br_stp.o br_stp_bpdu.o \
 			br_stp_if.o br_stp_timer.o br_netlink.o \
-			br_netlink_tunnel.o
+			br_netlink_tunnel.o br_arp_nd_proxy.o
 
 bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1407d1ba7577..6bf06e756df2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	/* Events that may cause spanning tree to refresh */
 	if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
 	    event == NETDEV_CHANGE || event == NETDEV_DOWN)
-		br_ifinfo_notify(RTM_NEWLINK, p);
+		br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
 	return NOTIFY_DONE;
 }
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
new file mode 100644
index 000000000000..2cf7716254be
--- /dev/null
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -0,0 +1,469 @@
+/*
+ *  Handle bridge arp/nd proxy/suppress
+ *
+ *  Copyright (C) 2017 Cumulus Networks
+ *  Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ *  Authors:
+ *	Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/neighbour.h>
+#include <net/arp.h>
+#include <linux/if_vlan.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
+#include "br_private.h"
+
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool neigh_suppress = false;
+
+	list_for_each_entry(p, &br->port_list, list) {
+		if (p->flags & BR_NEIGH_SUPPRESS) {
+			neigh_suppress = true;
+			break;
+		}
+	}
+
+	br->neigh_suppress_enabled = neigh_suppress;
+}
+
+#if IS_ENABLED(CONFIG_INET)
+static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
+			struct net_device *dev, __be32 dest_ip, __be32 src_ip,
+			const unsigned char *dest_hw,
+			const unsigned char *src_hw,
+			const unsigned char *target_hw,
+			__be16 vlan_proto, u16 vlan_tci)
+{
+	struct net_bridge_vlan_group *vg;
+	struct sk_buff *skb;
+	u16 pvid;
+
+	netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n",
+		   dev->name, &dest_ip, dest_hw, &src_ip, src_hw);
+
+	if (!vlan_tci) {
+		arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+			 dest_hw, src_hw, target_hw);
+		return;
+	}
+
+	skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+			 dest_hw, src_hw, target_hw);
+	if (!skb)
+		return;
+
+	if (p)
+		vg = nbp_vlan_group_rcu(p);
+	else
+		vg = br_vlan_group_rcu(br);
+	pvid = br_get_pvid(vg);
+	if (pvid == (vlan_tci & VLAN_VID_MASK))
+		vlan_tci = 0;
+
+	if (vlan_tci)
+		__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+
+	if (p) {
+		arp_xmit(skb);
+	} else {
+		skb_reset_mac_header(skb);
+		__skb_pull(skb, skb_network_offset(skb));
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->pkt_type = PACKET_HOST;
+
+		netif_rx_ni(skb);
+	}
+}
+
+static int br_chk_addr_ip(struct net_device *dev, void *data)
+{
+	__be32 ip = *(__be32 *)data;
+	struct in_device *in_dev;
+	__be32 addr = 0;
+
+	in_dev = __in_dev_get_rcu(dev);
+	if (in_dev)
+		addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip,
+					 RT_SCOPE_HOST);
+
+	if (addr == ip)
+		return 1;
+
+	return 0;
+}
+
+static bool br_is_local_ip(struct net_device *dev, __be32 ip)
+{
+	if (br_chk_addr_ip(dev, &ip))
+		return true;
+
+	/* check if ip is configured on upper dev */
+	if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
+		return true;
+
+	return false;
+}
+
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+			      u16 vid, struct net_bridge_port *p)
+{
+	struct net_device *dev = br->dev;
+	struct net_device *vlandev = dev;
+	struct neighbour *n;
+	struct arphdr *parp;
+	u8 *arpptr, *sha;
+	__be32 sip, tip;
+
+	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+	if ((dev->flags & IFF_NOARP) ||
+	    !pskb_may_pull(skb, arp_hdr_len(dev)))
+		return;
+
+	parp = arp_hdr(skb);
+
+	if (parp->ar_pro != htons(ETH_P_IP) ||
+	    parp->ar_hln != dev->addr_len ||
+	    parp->ar_pln != 4)
+		return;
+
+	arpptr = (u8 *)parp + sizeof(struct arphdr);
+	sha = arpptr;
+	arpptr += dev->addr_len;	/* sha */
+	memcpy(&sip, arpptr, sizeof(sip));
+	arpptr += sizeof(sip);
+	arpptr += dev->addr_len;	/* tha */
+	memcpy(&tip, arpptr, sizeof(tip));
+
+	if (ipv4_is_loopback(tip) ||
+	    ipv4_is_multicast(tip))
+		return;
+
+	if (br->neigh_suppress_enabled) {
+		if (p && (p->flags & BR_NEIGH_SUPPRESS))
+			return;
+		if (ipv4_is_zeronet(sip) || sip == tip) {
+			/* prevent flooding to neigh suppress ports */
+			BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+			return;
+		}
+	}
+
+	if (parp->ar_op != htons(ARPOP_REQUEST))
+		return;
+
+	if (vid != 0) {
+		vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+						   vid);
+		if (!vlandev)
+			return;
+	}
+
+	if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
+		/* its our local ip, so don't proxy reply
+		 * and don't forward to neigh suppress ports
+		 */
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		return;
+	}
+
+	n = neigh_lookup(&arp_tbl, &tip, vlandev);
+	if (n) {
+		struct net_bridge_fdb_entry *f;
+
+		if (!(n->nud_state & NUD_VALID)) {
+			neigh_release(n);
+			return;
+		}
+
+		f = br_fdb_find_rcu(br, n->ha, vid);
+		if (f) {
+			bool replied = false;
+
+			if ((p && (p->flags & BR_PROXYARP)) ||
+			    (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
+							 BR_NEIGH_SUPPRESS)))) {
+				if (!vid)
+					br_arp_send(br, p, skb->dev, sip, tip,
+						    sha, n->ha, sha, 0, 0);
+				else
+					br_arp_send(br, p, skb->dev, sip, tip,
+						    sha, n->ha, sha,
+						    skb->vlan_proto,
+						    skb_vlan_tag_get(skb));
+				replied = true;
+			}
+
+			/* If we have replied or as long as we know the
+			 * mac, indicate to arp replied
+			 */
+			if (replied || br->neigh_suppress_enabled)
+				BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		}
+
+		neigh_release(n);
+	}
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
+{
+	struct nd_msg *m;
+
+	m = skb_header_pointer(skb, skb_network_offset(skb) +
+			       sizeof(struct ipv6hdr), sizeof(*msg), msg);
+	if (!m)
+		return NULL;
+
+	if (m->icmph.icmp6_code != 0 ||
+	    (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
+	     m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
+		return NULL;
+
+	return m;
+}
+
+static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
+		       struct sk_buff *request, struct neighbour *n,
+		       __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
+{
+	struct net_device *dev = request->dev;
+	struct net_bridge_vlan_group *vg;
+	struct sk_buff *reply;
+	struct nd_msg *na;
+	struct ipv6hdr *pip6;
+	int na_olen = 8; /* opt hdr + ETH_ALEN for target */
+	int ns_olen;
+	int i, len;
+	u8 *daddr;
+	u16 pvid;
+
+	if (!dev)
+		return;
+
+	len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
+		sizeof(*na) + na_olen + dev->needed_tailroom;
+
+	reply = alloc_skb(len, GFP_ATOMIC);
+	if (!reply)
+		return;
+
+	reply->protocol = htons(ETH_P_IPV6);
+	reply->dev = dev;
+	skb_reserve(reply, LL_RESERVED_SPACE(dev));
+	skb_push(reply, sizeof(struct ethhdr));
+	skb_set_mac_header(reply, 0);
+
+	daddr = eth_hdr(request)->h_source;
+
+	/* Do we need option processing ? */
+	ns_olen = request->len - (skb_network_offset(request) +
+				  sizeof(struct ipv6hdr)) - sizeof(*ns);
+	for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
+		if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
+			daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+			break;
+		}
+	}
+
+	/* Ethernet header */
+	ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
+	ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
+	eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
+	reply->protocol = htons(ETH_P_IPV6);
+
+	skb_pull(reply, sizeof(struct ethhdr));
+	skb_set_network_header(reply, 0);
+	skb_put(reply, sizeof(struct ipv6hdr));
+
+	/* IPv6 header */
+	pip6 = ipv6_hdr(reply);
+	memset(pip6, 0, sizeof(struct ipv6hdr));
+	pip6->version = 6;
+	pip6->priority = ipv6_hdr(request)->priority;
+	pip6->nexthdr = IPPROTO_ICMPV6;
+	pip6->hop_limit = 255;
+	pip6->daddr = ipv6_hdr(request)->saddr;
+	pip6->saddr = *(struct in6_addr *)n->primary_key;
+
+	skb_pull(reply, sizeof(struct ipv6hdr));
+	skb_set_transport_header(reply, 0);
+
+	na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
+
+	/* Neighbor Advertisement */
+	memset(na, 0, sizeof(*na) + na_olen);
+	na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+	na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
+	na->icmph.icmp6_override = 1;
+	na->icmph.icmp6_solicited = 1;
+	na->target = ns->target;
+	ether_addr_copy(&na->opt[2], n->ha);
+	na->opt[0] = ND_OPT_TARGET_LL_ADDR;
+	na->opt[1] = na_olen >> 3;
+
+	na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
+						&pip6->daddr,
+						sizeof(*na) + na_olen,
+						IPPROTO_ICMPV6,
+						csum_partial(na, sizeof(*na) + na_olen, 0));
+
+	pip6->payload_len = htons(sizeof(*na) + na_olen);
+
+	skb_push(reply, sizeof(struct ipv6hdr));
+	skb_push(reply, sizeof(struct ethhdr));
+
+	reply->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (p)
+		vg = nbp_vlan_group_rcu(p);
+	else
+		vg = br_vlan_group_rcu(br);
+	pvid = br_get_pvid(vg);
+	if (pvid == (vlan_tci & VLAN_VID_MASK))
+		vlan_tci = 0;
+
+	if (vlan_tci)
+		__vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci);
+
+	netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n",
+		   dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha);
+
+	if (p) {
+		dev_queue_xmit(reply);
+	} else {
+		skb_reset_mac_header(reply);
+		__skb_pull(reply, skb_network_offset(reply));
+		reply->ip_summed = CHECKSUM_UNNECESSARY;
+		reply->pkt_type = PACKET_HOST;
+
+		netif_rx_ni(reply);
+	}
+}
+
+static int br_chk_addr_ip6(struct net_device *dev, void *data)
+{
+	struct in6_addr *addr = (struct in6_addr *)data;
+
+	if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
+		return 1;
+
+	return 0;
+}
+
+static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
+
+{
+	if (br_chk_addr_ip6(dev, addr))
+		return true;
+
+	/* check if ip is configured on upper dev */
+	if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
+		return true;
+
+	return false;
+}
+
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+		       u16 vid, struct net_bridge_port *p, struct nd_msg *msg)
+{
+	struct net_device *dev = br->dev;
+	struct net_device *vlandev = NULL;
+	struct in6_addr *saddr, *daddr;
+	struct ipv6hdr *iphdr;
+	struct neighbour *n;
+
+	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+	if (p && (p->flags & BR_NEIGH_SUPPRESS))
+		return;
+
+	if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
+	    !msg->icmph.icmp6_solicited) {
+		/* prevent flooding to neigh suppress ports */
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		return;
+	}
+
+	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+		return;
+
+	iphdr = ipv6_hdr(skb);
+	saddr = &iphdr->saddr;
+	daddr = &iphdr->daddr;
+
+	if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
+		/* prevent flooding to neigh suppress ports */
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		return;
+	}
+
+	if (vid != 0) {
+		/* build neigh table lookup on the vlan device */
+		vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+						   vid);
+		if (!vlandev)
+			return;
+	} else {
+		vlandev = dev;
+	}
+
+	if (br_is_local_ip6(vlandev, &msg->target)) {
+		/* its our own ip, so don't proxy reply
+		 * and don't forward to arp suppress ports
+		 */
+		BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		return;
+	}
+
+	n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev);
+	if (n) {
+		struct net_bridge_fdb_entry *f;
+
+		if (!(n->nud_state & NUD_VALID)) {
+			neigh_release(n);
+			return;
+		}
+
+		f = br_fdb_find_rcu(br, n->ha, vid);
+		if (f) {
+			bool replied = false;
+
+			if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
+				if (vid != 0)
+					br_nd_send(br, p, skb, n,
+						   skb->vlan_proto,
+						   skb_vlan_tag_get(skb), msg);
+				else
+					br_nd_send(br, p, skb, n, 0, 0, msg);
+				replied = true;
+			}
+
+			/* If we have replied or as long as we know the
+			 * mac, indicate to NEIGH_SUPPRESS ports that we
+			 * have replied
+			 */
+			if (replied || br->neigh_suppress_enabled)
+				BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		}
+		neigh_release(n);
+	}
+}
+#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 861ae2a165f4..af5b8c87f590 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
 	const struct nf_br_ops *nf_ops;
 	const unsigned char *dest;
+	struct ethhdr *eth;
 	u16 vid = 0;
 
 	rcu_read_lock();
@@ -53,14 +54,34 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	brstats->tx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
+	br_switchdev_frame_unmark(skb);
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
 	skb_reset_mac_header(skb);
+	eth = eth_hdr(skb);
 	skb_pull(skb, ETH_HLEN);
 
 	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
 		goto out;
 
+	if (IS_ENABLED(CONFIG_INET) &&
+	    (eth->h_proto == htons(ETH_P_ARP) ||
+	     eth->h_proto == htons(ETH_P_RARP)) &&
+	    br->neigh_suppress_enabled) {
+		br_do_proxy_suppress_arp(skb, br, vid, NULL);
+	} else if (IS_ENABLED(CONFIG_IPV6) &&
+		   skb->protocol == htons(ETH_P_IPV6) &&
+		   br->neigh_suppress_enabled &&
+		   pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+				 sizeof(struct nd_msg)) &&
+		   ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+			struct nd_msg *msg, _msg;
+
+			msg = br_is_nd_neigh_msg(skb, &_msg);
+			if (msg)
+				br_do_suppress_nd(skb, br, vid, NULL, msg);
+	}
+
 	dest = eth_hdr(skb)->h_dest;
 	if (is_broadcast_ether_addr(dest)) {
 		br_flood(br, skb, BR_PKT_BROADCAST, false, true);
@@ -319,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 #endif
 
-static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
+static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
+			struct netlink_ext_ack *extack)
 
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	return br_add_if(br, slave_dev);
+	return br_add_if(br, slave_dev, extack);
 }
 
 static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
@@ -399,7 +421,7 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_id.prio[0] = 0x80;
 	br->bridge_id.prio[1] = 0x00;
 
-	ether_addr_copy(br->group_addr, eth_reserved_addr_base);
+	ether_addr_copy(br->group_addr, eth_stp_addr);
 
 	br->stp_enabled = BR_NO_STP;
 	br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a5e4a736a984..4ea5c8bbe286 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -25,6 +25,7 @@
 #include <asm/unaligned.h>
 #include <linux/if_vlan.h>
 #include <net/switchdev.h>
+#include <trace/events/bridge.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -169,29 +170,13 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
 	}
 }
 
-static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj = {
-			.orig_dev = f->dst->dev,
-			.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-			.flags = SWITCHDEV_F_DEFER,
-		},
-		.vid = f->vlan_id,
-	};
-
-	ether_addr_copy(fdb.addr, f->addr.addr);
-	switchdev_port_obj_del(f->dst->dev, &fdb.obj);
-}
-
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 {
+	trace_fdb_delete(br, f);
+
 	if (f->is_static)
 		fdb_del_hw_addr(br, f->addr.addr);
 
-	if (f->added_by_external_learn)
-		fdb_del_external_learn(f);
-
 	hlist_del_init_rcu(&f->hlist);
 	fdb_notify(br, f, RTM_DELNEIGH);
 	call_rcu(&f->rcu, fdb_rcu_free);
@@ -598,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb->updated = now;
 			if (unlikely(added_by_user))
 				fdb->added_by_user = 1;
-			if (unlikely(fdb_modified))
+			if (unlikely(fdb_modified)) {
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
+			}
 		}
 	} else {
 		spin_lock(&br->hash_lock);
@@ -608,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			if (fdb) {
 				if (unlikely(added_by_user))
 					fdb->added_by_user = 1;
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 			}
 		}
@@ -888,6 +876,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	struct net_bridge *br = NULL;
 	int err = 0;
 
+	trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags);
+
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
@@ -1084,6 +1074,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 	bool modified = false;
 	int err = 0;
 
+	trace_br_fdb_external_learn_add(br, p, addr, vid);
+
 	spin_lock_bh(&br->hash_lock);
 
 	head = &br->hash[br_mac_hash(addr, vid)];
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48fb17417fac..b4eed113d2ec 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		/* Do not flood to ports that enable proxy ARP */
 		if (p->flags & BR_PROXYARP)
 			continue;
-		if ((p->flags & BR_PROXYARP_WIFI) &&
+		if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
 		    BR_INPUT_SKB_CB(skb)->proxyarp_replied)
 			continue;
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..9ba4ed65c52b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p)
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
 
-	br_ifinfo_notify(RTM_DELLINK, p);
+	br_ifinfo_notify(RTM_DELLINK, NULL, p);
 
 	list_del_rcu(&p->list);
 	if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
@@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
 		del_nbp(p);
 	}
 
+	br_recalculate_neigh_suppress_enabled(br);
+
 	br_fdb_delete_by_port(br, NULL, 0, 1);
 
 	cancel_delayed_work_sync(&br->gc_work);
@@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
 }
 
 /* called with RTNL */
-int br_add_if(struct net_bridge *br, struct net_device *dev)
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+	      struct netlink_ext_ack *extack)
 {
 	struct net_bridge_port *p;
 	int err = 0;
@@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		return -EINVAL;
 
 	/* No bridging of bridges */
-	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
+	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
+		NL_SET_ERR_MSG(extack,
+			       "Can not enslave a bridge to a bridge");
 		return -ELOOP;
+	}
 
 	/* Device is already being bridged */
 	if (br_port_exists(dev))
 		return -EBUSY;
 
 	/* No bridging devices that dislike that (e.g. wireless) */
-	if (dev->priv_flags & IFF_DONT_BRIDGE)
+	if (dev->priv_flags & IFF_DONT_BRIDGE) {
+		NL_SET_ERR_MSG(extack,
+			       "Device does not allow enslaving to a bridge");
 		return -EOPNOTSUPP;
+	}
 
 	p = new_nbp(br, dev);
 	if (IS_ERR(p))
@@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	dev->priv_flags |= IFF_BRIDGE_PORT;
 
-	err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
+	err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
 	if (err)
 		goto err5;
 
@@ -580,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
 
-	br_ifinfo_notify(RTM_NEWLINK, p);
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
 	if (changed_addr)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
@@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
 
 	if (mask & BR_AUTO_MASK)
 		nbp_update_port_count(br);
+
+	if (mask & BR_NEIGH_SUPPRESS)
+		br_recalculate_neigh_suppress_enabled(br);
 }
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..7f98a7d25866 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb)
 		       br_netif_receive_skb);
 }
 
-static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
-			    u16 vid, struct net_bridge_port *p)
-{
-	struct net_device *dev = br->dev;
-	struct neighbour *n;
-	struct arphdr *parp;
-	u8 *arpptr, *sha;
-	__be32 sip, tip;
-
-	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
-
-	if ((dev->flags & IFF_NOARP) ||
-	    !pskb_may_pull(skb, arp_hdr_len(dev)))
-		return;
-
-	parp = arp_hdr(skb);
-
-	if (parp->ar_pro != htons(ETH_P_IP) ||
-	    parp->ar_op != htons(ARPOP_REQUEST) ||
-	    parp->ar_hln != dev->addr_len ||
-	    parp->ar_pln != 4)
-		return;
-
-	arpptr = (u8 *)parp + sizeof(struct arphdr);
-	sha = arpptr;
-	arpptr += dev->addr_len;	/* sha */
-	memcpy(&sip, arpptr, sizeof(sip));
-	arpptr += sizeof(sip);
-	arpptr += dev->addr_len;	/* tha */
-	memcpy(&tip, arpptr, sizeof(tip));
-
-	if (ipv4_is_loopback(tip) ||
-	    ipv4_is_multicast(tip))
-		return;
-
-	n = neigh_lookup(&arp_tbl, &tip, dev);
-	if (n) {
-		struct net_bridge_fdb_entry *f;
-
-		if (!(n->nud_state & NUD_VALID)) {
-			neigh_release(n);
-			return;
-		}
-
-		f = br_fdb_find_rcu(br, n->ha, vid);
-		if (f && ((p->flags & BR_PROXYARP) ||
-			  (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
-			arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
-				 sha, n->ha, sha);
-			BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
-		}
-
-		neigh_release(n);
-	}
-}
-
 /* note: already called with rcu_read_lock */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -171,15 +115,29 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 
 	BR_INPUT_SKB_CB(skb)->brdev = br->dev;
 
-	if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
-		br_do_proxy_arp(skb, br, vid, p);
+	if (IS_ENABLED(CONFIG_INET) &&
+	    (skb->protocol == htons(ETH_P_ARP) ||
+	     skb->protocol == htons(ETH_P_RARP))) {
+		br_do_proxy_suppress_arp(skb, br, vid, p);
+	} else if (IS_ENABLED(CONFIG_IPV6) &&
+		   skb->protocol == htons(ETH_P_IPV6) &&
+		   br->neigh_suppress_enabled &&
+		   pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+				 sizeof(struct nd_msg)) &&
+		   ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+			struct nd_msg *msg, _msg;
+
+			msg = br_is_nd_neigh_msg(skb, &_msg);
+			if (msg)
+				br_do_suppress_nd(skb, br, vid, p, msg);
+	}
 
 	switch (pkt_type) {
 	case BR_PKT_MULTICAST:
 		mdst = br_mdb_get(br, skb, vid);
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
 		    br_multicast_querier_exists(br, eth_hdr(skb))) {
-			if ((mdst && mdst->mglist) ||
+			if ((mdst && mdst->host_joined) ||
 			    br_multicast_is_router(br)) {
 				local_rcv = true;
 				br->dev->stats.multicast++;
@@ -289,6 +247,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 		 *
 		 * Others reserved for future standardization
 		 */
+		fwd_mask |= p->group_fwd_mask;
 		switch (dest[5]) {
 		case 0x00:	/* Bridge Group Address */
 			/* If STP is turned off,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7970f8540cbb..73b957fd639d 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
 		return -EINVAL;
 
 	if (isadd)
-		ret = br_add_if(br, dev);
+		ret = br_add_if(br, dev, NULL);
 	else
 		ret = br_del_if(br, dev);
 
@@ -293,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 	if (!ret) {
 		if (p)
-			br_ifinfo_notify(RTM_NEWLINK, p);
+			br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 		else
 			netdev_state_change(br->dev);
 	}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a0b11e7d67d9..b0f4c734900b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/err.h>
 #include <linux/igmp.h>
 #include <linux/kernel.h>
@@ -291,6 +292,46 @@ err:
 	kfree(priv);
 }
 
+static void br_mdb_switchdev_host_port(struct net_device *dev,
+				       struct net_device *lower_dev,
+				       struct br_mdb_entry *entry, int type)
+{
+	struct switchdev_obj_port_mdb mdb = {
+		.obj = {
+			.id = SWITCHDEV_OBJ_ID_HOST_MDB,
+			.flags = SWITCHDEV_F_DEFER,
+		},
+		.vid = entry->vid,
+	};
+
+	if (entry->addr.proto == htons(ETH_P_IP))
+		ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+#endif
+
+	mdb.obj.orig_dev = dev;
+	switch (type) {
+	case RTM_NEWMDB:
+		switchdev_port_obj_add(lower_dev, &mdb.obj);
+		break;
+	case RTM_DELMDB:
+		switchdev_port_obj_del(lower_dev, &mdb.obj);
+		break;
+	}
+}
+
+static void br_mdb_switchdev_host(struct net_device *dev,
+				  struct br_mdb_entry *entry, int type)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter)
+		br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
+}
+
 static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
 			    struct br_mdb_entry *entry, int type)
 {
@@ -316,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
 #endif
 
 	mdb.obj.orig_dev = port_dev;
-	if (port_dev && type == RTM_NEWMDB) {
+	if (p && port_dev && type == RTM_NEWMDB) {
 		complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
 		if (complete_info) {
 			complete_info->port = p;
@@ -326,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
 			if (switchdev_port_obj_add(port_dev, &mdb.obj))
 				kfree(complete_info);
 		}
-	} else if (port_dev && type == RTM_DELMDB) {
+	} else if (p && port_dev && type == RTM_DELMDB) {
 		switchdev_port_obj_del(port_dev, &mdb.obj);
 	}
 
+	if (!p)
+		br_mdb_switchdev_host(dev, entry, type);
+
 	skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -352,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 	struct br_mdb_entry entry;
 
 	memset(&entry, 0, sizeof(entry));
-	entry.ifindex = port->dev->ifindex;
+	if (port)
+		entry.ifindex = port->dev->ifindex;
+	else
+		entry.ifindex = dev->ifindex;
 	entry.addr.proto = group->proto;
 	entry.addr.u.ip4 = group->u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
@@ -654,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 		call_rcu_bh(&p->rcu, br_multicast_free_pg);
 		err = 0;
 
-		if (!mp->ports && !mp->mglist &&
+		if (!mp->ports && !mp->host_joined &&
 		    netif_running(br->dev))
 			mod_timer(&mp->timer, jiffies);
 		break;
@@ -713,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 void br_mdb_init(void)
 {
-	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
-	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
+	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
+	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
 }
 
 void br_mdb_uninit(void)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d69bcd..cb4729539b82 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head)
 	kfree(mp);
 }
 
-static void br_multicast_group_expired(unsigned long data)
+static void br_multicast_group_expired(struct timer_list *t)
 {
-	struct net_bridge_mdb_entry *mp = (void *)data;
+	struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
 	struct net_bridge *br = mp->br;
 	struct net_bridge_mdb_htable *mdb;
 
@@ -249,7 +249,8 @@ static void br_multicast_group_expired(unsigned long data)
 	if (!netif_running(br->dev) || timer_pending(&mp->timer))
 		goto out;
 
-	mp->mglist = false;
+	mp->host_joined = false;
+	br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
 
 	if (mp->ports)
 		goto out;
@@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 			      p->flags);
 		call_rcu_bh(&p->rcu, br_multicast_free_pg);
 
-		if (!mp->ports && !mp->mglist &&
+		if (!mp->ports && !mp->host_joined &&
 		    netif_running(br->dev))
 			mod_timer(&mp->timer, jiffies);
 
@@ -302,9 +303,9 @@ static void br_multicast_del_pg(struct net_bridge *br,
 	WARN_ON(1);
 }
 
-static void br_multicast_port_group_expired(unsigned long data)
+static void br_multicast_port_group_expired(struct timer_list *t)
 {
-	struct net_bridge_port_group *pg = (void *)data;
+	struct net_bridge_port_group *pg = from_timer(pg, t, timer);
 	struct net_bridge *br = pg->port->br;
 
 	spin_lock(&br->multicast_lock);
@@ -701,8 +702,7 @@ rehash:
 
 	mp->br = br;
 	mp->addr = *group;
-	setup_timer(&mp->timer, br_multicast_group_expired,
-		    (unsigned long)mp);
+	timer_setup(&mp->timer, br_multicast_group_expired, 0);
 
 	hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
 	mdb->size++;
@@ -729,8 +729,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
 	p->flags = flags;
 	rcu_assign_pointer(p->next, next);
 	hlist_add_head(&p->mglist, &port->mglist);
-	setup_timer(&p->timer, br_multicast_port_group_expired,
-		    (unsigned long)p);
+	timer_setup(&p->timer, br_multicast_port_group_expired, 0);
 
 	if (src)
 		memcpy(p->eth_addr, src, ETH_ALEN);
@@ -775,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br,
 		goto err;
 
 	if (!port) {
-		mp->mglist = true;
+		if (!mp->host_joined) {
+			mp->host_joined = true;
+			br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
+		}
 		mod_timer(&mp->timer, now + br->multicast_membership_interval);
 		goto out;
 	}
@@ -843,9 +845,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 }
 #endif
 
-static void br_multicast_router_expired(unsigned long data)
+static void br_multicast_router_expired(struct timer_list *t)
 {
-	struct net_bridge_port *port = (void *)data;
+	struct net_bridge_port *port =
+			from_timer(port, t, multicast_router_timer);
 	struct net_bridge *br = port->br;
 
 	spin_lock(&br->multicast_lock);
@@ -859,8 +862,32 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
-static void br_multicast_local_router_expired(unsigned long data)
+static void br_mc_router_state_change(struct net_bridge *p,
+				      bool is_mc_router)
+{
+	struct switchdev_attr attr = {
+		.orig_dev = p->dev,
+		.id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+		.flags = SWITCHDEV_F_DEFER,
+		.u.mrouter = is_mc_router,
+	};
+
+	switchdev_port_attr_set(p->dev, &attr);
+}
+
+static void br_multicast_local_router_expired(struct timer_list *t)
 {
+	struct net_bridge *br = from_timer(br, t, multicast_router_timer);
+
+	spin_lock(&br->multicast_lock);
+	if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
+	    br->multicast_router == MDB_RTR_TYPE_PERM ||
+	    timer_pending(&br->multicast_router_timer))
+		goto out;
+
+	br_mc_router_state_change(br, false);
+out:
+	spin_unlock(&br->multicast_lock);
 }
 
 static void br_multicast_querier_expired(struct net_bridge *br,
@@ -876,17 +903,17 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
-static void br_ip4_multicast_querier_expired(unsigned long data)
+static void br_ip4_multicast_querier_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (void *)data;
+	struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
 
 	br_multicast_querier_expired(br, &br->ip4_own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_querier_expired(unsigned long data)
+static void br_ip6_multicast_querier_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (void *)data;
+	struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
 
 	br_multicast_querier_expired(br, &br->ip6_own_query);
 }
@@ -987,17 +1014,17 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
-static void br_ip4_multicast_port_query_expired(unsigned long data)
+static void br_ip4_multicast_port_query_expired(struct timer_list *t)
 {
-	struct net_bridge_port *port = (void *)data;
+	struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
 
 	br_multicast_port_query_expired(port, &port->ip4_own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_port_query_expired(unsigned long data)
+static void br_ip6_multicast_port_query_expired(struct timer_list *t)
 {
-	struct net_bridge_port *port = (void *)data;
+	struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
 
 	br_multicast_port_query_expired(port, &port->ip6_own_query);
 }
@@ -1019,13 +1046,13 @@ int br_multicast_add_port(struct net_bridge_port *port)
 {
 	port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 
-	setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
-		    (unsigned long)port);
-	setup_timer(&port->ip4_own_query.timer,
-		    br_ip4_multicast_port_query_expired, (unsigned long)port);
+	timer_setup(&port->multicast_router_timer,
+		    br_multicast_router_expired, 0);
+	timer_setup(&port->ip4_own_query.timer,
+		    br_ip4_multicast_port_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
-	setup_timer(&port->ip6_own_query.timer,
-		    br_ip6_multicast_port_query_expired, (unsigned long)port);
+	timer_setup(&port->ip6_own_query.timer,
+		    br_ip6_multicast_port_query_expired, 0);
 #endif
 	br_mc_disabled_update(port->dev, port->br->multicast_disabled);
 
@@ -1364,9 +1391,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
 	unsigned long now = jiffies;
 
 	if (!port) {
-		if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
+		if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+			if (!timer_pending(&br->multicast_router_timer))
+				br_mc_router_state_change(br, true);
 			mod_timer(&br->multicast_router_timer,
 				  now + br->multicast_querier_interval);
+		}
 		return;
 	}
 
@@ -1451,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 
 	max_delay *= br->multicast_last_member_count;
 
-	if (mp->mglist &&
+	if (mp->host_joined &&
 	    (timer_pending(&mp->timer) ?
 	     time_after(mp->timer.expires, now + max_delay) :
 	     try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1535,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 		goto out;
 
 	max_delay *= br->multicast_last_member_count;
-	if (mp->mglist &&
+	if (mp->host_joined &&
 	    (timer_pending(&mp->timer) ?
 	     time_after(mp->timer.expires, now + max_delay) :
 	     try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1596,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br,
 			br_mdb_notify(br->dev, port, group, RTM_DELMDB,
 				      p->flags);
 
-			if (!mp->ports && !mp->mglist &&
+			if (!mp->ports && !mp->host_joined &&
 			    netif_running(br->dev))
 				mod_timer(&mp->timer, jiffies);
 		}
@@ -1636,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br,
 		     br->multicast_last_member_interval;
 
 	if (!port) {
-		if (mp->mglist &&
+		if (mp->host_joined &&
 		    (timer_pending(&mp->timer) ?
 		     time_after(mp->timer.expires, time) :
 		     try_to_del_timer_sync(&mp->timer) >= 0)) {
@@ -1906,17 +1936,17 @@ static void br_multicast_query_expired(struct net_bridge *br,
 	spin_unlock(&br->multicast_lock);
 }
 
-static void br_ip4_multicast_query_expired(unsigned long data)
+static void br_ip4_multicast_query_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (void *)data;
+	struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
 
 	br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_query_expired(unsigned long data)
+static void br_ip6_multicast_query_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (void *)data;
+	struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
 
 	br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
 }
@@ -1951,17 +1981,17 @@ void br_multicast_init(struct net_bridge *br)
 	br->has_ipv6_addr = 1;
 
 	spin_lock_init(&br->multicast_lock);
-	setup_timer(&br->multicast_router_timer,
+	timer_setup(&br->multicast_router_timer,
 		    br_multicast_local_router_expired, 0);
-	setup_timer(&br->ip4_other_query.timer,
-		    br_ip4_multicast_querier_expired, (unsigned long)br);
-	setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
-		    (unsigned long)br);
+	timer_setup(&br->ip4_other_query.timer,
+		    br_ip4_multicast_querier_expired, 0);
+	timer_setup(&br->ip4_own_query.timer,
+		    br_ip4_multicast_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
-	setup_timer(&br->ip6_other_query.timer,
-		    br_ip6_multicast_querier_expired, (unsigned long)br);
-	setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired,
-		    (unsigned long)br);
+	timer_setup(&br->ip6_other_query.timer,
+		    br_ip6_multicast_querier_expired, 0);
+	timer_setup(&br->ip6_own_query.timer,
+		    br_ip6_multicast_query_expired, 0);
 #endif
 }
 
@@ -2042,9 +2072,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 	switch (val) {
 	case MDB_RTR_TYPE_DISABLED:
 	case MDB_RTR_TYPE_PERM:
+		br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
 		del_timer(&br->multicast_router_timer);
-		/* fall through */
+		br->multicast_router = val;
+		err = 0;
+		break;
 	case MDB_RTR_TYPE_TEMP_QUERY:
+		if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+			br_mc_router_state_change(br, false);
 		br->multicast_router = val;
 		err = 0;
 		break;
@@ -2184,6 +2219,18 @@ bool br_multicast_enabled(const struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(br_multicast_enabled);
 
+bool br_multicast_router(const struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	bool is_router;
+
+	spin_lock_bh(&br->multicast_lock);
+	is_router = br_multicast_is_router(br);
+	spin_unlock_bh(&br->multicast_lock);
+	return is_router;
+}
+EXPORT_SYMBOL_GPL(br_multicast_router);
+
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
 {
 	unsigned long max_delay;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2261e5194c82..c2eea1b8737a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
 
 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  * br_dev_queue_push_xmit is called afterwards */
-static struct nf_hook_ops br_nf_ops[] __read_mostly = {
+static const struct nf_hook_ops br_nf_ops[] = {
 	{
 		.hook = br_nf_pre_routing,
 		.pf = NFPROTO_BRIDGE,
@@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 		      int (*okfn)(struct net *, struct sock *,
 				  struct sk_buff *))
 {
-	struct nf_hook_entry *elem;
+	const struct nf_hook_entries *e;
 	struct nf_hook_state state;
+	struct nf_hook_ops **ops;
+	unsigned int i;
 	int ret;
 
-	for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
-	     elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
-	     elem = rcu_dereference(elem->next))
-		;
-
-	if (!elem)
+	e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+	if (!e)
 		return okfn(net, sk, skb);
 
+	ops = nf_hook_entries_get_hook_ops(e);
+	for (i = 0; i < e->num_hook_entries &&
+	      ops[i]->priority <= NF_BR_PRI_BRNF; i++)
+		;
+
 	nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
 			   sk, net, okfn);
 
-	ret = nf_hook_slow(skb, &state, elem);
+	ret = nf_hook_slow(skb, &state, e, i);
 	if (ret == 1)
 		ret = okfn(net, sk, skb);
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3bc890716c89..d0ef0a8e8831 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP_WIFI */
 		+ nla_total_size(1)	/* IFLA_BRPORT_VLAN_TUNNEL */
+		+ nla_total_size(1)	/* IFLA_BRPORT_NEIGH_SUPPRESS */
 		+ nla_total_size(sizeof(struct ifla_bridge_id))	/* IFLA_BRPORT_ROOT_ID */
 		+ nla_total_size(sizeof(struct ifla_bridge_id))	/* IFLA_BRPORT_BRIDGE_ID */
 		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_DESIGNATED_PORT */
@@ -152,6 +153,7 @@ static inline size_t br_port_info_size(void)
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MULTICAST_ROUTER */
 #endif
+		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_GROUP_FWD_MASK */
 		+ 0;
 }
 
@@ -208,7 +210,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 		       p->topology_change_ack) ||
 	    nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
 	    nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
-							BR_VLAN_TUNNEL)))
+							BR_VLAN_TUNNEL)) ||
+	    nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
+	    nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
+		       !!(p->flags & BR_NEIGH_SUPPRESS)))
 		return -EMSGSIZE;
 
 	timerval = br_timer_value(&p->message_age_timer);
@@ -356,14 +361,14 @@ nla_put_failure:
  * Contains port and master info as well as carrier and bridge state.
  */
 static int br_fill_ifinfo(struct sk_buff *skb,
-			  struct net_bridge_port *port,
+			  const struct net_bridge_port *port,
 			  u32 pid, u32 seq, int event, unsigned int flags,
 			  u32 filter_mask, const struct net_device *dev)
 {
+	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 	struct net_bridge *br;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
-	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
 	if (port)
 		br = port->br;
@@ -449,28 +454,36 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-/*
- * Notify listeners of a change in port information
- */
-void br_ifinfo_notify(int event, struct net_bridge_port *port)
+/* Notify listeners of a change in bridge or port information */
+void br_ifinfo_notify(int event, const struct net_bridge *br,
+		      const struct net_bridge_port *port)
 {
-	struct net *net;
+	u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
+	struct net_device *dev;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
-	u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
+	struct net *net;
+	u16 port_no = 0;
 
-	if (!port)
+	if (WARN_ON(!port && !br))
 		return;
 
-	net = dev_net(port->dev);
-	br_debug(port->br, "port %u(%s) event %d\n",
-		 (unsigned int)port->port_no, port->dev->name, event);
+	if (port) {
+		dev = port->dev;
+		br = port->br;
+		port_no = port->port_no;
+	} else {
+		dev = br->dev;
+	}
 
-	skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC);
+	net = dev_net(dev);
+	br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event);
+
+	skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
 
-	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev);
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in br_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -483,7 +496,6 @@ errout:
 	rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
-
 /*
  * Dump information about all ports, in response to GETLINK
  */
@@ -501,8 +513,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 }
 
 static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
-			int cmd, struct bridge_vlan_info *vinfo)
+			int cmd, struct bridge_vlan_info *vinfo, bool *changed)
 {
+	bool curr_change;
 	int err = 0;
 
 	switch (cmd) {
@@ -511,22 +524,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
 			/* if the MASTER flag is set this will act on the global
 			 * per-VLAN entry as well
 			 */
-			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
-			if (err)
-				break;
+			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
+					   &curr_change);
 		} else {
 			vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
-			err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+			err = br_vlan_add(br, vinfo->vid, vinfo->flags,
+					  &curr_change);
 		}
+		if (curr_change)
+			*changed = true;
 		break;
 
 	case RTM_DELLINK:
 		if (p) {
-			nbp_vlan_delete(p, vinfo->vid);
-			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-				br_vlan_delete(p->br, vinfo->vid);
-		} else {
-			br_vlan_delete(br, vinfo->vid);
+			if (!nbp_vlan_delete(p, vinfo->vid))
+				*changed = true;
+
+			if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) &&
+			    !br_vlan_delete(p->br, vinfo->vid))
+				*changed = true;
+		} else if (!br_vlan_delete(br, vinfo->vid)) {
+			*changed = true;
 		}
 		break;
 	}
@@ -537,7 +555,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
 static int br_process_vlan_info(struct net_bridge *br,
 				struct net_bridge_port *p, int cmd,
 				struct bridge_vlan_info *vinfo_curr,
-				struct bridge_vlan_info **vinfo_last)
+				struct bridge_vlan_info **vinfo_last,
+				bool *changed)
 {
 	if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
 		return -EINVAL;
@@ -567,22 +586,22 @@ static int br_process_vlan_info(struct net_bridge *br,
 		       sizeof(struct bridge_vlan_info));
 		for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
 			tmp_vinfo.vid = v;
-			err = br_vlan_info(br, p, cmd, &tmp_vinfo);
+			err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
 			if (err)
 				break;
 		}
 		*vinfo_last = NULL;
 
-		return 0;
+		return err;
 	}
 
-	return br_vlan_info(br, p, cmd, vinfo_curr);
+	return br_vlan_info(br, p, cmd, vinfo_curr, changed);
 }
 
 static int br_afspec(struct net_bridge *br,
 		     struct net_bridge_port *p,
 		     struct nlattr *af_spec,
-		     int cmd)
+		     int cmd, bool *changed)
 {
 	struct bridge_vlan_info *vinfo_curr = NULL;
 	struct bridge_vlan_info *vinfo_last = NULL;
@@ -602,7 +621,8 @@ static int br_afspec(struct net_bridge *br,
 				return err;
 			err = br_process_vlan_tunnel_info(br, p, cmd,
 							  &tinfo_curr,
-							  &tinfo_last);
+							  &tinfo_last,
+							  changed);
 			if (err)
 				return err;
 			break;
@@ -611,7 +631,7 @@ static int br_afspec(struct net_bridge *br,
 				return -EINVAL;
 			vinfo_curr = nla_data(attr);
 			err = br_process_vlan_info(br, p, cmd, vinfo_curr,
-						   &vinfo_last);
+						   &vinfo_last, changed);
 			if (err)
 				return err;
 			break;
@@ -637,6 +657,9 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
 	[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
 	[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
+	[IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 },
+	[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
+	[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -773,6 +796,20 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 			return err;
 	}
 #endif
+
+	if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
+		u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]);
+
+		if (fwd_mask & BR_GROUPFWD_MACPAUSE)
+			return -EINVAL;
+		p->group_fwd_mask = fwd_mask;
+	}
+
+	err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
+			       BR_NEIGH_SUPPRESS);
+	if (err)
+		return err;
+
 	br_port_flags_change(p, old_flags ^ p->flags);
 	return 0;
 }
@@ -780,10 +817,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 /* Change state and parameters on port. */
 int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
+	struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
+	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
+	struct net_bridge_port *p;
 	struct nlattr *protinfo;
 	struct nlattr *afspec;
-	struct net_bridge_port *p;
-	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
+	bool changed = false;
 	int err = 0;
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
@@ -819,15 +858,14 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 		}
 		if (err)
 			goto out;
+		changed = true;
 	}
 
-	if (afspec) {
-		err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
-				afspec, RTM_SETLINK);
-	}
+	if (afspec)
+		err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
 
-	if (err == 0)
-		br_ifinfo_notify(RTM_NEWLINK, p);
+	if (changed)
+		br_ifinfo_notify(RTM_NEWLINK, br, p);
 out:
 	return err;
 }
@@ -835,8 +873,10 @@ out:
 /* Delete port information */
 int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
-	struct nlattr *afspec;
+	struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
 	struct net_bridge_port *p;
+	struct nlattr *afspec;
+	bool changed = false;
 	int err = 0;
 
 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -848,13 +888,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
 		return -EINVAL;
 
-	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
-			afspec, RTM_DELLINK);
-	if (err == 0)
+	err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
+	if (changed)
 		/* Send RTM_NEWLINK because userspace
 		 * expects RTM_NEWLINK for vlan dels
 		 */
-		br_ifinfo_notify(RTM_NEWLINK, p);
+		br_ifinfo_notify(RTM_NEWLINK, br, p);
 
 	return err;
 }
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 3712c7f0e00c..da8cb99fd259 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX +
 };
 
 static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
-			       u16 vid, u32 tun_id)
+			       u16 vid, u32 tun_id, bool *changed)
 {
 	int err = 0;
 
@@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
 	switch (cmd) {
 	case RTM_SETLINK:
 		err = nbp_vlan_tunnel_info_add(p, vid, tun_id);
+		if (!err)
+			*changed = true;
 		break;
 	case RTM_DELLINK:
-		nbp_vlan_tunnel_info_delete(p, vid);
+		if (!nbp_vlan_tunnel_info_delete(p, vid))
+			*changed = true;
 		break;
 	}
 
@@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
 int br_process_vlan_tunnel_info(struct net_bridge *br,
 				struct net_bridge_port *p, int cmd,
 				struct vtunnel_info *tinfo_curr,
-				struct vtunnel_info *tinfo_last)
+				struct vtunnel_info *tinfo_last,
+				bool *changed)
 {
 	int err;
 
@@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
 			return -EINVAL;
 		t = tinfo_last->tunid;
 		for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) {
-			err = br_vlan_tunnel_info(p, cmd, v, t);
+			err = br_vlan_tunnel_info(p, cmd, v, t, changed);
 			if (err)
 				return err;
 			t++;
@@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
 		if (tinfo_last->flags)
 			return -EINVAL;
 		err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid,
-					  tinfo_curr->tunid);
+					  tinfo_curr->tunid, changed);
 		if (err)
 			return err;
 		memset(tinfo_last, 0, sizeof(struct vtunnel_info));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fd9ee73e0a6d..1312b8d20ec3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -36,7 +36,14 @@
 /* Control of forwarding link local multicast */
 #define BR_GROUPFWD_DEFAULT	0
 /* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
-#define BR_GROUPFWD_RESTRICTED	0x0007u
+enum {
+	BR_GROUPFWD_STP		= BIT(0),
+	BR_GROUPFWD_MACPAUSE	= BIT(1),
+	BR_GROUPFWD_LACP	= BIT(2),
+};
+
+#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
+				BR_GROUPFWD_LACP)
 /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
 #define BR_GROUPFWD_8021AD	0xB801u
 
@@ -202,7 +209,7 @@ struct net_bridge_mdb_entry
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct br_ip			addr;
-	bool				mglist;
+	bool				host_joined;
 };
 
 struct net_bridge_mdb_htable
@@ -268,6 +275,7 @@ struct net_bridge_port {
 #ifdef CONFIG_NET_SWITCHDEV
 	int				offload_fwd_mark;
 #endif
+	u16				group_fwd_mask;
 };
 
 #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -396,6 +404,7 @@ struct net_bridge {
 #ifdef CONFIG_NET_SWITCHDEV
 	int offload_fwd_mark;
 #endif
+	bool				neigh_suppress_enabled;
 };
 
 struct br_input_skb_cb {
@@ -558,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 void br_port_carrier_check(struct net_bridge_port *p);
 int br_add_bridge(struct net *net, const char *name);
 int br_del_bridge(struct net *net, const char *name);
-int br_add_if(struct net_bridge *br, struct net_device *dev);
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+	      struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
 int br_min_mtu(const struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
@@ -793,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
 			       const struct net_bridge_port *port,
 			       struct net_bridge_vlan_group *vg,
 			       struct sk_buff *skb);
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
+		bool *changed);
 int br_vlan_delete(struct net_bridge *br, u16 vid);
 void br_vlan_flush(struct net_bridge *br);
 struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -806,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
 int br_vlan_init(struct net_bridge *br);
 int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
 int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
-int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+		 bool *changed);
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
 void nbp_vlan_flush(struct net_bridge_port *port);
 int nbp_vlan_init(struct net_bridge_port *port);
@@ -893,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
 	return skb;
 }
 
-static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
+			      bool *changed)
 {
+	*changed = false;
 	return -EOPNOTSUPP;
 }
 
@@ -916,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br)
 	return 0;
 }
 
-static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+			       bool *changed)
 {
+	*changed = false;
 	return -EOPNOTSUPP;
 }
 
@@ -1055,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
 extern struct rtnl_link_ops br_link_ops;
 int br_netlink_init(void);
 void br_netlink_fini(void);
-void br_ifinfo_notify(int event, struct net_bridge_port *port);
+void br_ifinfo_notify(int event, const struct net_bridge *br,
+		      const struct net_bridge_port *port);
 int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
 int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
@@ -1091,6 +1108,11 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 			       unsigned long mask);
 void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
 			     int type);
+
+static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
+{
+	skb->offload_fwd_mark = 0;
+}
 #else
 static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
 {
@@ -1119,6 +1141,17 @@ static inline void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
 }
+
+static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
+{
+}
 #endif /* CONFIG_NET_SWITCHDEV */
 
+/* br_arp_nd_proxy.c */
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br);
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+			      u16 vid, struct net_bridge_port *p);
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+		       u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
 #endif
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index 4a447a378ab3..a259471bfd78 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
 				struct net_bridge_port *p,
 				int cmd,
 				struct vtunnel_info *tinfo_curr,
-				struct vtunnel_info *tinfo_last);
+				struct vtunnel_info *tinfo_last,
+				bool *changed);
 int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
 int br_fill_vlan_tunnel_info(struct sk_buff *skb,
 			     struct net_bridge_vlan_group *vg);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 8f56c2d1f1a7..b6941961a876 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br,
 		  (unsigned int) p->port_no, p->dev->name);
 
 	br_set_state(p, BR_STATE_LISTENING);
-	br_ifinfo_notify(RTM_NEWLINK, p);
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
 	if (br->forward_delay > 0)
 		mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
@@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p)
 			br_topology_change_detection(p->br);
 
 		br_set_state(p, BR_STATE_BLOCKING);
-		br_ifinfo_notify(RTM_NEWLINK, p);
+		br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
 		del_timer(&p->forward_delay_timer);
 	}
@@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p)
 	else
 		br_set_state(p, BR_STATE_LEARNING);
 
-	br_ifinfo_notify(RTM_NEWLINK, p);
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
 	if (br->forward_delay != 0)
 		mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 89110319ef0f..808e2b914015 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p)
 {
 	br_init_port(p);
 	br_port_state_selection(p->br);
-	br_ifinfo_notify(RTM_NEWLINK, p);
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 }
 
 /* called under bridge lock */
@@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
 	p->topology_change_ack = 0;
 	p->config_pending = 0;
 
-	br_ifinfo_notify(RTM_NEWLINK, p);
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 
 	del_timer(&p->message_age_timer);
 	del_timer(&p->forward_delay_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 60b6fe277a8b..e7739de5f0e1 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br)
 	return 0;
 }
 
-static void br_hello_timer_expired(unsigned long arg)
+static void br_hello_timer_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (struct net_bridge *)arg;
+	struct net_bridge *br = from_timer(br, t, hello_timer);
 
 	br_debug(br, "hello timer expired\n");
 	spin_lock(&br->lock);
@@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg)
 	spin_unlock(&br->lock);
 }
 
-static void br_message_age_timer_expired(unsigned long arg)
+static void br_message_age_timer_expired(struct timer_list *t)
 {
-	struct net_bridge_port *p = (struct net_bridge_port *) arg;
+	struct net_bridge_port *p = from_timer(p, t, message_age_timer);
 	struct net_bridge *br = p->br;
 	const bridge_id *id = &p->designated_bridge;
 	int was_root;
@@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg)
 	spin_unlock(&br->lock);
 }
 
-static void br_forward_delay_timer_expired(unsigned long arg)
+static void br_forward_delay_timer_expired(struct timer_list *t)
 {
-	struct net_bridge_port *p = (struct net_bridge_port *) arg;
+	struct net_bridge_port *p = from_timer(p, t, forward_delay_timer);
 	struct net_bridge *br = p->br;
 
 	br_debug(br, "port %u(%s) forward delay timer\n",
@@ -99,14 +99,14 @@ static void br_forward_delay_timer_expired(unsigned long arg)
 		netif_carrier_on(br->dev);
 	}
 	rcu_read_lock();
-	br_ifinfo_notify(RTM_NEWLINK, p);
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 	rcu_read_unlock();
 	spin_unlock(&br->lock);
 }
 
-static void br_tcn_timer_expired(unsigned long arg)
+static void br_tcn_timer_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (struct net_bridge *) arg;
+	struct net_bridge *br = from_timer(br, t, tcn_timer);
 
 	br_debug(br, "tcn timer expired\n");
 	spin_lock(&br->lock);
@@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg)
 	spin_unlock(&br->lock);
 }
 
-static void br_topology_change_timer_expired(unsigned long arg)
+static void br_topology_change_timer_expired(struct timer_list *t)
 {
-	struct net_bridge *br = (struct net_bridge *) arg;
+	struct net_bridge *br = from_timer(br, t, topology_change_timer);
 
 	br_debug(br, "topo change timer expired\n");
 	spin_lock(&br->lock);
@@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg)
 	spin_unlock(&br->lock);
 }
 
-static void br_hold_timer_expired(unsigned long arg)
+static void br_hold_timer_expired(struct timer_list *t)
 {
-	struct net_bridge_port *p = (struct net_bridge_port *) arg;
+	struct net_bridge_port *p = from_timer(p, t, hold_timer);
 
 	br_debug(p->br, "port %u(%s) hold timer expired\n",
 		 (unsigned int) p->port_no, p->dev->name);
@@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg)
 
 void br_stp_timer_init(struct net_bridge *br)
 {
-	setup_timer(&br->hello_timer, br_hello_timer_expired,
-		      (unsigned long) br);
-
-	setup_timer(&br->tcn_timer, br_tcn_timer_expired,
-		      (unsigned long) br);
-
-	setup_timer(&br->topology_change_timer,
-		      br_topology_change_timer_expired,
-		      (unsigned long) br);
+	timer_setup(&br->hello_timer, br_hello_timer_expired, 0);
+	timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0);
+	timer_setup(&br->topology_change_timer,
+		    br_topology_change_timer_expired, 0);
 }
 
 void br_stp_port_timer_init(struct net_bridge_port *p)
 {
-	setup_timer(&p->message_age_timer, br_message_age_timer_expired,
-		      (unsigned long) p);
-
-	setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
-		      (unsigned long) p);
-
-	setup_timer(&p->hold_timer, br_hold_timer_expired,
-		      (unsigned long) p);
+	timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0);
+	timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0);
+	timer_setup(&p->hold_timer, br_hold_timer_expired, 0);
 }
 
 /* Report ticks left (in USER_HZ) used for API */
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 181a44d0f1da..9700e0f3307b 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
@@ -115,7 +116,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
-	if (!fdb->added_by_user)
+	if (!fdb->added_by_user || !fdb->dst)
 		return;
 
 	switch (type) {
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 5d5d413a6cf8..0254c35b2bf0 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
 }
 static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
 
+static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
+{
+	return sprintf(buf, "%#x\n", p->group_fwd_mask);
+}
+
+static int store_group_fwd_mask(struct net_bridge_port *p,
+				unsigned long v)
+{
+	if (v & BR_GROUPFWD_MACPAUSE)
+		return -EINVAL;
+	p->group_fwd_mask = v;
+
+	return 0;
+}
+static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+		   store_group_fwd_mask);
+
 BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
 BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
 BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -174,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
 BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
 BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
 BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
+BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -223,6 +241,8 @@ static const struct brport_attribute *brport_attrs[] = {
 	&brport_attr_proxyarp_wifi,
 	&brport_attr_multicast_flood,
 	&brport_attr_broadcast_flood,
+	&brport_attr_group_fwd_mask,
+	&brport_attr_neigh_suppress,
 	NULL
 };
 
@@ -260,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj,
 			ret = brport_attr->store(p, val);
 			spin_unlock_bh(&p->br->lock);
 			if (!ret) {
-				br_ifinfo_notify(RTM_NEWLINK, p);
+				br_ifinfo_notify(RTM_NEWLINK, NULL, p);
 				ret = count;
 			}
 		}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 233a30040c91..51935270c651 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
 	return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
 }
 
-static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
 {
 	if (vg->pvid == vid)
-		return;
+		return false;
 
 	smp_wmb();
 	vg->pvid = vid;
+
+	return true;
 }
 
-static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
 {
 	if (vg->pvid != vid)
-		return;
+		return false;
 
 	smp_wmb();
 	vg->pvid = 0;
+
+	return true;
 }
 
-static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
+/* return true if anything changed, false otherwise */
+static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
 {
 	struct net_bridge_vlan_group *vg;
+	u16 old_flags = v->flags;
+	bool ret;
 
 	if (br_vlan_is_master(v))
 		vg = br_vlan_group(v->br);
@@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
 		vg = nbp_vlan_group(v->port);
 
 	if (flags & BRIDGE_VLAN_INFO_PVID)
-		__vlan_add_pvid(vg, v->vid);
+		ret = __vlan_add_pvid(vg, v->vid);
 	else
-		__vlan_delete_pvid(vg, v->vid);
+		ret = __vlan_delete_pvid(vg, v->vid);
 
 	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
 		v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 	else
 		v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
+
+	return ret || !!(old_flags ^ v->flags);
 }
 
 static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
 	vg = br_vlan_group(br);
 	masterv = br_vlan_find(vg, vid);
 	if (!masterv) {
+		bool changed;
+
 		/* missing global ctx, create it now */
-		if (br_vlan_add(br, vid, 0))
+		if (br_vlan_add(br, vid, 0, &changed))
 			return NULL;
 		masterv = br_vlan_find(vg, vid);
 		if (WARN_ON(!masterv))
@@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
 
 		/* need to work on the master vlan too */
 		if (flags & BRIDGE_VLAN_INFO_MASTER) {
-			err = br_vlan_add(br, v->vid, flags |
-						      BRIDGE_VLAN_INFO_BRENTRY);
+			bool changed;
+
+			err = br_vlan_add(br, v->vid,
+					  flags | BRIDGE_VLAN_INFO_BRENTRY,
+					  &changed);
 			if (err)
 				goto out_filt;
 		}
@@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
 
 /* Must be protected by RTNL.
  * Must be called with vid in range from 1 to 4094 inclusive.
+ * changed must be true only if the vlan was created or updated
  */
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *vlan;
@@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
 
 	ASSERT_RTNL();
 
+	*changed = false;
 	vg = br_vlan_group(br);
 	vlan = br_vlan_find(vg, vid);
 	if (vlan) {
@@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
 			refcount_inc(&vlan->refcnt);
 			vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
 			vg->num_vlans++;
+			*changed = true;
 		}
-		__vlan_add_flags(vlan, flags);
+		if (__vlan_add_flags(vlan, flags))
+			*changed = true;
+
 		return 0;
 	}
 
@@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
 	if (ret) {
 		free_percpu(vlan->stats);
 		kfree(vlan);
+	} else {
+		*changed = true;
 	}
 
 	return ret;
@@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
 	const struct net_bridge_vlan *pvent;
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p;
+	unsigned long *changed;
+	bool vlchange;
 	u16 old_pvid;
 	int err = 0;
-	unsigned long *changed;
 
 	if (!pvid) {
 		br_vlan_disable_default_pvid(br);
@@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
 		err = br_vlan_add(br, pvid,
 				  BRIDGE_VLAN_INFO_PVID |
 				  BRIDGE_VLAN_INFO_UNTAGGED |
-				  BRIDGE_VLAN_INFO_BRENTRY);
+				  BRIDGE_VLAN_INFO_BRENTRY,
+				  &vlchange);
 		if (err)
 			goto out;
 		br_vlan_delete(br, old_pvid);
@@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
 
 		err = nbp_vlan_add(p, pvid,
 				   BRIDGE_VLAN_INFO_PVID |
-				   BRIDGE_VLAN_INFO_UNTAGGED);
+				   BRIDGE_VLAN_INFO_UNTAGGED,
+				   &vlchange);
 		if (err)
 			goto err_port;
 		nbp_vlan_delete(p, old_pvid);
@@ -890,7 +914,8 @@ err_port:
 		if (old_pvid)
 			nbp_vlan_add(p, old_pvid,
 				     BRIDGE_VLAN_INFO_PVID |
-				     BRIDGE_VLAN_INFO_UNTAGGED);
+				     BRIDGE_VLAN_INFO_UNTAGGED,
+				     &vlchange);
 		nbp_vlan_delete(p, pvid);
 	}
 
@@ -899,7 +924,8 @@ err_port:
 			br_vlan_add(br, old_pvid,
 				    BRIDGE_VLAN_INFO_PVID |
 				    BRIDGE_VLAN_INFO_UNTAGGED |
-				    BRIDGE_VLAN_INFO_BRENTRY);
+				    BRIDGE_VLAN_INFO_BRENTRY,
+				    &vlchange);
 		br_vlan_delete(br, pvid);
 	}
 	goto out;
@@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br)
 {
 	struct net_bridge_vlan_group *vg;
 	int ret = -ENOMEM;
+	bool changed;
 
 	vg = kzalloc(sizeof(*vg), GFP_KERNEL);
 	if (!vg)
@@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br)
 	rcu_assign_pointer(br->vlgrp, vg);
 	ret = br_vlan_add(br, 1,
 			  BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
-			  BRIDGE_VLAN_INFO_BRENTRY);
+			  BRIDGE_VLAN_INFO_BRENTRY, &changed);
 	if (ret)
 		goto err_vlan_add;
 
@@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p)
 	INIT_LIST_HEAD(&vg->vlan_list);
 	rcu_assign_pointer(p->vlgrp, vg);
 	if (p->br->default_pvid) {
+		bool changed;
+
 		ret = nbp_vlan_add(p, p->br->default_pvid,
 				   BRIDGE_VLAN_INFO_PVID |
-				   BRIDGE_VLAN_INFO_UNTAGGED);
+				   BRIDGE_VLAN_INFO_UNTAGGED,
+				   &changed);
 		if (ret)
 			goto err_vlan_add;
 	}
@@ -1016,8 +1046,10 @@ err_vlan_enabled:
 
 /* Must be protected by RTNL.
  * Must be called with vid in range from 1 to 4094 inclusive.
+ * changed must be true only if the vlan was created or updated
  */
-int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+		 bool *changed)
 {
 	struct switchdev_obj_port_vlan v = {
 		.obj.orig_dev = port->dev,
@@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
 
 	ASSERT_RTNL();
 
+	*changed = false;
 	vlan = br_vlan_find(nbp_vlan_group(port), vid);
 	if (vlan) {
 		/* Pass the flags to the hardware bridge */
 		ret = switchdev_port_obj_add(port->dev, &v.obj);
 		if (ret && ret != -EOPNOTSUPP)
 			return ret;
-		__vlan_add_flags(vlan, flags);
+		*changed = __vlan_add_flags(vlan, flags);
+
 		return 0;
 	}
 
@@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
 	ret = __vlan_add(vlan, flags);
 	if (ret)
 		kfree(vlan);
+	else
+		*changed = true;
 
 	return ret;
 }
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index be4d0cea78ce..2f28e16de6c7 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d06968bdf5ec..2b46c50abce0 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			if (NF_INVF(info, EBT_IP_DPORT,
 				    dst < info->dport[0] ||
 				    dst > info->dport[1]))
-			return false;
+				return false;
 		}
 		if (info->bitmask & EBT_IP_SPORT) {
 			u32 src = ntohs(pptr->src);
 			if (NF_INVF(info, EBT_IP_SPORT,
 				    src < info->sport[0] ||
 				    src > info->sport[1]))
-			return false;
+				return false;
 		}
 	}
 	return true;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4617491be41e..2a5a52a53ec4 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			if (NF_INVF(info, EBT_IP6_SPORT,
 				    src < info->sport[0] ||
 				    src > info->sport[1]))
-			return false;
+				return false;
 		}
 		if ((info->bitmask & EBT_IP6_ICMP6) &&
 		    NF_INVF(info, EBT_IP6_ICMP6,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 2585b100ebbb..276b60262981 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb)
 
 static int __net_init broute_net_init(struct net *net)
 {
-	net->xt.broute_table = ebt_register_table(net, &broute_table, NULL);
-	return PTR_ERR_OR_ZERO(net->xt.broute_table);
+	return ebt_register_table(net, &broute_table, NULL,
+				  &net->xt.broute_table);
 }
 
 static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index f22ef7c21913..c41da5fac84f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb,
 	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
-static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_filter[] = {
 	{
 		.hook		= ebt_in_hook,
 		.pf		= NFPROTO_BRIDGE,
@@ -93,8 +93,8 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 
 static int __net_init frame_filter_net_init(struct net *net)
 {
-	net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter);
-	return PTR_ERR_OR_ZERO(net->xt.frame_filter);
+	return ebt_register_table(net, &frame_filter, ebt_ops_filter,
+				  &net->xt.frame_filter);
 }
 
 static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 2f7a4f314406..08df7406ecb3 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -48,7 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table frame_nat = {
+static const struct ebt_table frame_nat = {
 	.name		= "nat",
 	.table		= &initial_table,
 	.valid_hooks	= NAT_VALID_HOOKS,
@@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb,
 	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
-static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_nat[] = {
 	{
 		.hook		= ebt_nat_out,
 		.pf		= NFPROTO_BRIDGE,
@@ -93,8 +93,8 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 
 static int __net_init frame_nat_net_init(struct net *net)
 {
-	net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat);
-	return PTR_ERR_OR_ZERO(net->xt.frame_nat);
+	return ebt_register_table(net, &frame_nat, ebt_ops_nat,
+				  &net->xt.frame_nat);
 }
 
 static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9c6e619f452b..37817d25b63d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -252,13 +252,11 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 		}
 		if (verdict == EBT_RETURN) {
 letsreturn:
-#ifdef CONFIG_NETFILTER_DEBUG
-			if (sp == 0) {
-				BUGPRINT("RETURN on base chain");
+			if (WARN(sp == 0, "RETURN on base chain")) {
 				/* act like this is EBT_CONTINUE */
 				goto letscontinue;
 			}
-#endif
+
 			sp--;
 			/* put all the local variables right */
 			i = cs[sp].n;
@@ -271,26 +269,24 @@ letsreturn:
 		}
 		if (verdict == EBT_CONTINUE)
 			goto letscontinue;
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (verdict < 0) {
-			BUGPRINT("bogus standard verdict\n");
+
+		if (WARN(verdict < 0, "bogus standard verdict\n")) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
-#endif
+
 		/* jump to a udc */
 		cs[sp].n = i + 1;
 		cs[sp].chaininfo = chaininfo;
 		cs[sp].e = ebt_next_entry(point);
 		i = 0;
 		chaininfo = (struct ebt_entries *) (base + verdict);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (chaininfo->distinguisher) {
-			BUGPRINT("jump to non-chain\n");
+
+		if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
-#endif
+
 		nentries = chaininfo->nentries;
 		point = (struct ebt_entry *)chaininfo->data;
 		counter_base = cb_base + chaininfo->counter_offset;
@@ -1069,15 +1065,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		struct audit_buffer *ab;
-
-		ab = audit_log_start(current->audit_context, GFP_KERNEL,
-				     AUDIT_NETFILTER_CFG);
-		if (ab) {
-			audit_log_format(ab, "table=%s family=%u entries=%u",
-					 repl->name, AF_BRIDGE, repl->nentries);
-			audit_log_end(ab);
-		}
+		audit_log(current->audit_context, GFP_KERNEL,
+			  AUDIT_NETFILTER_CFG,
+			  "table=%s family=%u entries=%u",
+			  repl->name, AF_BRIDGE, repl->nentries);
 	}
 #endif
 	return ret;
@@ -1178,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 	kfree(table);
 }
 
-struct ebt_table *
-ebt_register_table(struct net *net, const struct ebt_table *input_table,
-		   const struct nf_hook_ops *ops)
+int ebt_register_table(struct net *net, const struct ebt_table *input_table,
+		       const struct nf_hook_ops *ops, struct ebt_table **res)
 {
 	struct ebt_table_info *newinfo;
 	struct ebt_table *t, *table;
@@ -1192,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
 	    repl->entries == NULL || repl->entries_size == 0 ||
 	    repl->counters != NULL || input_table->private != NULL) {
 		BUGPRINT("Bad table data for ebt_register_table!!!\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	/* Don't add one table to multiple lists. */
@@ -1261,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
 	list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
 	mutex_unlock(&ebt_mutex);
 
+	WRITE_ONCE(*res, table);
+
 	if (!ops)
-		return table;
+		return 0;
 
 	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
 	if (ret) {
 		__ebt_unregister_table(net, table);
-		return ERR_PTR(ret);
+		*res = NULL;
 	}
 
-	return table;
+	return ret;
 free_unlock:
 	mutex_unlock(&ebt_mutex);
 free_chainstack:
@@ -1285,7 +1277,7 @@ free_newinfo:
 free_table:
 	kfree(table);
 out:
-	return ERR_PTR(ret);
+	return ret;
 }
 
 void ebt_unregister_table(struct net *net, struct ebt_table *table,
@@ -2120,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 	for (i = 0, j = 1 ; j < 4 ; j++, i++) {
 		struct compat_ebt_entry_mwt *match32;
 		unsigned int size;
-		char *buf = buf_start;
+		char *buf = buf_start + offsets[i];
 
-		buf = buf_start + offsets[i];
 		if (offsets[i] > offsets[j])
 			return -EINVAL;
 
diff --git a/net/caif/Makefile b/net/caif/Makefile
index cc2b51154d03..4f6c0517cdfb 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ccflags-$(CONFIG_CAIF_DEBUG)     :=      -DDEBUG
 
 caif-y := caif_dev.o \
diff --git a/net/can/Makefile b/net/can/Makefile
index 10936754e3f2..1242bbbfe57f 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 #  Makefile for the Linux Controller Area Network core.
 #
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 88edac0f3e36..003b2d6d655f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
 static struct kmem_cache *rcv_cache __read_mostly;
 
 /* table of registered CAN protocols */
-static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
+static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly;
 static DEFINE_MUTEX(proto_tab_lock);
 
 static atomic_t skbcounter = ATOMIC_INIT(0);
@@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp)
 
 	mutex_lock(&proto_tab_lock);
 
-	if (proto_tab[proto]) {
+	if (rcu_access_pointer(proto_tab[proto])) {
 		pr_err("can: protocol %d already registered\n", proto);
 		err = -EBUSY;
 	} else
@@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp)
 	int proto = cp->protocol;
 
 	mutex_lock(&proto_tab_lock);
-	BUG_ON(proto_tab[proto] != cp);
+	BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp);
 	RCU_INIT_POINTER(proto_tab[proto], NULL);
 	mutex_unlock(&proto_tab_lock);
 
@@ -875,15 +875,20 @@ static int can_pernet_init(struct net *net)
 	spin_lock_init(&net->can.can_rcvlists_lock);
 	net->can.can_rx_alldev_list =
 		kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
-
+	if (!net->can.can_rx_alldev_list)
+		goto out;
 	net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
+	if (!net->can.can_stats)
+		goto out_free_alldev_list;
 	net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
+	if (!net->can.can_pstats)
+		goto out_free_can_stats;
 
 	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		/* the statistics are updated every second (timer triggered) */
 		if (stats_timer) {
-			setup_timer(&net->can.can_stattimer, can_stat_update,
-				    (unsigned long)net);
+			timer_setup(&net->can.can_stattimer, can_stat_update,
+				    0);
 			mod_timer(&net->can.can_stattimer,
 				  round_jiffies(jiffies + HZ));
 		}
@@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net)
 	}
 
 	return 0;
+
+ out_free_can_stats:
+	kfree(net->can.can_stats);
+ out_free_alldev_list:
+	kfree(net->can.can_rx_alldev_list);
+ out:
+	return -ENOMEM;
 }
 
 static void can_pernet_exit(struct net *net)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index d0ef45bb2a72..eca6463c6213 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -113,6 +113,6 @@ struct s_pstats {
 /* function prototypes for the CAN networklayer procfs (proc.c) */
 void can_init_proc(struct net *net);
 void can_remove_proc(struct net *net);
-void can_stat_update(unsigned long data);
+void can_stat_update(struct timer_list *t);
 
 #endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 47a8748d953a..13690334efa3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk)
 static int bcm_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
+	struct net *net;
 	struct bcm_sock *bo;
 	struct bcm_op *op, *next;
 
-	if (sk == NULL)
+	if (!sk)
 		return 0;
 
+	net = sock_net(sk);
 	bo = bcm_sk(sk);
 
 	/* remove bcm_ops, timer, rx_unregister(), etc. */
diff --git a/net/can/gw.c b/net/can/gw.c
index 29748d844c3f..73a02af4b5d7 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1031,15 +1031,15 @@ static __init int cgw_module_init(void)
 	notifier.notifier_call = cgw_notifier;
 	register_netdevice_notifier(&notifier);
 
-	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
+	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) {
 		unregister_netdevice_notifier(&notifier);
 		kmem_cache_destroy(cgw_cache);
 		return -ENOBUFS;
 	}
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
-	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);
+	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0);
+	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0);
 
 	return 0;
 }
diff --git a/net/can/proc.c b/net/can/proc.c
index 83045f00c63c..d979b3dc49a6 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 	return rate;
 }
 
-void can_stat_update(unsigned long data)
+void can_stat_update(struct timer_list *t)
 {
-	struct net *net = (struct net *)data;
+	struct net *net = from_timer(net, t, can.can_stattimer);
 	struct s_stats *can_stats = net->can.can_stats;
 	unsigned long j = jiffies; /* snapshot */
 
@@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
 
 	seq_putc(m, '\n');
 
-	if (net->can.can_stattimer.function == can_stat_update) {
+	if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
 		seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
 				can_stats->total_rx_match_ratio);
 
@@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 
 	user_reset = 1;
 
-	if (net->can.can_stattimer.function == can_stat_update) {
+	if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
 		seq_printf(m, "Scheduled statistic reset #%ld.\n",
 				can_pstats->stats_reset + 1);
 	} else {
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 6a5180903e7b..b4bded4b5396 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for CEPH filesystem.
 #
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index 1fc1ee11dfa2..0db8065928df 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/errno.h>
 
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 48bb8d95195b..dbde2b3c3c15 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index df45e467c81f..41d2a0c72236 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ceph/ceph_debug.h>
 
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 62021535ae4a..860ed9875791 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _FS_CEPH_AUTH_NONE_H
 #define _FS_CEPH_AUTH_NONE_H
 
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 8757fb87dab8..2f4a1baf5f52 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ceph/ceph_debug.h>
 
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 48e9ad41bd2a..454cb54568af 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _FS_CEPH_AUTH_X_H
 #define _FS_CEPH_AUTH_X_H
 
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 671d30576c4f..32c13d763b9a 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __FS_CEPH_AUTH_X_PROTOCOL
 #define __FS_CEPH_AUTH_X_PROTOCOL
 
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index add5f921a0ff..5622763ad402 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ceph/ceph_debug.h>
 
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index dcbe67ff3e2b..756a2dc10d27 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Some non-inline ceph helpers
  */
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 19b7d8aa915c..10e01494993c 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Ceph string constants
  */
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 08ada893f01e..8d2032b2f225 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/types.h>
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 4b428f46a8ca..3d70244bc1b6 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifdef __KERNEL__
 # include <linux/slab.h>
 # include <linux/crush/crush.h>
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index ed123af49eba..e5cc603cdb17 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifdef __KERNEL__
 # include <linux/crush/hash.h>
 #else
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 46008d5ac504..489610ac1cdd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ceph/ceph_debug.h>
 
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 58d83aa7740f..bb45c7d43739 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _FS_CEPH_CRYPTO_H
 #define _FS_CEPH_CRYPTO_H
 
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index fa5233e0d01c..1eef6806aa1a 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/device.h>
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a67298c7e0cd..ad93342c90d7 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/crc32c.h>
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 875675765531..9ae1bab8c05d 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
@@ -676,7 +677,8 @@ bad:
 /*
  * Do a synchronous statfs().
  */
-int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
+int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool,
+			struct ceph_statfs *buf)
 {
 	struct ceph_mon_generic_request *req;
 	struct ceph_mon_statfs *h;
@@ -696,6 +698,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
 		goto out;
 
 	req->u.st = buf;
+	req->request->hdr.version = cpu_to_le16(2);
 
 	mutex_lock(&monc->mutex);
 	register_generic_request(req);
@@ -705,6 +708,8 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf)
 	h->monhdr.session_mon = cpu_to_le16(-1);
 	h->monhdr.session_mon_tid = 0;
 	h->fsid = monc->monmap->fsid;
+	h->contains_data_pool = (data_pool != CEPH_NOPOOL);
+	h->data_pool = cpu_to_le64(data_pool);
 	send_generic_request(monc, req);
 	mutex_unlock(&monc->mutex);
 
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index aaed59a47b1d..72571535883f 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/err.h>
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dcfbdd74dfd1..2814dba5902d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ceph/ceph_debug.h>
 
@@ -863,8 +864,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
 		dst->cls.method_len = src->cls.method_len;
 		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
 		break;
-	case CEPH_OSD_OP_STARTSYNC:
-		break;
 	case CEPH_OSD_OP_WATCH:
 		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
 		dst->watch.ver = cpu_to_le64(0);
@@ -916,9 +915,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
  * if the file was recently truncated, we include information about its
  * old and new size so that the object can be updated appropriately.  (we
  * avoid synchronously deleting truncated objects because it's slow.)
- *
- * if @do_sync, include a 'startsync' command so that the osd will flush
- * data quickly.
  */
 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       struct ceph_file_layout *layout,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f358d0bfa76b..0da27c66349a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/ceph/ceph_debug.h>
 
@@ -2445,19 +2446,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
 
 	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
 	if (pg) {
-		for (i = 0; i < raw->size; i++) {
-			for (j = 0; j < pg->pg_upmap_items.len; j++) {
-				int from = pg->pg_upmap_items.from_to[j][0];
-				int to = pg->pg_upmap_items.from_to[j][1];
-
-				if (from == raw->osds[i]) {
-					if (!(to != CRUSH_ITEM_NONE &&
-					      to < osdmap->max_osd &&
-					      osdmap->osd_weight[to] == 0))
-						raw->osds[i] = to;
+		/*
+		 * Note: this approach does not allow a bidirectional swap,
+		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+		 */
+		for (i = 0; i < pg->pg_upmap_items.len; i++) {
+			int from = pg->pg_upmap_items.from_to[i][0];
+			int to = pg->pg_upmap_items.from_to[i][1];
+			int pos = -1;
+			bool exists = false;
+
+			/* make sure replacement doesn't already appear */
+			for (j = 0; j < raw->size; j++) {
+				int osd = raw->osds[j];
+
+				if (osd == to) {
+					exists = true;
 					break;
 				}
+				/* ignore mapping if target is marked out */
+				if (osd == from && pos < 0 &&
+				    !(to != CRUSH_ITEM_NONE &&
+				      to < osdmap->max_osd &&
+				      osdmap->osd_weight[to] == 0)) {
+					pos = j;
+				}
 			}
+			if (!exists && pos >= 0)
+				raw->osds[pos] = to;
 		}
 	}
 }
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index ce09f73be759..2ea0564771d2 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a7c9a79a53c..a3d0adc828e6 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ceph/ceph_debug.h>
 
 #include <linux/module.h>
@@ -24,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
 		return ERR_PTR(-ENOMEM);
 
 	while (got < num_pages) {
-		rc = get_user_pages_unlocked(
+		rc = get_user_pages_fast(
 		    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
-		    num_pages - got, pages + got, write_page ? FOLL_WRITE : 0);
+		    num_pages - got, write_page, pages + got);
 		if (rc < 0)
 			break;
 		BUG_ON(rc == 0);
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c
index 22fb96efcf34..3191d9d160a2 100644
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/slab.h>
 #include <linux/gfp.h>
 #include <linux/string.h>
diff --git a/net/compat.c b/net/compat.c
index 6ded6c821d7a..22381719718c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
 	}
 
+	/*
+	 * check the length of messages copied in is the same as the
+	 * what we get from the first loop
+	 */
+	if ((char *)kcmsg - (char *)kcmsg_base != kcmlen)
+		goto Einval;
+
 	/* Ok, looks like we made it.  Hook it up and return success. */
 	kmsg->msg_control = kcmsg_base;
 	kmsg->msg_controllen = kcmlen;
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479e9658..1fd0a9c88b1b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux networking core.
 #
@@ -9,9 +10,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+			fib_notifier.o
 
-obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee5647bd91b3..522873ed120b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	SUCS NET3:
  *
@@ -169,20 +170,26 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 					  int *peeked, int *off, int *err,
 					  struct sk_buff **last)
 {
+	bool peek_at_off = false;
 	struct sk_buff *skb;
-	int _off = *off;
+	int _off = 0;
+
+	if (unlikely(flags & MSG_PEEK && *off >= 0)) {
+		peek_at_off = true;
+		_off = *off;
+	}
 
 	*last = queue->prev;
 	skb_queue_walk(queue, skb) {
 		if (flags & MSG_PEEK) {
-			if (_off >= skb->len && (skb->len || _off ||
-						 skb->peeked)) {
+			if (peek_at_off && _off >= skb->len &&
+			    (_off || skb->peeked)) {
 				_off -= skb->len;
 				continue;
 			}
 			if (!skb->len) {
 				skb = skb_set_peeked(skb);
-				if (unlikely(IS_ERR(skb))) {
+				if (IS_ERR(skb)) {
 					*err = PTR_ERR(skb);
 					return NULL;
 				}
@@ -356,7 +363,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
 	if (flags & MSG_PEEK) {
 		err = -ENOENT;
 		spin_lock_bh(&sk_queue->lock);
-		if (skb == skb_peek(sk_queue)) {
+		if (skb->next) {
 			__skb_unlink(skb, sk_queue);
 			refcount_dec(&skb->users);
 			if (destructor)
@@ -573,27 +580,12 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 
-/**
- *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- *	@skb: buffer to copy
- *	@from: the source to copy from
- *
- *	The function will first copy up to headlen, and then pin the userspace
- *	pages and build frags through them.
- *
- *	Returns 0, -EFAULT or -EMSGSIZE.
- */
-int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+			    struct iov_iter *from, size_t length)
 {
-	int len = iov_iter_count(from);
-	int copy = min_t(int, skb_headlen(skb), len);
-	int frag = 0;
+	int frag = skb_shinfo(skb)->nr_frags;
 
-	/* copy up to skb headlen */
-	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
-		return -EFAULT;
-
-	while (iov_iter_count(from)) {
+	while (length && iov_iter_count(from)) {
 		struct page *pages[MAX_SKB_FRAGS];
 		size_t start;
 		ssize_t copied;
@@ -603,18 +595,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages(from, pages, ~0U,
+		copied = iov_iter_get_pages(from, pages, length,
 					    MAX_SKB_FRAGS - frag, &start);
 		if (copied < 0)
 			return -EFAULT;
 
 		iov_iter_advance(from, copied);
+		length -= copied;
 
 		truesize = PAGE_ALIGN(copied + start);
 		skb->data_len += copied;
 		skb->len += copied;
 		skb->truesize += truesize;
-		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		if (sk && sk->sk_type == SOCK_STREAM) {
+			sk->sk_wmem_queued += truesize;
+			sk_mem_charge(sk, truesize);
+		} else {
+			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		}
 		while (copied) {
 			int size = min_t(int, copied, PAGE_SIZE - start);
 			skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@ -625,6 +623,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+
+/**
+ *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ *	@skb: buffer to copy
+ *	@from: the source to copy from
+ *
+ *	The function will first copy up to headlen, and then pin the userspace
+ *	pages and build frags through them.
+ *
+ *	Returns 0, -EFAULT or -EMSGSIZE.
+ */
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+{
+	int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+
+	/* copy up to skb headlen */
+	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+		return -EFAULT;
+
+	return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+}
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --git a/net/core/dev.c b/net/core/dev.c
index ce15a06d5558..8ee29f4f5fa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -144,6 +144,8 @@
 #include <linux/netfilter_ingress.h>
 #include <linux/crash_dump.h>
 #include <linux/sctp.h>
+#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
 
 #include "net-sysfs.h"
 
@@ -161,7 +163,6 @@ static struct list_head offload_base __read_mostly;
 
 static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_info(unsigned long val,
-					 struct net_device *dev,
 					 struct netdev_notifier_info *info);
 static struct napi_struct *napi_by_id(unsigned int napi_id);
 
@@ -187,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
+static DEFINE_MUTEX(ifalias_mutex);
+
 /* protects napi_hash addition/deletion and napi_gen_id */
 static DEFINE_SPINLOCK(napi_hash_lock);
 
@@ -1061,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 	unsigned long *inuse;
 	struct net_device *d;
 
-	p = strnchr(name, IFNAMSIZ-1, '%');
+	if (!dev_valid_name(name))
+		return -EINVAL;
+
+	p = strchr(name, '%');
 	if (p) {
 		/*
 		 * Verify the string as this thing may have come from
@@ -1092,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 		free_page((unsigned long) inuse);
 	}
 
-	if (buf != name)
-		snprintf(buf, IFNAMSIZ, name, i);
+	snprintf(buf, IFNAMSIZ, name, i);
 	if (!__dev_get_by_name(net, buf))
 		return i;
 
@@ -1101,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 	 * when the name is long and there isn't enough space left
 	 * for the digits, or if all bits are used.
 	 */
-	return -ENFILE;
+	return p ? -ENFILE : -EEXIST;
+}
+
+static int dev_alloc_name_ns(struct net *net,
+			     struct net_device *dev,
+			     const char *name)
+{
+	char buf[IFNAMSIZ];
+	int ret;
+
+	BUG_ON(!net);
+	ret = __dev_alloc_name(net, name, buf);
+	if (ret >= 0)
+		strlcpy(dev->name, buf, IFNAMSIZ);
+	return ret;
 }
 
 /**
@@ -1120,50 +1139,16 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 
 int dev_alloc_name(struct net_device *dev, const char *name)
 {
-	char buf[IFNAMSIZ];
-	struct net *net;
-	int ret;
-
-	BUG_ON(!dev_net(dev));
-	net = dev_net(dev);
-	ret = __dev_alloc_name(net, name, buf);
-	if (ret >= 0)
-		strlcpy(dev->name, buf, IFNAMSIZ);
-	return ret;
+	return dev_alloc_name_ns(dev_net(dev), dev, name);
 }
 EXPORT_SYMBOL(dev_alloc_name);
 
-static int dev_alloc_name_ns(struct net *net,
-			     struct net_device *dev,
-			     const char *name)
-{
-	char buf[IFNAMSIZ];
-	int ret;
-
-	ret = __dev_alloc_name(net, name, buf);
-	if (ret >= 0)
-		strlcpy(dev->name, buf, IFNAMSIZ);
-	return ret;
-}
-
-static int dev_get_valid_name(struct net *net,
-			      struct net_device *dev,
-			      const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+		       const char *name)
 {
-	BUG_ON(!net);
-
-	if (!dev_valid_name(name))
-		return -EINVAL;
-
-	if (strchr(name, '%'))
-		return dev_alloc_name_ns(net, dev, name);
-	else if (__dev_get_by_name(net, name))
-		return -EEXIST;
-	else if (dev->name != name)
-		strlcpy(dev->name, name, IFNAMSIZ);
-
-	return 0;
+	return dev_alloc_name_ns(net, dev, name);
 }
+EXPORT_SYMBOL(dev_get_valid_name);
 
 /**
  *	dev_change_name - change name of a device
@@ -1264,29 +1249,53 @@ rollback:
  */
 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
 {
-	char *new_ifalias;
-
-	ASSERT_RTNL();
+	struct dev_ifalias *new_alias = NULL;
 
 	if (len >= IFALIASZ)
 		return -EINVAL;
 
-	if (!len) {
-		kfree(dev->ifalias);
-		dev->ifalias = NULL;
-		return 0;
+	if (len) {
+		new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+		if (!new_alias)
+			return -ENOMEM;
+
+		memcpy(new_alias->ifalias, alias, len);
+		new_alias->ifalias[len] = 0;
 	}
 
-	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
-	if (!new_ifalias)
-		return -ENOMEM;
-	dev->ifalias = new_ifalias;
-	memcpy(dev->ifalias, alias, len);
-	dev->ifalias[len] = 0;
+	mutex_lock(&ifalias_mutex);
+	rcu_swap_protected(dev->ifalias, new_alias,
+			   mutex_is_locked(&ifalias_mutex));
+	mutex_unlock(&ifalias_mutex);
+
+	if (new_alias)
+		kfree_rcu(new_alias, rcuhead);
 
 	return len;
 }
 
+/**
+ *	dev_get_alias - get ifalias of a device
+ *	@dev: device
+ *	@name: buffer to store name of ifalias
+ *	@len: size of buffer
+ *
+ *	get ifalias for a device.  Caller must make sure dev cannot go
+ *	away,  e.g. rcu read lock or own a reference count to device.
+ */
+int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+{
+	const struct dev_ifalias *alias;
+	int ret = 0;
+
+	rcu_read_lock();
+	alias = rcu_dereference(dev->ifalias);
+	if (alias)
+		ret = snprintf(name, len, "%s", alias->ifalias);
+	rcu_read_unlock();
+
+	return ret;
+}
 
 /**
  *	netdev_features_change - device changes features
@@ -1311,10 +1320,11 @@ EXPORT_SYMBOL(netdev_features_change);
 void netdev_state_change(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
-		struct netdev_notifier_change_info change_info;
+		struct netdev_notifier_change_info change_info = {
+			.info.dev = dev,
+		};
 
-		change_info.flags_changed = 0;
-		call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+		call_netdevice_notifiers_info(NETDEV_CHANGE,
 					      &change_info.info);
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
 	}
@@ -1413,7 +1423,7 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close_many(struct list_head *head)
+static void __dev_close_many(struct list_head *head)
 {
 	struct net_device *dev;
 
@@ -1455,23 +1465,18 @@ static int __dev_close_many(struct list_head *head)
 		dev->flags &= ~IFF_UP;
 		netpoll_poll_enable(dev);
 	}
-
-	return 0;
 }
 
-static int __dev_close(struct net_device *dev)
+static void __dev_close(struct net_device *dev)
 {
-	int retval;
 	LIST_HEAD(single);
 
 	list_add(&dev->close_list, &single);
-	retval = __dev_close_many(&single);
+	__dev_close_many(&single);
 	list_del(&single);
-
-	return retval;
 }
 
-int dev_close_many(struct list_head *head, bool unlink)
+void dev_close_many(struct list_head *head, bool unlink)
 {
 	struct net_device *dev, *tmp;
 
@@ -1488,8 +1493,6 @@ int dev_close_many(struct list_head *head, bool unlink)
 		if (unlink)
 			list_del_init(&dev->close_list);
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(dev_close_many);
 
@@ -1502,7 +1505,7 @@ EXPORT_SYMBOL(dev_close_many);
  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  *	chain.
  */
-int dev_close(struct net_device *dev)
+void dev_close(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
@@ -1511,7 +1514,6 @@ int dev_close(struct net_device *dev)
 		dev_close_many(&single, true);
 		list_del(&single);
 	}
-	return 0;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -1543,9 +1545,10 @@ EXPORT_SYMBOL(dev_disable_lro);
 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
 				   struct net_device *dev)
 {
-	struct netdev_notifier_info info;
+	struct netdev_notifier_info info = {
+		.dev = dev,
+	};
 
-	netdev_notifier_info_init(&info, dev);
 	return nb->notifier_call(nb, val, &info);
 }
 
@@ -1670,11 +1673,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
  */
 
 static int call_netdevice_notifiers_info(unsigned long val,
-					 struct net_device *dev,
 					 struct netdev_notifier_info *info)
 {
 	ASSERT_RTNL();
-	netdev_notifier_info_init(info, dev);
 	return raw_notifier_call_chain(&netdev_chain, val, info);
 }
 
@@ -1689,9 +1690,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
-	struct netdev_notifier_info info;
+	struct netdev_notifier_info info = {
+		.dev = dev,
+	};
 
-	return call_netdevice_notifiers_info(val, dev, &info);
+	return call_netdevice_notifiers_info(val, &info);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
@@ -1860,7 +1863,7 @@ static inline int deliver_skb(struct sk_buff *skb,
 			      struct packet_type *pt_prev,
 			      struct net_device *orig_dev)
 {
-	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		return -ENOMEM;
 	refcount_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -1955,8 +1958,12 @@ again:
 		goto again;
 	}
 out_unlock:
-	if (pt_prev)
-		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+	if (pt_prev) {
+		if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
+			pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+		else
+			kfree_skb(skb2);
+	}
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
@@ -2015,6 +2022,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 
 	return 0;
 }
+EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
 static DEFINE_MUTEX(xps_map_mutex);
@@ -2738,8 +2746,7 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
 {
 	if (tx_path)
-		return skb->ip_summed != CHECKSUM_PARTIAL &&
-		       skb->ip_summed != CHECKSUM_UNNECESSARY;
+		return skb->ip_summed != CHECKSUM_PARTIAL;
 
 	return skb->ip_summed == CHECKSUM_NONE;
 }
@@ -3249,22 +3256,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
 static struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
-	struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+	struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
 	struct tcf_result cl_res;
 
-	if (!cl)
+	if (!miniq)
 		return skb;
 
 	/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-	qdisc_bstats_cpu_update(cl->q, skb);
+	mini_qdisc_bstats_cpu_update(miniq, skb);
 
-	switch (tcf_classify(skb, cl, &cl_res, false)) {
+	switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
 	case TC_ACT_OK:
 	case TC_ACT_RECLASSIFY:
 		skb->tc_index = TC_H_MIN(cl_res.classid);
 		break;
 	case TC_ACT_SHOT:
-		qdisc_qstats_cpu_drop(cl->q);
+		mini_qdisc_qstats_cpu_drop(miniq);
 		*ret = NET_XMIT_DROP;
 		kfree_skb(skb);
 		return NULL;
@@ -3729,7 +3736,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	if (flow_table && flow_id <= flow_table->mask) {
 		rflow = &flow_table->flows[flow_id];
-		cpu = ACCESS_ONCE(rflow->cpu);
+		cpu = READ_ONCE(rflow->cpu);
 		if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
 		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
 			   rflow->last_qtail) <
@@ -3865,6 +3872,142 @@ drop:
 	return NET_RX_DROP;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct bpf_prog *xdp_prog)
+{
+	u32 metalen, act = XDP_DROP;
+	struct xdp_buff xdp;
+	void *orig_data;
+	int hlen, off;
+	u32 mac_len;
+
+	/* Reinjected packets coming from act_mirred or similar should
+	 * not get XDP generic processing.
+	 */
+	if (skb_cloned(skb))
+		return XDP_PASS;
+
+	/* XDP packets must be linear and must have sufficient headroom
+	 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+	 * native XDP provides, thus we need to do it here as well.
+	 */
+	if (skb_is_nonlinear(skb) ||
+	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+		int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+		int troom = skb->tail + skb->data_len - skb->end;
+
+		/* In case we have to go down the path and also linearize,
+		 * then lets do the pskb_expand_head() work just once here.
+		 */
+		if (pskb_expand_head(skb,
+				     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+				     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+			goto do_drop;
+		if (troom > 0 && __skb_linearize(skb))
+			goto do_drop;
+	}
+
+	/* The XDP program wants to see the packet starting at the MAC
+	 * header.
+	 */
+	mac_len = skb->data - skb_mac_header(skb);
+	hlen = skb_headlen(skb) + mac_len;
+	xdp.data = skb->data - mac_len;
+	xdp.data_meta = xdp.data;
+	xdp.data_end = xdp.data + hlen;
+	xdp.data_hard_start = skb->data - skb_headroom(skb);
+	orig_data = xdp.data;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	off = xdp.data - orig_data;
+	if (off > 0)
+		__skb_pull(skb, off);
+	else if (off < 0)
+		__skb_push(skb, -off);
+	skb->mac_header += off;
+
+	switch (act) {
+	case XDP_REDIRECT:
+	case XDP_TX:
+		__skb_push(skb, mac_len);
+		break;
+	case XDP_PASS:
+		metalen = xdp.data - xdp.data_meta;
+		if (metalen)
+			skb_metadata_set(skb, metalen);
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(skb->dev, xdp_prog, act);
+		/* fall through */
+	case XDP_DROP:
+	do_drop:
+		kfree_skb(skb);
+		break;
+	}
+
+	return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+	struct net_device *dev = skb->dev;
+	struct netdev_queue *txq;
+	bool free_skb = true;
+	int cpu, rc;
+
+	txq = netdev_pick_tx(dev, skb, NULL);
+	cpu = smp_processor_id();
+	HARD_TX_LOCK(dev, txq, cpu);
+	if (!netif_xmit_stopped(txq)) {
+		rc = netdev_start_xmit(skb, dev, txq, 0);
+		if (dev_xmit_complete(rc))
+			free_skb = false;
+	}
+	HARD_TX_UNLOCK(dev, txq);
+	if (free_skb) {
+		trace_xdp_exception(dev, xdp_prog, XDP_TX);
+		kfree_skb(skb);
+	}
+}
+EXPORT_SYMBOL_GPL(generic_xdp_tx);
+
+static struct static_key generic_xdp_needed __read_mostly;
+
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+{
+	if (xdp_prog) {
+		u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+		int err;
+
+		if (act != XDP_PASS) {
+			switch (act) {
+			case XDP_REDIRECT:
+				err = xdp_do_generic_redirect(skb->dev, skb,
+							      xdp_prog);
+				if (err)
+					goto out_redir;
+			/* fallthru to submit skb */
+			case XDP_TX:
+				generic_xdp_tx(skb, xdp_prog);
+				break;
+			}
+			return XDP_DROP;
+		}
+	}
+	return XDP_PASS;
+out_redir:
+	kfree_skb(skb);
+	return XDP_DROP;
+}
+EXPORT_SYMBOL_GPL(do_xdp_generic);
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -3872,6 +4015,24 @@ static int netif_rx_internal(struct sk_buff *skb)
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 
 	trace_netif_rx(skb);
+
+	if (static_key_false(&generic_xdp_needed)) {
+		int ret;
+
+		preempt_disable();
+		rcu_read_lock();
+		ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		rcu_read_unlock();
+		preempt_enable();
+
+		/* Consider XDP consuming the packet a success from
+		 * the netdev point of view we do not want to count
+		 * this as an error.
+		 */
+		if (ret != XDP_PASS)
+			return NET_RX_SUCCESS;
+	}
+
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4010,7 +4171,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		   struct net_device *orig_dev)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+	struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
 	struct tcf_result cl_res;
 
 	/* If there's at least one ingress present somewhere (so
@@ -4018,8 +4179,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 	 * that are not configured with an ingress qdisc will bail
 	 * out here.
 	 */
-	if (!cl)
+	if (!miniq)
 		return skb;
+
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
@@ -4027,15 +4189,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 
 	qdisc_skb_cb(skb)->pkt_len = skb->len;
 	skb->tc_at_ingress = 1;
-	qdisc_bstats_cpu_update(cl->q, skb);
+	mini_qdisc_bstats_cpu_update(miniq, skb);
 
-	switch (tcf_classify(skb, cl, &cl_res, false)) {
+	switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
 	case TC_ACT_OK:
 	case TC_ACT_RECLASSIFY:
 		skb->tc_index = TC_H_MIN(cl_res.classid);
 		break;
 	case TC_ACT_SHOT:
-		qdisc_qstats_cpu_drop(cl->q);
+		mini_qdisc_qstats_cpu_drop(miniq);
 		kfree_skb(skb);
 		return NULL;
 	case TC_ACT_STOLEN:
@@ -4292,7 +4454,7 @@ skip_classify:
 	}
 
 	if (pt_prev) {
-		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 			goto drop;
 		else
 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -4313,6 +4475,33 @@ out:
 	return ret;
 }
 
+/**
+ *	netif_receive_skb_core - special purpose version of netif_receive_skb
+ *	@skb: buffer to process
+ *
+ *	More direct receive version of netif_receive_skb().  It should
+ *	only be used by callers that have a need to skip RPS and Generic XDP.
+ *	Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb_core(struct sk_buff *skb)
+{
+	int ret;
+
+	rcu_read_lock();
+	ret = __netif_receive_skb_core(skb, false);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(netif_receive_skb_core);
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	int ret;
@@ -4338,9 +4527,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
-static struct static_key generic_xdp_needed __read_mostly;
-
-static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
 	struct bpf_prog *new = xdp->prog;
@@ -4373,89 +4560,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 	return ret;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-				     struct bpf_prog *xdp_prog)
-{
-	struct xdp_buff xdp;
-	u32 act = XDP_DROP;
-	void *orig_data;
-	int hlen, off;
-	u32 mac_len;
-
-	/* Reinjected packets coming from act_mirred or similar should
-	 * not get XDP generic processing.
-	 */
-	if (skb_cloned(skb))
-		return XDP_PASS;
-
-	if (skb_linearize(skb))
-		goto do_drop;
-
-	/* The XDP program wants to see the packet starting at the MAC
-	 * header.
-	 */
-	mac_len = skb->data - skb_mac_header(skb);
-	hlen = skb_headlen(skb) + mac_len;
-	xdp.data = skb->data - mac_len;
-	xdp.data_end = xdp.data + hlen;
-	xdp.data_hard_start = skb->data - skb_headroom(skb);
-	orig_data = xdp.data;
-
-	act = bpf_prog_run_xdp(xdp_prog, &xdp);
-
-	off = xdp.data - orig_data;
-	if (off > 0)
-		__skb_pull(skb, off);
-	else if (off < 0)
-		__skb_push(skb, -off);
-
-	switch (act) {
-	case XDP_TX:
-		__skb_push(skb, mac_len);
-		/* fall through */
-	case XDP_PASS:
-		break;
-
-	default:
-		bpf_warn_invalid_xdp_action(act);
-		/* fall through */
-	case XDP_ABORTED:
-		trace_xdp_exception(skb->dev, xdp_prog, act);
-		/* fall through */
-	case XDP_DROP:
-	do_drop:
-		kfree_skb(skb);
-		break;
-	}
-
-	return act;
-}
-
-/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
- */
-static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
-{
-	struct net_device *dev = skb->dev;
-	struct netdev_queue *txq;
-	bool free_skb = true;
-	int cpu, rc;
-
-	txq = netdev_pick_tx(dev, skb, NULL);
-	cpu = smp_processor_id();
-	HARD_TX_LOCK(dev, txq, cpu);
-	if (!netif_xmit_stopped(txq)) {
-		rc = netdev_start_xmit(skb, dev, txq, 0);
-		if (dev_xmit_complete(rc))
-			free_skb = false;
-	}
-	HARD_TX_UNLOCK(dev, txq);
-	if (free_skb) {
-		trace_xdp_exception(dev, xdp_prog, XDP_TX);
-		kfree_skb(skb);
-	}
-}
-
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4465,23 +4569,20 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 
-	rcu_read_lock();
-
 	if (static_key_false(&generic_xdp_needed)) {
-		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+		int ret;
 
-		if (xdp_prog) {
-			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+		preempt_disable();
+		rcu_read_lock();
+		ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		rcu_read_unlock();
+		preempt_enable();
 
-			if (act != XDP_PASS) {
-				rcu_read_unlock();
-				if (act == XDP_TX)
-					generic_xdp_tx(skb, xdp_prog);
-				return NET_RX_DROP;
-			}
-		}
+		if (ret != XDP_PASS)
+			return NET_RX_DROP;
 	}
 
+	rcu_read_lock();
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4653,6 +4754,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
 		diffs |= skb_metadata_dst_cmp(p, skb);
+		diffs |= skb_metadata_differs(p, skb);
 		if (maclen == ETH_HLEN)
 			diffs |= compare_ether_header(skb_mac_header(p),
 						      skb_mac_header(skb));
@@ -5289,6 +5391,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 	 */
 	rc = napi->poll(napi, BUSY_POLL_BUDGET);
+	trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 	netpoll_poll_unlock(have_poll_lock);
 	if (rc == BUSY_POLL_BUDGET)
 		__napi_schedule(napi);
@@ -5667,12 +5770,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
  * Find out if a device is linked to an upper device and return true in case
  * it is. The caller must hold the RTNL lock.
  */
-static bool netdev_has_any_upper_dev(struct net_device *dev)
+bool netdev_has_any_upper_dev(struct net_device *dev)
 {
 	ASSERT_RTNL();
 
 	return !list_empty(&dev->adj_list.upper);
 }
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
 
 /**
  * netdev_master_upper_dev_get - Get master upper device
@@ -6184,9 +6288,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 
 static int __netdev_upper_dev_link(struct net_device *dev,
 				   struct net_device *upper_dev, bool master,
-				   void *upper_priv, void *upper_info)
-{
-	struct netdev_notifier_changeupper_info changeupper_info;
+				   void *upper_priv, void *upper_info,
+				   struct netlink_ext_ack *extack)
+{
+	struct netdev_notifier_changeupper_info changeupper_info = {
+		.info = {
+			.dev = dev,
+			.extack = extack,
+		},
+		.upper_dev = upper_dev,
+		.master = master,
+		.linking = true,
+		.upper_info = upper_info,
+	};
 	int ret = 0;
 
 	ASSERT_RTNL();
@@ -6204,12 +6318,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (master && netdev_master_upper_dev_get(dev))
 		return -EBUSY;
 
-	changeupper_info.upper_dev = upper_dev;
-	changeupper_info.master = master;
-	changeupper_info.linking = true;
-	changeupper_info.upper_info = upper_info;
-
-	ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+	ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
 					    &changeupper_info.info);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -6220,7 +6329,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	if (ret)
 		return ret;
 
-	ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+	ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
 					    &changeupper_info.info);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -6245,9 +6354,11 @@ rollback:
  * returns zero.
  */
 int netdev_upper_dev_link(struct net_device *dev,
-			  struct net_device *upper_dev)
+			  struct net_device *upper_dev,
+			  struct netlink_ext_ack *extack)
 {
-	return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+	return __netdev_upper_dev_link(dev, upper_dev, false,
+				       NULL, NULL, extack);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 
@@ -6266,10 +6377,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
  */
 int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev,
-				 void *upper_priv, void *upper_info)
+				 void *upper_priv, void *upper_info,
+				 struct netlink_ext_ack *extack)
 {
 	return __netdev_upper_dev_link(dev, upper_dev, true,
-				       upper_priv, upper_info);
+				       upper_priv, upper_info, extack);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
@@ -6284,20 +6396,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
 void netdev_upper_dev_unlink(struct net_device *dev,
 			     struct net_device *upper_dev)
 {
-	struct netdev_notifier_changeupper_info changeupper_info;
+	struct netdev_notifier_changeupper_info changeupper_info = {
+		.info = {
+			.dev = dev,
+		},
+		.upper_dev = upper_dev,
+		.linking = false,
+	};
 
 	ASSERT_RTNL();
 
-	changeupper_info.upper_dev = upper_dev;
 	changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
-	changeupper_info.linking = false;
 
-	call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+	call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
 				      &changeupper_info.info);
 
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
-	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
 				      &changeupper_info.info);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6313,11 +6429,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
 void netdev_bonding_info_change(struct net_device *dev,
 				struct netdev_bonding_info *bonding_info)
 {
-	struct netdev_notifier_bonding_info	info;
+	struct netdev_notifier_bonding_info info = {
+		.info.dev = dev,
+	};
 
 	memcpy(&info.bonding_info, bonding_info,
 	       sizeof(struct netdev_bonding_info));
-	call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+	call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
 				      &info.info);
 }
 EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6443,11 +6561,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
 void netdev_lower_state_changed(struct net_device *lower_dev,
 				void *lower_state_info)
 {
-	struct netdev_notifier_changelowerstate_info changelowerstate_info;
+	struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+		.info.dev = lower_dev,
+	};
 
 	ASSERT_RTNL();
 	changelowerstate_info.lower_state_info = lower_state_info;
-	call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+	call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
 				      &changelowerstate_info.info);
 }
 EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6689,8 +6809,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 	 */
 
 	ret = 0;
-	if ((old_flags ^ flags) & IFF_UP)
-		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+	if ((old_flags ^ flags) & IFF_UP) {
+		if (old_flags & IFF_UP)
+			__dev_close(dev);
+		else
+			ret = __dev_open(dev);
+	}
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@ -6734,11 +6858,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 
 	if (dev->flags & IFF_UP &&
 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
-		struct netdev_notifier_change_info change_info;
+		struct netdev_notifier_change_info change_info = {
+			.info = {
+				.dev = dev,
+			},
+			.flags_changed = changes,
+		};
 
-		change_info.flags_changed = changes;
-		call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
-					      &change_info.info);
+		call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
 	}
 }
 
@@ -6945,26 +7072,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
 {
-	struct netdev_xdp xdp;
+	struct netdev_bpf xdp;
 
 	memset(&xdp, 0, sizeof(xdp));
 	xdp.command = XDP_QUERY_PROG;
 
 	/* Query must always succeed. */
-	WARN_ON(xdp_op(dev, &xdp) < 0);
+	WARN_ON(bpf_op(dev, &xdp) < 0);
 	if (prog_id)
 		*prog_id = xdp.prog_id;
 
 	return xdp.prog_attached;
 }
 
-static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
 			   struct netlink_ext_ack *extack, u32 flags,
 			   struct bpf_prog *prog)
 {
-	struct netdev_xdp xdp;
+	struct netdev_bpf xdp;
 
 	memset(&xdp, 0, sizeof(xdp));
 	if (flags & XDP_FLAGS_HW_MODE)
@@ -6975,7 +7102,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
 	xdp.flags = flags;
 	xdp.prog = prog;
 
-	return xdp_op(dev, &xdp);
+	return bpf_op(dev, &xdp);
 }
 
 /**
@@ -6992,32 +7119,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct bpf_prog *prog = NULL;
-	xdp_op_t xdp_op, xdp_chk;
+	bpf_op_t bpf_op, bpf_chk;
 	int err;
 
 	ASSERT_RTNL();
 
-	xdp_op = xdp_chk = ops->ndo_xdp;
-	if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+	bpf_op = bpf_chk = ops->ndo_bpf;
+	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
 		return -EOPNOTSUPP;
-	if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
-		xdp_op = generic_xdp_install;
-	if (xdp_op == xdp_chk)
-		xdp_chk = generic_xdp_install;
+	if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+		bpf_op = generic_xdp_install;
+	if (bpf_op == bpf_chk)
+		bpf_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+		if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
 			return -EEXIST;
 		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_attached(dev, xdp_op, NULL))
+		    __dev_xdp_attached(dev, bpf_op, NULL))
 			return -EBUSY;
 
-		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+		if (bpf_op == ops->ndo_bpf)
+			prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+						     dev);
+		else
+			prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
 		if (IS_ERR(prog))
 			return PTR_ERR(prog);
 	}
 
-	err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+	err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
 	if (err < 0 && prog)
 		bpf_prog_put(prog);
 
@@ -7109,7 +7240,7 @@ static void rollback_registered_many(struct list_head *head)
 		if (!dev->rtnl_link_ops ||
 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
 			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
-						     GFP_KERNEL);
+						     GFP_KERNEL, NULL);
 
 		/*
 		 *	Flush the unicast and multicast chains
@@ -7235,24 +7366,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_GSO;
 	}
 
-	/* UFO needs SG and checksumming */
-	if (features & NETIF_F_UFO) {
-		/* maybe split UFO into V4 and V6? */
-		if (!(features & NETIF_F_HW_CSUM) &&
-		    ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
-		     (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
-			netdev_dbg(dev,
-				"Dropping NETIF_F_UFO since no checksum offload features.\n");
-			features &= ~NETIF_F_UFO;
-		}
-
-		if (!(features & NETIF_F_SG)) {
-			netdev_dbg(dev,
-				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
-			features &= ~NETIF_F_UFO;
-		}
-	}
-
 	/* GSO partial features require GSO partial be set */
 	if ((features & dev->gso_partial_features) &&
 	    !(features & NETIF_F_GSO_PARTIAL)) {
@@ -7313,8 +7426,27 @@ sync_lower:
 	netdev_for_each_lower_dev(dev, lower, iter)
 		netdev_sync_lower_features(dev, lower, features);
 
-	if (!err)
+	if (!err) {
+		netdev_features_t diff = features ^ dev->features;
+
+		if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
+			/* udp_tunnel_{get,drop}_rx_info both need
+			 * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
+			 * device, or they won't do anything.
+			 * Thus we need to update dev->features
+			 * *before* calling udp_tunnel_get_rx_info,
+			 * but *after* calling udp_tunnel_drop_rx_info.
+			 */
+			if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
+				dev->features = features;
+				udp_tunnel_get_rx_info(dev);
+			} else {
+				udp_tunnel_drop_rx_info(dev);
+			}
+		}
+
 		dev->features = features;
+	}
 
 	return err < 0 ? 0 : 1;
 }
@@ -7516,6 +7648,12 @@ int register_netdevice(struct net_device *dev)
 	 */
 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
 	dev->features |= NETIF_F_SOFT_FEATURES;
+
+	if (dev->netdev_ops->ndo_udp_tunnel_add) {
+		dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+		dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+	}
+
 	dev->wanted_features = dev->features & dev->hw_features;
 
 	if (!(dev->flags & IFF_LOOPBACK))
@@ -7939,7 +8077,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
-	size_t alloc_size;
+	unsigned int alloc_size;
 	struct net_device *p;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8189,7 +8327,7 @@ EXPORT_SYMBOL(unregister_netdev);
 
 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 {
-	int err;
+	int err, new_nsid;
 
 	ASSERT_RTNL();
 
@@ -8245,7 +8383,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
 	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+	if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+		new_nsid = peernet2id_alloc(dev_net(dev), net);
+	else
+		new_nsid = peernet2id(dev_net(dev), net);
+	rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
 
 	/*
 	 *	Flush the unicast and multicast chains
@@ -8507,6 +8649,8 @@ static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
 	kfree(net->dev_index_head);
+	if (net != &init_net)
+		WARN_ON_ONCE(!list_empty(&net->dev_base_head));
 }
 
 static struct pernet_operations __net_initdata netdev_net_ops = {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 709a4e6fb447..7e690d0ccd05 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kmod.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -303,7 +304,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 	case SIOCSIFTXQLEN:
 		if (ifr->ifr_qlen < 0)
 			return -EINVAL;
-		dev->tx_queue_len = ifr->ifr_qlen;
+		if (dev->tx_queue_len ^ ifr->ifr_qlen) {
+			unsigned int orig_len = dev->tx_queue_len;
+
+			dev->tx_queue_len = ifr->ifr_qlen;
+			err = call_netdevice_notifiers(
+					NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+			err = notifier_to_errno(err);
+			if (err) {
+				dev->tx_queue_len = orig_len;
+				return err;
+			}
+		}
 		return 0;
 
 	case SIOCSIFNAME:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a0adfc31a3fe..7d430c1d9c3e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -29,6 +29,57 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/devlink.h>
 
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+	{
+		.name = "destination mac",
+		.id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+		.bitwidth = 48,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+	.name = "ethernet",
+	.id = DEVLINK_DPIPE_HEADER_ETHERNET,
+	.fields = devlink_dpipe_fields_ethernet,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+		.bitwidth = 32,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+	.name = "ipv4",
+	.id = DEVLINK_DPIPE_HEADER_IPV4,
+	.fields = devlink_dpipe_fields_ipv4,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+		.bitwidth = 128,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+	.name = "ipv6",
+	.id = DEVLINK_DPIPE_HEADER_IPV6,
+	.fields = devlink_dpipe_fields_ipv6,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
@@ -1613,13 +1664,15 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
 				   struct devlink_dpipe_table *table)
 {
 	struct nlattr *table_attr;
+	u64 table_size;
 
+	table_size = table->table_ops->size_get(table->priv);
 	table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
 	if (!table_attr)
 		return -EMSGSIZE;
 
 	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
 			      DEVLINK_ATTR_PAD))
 		goto nla_put_failure;
 	if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
@@ -1960,6 +2013,28 @@ int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
 }
 EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
 
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+	unsigned int value_count, value_index;
+	struct devlink_dpipe_value *value;
+
+	value = entry->action_values;
+	value_count = entry->action_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+
+	value = entry->match_values;
+	value_count = entry->match_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+}
+EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+
 static int devlink_dpipe_entries_fill(struct genl_info *info,
 				      enum devlink_command cmd, int flags,
 				      struct devlink_dpipe_table *table)
@@ -2684,20 +2759,21 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
  *	@table_name: table name
  *	@table_ops: table ops
  *	@priv: priv
- *	@size: size
  *	@counter_control_extern: external control for counters
  */
 int devlink_dpipe_table_register(struct devlink *devlink,
 				 const char *table_name,
 				 struct devlink_dpipe_table_ops *table_ops,
-				 void *priv, u64 size,
-				 bool counter_control_extern)
+				 void *priv, bool counter_control_extern)
 {
 	struct devlink_dpipe_table *table;
 
 	if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
 		return -EEXIST;
 
+	if (WARN_ON(!table_ops->size_get))
+		return -EINVAL;
+
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
@@ -2705,7 +2781,6 @@ int devlink_dpipe_table_register(struct devlink *devlink,
 	table->name = table_name;
 	table->table_ops = table_ops;
 	table->priv = priv;
-	table->size = size;
 	table->counter_control_extern = counter_control_extern;
 
 	mutex_lock(&devlink_mutex);
diff --git a/net/core/dst.c b/net/core/dst.c
index 00aa972ad1a1..662a2d4a3d19 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -55,7 +55,7 @@ const struct dst_metrics dst_default_metrics = {
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	.refcnt = ATOMIC_INIT(1),
+	.refcnt = REFCOUNT_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -213,7 +213,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		atomic_set(&p->refcnt, 1);
+		refcount_set(&p->refcnt, 1);
 		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
@@ -225,7 +225,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
 		} else if (prev & DST_METRICS_REFCOUNTED) {
-			if (atomic_dec_and_test(&old_p->refcnt))
+			if (refcount_dec_and_test(&old_p->refcnt))
 				kfree(old_p);
 		}
 	}
@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc);
 void metadata_dst_free(struct metadata_dst *md_dst)
 {
 #ifdef CONFIG_DST_CACHE
-	dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
+	if (md_dst->type == METADATA_IP_TUNNEL)
+		dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
 #endif
 	kfree(md_dst);
 }
@@ -321,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
 	return md_dst;
 }
 EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
+{
+#ifdef CONFIG_DST_CACHE
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+
+		if (one_md_dst->type == METADATA_IP_TUNNEL)
+			dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
+	}
+#endif
+	free_percpu(md_dst);
+}
+EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 674b6c9cec18..f8fcf450a36e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_LRO_BIT] =              "rx-lro",
 
 	[NETIF_F_TSO_BIT] =              "tx-tcp-segmentation",
-	[NETIF_F_UFO_BIT] =              "tx-udp-fragmentation",
 	[NETIF_F_GSO_ROBUST_BIT] =       "tx-gso-robust",
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO_MANGLEID_BIT] =	 "tx-tcp-mangleid-segmentation",
@@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_HW_TC_BIT] =		 "hw-tc-offload",
 	[NETIF_F_HW_ESP_BIT] =		 "esp-hw-offload",
 	[NETIF_F_HW_ESP_TX_CSUM_BIT] =	 "esp-tx-csum-hw-offload",
+	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
 };
 
 static const char
@@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
 	case ETHTOOL_GTSO:
 	case ETHTOOL_STSO:
 		return NETIF_F_ALL_TSO;
-	case ETHTOOL_GUFO:
-	case ETHTOOL_SUFO:
-		return NETIF_F_UFO;
 	case ETHTOOL_GGSO:
 	case ETHTOOL_SGSO:
 		return NETIF_F_GSO;
@@ -406,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
 	return 0;
 }
 
+/* Given two link masks, AND them together and save the result in dst. */
+void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
+				  struct ethtool_link_ksettings *src)
+{
+	unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+	unsigned int idx = 0;
+
+	for (; idx < size; idx++) {
+		dst->link_modes.supported[idx] &=
+			src->link_modes.supported[idx];
+		dst->link_modes.advertising[idx] &=
+			src->link_modes.advertising[idx];
+	}
+}
+EXPORT_SYMBOL(ethtool_intersect_link_masks);
+
 void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
 					     u32 legacy_u32)
 {
@@ -439,7 +452,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
 
 /* return false if legacy contained non-0 deprecated fields
- * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
+ * maxtxpkt/maxrxpkt. rest of ksettings always updated
  */
 static bool
 convert_legacy_settings_to_link_ksettings(
@@ -454,8 +467,7 @@ convert_legacy_settings_to_link_ksettings(
 	 * deprecated legacy fields, and they should not use
 	 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
 	 */
-	if (legacy_settings->transceiver ||
-	    legacy_settings->maxtxpkt ||
+	if (legacy_settings->maxtxpkt ||
 	    legacy_settings->maxrxpkt)
 		retval = false;
 
@@ -528,6 +540,8 @@ convert_link_ksettings_to_legacy_settings(
 		= link_ksettings->base.eth_tp_mdix;
 	legacy_settings->eth_tp_mdix_ctrl
 		= link_ksettings->base.eth_tp_mdix_ctrl;
+	legacy_settings->transceiver
+		= link_ksettings->base.transceiver;
 	return retval;
 }
 
@@ -2515,6 +2529,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
+static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+
+	if (!dev->ethtool_ops->get_fecparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_fecparam(dev, &fecparam);
+
+	if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_fecparam fecparam;
+
+	if (!dev->ethtool_ops->set_fecparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_fecparam(dev, &fecparam);
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2555,7 +2596,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPHYSTATS:
 	case ETHTOOL_GTSO:
 	case ETHTOOL_GPERMADDR:
-	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:
@@ -2574,6 +2614,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GTUNABLE:
 	case ETHTOOL_PHY_GTUNABLE:
 	case ETHTOOL_GLINKSETTINGS:
+	case ETHTOOL_GFECPARAM:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2723,7 +2764,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCSUM:
 	case ETHTOOL_GSG:
 	case ETHTOOL_GTSO:
-	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
@@ -2732,7 +2772,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXCSUM:
 	case ETHTOOL_SSG:
 	case ETHTOOL_STSO:
-	case ETHTOOL_SUFO:
 	case ETHTOOL_SGSO:
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
@@ -2785,6 +2824,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_PHY_STUNABLE:
 		rc = set_phy_tunable(dev, useraddr);
 		break;
+	case ETHTOOL_GFECPARAM:
+		rc = ethtool_get_fecparam(dev, useraddr);
+		break;
+	case ETHTOOL_SFECPARAM:
+		rc = ethtool_set_fecparam(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 000000000000..0c048bdeb016
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,181 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info)
+{
+	info->net = net;
+	return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->net = net;
+	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+	struct fib_notifier_ops *ops;
+	unsigned int fib_seq = 0;
+	struct net *net;
+
+	rtnl_lock();
+	for_each_net(net) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+			if (!try_module_get(ops->owner))
+				continue;
+			fib_seq += ops->fib_seq_read(net);
+			module_put(ops->owner);
+		}
+		rcu_read_unlock();
+	}
+	rtnl_unlock();
+
+	return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib_notifier_ops *ops;
+
+	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+		int err;
+
+		if (!try_module_get(ops->owner))
+			continue;
+		err = ops->fib_dump(net, nb);
+		module_put(ops->owner);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+				   void (*cb)(struct notifier_block *nb),
+				   unsigned int fib_seq)
+{
+	atomic_notifier_chain_register(&fib_chain, nb);
+	if (fib_seq == fib_seq_sum())
+		return true;
+	atomic_notifier_chain_unregister(&fib_chain, nb);
+	if (cb)
+		cb(nb);
+	return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb))
+{
+	int retries = 0;
+	int err;
+
+	do {
+		unsigned int fib_seq = fib_seq_sum();
+		struct net *net;
+
+		rcu_read_lock();
+		for_each_net_rcu(net) {
+			err = fib_net_dump(net, nb);
+			if (err)
+				goto err_fib_net_dump;
+		}
+		rcu_read_unlock();
+
+		if (fib_dump_is_consistent(nb, cb, fib_seq))
+			return 0;
+	} while (++retries < FIB_DUMP_MAX_RETRIES);
+
+	return -EBUSY;
+
+err_fib_net_dump:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+				       struct net *net)
+{
+	struct fib_notifier_ops *o;
+
+	list_for_each_entry(o, &net->fib_notifier_ops, list)
+		if (ops->family == o->family)
+			return -EEXIST;
+	list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+	return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+	struct fib_notifier_ops *ops;
+	int err;
+
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return ERR_PTR(-ENOMEM);
+
+	err = __fib_notifier_ops_register(ops, net);
+	if (err)
+		goto err_register;
+
+	return ops;
+
+err_register:
+	kfree(ops);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+	list_del_rcu(&ops->list);
+	kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->fib_notifier_ops);
+	return 0;
+}
+
+static void __net_exit fib_notifier_net_exit(struct net *net)
+{
+	WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+	.init = fib_notifier_net_init,
+	.exit = fib_notifier_net_exit,
+};
+
+static int __init fib_notifier_init(void)
+{
+	return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bcd2afa..98e1066c3d55 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,69 @@ out:
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+				  enum fib_event_type event_type,
+				  struct fib_rule *rule, int family)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = family,
+		.rule = rule,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+				   enum fib_event_type event_type,
+				   struct fib_rule *rule,
+				   struct fib_rules_ops *ops,
+				   struct netlink_ext_ack *extack)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = ops->family,
+		.info.extack = extack,
+		.rule = rule,
+	};
+
+	ops->fib_rules_seq++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+	struct fib_rules_ops *ops;
+	struct fib_rule *rule;
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return -EAFNOSUPPORT;
+	list_for_each_entry_rcu(rule, &ops->rules_list, list)
+		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+				       family);
+	rules_ops_put(ops);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+	unsigned int fib_rules_seq;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return 0;
+	fib_rules_seq = ops->fib_rules_seq;
+	rules_ops_put(ops);
+
+	return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
 			    struct fib_rules_ops *ops)
 {
@@ -548,6 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rule->tun_id)
 		ip_tunnel_need_metadata();
 
+	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
@@ -687,6 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 			}
 		}
 
+		call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
+					NULL);
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).portid);
 		fib_rule_put(rule);
@@ -956,16 +1022,22 @@ static int __net_init fib_rules_net_init(struct net *net)
 	return 0;
 }
 
+static void __net_exit fib_rules_net_exit(struct net *net)
+{
+	WARN_ON_ONCE(!list_empty(&net->rules_ops));
+}
+
 static struct pernet_operations fib_rules_net_ops = {
 	.init = fib_rules_net_init,
+	.exit = fib_rules_net_exit,
 };
 
 static int __init fib_rules_init(void)
 {
 	int err;
-	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
 
 	err = register_pernet_subsys(&fib_rules_net_ops);
 	if (err < 0)
diff --git a/net/core/filter.c b/net/core/filter.c
index 6280a602604c..1afa17935954 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
 #include <linux/timer.h>
 #include <linux/uaccess.h>
 #include <asm/unaligned.h>
+#include <asm/cmpxchg.h>
 #include <linux/filter.h>
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
@@ -55,6 +56,7 @@
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <linux/bpf_trace.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -513,14 +515,27 @@ do_pass:
 				break;
 			}
 
-			/* Convert JEQ into JNE when 'jump_true' is next insn. */
-			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
-				insn->code = BPF_JMP | BPF_JNE | bpf_src;
+			/* Convert some jumps when 'jump_true' is next insn. */
+			if (fp->jt == 0) {
+				switch (BPF_OP(fp->code)) {
+				case BPF_JEQ:
+					insn->code = BPF_JMP | BPF_JNE | bpf_src;
+					break;
+				case BPF_JGT:
+					insn->code = BPF_JMP | BPF_JLE | bpf_src;
+					break;
+				case BPF_JGE:
+					insn->code = BPF_JMP | BPF_JLT | bpf_src;
+					break;
+				default:
+					goto jmp_rest;
+				}
+
 				target = i + fp->jf + 1;
 				BPF_EMIT_JMP;
 				break;
 			}
-
+jmp_rest:
 			/* Other jumps are mapped into two insns: Jxx and JA. */
 			target = i + fp->jt + 1;
 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
@@ -975,10 +990,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 
 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
-	bool ret = __sk_filter_charge(sk, fp);
-	if (ret)
-		refcount_inc(&fp->refcnt);
-	return ret;
+	if (!refcount_inc_not_zero(&fp->refcnt))
+		return false;
+
+	if (!__sk_filter_charge(sk, fp)) {
+		sk_filter_release(fp);
+		return false;
+	}
+	return true;
 }
 
 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
@@ -1388,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
 {
 	int err = __bpf_try_make_writable(skb, write_len);
 
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return err;
 }
 
@@ -1778,6 +1797,9 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 struct redirect_info {
 	u32 ifindex;
 	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
+	unsigned long   map_owner;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1818,6 +1840,47 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	/* If user passes invalid input drop the packet. */
+	if (unlikely(flags))
+		return SK_DROP;
+
+	tcb->bpf.key = key;
+	tcb->bpf.flags = flags;
+	tcb->bpf.map = map;
+
+	return SK_PASS;
+}
+
+struct sock *do_sk_redirect_map(struct sk_buff *skb)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+	struct sock *sk = NULL;
+
+	if (tcb->bpf.map) {
+		sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
+
+		tcb->bpf.key = 0;
+		tcb->bpf.map = NULL;
+	}
+
+	return sk;
+}
+
+static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+	.func           = bpf_sk_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -1906,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
 	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
 	bpf_pull_mac_rcsum(skb);
 
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return ret;
 }
 
@@ -1928,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
 	ret = skb_vlan_pop(skb);
 	bpf_pull_mac_rcsum(skb);
 
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return ret;
 }
 
@@ -2024,8 +2087,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
-		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
-		 * be changed into SKB_GSO_TCPV6.
+		/* SKB_GSO_TCPV4 needs to be changed into
+		 * SKB_GSO_TCPV6.
 		 */
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
 			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
@@ -2060,8 +2123,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
-		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
-		 * be changed into SKB_GSO_TCPV4.
+		/* SKB_GSO_TCPV6 needs to be changed into
+		 * SKB_GSO_TCPV4.
 		 */
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
 			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
@@ -2122,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
 	 * need to be verified first.
 	 */
 	ret = bpf_skb_proto_xlat(skb, proto);
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return ret;
 }
 
@@ -2247,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 	ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
 		       bpf_skb_net_grow(skb, len_diff_abs);
 
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return ret;
 }
 
@@ -2338,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 			skb_gso_reset(skb);
 	}
 
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return ret;
 }
 
@@ -2378,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
 		skb_reset_mac_header(skb);
 	}
 
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	return 0;
 }
 
@@ -2391,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
+{
+	return xdp_data_meta_unsupported(xdp) ? 0 :
+	       xdp->data - xdp->data_meta;
+}
+
 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
 {
+	unsigned long metalen = xdp_get_metalen(xdp);
+	void *data_start = xdp->data_hard_start + metalen;
 	void *data = xdp->data + offset;
 
-	if (unlikely(data < xdp->data_hard_start ||
+	if (unlikely(data < data_start ||
 		     data > xdp->data_end - ETH_HLEN))
 		return -EINVAL;
 
+	if (metalen)
+		memmove(xdp->data_meta + offset,
+			xdp->data_meta, metalen);
+	xdp->data_meta += offset;
 	xdp->data = data;
 
 	return 0;
@@ -2412,6 +2487,326 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
+{
+	void *meta = xdp->data_meta + offset;
+	unsigned long metalen = xdp->data - meta;
+
+	if (xdp_data_meta_unsupported(xdp))
+		return -ENOTSUPP;
+	if (unlikely(meta < xdp->data_hard_start ||
+		     meta > xdp->data))
+		return -EINVAL;
+	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
+		     (metalen > 32)))
+		return -EACCES;
+
+	xdp->data_meta = meta;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
+	.func		= bpf_xdp_adjust_meta,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+static int __bpf_tx_xdp(struct net_device *dev,
+			struct bpf_map *map,
+			struct xdp_buff *xdp,
+			u32 index)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_xdp_xmit) {
+		return -EOPNOTSUPP;
+	}
+
+	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+	if (err)
+		return err;
+	dev->netdev_ops->ndo_xdp_flush(dev);
+	return 0;
+}
+
+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
+			    struct bpf_map *map,
+			    struct xdp_buff *xdp,
+			    u32 index)
+{
+	int err;
+
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+		struct net_device *dev = fwd;
+
+		if (!dev->netdev_ops->ndo_xdp_xmit)
+			return -EOPNOTSUPP;
+
+		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+		if (err)
+			return err;
+		__dev_map_insert_ctx(map, index);
+
+	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+		struct bpf_cpu_map_entry *rcpu = fwd;
+
+		err = cpu_map_enqueue(rcpu, xdp, dev_rx);
+		if (err)
+			return err;
+		__cpu_map_insert_ctx(map, index);
+	}
+	return 0;
+}
+
+void xdp_do_flush_map(void)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_map *map = ri->map_to_flush;
+
+	ri->map_to_flush = NULL;
+	if (map) {
+		switch (map->map_type) {
+		case BPF_MAP_TYPE_DEVMAP:
+			__dev_map_flush(map);
+			break;
+		case BPF_MAP_TYPE_CPUMAP:
+			__cpu_map_flush(map);
+			break;
+		default:
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+
+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+{
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		return __dev_map_lookup_elem(map, index);
+	case BPF_MAP_TYPE_CPUMAP:
+		return __cpu_map_lookup_elem(map, index);
+	default:
+		return NULL;
+	}
+}
+
+static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
+				   unsigned long aux)
+{
+	return (unsigned long)xdp_prog->aux != aux;
+}
+
+static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
+			       struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	unsigned long map_owner = ri->map_owner;
+	struct bpf_map *map = ri->map;
+	u32 index = ri->ifindex;
+	void *fwd = NULL;
+	int err;
+
+	ri->ifindex = 0;
+	ri->map = NULL;
+	ri->map_owner = 0;
+
+	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+		err = -EFAULT;
+		map = NULL;
+		goto err;
+	}
+
+	fwd = __xdp_map_lookup_elem(map, index);
+	if (!fwd) {
+		err = -EINVAL;
+		goto err;
+	}
+	if (ri->map_to_flush && ri->map_to_flush != map)
+		xdp_do_flush_map();
+
+	err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
+	if (unlikely(err))
+		goto err;
+
+	ri->map_to_flush = map;
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	return 0;
+err:
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	return err;
+}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+		    struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct net_device *fwd;
+	u32 index = ri->ifindex;
+	int err;
+
+	if (ri->map)
+		return xdp_do_redirect_map(dev, xdp, xdp_prog);
+
+	fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	ri->ifindex = 0;
+	if (unlikely(!fwd)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
+	if (unlikely(err))
+		goto err;
+
+	_trace_xdp_redirect(dev, xdp_prog, index);
+	return 0;
+err:
+	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect);
+
+static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
+{
+	unsigned int len;
+
+	if (unlikely(!(fwd->flags & IFF_UP)))
+		return -ENETDOWN;
+
+	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+	if (skb->len > len)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
+				struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	unsigned long map_owner = ri->map_owner;
+	struct bpf_map *map = ri->map;
+	struct net_device *fwd = NULL;
+	u32 index = ri->ifindex;
+	int err = 0;
+
+	ri->ifindex = 0;
+	ri->map = NULL;
+	ri->map_owner = 0;
+
+	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+		err = -EFAULT;
+		map = NULL;
+		goto err;
+	}
+	fwd = __xdp_map_lookup_elem(map, index);
+	if (unlikely(!fwd)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+		if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+			goto err;
+		skb->dev = fwd;
+	} else {
+		/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
+		err = -EBADRQC;
+		goto err;
+	}
+
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	return 0;
+err:
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	return err;
+}
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+			    struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	u32 index = ri->ifindex;
+	struct net_device *fwd;
+	int err = 0;
+
+	if (ri->map)
+		return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
+
+	ri->ifindex = 0;
+	fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	if (unlikely(!fwd)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+		goto err;
+
+	skb->dev = fwd;
+	_trace_xdp_redirect(dev, xdp_prog, index);
+	return 0;
+err:
+	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+
+BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return XDP_ABORTED;
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	ri->map = NULL;
+	ri->map_owner = 0;
+
+	return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_proto = {
+	.func           = bpf_xdp_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_ANYTHING,
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
+	   unsigned long, map_owner)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return XDP_ABORTED;
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	ri->map = map;
+	ri->map_owner = map_owner;
+
+	return XDP_REDIRECT;
+}
+
+/* Note, arg4 is hidden from users and populated by the verifier
+ * with the right pointer.
+ */
+static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
+	.func           = bpf_xdp_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -2425,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
-	    func == bpf_xdp_adjust_head)
+	    func == bpf_xdp_adjust_head ||
+	    func == bpf_xdp_adjust_meta)
 		return true;
 
 	return false;
@@ -2676,14 +3072,15 @@ static const struct bpf_func_proto *
 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 {
 	if (!md_dst) {
-		/* Race is not possible, since it's called from verifier
-		 * that is holding verifier mutex.
-		 */
-		md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
-						   METADATA_IP_TUNNEL,
-						   GFP_KERNEL);
-		if (!md_dst)
+		struct metadata_dst __percpu *tmp;
+
+		tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
+						METADATA_IP_TUNNEL,
+						GFP_KERNEL);
+		if (!tmp)
 			return NULL;
+		if (cmpxchg(&md_dst, NULL, tmp))
+			metadata_dst_free_percpu(tmp);
 	}
 
 	switch (which) {
@@ -2836,15 +3233,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 		   sk->sk_prot->setsockopt == tcp_setsockopt) {
 		if (optname == TCP_CONGESTION) {
 			char name[TCP_CA_NAME_MAX];
+			bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
 
 			strncpy(name, optval, min_t(long, optlen,
 						    TCP_CA_NAME_MAX-1));
 			name[TCP_CA_NAME_MAX-1] = 0;
-			ret = tcp_set_congestion_control(sk, name, false);
-			if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
-				/* replacing an existing ca */
-				tcp_reinit_congestion_control(sk,
-					inet_csk(sk)->icsk_ca_ops);
+			ret = tcp_set_congestion_control(sk, name, false, reinit);
 		} else {
 			struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2872,7 +3266,6 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 				ret = -EINVAL;
 			}
 		}
-		ret = -EINVAL;
 #endif
 	} else {
 		ret = -EINVAL;
@@ -2882,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 
 static const struct bpf_func_proto bpf_setsockopt_proto = {
 	.func		= bpf_setsockopt,
-	.gpl_only	= true,
+	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_ANYTHING,
@@ -2891,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+	   int, level, int, optname, char *, optval, int, optlen)
+{
+	struct sock *sk = bpf_sock->sk;
+
+	if (!sk_fullsock(sk))
+		goto err_clear;
+
+#ifdef CONFIG_INET
+	if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
+		if (optname == TCP_CONGESTION) {
+			struct inet_connection_sock *icsk = inet_csk(sk);
+
+			if (!icsk->icsk_ca_ops || optlen <= 1)
+				goto err_clear;
+			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
+			optval[optlen - 1] = 0;
+		} else {
+			goto err_clear;
+		}
+	} else {
+		goto err_clear;
+	}
+	return 0;
+#endif
+err_clear:
+	memset(optval, 0, optlen);
+	return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_getsockopt_proto = {
+	.func		= bpf_getsockopt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -2920,6 +3354,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
+sock_filter_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	/* inet and inet6 sockets are created in a process
+	 * context so there is always a valid uid/gid
+	 */
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -3011,6 +3459,12 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_xdp_adjust_head:
 		return &bpf_xdp_adjust_head_proto;
+	case BPF_FUNC_xdp_adjust_meta:
+		return &bpf_xdp_adjust_meta_proto;
+	case BPF_FUNC_redirect:
+		return &bpf_xdp_redirect_proto;
+	case BPF_FUNC_redirect_map:
+		return &bpf_xdp_redirect_map_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3049,6 +3503,34 @@ static const struct bpf_func_proto *
 	switch (func_id) {
 	case BPF_FUNC_setsockopt:
 		return &bpf_setsockopt_proto;
+	case BPF_FUNC_getsockopt:
+		return &bpf_getsockopt_proto;
+	case BPF_FUNC_sock_map_update:
+		return &bpf_sock_map_update_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_store_bytes:
+		return &bpf_skb_store_bytes_proto;
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_skb_pull_data:
+		return &bpf_skb_pull_data_proto;
+	case BPF_FUNC_skb_change_tail:
+		return &bpf_skb_change_tail_proto;
+	case BPF_FUNC_skb_change_head:
+		return &bpf_skb_change_head_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_proto;
+	case BPF_FUNC_get_socket_uid:
+		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_sk_redirect_map:
+		return &bpf_sk_redirect_map_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3106,7 +3588,12 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (off + size > offsetofend(struct __sk_buff, cb[4]))
 			return false;
 		break;
+	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
+	case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
+	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
+	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
 	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		if (size != size_default)
 			return false;
@@ -3133,7 +3620,9 @@ static bool sk_filter_is_valid_access(int off, int size,
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, data_end):
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
 
@@ -3155,6 +3644,8 @@ static bool lwt_is_valid_access(int off, int size,
 {
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
 		return false;
 	}
 
@@ -3188,6 +3679,8 @@ static bool sock_filter_is_valid_access(int off, int size,
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct bpf_sock, bound_dev_if):
+		case offsetof(struct bpf_sock, mark):
+		case offsetof(struct bpf_sock, priority):
 			break;
 		default:
 			return false;
@@ -3205,8 +3698,8 @@ static bool sock_filter_is_valid_access(int off, int size,
 	return true;
 }
 
-static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
-			       const struct bpf_prog *prog)
+static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
+				const struct bpf_prog *prog, int drop_verdict)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -3233,7 +3726,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 	 * return TC_ACT_SHOT;
 	 */
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
-	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
 	*insn++ = BPF_EXIT_INSN();
 
 	/* restore: */
@@ -3244,6 +3737,12 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 	return insn - insn_buf;
 }
 
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			       const struct bpf_prog *prog)
+{
+	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
+}
+
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
 				       struct bpf_insn_access_aux *info)
@@ -3265,9 +3764,14 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data):
 		info->reg_type = PTR_TO_PACKET;
 		break;
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+		info->reg_type = PTR_TO_PACKET_META;
+		break;
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+		return false;
 	}
 
 	return bpf_skb_is_valid_access(off, size, type, info);
@@ -3296,6 +3800,9 @@ static bool xdp_is_valid_access(int off, int size,
 	case offsetof(struct xdp_md, data):
 		info->reg_type = PTR_TO_PACKET;
 		break;
+	case offsetof(struct xdp_md, data_meta):
+		info->reg_type = PTR_TO_PACKET_META;
+		break;
 	case offsetof(struct xdp_md, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
@@ -3306,7 +3813,11 @@ static bool xdp_is_valid_access(int off, int size,
 
 void bpf_warn_invalid_xdp_action(u32 act)
 {
-	WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act);
+	const u32 act_max = XDP_REDIRECT;
+
+	WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
+		  act > act_max ? "Illegal" : "Driver unsupported",
+		  act);
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
@@ -3340,6 +3851,46 @@ static bool sock_ops_is_valid_access(int off, int size,
 	return __is_valid_sock_ops_access(off, size);
 }
 
+static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			   const struct bpf_prog *prog)
+{
+	return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
+}
+
+static bool sk_skb_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+		return false;
+	}
+
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range(struct __sk_buff, tc_index):
+		case bpf_ctx_range(struct __sk_buff, priority):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, mark):
+		return false;
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, info);
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -3485,6 +4036,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				      offsetof(struct sk_buff, data));
 		break;
 
+	case offsetof(struct __sk_buff, data_meta):
+		off  = si->off;
+		off -= offsetof(struct __sk_buff, data_meta);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct bpf_skb_data_end, data_meta);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+				      si->src_reg, off);
+		break;
+
 	case offsetof(struct __sk_buff, data_end):
 		off  = si->off;
 		off -= offsetof(struct __sk_buff, data_end);
@@ -3525,6 +4085,106 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #endif
 		break;
+	case offsetof(struct __sk_buff, family):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_family,
+						     2, target_size));
+		break;
+	case offsetof(struct __sk_buff, remote_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_daddr,
+						     4, target_size));
+		break;
+	case offsetof(struct __sk_buff, local_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_rcv_saddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_rcv_saddr,
+						     4, target_size));
+		break;
+	case offsetof(struct __sk_buff, remote_ip6[0]) ...
+	     offsetof(struct __sk_buff, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_daddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct __sk_buff, remote_ip6[0]);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_daddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+	case offsetof(struct __sk_buff, local_ip6[0]) ...
+	     offsetof(struct __sk_buff, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct __sk_buff, local_ip6[0]);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_rcv_saddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct __sk_buff, remote_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_dport,
+						     2, target_size));
+#ifndef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+		break;
+
+	case offsetof(struct __sk_buff, local_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_num, 2, target_size));
+		break;
 	}
 
 	return insn - insn_buf;
@@ -3549,6 +4209,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 				      offsetof(struct sock, sk_bound_dev_if));
 		break;
 
+	case offsetof(struct bpf_sock, mark):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					offsetof(struct sock, sk_mark));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, sk_mark));
+		break;
+
+	case offsetof(struct bpf_sock, priority):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					offsetof(struct sock, sk_priority));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, sk_priority));
+		break;
+
 	case offsetof(struct bpf_sock, family):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
 
@@ -3611,6 +4293,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct xdp_buff, data));
 		break;
+	case offsetof(struct xdp_md, data_meta):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, data_meta));
+		break;
 	case offsetof(struct xdp_md, data_end):
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
 				      si->dst_reg, si->src_reg,
@@ -3754,61 +4441,120 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
-const struct bpf_verifier_ops sk_filter_prog_ops = {
+static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+				     const struct bpf_insn *si,
+				     struct bpf_insn *insn_buf,
+				     struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	int off;
+
+	switch (si->off) {
+	case offsetof(struct __sk_buff, data_end):
+		off  = si->off;
+		off -= offsetof(struct __sk_buff, data_end);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct tcp_skb_cb, bpf.data_end);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+				      si->src_reg, off);
+		break;
+	default:
+		return bpf_convert_ctx_access(type, si, insn_buf, prog,
+					      target_size);
+	}
+
+	return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_filter_verifier_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
 
-const struct bpf_verifier_ops tc_cls_act_prog_ops = {
+const struct bpf_prog_ops sk_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
 	.get_func_proto		= tc_cls_act_func_proto,
 	.is_valid_access	= tc_cls_act_is_valid_access,
 	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
 	.gen_prologue		= tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops tc_cls_act_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };
 
-const struct bpf_verifier_ops xdp_prog_ops = {
+const struct bpf_verifier_ops xdp_verifier_ops = {
 	.get_func_proto		= xdp_func_proto,
 	.is_valid_access	= xdp_is_valid_access,
 	.convert_ctx_access	= xdp_convert_ctx_access,
+};
+
+const struct bpf_prog_ops xdp_prog_ops = {
 	.test_run		= bpf_prog_test_run_xdp,
 };
 
-const struct bpf_verifier_ops cg_skb_prog_ops = {
+const struct bpf_verifier_ops cg_skb_verifier_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_skb_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };
 
-const struct bpf_verifier_ops lwt_inout_prog_ops = {
+const struct bpf_verifier_ops lwt_inout_verifier_ops = {
 	.get_func_proto		= lwt_inout_func_proto,
 	.is_valid_access	= lwt_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_inout_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };
 
-const struct bpf_verifier_ops lwt_xmit_prog_ops = {
+const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
 	.get_func_proto		= lwt_xmit_func_proto,
 	.is_valid_access	= lwt_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 	.gen_prologue		= tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops lwt_xmit_prog_ops = {
 	.test_run		= bpf_prog_test_run_skb,
 };
 
-const struct bpf_verifier_ops cg_sock_prog_ops = {
-	.get_func_proto		= bpf_base_func_proto,
+const struct bpf_verifier_ops cg_sock_verifier_ops = {
+	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,
 	.convert_ctx_access	= sock_filter_convert_ctx_access,
 };
 
-const struct bpf_verifier_ops sock_ops_prog_ops = {
+const struct bpf_prog_ops cg_sock_prog_ops = {
+};
+
+const struct bpf_verifier_ops sock_ops_verifier_ops = {
 	.get_func_proto		= sock_ops_func_proto,
 	.is_valid_access	= sock_ops_is_valid_access,
 	.convert_ctx_access	= sock_ops_convert_ctx_access,
 };
 
+const struct bpf_prog_ops sock_ops_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_skb_verifier_ops = {
+	.get_func_proto		= sk_skb_func_proto,
+	.is_valid_access	= sk_skb_is_valid_access,
+	.convert_ctx_access	= sk_skb_convert_ctx_access,
+	.gen_prologue		= sk_skb_prologue,
+};
+
+const struct bpf_prog_ops sk_skb_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644
index f7f5d1932a27..000000000000
--- a/net/core/flow.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
-	union {
-		struct hlist_node	hlist;
-		struct list_head	gc_list;
-	} u;
-	struct net			*net;
-	u16				family;
-	u8				dir;
-	u32				genid;
-	struct flowi			key;
-	struct flow_cache_object	*object;
-};
-
-struct flow_flush_info {
-	struct flow_cache		*cache;
-	atomic_t			cpuleft;
-	struct completion		completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache)	(1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
-	struct flow_cache *fc = (void *) arg;
-	int i;
-
-	for_each_possible_cpu(i)
-		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
-		return 0;
-	if (fle->object && !fle->object->ops->check(fle->object))
-		return 0;
-	return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (fle->object)
-		fle->object->ops->delete(fle->object);
-	kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
-	struct list_head gc_list;
-	struct flow_cache_entry *fce, *n;
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_gc_work);
-
-	INIT_LIST_HEAD(&gc_list);
-	spin_lock_bh(&xfrm->flow_cache_gc_lock);
-	list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
-	spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
-	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
-		flow_entry_kill(fce, xfrm);
-		atomic_dec(&xfrm->flow_cache_gc_count);
-	}
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-				     unsigned int deleted,
-				     struct list_head *gc_list,
-				     struct netns_xfrm *xfrm)
-{
-	if (deleted) {
-		atomic_add(deleted, &xfrm->flow_cache_gc_count);
-		fcp->hash_count -= deleted;
-		spin_lock_bh(&xfrm->flow_cache_gc_lock);
-		list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
-		spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-		schedule_work(&xfrm->flow_cache_gc_work);
-	}
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
-				struct flow_cache_percpu *fcp,
-				unsigned int shrink_to)
-{
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		unsigned int saved = 0;
-
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (saved < shrink_to &&
-			    flow_entry_valid(fle, xfrm)) {
-				saved++;
-			} else {
-				deleted++;
-				hlist_del(&fle->u.hlist);
-				list_add_tail(&fle->u.gc_list, &gc_list);
-			}
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
-	__flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
-	fcp->hash_rnd_recalc = 0;
-	__flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
-			  struct flow_cache_percpu *fcp,
-			  const struct flowi *key,
-			  unsigned int keysize)
-{
-	const u32 *k = (const u32 *) key;
-	const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
-	return jhash2(k, length, fcp->hash_rnd)
-		& (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-			    unsigned int keysize)
-{
-	const flow_compare_t *k1, *k1_lim, *k2;
-
-	k1 = (const flow_compare_t *) key1;
-	k1_lim = k1 + keysize;
-
-	k2 = (const flow_compare_t *) key2;
-
-	do {
-		if (*k1++ != *k2++)
-			return 1;
-	} while (k1 < k1_lim);
-
-	return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
-		  flow_resolve_t resolver, void *ctx)
-{
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, *tfle;
-	struct flow_cache_object *flo;
-	unsigned int keysize;
-	unsigned int hash;
-
-	local_bh_disable();
-	fcp = this_cpu_ptr(fc->percpu);
-
-	fle = NULL;
-	flo = NULL;
-
-	keysize = flow_key_size(family);
-	if (!keysize)
-		goto nocache;
-
-	/* Packet really early in init?  Making flow_cache_init a
-	 * pre-smp initcall would solve this.  --RR */
-	if (!fcp->hash_table)
-		goto nocache;
-
-	if (fcp->hash_rnd_recalc)
-		flow_new_hash_rnd(fc, fcp);
-
-	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
-		if (tfle->net == net &&
-		    tfle->family == family &&
-		    tfle->dir == dir &&
-		    flow_key_compare(key, &tfle->key, keysize) == 0) {
-			fle = tfle;
-			break;
-		}
-	}
-
-	if (unlikely(!fle)) {
-		if (fcp->hash_count > fc->high_watermark)
-			flow_cache_shrink(fc, fcp);
-
-		if (atomic_read(&net->xfrm.flow_cache_gc_count) >
-		    2 * num_online_cpus() * fc->high_watermark) {
-			flo = ERR_PTR(-ENOBUFS);
-			goto ret_object;
-		}
-
-		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
-		if (fle) {
-			fle->net = net;
-			fle->family = family;
-			fle->dir = dir;
-			memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
-			fle->object = NULL;
-			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
-			fcp->hash_count++;
-		}
-	} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
-		flo = fle->object;
-		if (!flo)
-			goto ret_object;
-		flo = flo->ops->get(flo);
-		if (flo)
-			goto ret_object;
-	} else if (fle->object) {
-	        flo = fle->object;
-	        flo->ops->delete(flo);
-	        fle->object = NULL;
-	}
-
-nocache:
-	flo = NULL;
-	if (fle) {
-		flo = fle->object;
-		fle->object = NULL;
-	}
-	flo = resolver(net, key, family, dir, flo, ctx);
-	if (fle) {
-		fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
-		if (!IS_ERR(flo))
-			fle->object = flo;
-		else
-			fle->genid--;
-	} else {
-		if (!IS_ERR_OR_NULL(flo))
-			flo->ops->delete(flo);
-	}
-ret_object:
-	local_bh_enable();
-	return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
-	struct flow_flush_info *info = (void *)data;
-	struct flow_cache *fc = info->cache;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	fcp = this_cpu_ptr(fc->percpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (flow_entry_valid(fle, xfrm))
-				continue;
-
-			deleted++;
-			hlist_del(&fle->u.hlist);
-			list_add_tail(&fle->u.gc_list, &gc_list);
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
-	if (atomic_dec_and_test(&info->cpuleft))
-		complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing.  Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp;
-	unsigned int i;
-
-	fcp = per_cpu_ptr(fc->percpu, cpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++)
-		if (!hlist_empty(&fcp->hash_table[i]))
-			return 0;
-	return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
-	struct flow_flush_info *info = data;
-	struct tasklet_struct *tasklet;
-
-	tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
-	tasklet->data = (unsigned long)info;
-	tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
-	struct flow_flush_info info;
-	cpumask_var_t mask;
-	int i, self;
-
-	/* Track which cpus need flushing to avoid disturbing all cores. */
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-		return;
-	cpumask_clear(mask);
-
-	/* Don't want cpus going down or up during this. */
-	get_online_cpus();
-	mutex_lock(&net->xfrm.flow_flush_sem);
-	info.cache = &net->xfrm.flow_cache_global;
-	for_each_online_cpu(i)
-		if (!flow_cache_percpu_empty(info.cache, i))
-			cpumask_set_cpu(i, mask);
-	atomic_set(&info.cpuleft, cpumask_weight(mask));
-	if (atomic_read(&info.cpuleft) == 0)
-		goto done;
-
-	init_completion(&info.completion);
-
-	local_bh_disable();
-	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
-	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
-	if (self)
-		flow_cache_flush_tasklet((unsigned long)&info);
-	local_bh_enable();
-
-	wait_for_completion(&info.completion);
-
-done:
-	mutex_unlock(&net->xfrm.flow_flush_sem);
-	put_online_cpus();
-	free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_flush_work);
-	struct net *net = container_of(xfrm, struct net, xfrm);
-
-	flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
-	schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-	unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
-	if (!fcp->hash_table) {
-		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
-		if (!fcp->hash_table) {
-			pr_err("NET: failed to allocate flow cache sz %u\n", sz);
-			return -ENOMEM;
-		}
-		fcp->hash_rnd_recalc = 1;
-		fcp->hash_count = 0;
-		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-	}
-	return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
-	return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
-	__flow_cache_shrink(fc, fcp, 0);
-	return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	if (!flow_cachep)
-		flow_cachep = kmem_cache_create("flow_cache",
-						sizeof(struct flow_cache_entry),
-						0, SLAB_PANIC, NULL);
-	spin_lock_init(&net->xfrm.flow_cache_gc_lock);
-	INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
-	INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
-	INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
-	mutex_init(&net->xfrm.flow_flush_sem);
-	atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
-	fc->hash_shift = 10;
-	fc->low_watermark = 2 * flow_cache_hash_size(fc);
-	fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
-	fc->percpu = alloc_percpu(struct flow_cache_percpu);
-	if (!fc->percpu)
-		return -ENOMEM;
-
-	if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
-		goto err;
-
-	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-		    (unsigned long) fc);
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-
-	return 0;
-
-err:
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	del_timer_sync(&fc->rnd_timer);
-
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
-	int ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
-				      "net/flow:prepare",
-				      flow_cache_cpu_up_prep,
-				      flow_cache_cpu_dead);
-	WARN_ON(ret < 0);
-}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index fc5fc4594c90..15ce30063765 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -4,10 +4,13 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/if_vlan.h>
+#include <net/dsa.h>
+#include <net/dst_metadata.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/gre.h>
 #include <net/pptp.h>
+#include <net/tipc.h>
 #include <linux/igmp.h>
 #include <linux/icmp.h>
 #include <linux/sctp.h>
@@ -114,11 +117,101 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
-enum flow_dissect_ret {
-	FLOW_DISSECT_RET_OUT_GOOD,
-	FLOW_DISSECT_RET_OUT_BAD,
-	FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
-};
+static void
+skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
+				   struct flow_dissector *flow_dissector,
+				   void *target_container)
+{
+	struct flow_dissector_key_control *ctrl;
+
+	if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
+		return;
+
+	ctrl = skb_flow_dissector_target(flow_dissector,
+					 FLOW_DISSECTOR_KEY_ENC_CONTROL,
+					 target_container);
+	ctrl->addr_type = type;
+}
+
+static void
+__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+			       struct flow_dissector *flow_dissector,
+			       void *target_container)
+{
+	struct ip_tunnel_info *info;
+	struct ip_tunnel_key *key;
+
+	/* A quick check to see if there might be something to do. */
+	if (!dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_KEYID) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_PORTS))
+		return;
+
+	info = skb_tunnel_info(skb);
+	if (!info)
+		return;
+
+	key = &info->key;
+
+	switch (ip_tunnel_info_af(info)) {
+	case AF_INET:
+		skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						   flow_dissector,
+						   target_container);
+		if (dissector_uses_key(flow_dissector,
+				       FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+			struct flow_dissector_key_ipv4_addrs *ipv4;
+
+			ipv4 = skb_flow_dissector_target(flow_dissector,
+							 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+							 target_container);
+			ipv4->src = key->u.ipv4.src;
+			ipv4->dst = key->u.ipv4.dst;
+		}
+		break;
+	case AF_INET6:
+		skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						   flow_dissector,
+						   target_container);
+		if (dissector_uses_key(flow_dissector,
+				       FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+			struct flow_dissector_key_ipv6_addrs *ipv6;
+
+			ipv6 = skb_flow_dissector_target(flow_dissector,
+							 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+							 target_container);
+			ipv6->src = key->u.ipv6.src;
+			ipv6->dst = key->u.ipv6.dst;
+		}
+		break;
+	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *keyid;
+
+		keyid = skb_flow_dissector_target(flow_dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  target_container);
+		keyid->keyid = tunnel_id_to_key32(key->tun_id);
+	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+		struct flow_dissector_key_ports *tp;
+
+		tp = skb_flow_dissector_target(flow_dissector,
+					       FLOW_DISSECTOR_KEY_ENC_PORTS,
+					       target_container);
+		tp->src = key->tp_src;
+		tp->dst = key->tp_dst;
+	}
+}
 
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
@@ -340,7 +433,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 	if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 		return FLOW_DISSECT_RET_OUT_GOOD;
 
-	return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+	return FLOW_DISSECT_RET_PROTO_AGAIN;
 }
 
 static void
@@ -401,6 +494,18 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
 	key_ip->ttl = iph->hop_limit;
 }
 
+/* Maximum number of protocol headers that can be parsed in
+ * __skb_flow_dissect
+ */
+#define MAX_FLOW_DISSECT_HDRS	15
+
+static bool skb_flow_dissect_allowed(int *num_hdrs)
+{
+	++*num_hdrs;
+
+	return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -430,7 +535,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
+	enum flow_dissect_ret fdret;
 	bool skip_vlan = false;
+	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
 
@@ -440,6 +547,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			 skb->vlan_proto : skb->protocol;
 		nhoff = skb_network_offset(skb);
 		hlen = skb_headlen(skb);
+#if IS_ENABLED(CONFIG_NET_DSA)
+		if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) {
+			const struct dsa_device_ops *ops;
+			int offset;
+
+			ops = skb->dev->dsa_ptr->tag_ops;
+			if (ops->flow_dissect &&
+			    !ops->flow_dissect(skb, &proto, &offset)) {
+				hlen -= offset;
+				nhoff += offset;
+			}
+		}
+#endif
 	}
 
 	/* It is ensured by skb_flow_dissector_init() that control key will
@@ -456,6 +576,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
+	__skb_flow_dissect_tunnel_info(skb, flow_dissector,
+				       target_container);
+
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct ethhdr *eth = eth_hdr(skb);
@@ -468,14 +591,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	}
 
 proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (proto) {
 	case htons(ETH_P_IP): {
 		const struct iphdr *iph;
 		struct iphdr _iph;
-ip:
+
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph || iph->ihl < 5)
-			goto out_bad;
+		if (!iph || iph->ihl < 5) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		nhoff += iph->ihl * 4;
 
 		ip_proto = iph->protocol;
@@ -495,19 +623,25 @@ ip:
 			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
 			if (iph->frag_off & htons(IP_OFFSET)) {
-				goto out_good;
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
 			} else {
 				key_control->flags |= FLOW_DIS_FIRST_FRAG;
-				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
-					goto out_good;
+				if (!(flags &
+				      FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) {
+					fdret = FLOW_DISSECT_RET_OUT_GOOD;
+					break;
+				}
 			}
 		}
 
 		__skb_flow_dissect_ipv4(skb, flow_dissector,
 					target_container, data, iph);
 
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
 
 		break;
 	}
@@ -515,10 +649,11 @@ ip:
 		const struct ipv6hdr *iph;
 		struct ipv6hdr _iph;
 
-ipv6:
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph)
-			goto out_bad;
+		if (!iph) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = iph->nexthdr;
 		nhoff += sizeof(struct ipv6hdr);
@@ -547,15 +682,17 @@ ipv6:
 								     target_container);
 				key_tags->flow_label = ntohl(flow_label);
 			}
-			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
-				goto out_good;
+			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) {
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
+			}
 		}
 
 		__skb_flow_dissect_ipv6(skb, flow_dissector,
 					target_container, data, iph);
 
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
 
 		break;
 	}
@@ -571,12 +708,17 @@ ipv6:
 		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
-			if (!vlan)
-				goto out_bad;
+			if (!vlan) {
+				fdret = FLOW_DISSECT_RET_OUT_BAD;
+				break;
+			}
+
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan)
-				goto proto_again;
+			if (skip_vlan) {
+				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+				break;
+			}
 		}
 
 		skip_vlan = true;
@@ -599,7 +741,8 @@ ipv6:
 			}
 		}
 
-		goto proto_again;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 	}
 	case htons(ETH_P_PPP_SES): {
 		struct {
@@ -607,86 +750,104 @@ ipv6:
 			__be16 proto;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		proto = hdr->proto;
 		nhoff += PPPOE_SES_HLEN;
 		switch (proto) {
 		case htons(PPP_IP):
-			goto ip;
+			proto = htons(ETH_P_IP);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		case htons(PPP_IPV6):
-			goto ipv6;
+			proto = htons(ETH_P_IPV6);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		default:
-			goto out_bad;
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
 		}
+		break;
 	}
 	case htons(ETH_P_TIPC): {
-		struct {
-			__be32 pre[3];
-			__be32 srcnode;
-		} *hdr, _hdr;
-		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		struct tipc_basic_hdr *hdr, _hdr;
+
+		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
+					   data, hlen, &_hdr);
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		if (dissector_uses_key(flow_dissector,
-				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
+				       FLOW_DISSECTOR_KEY_TIPC)) {
 			key_addrs = skb_flow_dissector_target(flow_dissector,
-							      FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+							      FLOW_DISSECTOR_KEY_TIPC,
 							      target_container);
-			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
-			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+			key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
+			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
 		}
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 
 	case htons(ETH_P_MPLS_UC):
 	case htons(ETH_P_MPLS_MC):
-mpls:
-		switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+		fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
 						target_container, data,
-						nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+						nhoff, hlen);
+		break;
 	case htons(ETH_P_FCOE):
-		if ((hlen - nhoff) < FCOE_HEADER_LEN)
-			goto out_bad;
+		if ((hlen - nhoff) < FCOE_HEADER_LEN) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		nhoff += FCOE_HEADER_LEN;
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 
 	case htons(ETH_P_ARP):
 	case htons(ETH_P_RARP):
-		switch (__skb_flow_dissect_arp(skb, flow_dissector,
+		fdret = __skb_flow_dissect_arp(skb, flow_dissector,
 					       target_container, data,
-					       nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+					       nhoff, hlen);
+		break;
+
+	default:
+		fdret = FLOW_DISSECT_RET_OUT_BAD;
+		break;
+	}
+
+	/* Process result of proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_OUT_GOOD:
+		goto out_good;
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		if (skb_flow_dissect_allowed(&num_hdrs))
+			goto proto_again;
+		goto out_good;
+	case FLOW_DISSECT_RET_CONTINUE:
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
 	default:
 		goto out_bad;
 	}
 
 ip_proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (ip_proto) {
 	case IPPROTO_GRE:
-		switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+		fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
 					       target_container, data,
-					       &proto, &nhoff, &hlen, flags)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-			goto out_bad;
-		case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
-			goto proto_again;
-		}
+					       &proto, &nhoff, &hlen, flags);
+		break;
+
 	case NEXTHDR_HOP:
 	case NEXTHDR_ROUTING:
 	case NEXTHDR_DEST: {
@@ -697,13 +858,16 @@ ip_proto_again:
 
 		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
 					      data, hlen, &_opthdr);
-		if (!opthdr)
-			goto out_bad;
+		if (!opthdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = opthdr[0];
 		nhoff += (opthdr[1] + 1) << 3;
 
-		goto ip_proto_again;
+		fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+		break;
 	}
 	case NEXTHDR_FRAGMENT: {
 		struct frag_hdr _fh, *fh;
@@ -714,8 +878,10 @@ ip_proto_again:
 		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
 					  data, hlen, &_fh);
 
-		if (!fh)
-			goto out_bad;
+		if (!fh) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
@@ -724,34 +890,50 @@ ip_proto_again:
 
 		if (!(fh->frag_off & htons(IP6_OFFSET))) {
 			key_control->flags |= FLOW_DIS_FIRST_FRAG;
-			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
-				goto ip_proto_again;
+			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
+				fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+				break;
+			}
 		}
-		goto out_good;
+
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 	case IPPROTO_IPIP:
 		proto = htons(ETH_P_IP);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 
-		goto ip;
 	case IPPROTO_IPV6:
 		proto = htons(ETH_P_IPV6);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 
-		goto ipv6;
 	case IPPROTO_MPLS:
 		proto = htons(ETH_P_MPLS_UC);
-		goto mpls;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 	case IPPROTO_TCP:
 		__skb_flow_dissect_tcp(skb, flow_dissector, target_container,
 				       data, nhoff, hlen);
 		break;
+
 	default:
 		break;
 	}
@@ -773,6 +955,24 @@ ip_proto_again:
 		key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
 	}
 
+	/* Process result of IP proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		if (skb_flow_dissect_allowed(&num_hdrs))
+			goto proto_again;
+		break;
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		if (skb_flow_dissect_allowed(&num_hdrs))
+			goto ip_proto_again;
+		break;
+	case FLOW_DISSECT_RET_OUT_GOOD:
+	case FLOW_DISSECT_RET_CONTINUE:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
+	default:
+		goto out_bad;
+	}
+
 out_good:
 	ret = true;
 
@@ -824,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
 	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 		diff -= sizeof(flow->addrs.v6addrs);
 		break;
-	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
-		diff -= sizeof(flow->addrs.tipcaddrs);
+	case FLOW_DISSECTOR_KEY_TIPC:
+		diff -= sizeof(flow->addrs.tipckey);
 		break;
 	}
 	return (sizeof(*flow) - diff) / sizeof(u32);
@@ -839,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow)
 	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 		return (__force __be32)ipv6_addr_hash(
 			&flow->addrs.v6addrs.src);
-	case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
-		return flow->addrs.tipcaddrs.srcnode;
+	case FLOW_DISSECTOR_KEY_TIPC:
+		return flow->addrs.tipckey.key;
 	default:
 		return 0;
 	}
@@ -998,51 +1198,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 }
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
-{
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
-	       sizeof(keys.addrs.v6addrs.src));
-	memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
-	       sizeof(keys.addrs.v6addrs.dst));
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys.ports.src = fl6->fl6_sport;
-	keys.ports.dst = fl6->fl6_dport;
-	keys.keyid.keyid = fl6->fl6_gre_key;
-	keys.tags.flow_label = (__force u32)fl6->flowlabel;
-	keys.basic.ip_proto = fl6->flowi6_proto;
-
-	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
-
-	return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi6);
-
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	keys.addrs.v4addrs.src = fl4->saddr;
-	keys.addrs.v4addrs.dst = fl4->daddr;
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys.ports.src = fl4->fl4_sport;
-	keys.ports.dst = fl4->fl4_dport;
-	keys.keyid.keyid = fl4->fl4_gre_key;
-	keys.basic.ip_proto = fl4->flowi4_proto;
-
-	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
-
-	return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi4);
-
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen)
 {
@@ -1166,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 		.offset = offsetof(struct flow_keys, addrs.v6addrs),
 	},
 	{
-		.key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
-		.offset = offsetof(struct flow_keys, addrs.tipcaddrs),
+		.key_id = FLOW_DISSECTOR_KEY_TIPC,
+		.offset = offsetof(struct flow_keys, addrs.tipckey),
 	},
 	{
 		.key_id = FLOW_DISSECTOR_KEY_PORTS,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 0385dece1f6f..7c1ffd6f9501 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -83,10 +83,10 @@ static void est_timer(unsigned long arg)
 	u64 rate, brate;
 
 	est_fetch_counters(est, &b);
-	brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log);
+	brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
 	brate -= (est->avbps >> est->ewma_log);
 
-	rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log);
+	rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
 	rate -= (est->avpps >> est->ewma_log);
 
 	write_seqcount_begin(&est->seq);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 814e58a3ce8b..4b54e5f107c6 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 	 */
 	preempt_disable();
 	rcu_read_lock();
-	bpf_compute_data_end(skb);
+	bpf_compute_data_pointers(skb);
 	ret = bpf_prog_run_save_cb(lwt->prog, skb);
 	rcu_read_unlock();
 
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index d9cb3532f1dd..0b171756453c 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -44,6 +44,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
 		return "SEG6";
 	case LWTUNNEL_ENCAP_BPF:
 		return "BPF";
+	case LWTUNNEL_ENCAP_SEG6_LOCAL:
+		return "SEG6LOCAL";
 	case LWTUNNEL_ENCAP_IP6:
 	case LWTUNNEL_ENCAP_IP:
 	case LWTUNNEL_ENCAP_NONE:
@@ -65,7 +67,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
 
 	return lws;
 }
-EXPORT_SYMBOL(lwtunnel_state_alloc);
+EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
 
 static const struct lwtunnel_encap_ops __rcu *
 		lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
@@ -80,7 +82,7 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
 			&lwtun_encaps[num],
 			NULL, ops) ? 0 : -1;
 }
-EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
 
 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
 			   unsigned int encap_type)
@@ -99,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
 
 int lwtunnel_build_state(u16 encap_type,
 			 struct nlattr *encap, unsigned int family,
@@ -138,7 +140,7 @@ int lwtunnel_build_state(u16 encap_type,
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_build_state);
+EXPORT_SYMBOL_GPL(lwtunnel_build_state);
 
 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
 {
@@ -175,7 +177,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_valid_encap_type);
+EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
 
 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
 				   struct netlink_ext_ack *extack)
@@ -205,7 +207,7 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
 
 	return 0;
 }
-EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
+EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
 
 void lwtstate_free(struct lwtunnel_state *lws)
 {
@@ -219,7 +221,7 @@ void lwtstate_free(struct lwtunnel_state *lws)
 	}
 	module_put(ops->owner);
 }
-EXPORT_SYMBOL(lwtstate_free);
+EXPORT_SYMBOL_GPL(lwtstate_free);
 
 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
 {
@@ -259,7 +261,7 @@ nla_put_failure:
 
 	return (ret == -EOPNOTSUPP ? 0 : ret);
 }
-EXPORT_SYMBOL(lwtunnel_fill_encap);
+EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
 
 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 {
@@ -281,7 +283,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_get_encap_size);
+EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
 
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 {
@@ -309,7 +311,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_cmp_encap);
+EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
 
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -343,7 +345,7 @@ drop:
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_output);
+EXPORT_SYMBOL_GPL(lwtunnel_output);
 
 int lwtunnel_xmit(struct sk_buff *skb)
 {
@@ -378,7 +380,7 @@ drop:
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_xmit);
+EXPORT_SYMBOL_GPL(lwtunnel_xmit);
 
 int lwtunnel_input(struct sk_buff *skb)
 {
@@ -412,4 +414,4 @@ drop:
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_input);
+EXPORT_SYMBOL_GPL(lwtunnel_input);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d0713627deb6..6ea3a1a7f36a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 				     const void *pkey)
 {
 	struct neighbour *n;
-	int key_len = tbl->key_len;
+	unsigned int key_len = tbl->key_len;
 	u32 hash_val;
 	struct neigh_hash_table *nht;
 
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 				 struct net_device *dev, bool want_ref)
 {
 	u32 hash_val;
-	int key_len = tbl->key_len;
+	unsigned int key_len = tbl->key_len;
 	int error;
 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 	struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
 }
 EXPORT_SYMBOL(__neigh_create);
 
-static u32 pneigh_hash(const void *pkey, int key_len)
+static u32 pneigh_hash(const void *pkey, unsigned int key_len)
 {
 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
 	hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 					      struct net *net,
 					      const void *pkey,
-					      int key_len,
+					      unsigned int key_len,
 					      struct net_device *dev)
 {
 	while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
 		struct net *net, const void *pkey, struct net_device *dev)
 {
-	int key_len = tbl->key_len;
+	unsigned int key_len = tbl->key_len;
 	u32 hash_val = pneigh_hash(pkey, key_len);
 
 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 				    struct net_device *dev, int creat)
 {
 	struct pneigh_entry *n;
-	int key_len = tbl->key_len;
+	unsigned int key_len = tbl->key_len;
 	u32 hash_val = pneigh_hash(pkey, key_len);
 
 	read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 		  struct net_device *dev)
 {
 	struct pneigh_entry *n, **np;
-	int key_len = tbl->key_len;
+	unsigned int key_len = tbl->key_len;
 	u32 hash_val = pneigh_hash(pkey, key_len);
 
 	write_lock_bh(&tbl->lock);
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tbl == NULL)
 		return -EAFNOSUPPORT;
 
-	if (nla_len(dst_attr) < tbl->key_len)
+	if (nla_len(dst_attr) < (int)tbl->key_len)
 		goto out;
 
 	if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tbl == NULL)
 		return -EAFNOSUPPORT;
 
-	if (nla_len(tb[NDA_DST]) < tbl->key_len)
+	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
 		goto out;
 	dst = nla_data(tb[NDA_DST]);
 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
@@ -3261,13 +3261,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
 
 static int __init neigh_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
-		      NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
+		      0);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
 
 	return 0;
 }
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 4847964931df..615ccab55f38 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b4f9922b6f23..799b75268291 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -97,7 +97,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 		return restart_syscall();
 
 	if (dev_isalive(netdev)) {
-		if ((ret = (*set)(netdev, new)) == 0)
+		ret = (*set)(netdev, new);
+		if (ret == 0)
 			ret = len;
 	}
 	rtnl_unlock();
@@ -160,6 +161,7 @@ static ssize_t broadcast_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
 	struct net_device *ndev = to_net_dev(dev);
+
 	if (dev_isalive(ndev))
 		return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
 	return -EINVAL;
@@ -170,7 +172,7 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
 {
 	if (!netif_running(dev))
 		return -EINVAL;
-	return dev_change_carrier(dev, (bool) new_carrier);
+	return dev_change_carrier(dev, (bool)new_carrier);
 }
 
 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
@@ -183,9 +185,10 @@ static ssize_t carrier_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
-	if (netif_running(netdev)) {
+
+	if (netif_running(netdev))
 		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
-	}
+
 	return -EINVAL;
 }
 static DEVICE_ATTR_RW(carrier);
@@ -290,6 +293,7 @@ static ssize_t carrier_changes_show(struct device *dev,
 				    char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
+
 	return sprintf(buf, fmt_dec,
 		       atomic_read(&netdev->carrier_changes));
 }
@@ -299,7 +303,7 @@ static DEVICE_ATTR_RO(carrier_changes);
 
 static int change_mtu(struct net_device *dev, unsigned long new_mtu)
 {
-	return dev_set_mtu(dev, (int) new_mtu);
+	return dev_set_mtu(dev, (int)new_mtu);
 }
 
 static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
@@ -311,7 +315,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec);
 
 static int change_flags(struct net_device *dev, unsigned long new_flags)
 {
-	return dev_change_flags(dev, (unsigned int) new_flags);
+	return dev_change_flags(dev, (unsigned int)new_flags);
 }
 
 static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -362,8 +366,8 @@ static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 }
 
 static ssize_t gro_flush_timeout_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t len)
+				       struct device_attribute *attr,
+				       const char *buf, size_t len)
 {
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -378,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
 	struct net_device *netdev = to_net_dev(dev);
 	struct net *net = dev_net(netdev);
 	size_t count = len;
-	ssize_t ret;
+	ssize_t ret = 0;
 
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -389,30 +393,37 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
 
 	if (!rtnl_trylock())
 		return restart_syscall();
-	ret = dev_set_alias(netdev, buf, count);
+
+	if (dev_isalive(netdev)) {
+		ret = dev_set_alias(netdev, buf, count);
+		if (ret < 0)
+			goto err;
+		ret = len;
+		netdev_state_change(netdev);
+	}
+err:
 	rtnl_unlock();
 
-	return ret < 0 ? ret : len;
+	return ret;
 }
 
 static ssize_t ifalias_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	const struct net_device *netdev = to_net_dev(dev);
+	char tmp[IFALIASZ];
 	ssize_t ret = 0;
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-	if (netdev->ifalias)
-		ret = sprintf(buf, "%s\n", netdev->ifalias);
-	rtnl_unlock();
+	ret = dev_get_alias(netdev, tmp, sizeof(tmp));
+	if (ret > 0)
+		ret = sprintf(buf, "%s\n", tmp);
 	return ret;
 }
 static DEVICE_ATTR_RW(ifalias);
 
 static int change_group(struct net_device *dev, unsigned long new_group)
 {
-	dev_set_group(dev, (int) new_group);
+	dev_set_group(dev, (int)new_group);
 	return 0;
 }
 
@@ -426,7 +437,7 @@ static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
 
 static int change_proto_down(struct net_device *dev, unsigned long proto_down)
 {
-	return dev_change_proto_down(dev, (bool) proto_down);
+	return dev_change_proto_down(dev, (bool)proto_down);
 }
 
 static ssize_t proto_down_store(struct device *dev,
@@ -508,7 +519,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_switch_id);
 
-static struct attribute *net_class_attrs[] = {
+static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_netdev_group.attr,
 	&dev_attr_type.attr,
 	&dev_attr_dev_id.attr,
@@ -549,14 +560,14 @@ static ssize_t netstat_show(const struct device *d,
 	ssize_t ret = -EINVAL;
 
 	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
-			offset % sizeof(u64) != 0);
+		offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
 		struct rtnl_link_stats64 temp;
 		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
-		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -565,7 +576,7 @@ static ssize_t netstat_show(const struct device *d,
 /* generate a read-only statistics attribute */
 #define NETSTAT_ENTRY(name)						\
 static ssize_t name##_show(struct device *d,				\
-			   struct device_attribute *attr, char *buf) 	\
+			   struct device_attribute *attr, char *buf)	\
 {									\
 	return netstat_show(d, attr, buf,				\
 			    offsetof(struct rtnl_link_stats64, name));	\
@@ -597,7 +608,7 @@ NETSTAT_ENTRY(rx_compressed);
 NETSTAT_ENTRY(tx_compressed);
 NETSTAT_ENTRY(rx_nohandler);
 
-static struct attribute *netstat_attrs[] = {
+static struct attribute *netstat_attrs[] __ro_after_init = {
 	&dev_attr_rx_packets.attr,
 	&dev_attr_tx_packets.attr,
 	&dev_attr_rx_bytes.attr,
@@ -625,7 +636,6 @@ static struct attribute *netstat_attrs[] = {
 	NULL
 };
 
-
 static const struct attribute_group netstat_group = {
 	.name  = "statistics",
 	.attrs  = netstat_attrs,
@@ -647,33 +657,33 @@ static const struct attribute_group wireless_group = {
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_SYSFS
-#define to_rx_queue_attr(_attr) container_of(_attr,		\
-    struct rx_queue_attribute, attr)
+#define to_rx_queue_attr(_attr) \
+	container_of(_attr, struct rx_queue_attribute, attr)
 
 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
 
 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
 				  char *buf)
 {
-	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 
 	if (!attribute->show)
 		return -EIO;
 
-	return attribute->show(queue, attribute, buf);
+	return attribute->show(queue, buf);
 }
 
 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
 				   const char *buf, size_t count)
 {
-	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 
 	if (!attribute->store)
 		return -EIO;
 
-	return attribute->store(queue, attribute, buf, count);
+	return attribute->store(queue, buf, count);
 }
 
 static const struct sysfs_ops rx_queue_sysfs_ops = {
@@ -682,8 +692,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
 };
 
 #ifdef CONFIG_RPS
-static ssize_t show_rps_map(struct netdev_rx_queue *queue,
-			    struct rx_queue_attribute *attribute, char *buf)
+static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
 {
 	struct rps_map *map;
 	cpumask_var_t mask;
@@ -706,8 +715,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
 }
 
 static ssize_t store_rps_map(struct netdev_rx_queue *queue,
-		      struct rx_queue_attribute *attribute,
-		      const char *buf, size_t len)
+			     const char *buf, size_t len)
 {
 	struct rps_map *old_map, *map;
 	cpumask_var_t mask;
@@ -727,8 +735,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	}
 
 	map = kzalloc(max_t(unsigned int,
-	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
-	    GFP_KERNEL);
+			    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+		      GFP_KERNEL);
 	if (!map) {
 		free_cpumask_var(mask);
 		return -ENOMEM;
@@ -738,9 +746,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	for_each_cpu_and(cpu, mask, cpu_online_mask)
 		map->cpus[i++] = cpu;
 
-	if (i)
+	if (i) {
 		map->len = i;
-	else {
+	} else {
 		kfree(map);
 		map = NULL;
 	}
@@ -765,7 +773,6 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 }
 
 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
-					   struct rx_queue_attribute *attr,
 					   char *buf)
 {
 	struct rps_dev_flow_table *flow_table;
@@ -788,8 +795,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu)
 }
 
 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
-				     struct rx_queue_attribute *attr,
-				     const char *buf, size_t len)
+					    const char *buf, size_t len)
 {
 	unsigned long mask, count;
 	struct rps_dev_flow_table *table, *old_table;
@@ -831,8 +837,9 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 		table->mask = mask;
 		for (count = 0; count <= mask; count++)
 			table->flows[count].cpu = RPS_NO_CPU;
-	} else
+	} else {
 		table = NULL;
+	}
 
 	spin_lock(&rps_dev_flow_lock);
 	old_table = rcu_dereference_protected(queue->rps_flow_table,
@@ -846,16 +853,15 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 	return len;
 }
 
-static struct rx_queue_attribute rps_cpus_attribute =
-	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
-
+static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
+	= __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
 
-static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
-	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
-	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
+	= __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
 #endif /* CONFIG_RPS */
 
-static struct attribute *rx_queue_default_attrs[] = {
+static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
 #ifdef CONFIG_RPS
 	&rps_cpus_attribute.attr,
 	&rps_dev_flow_table_cnt_attribute.attr,
@@ -870,7 +876,6 @@ static void rx_queue_release(struct kobject *kobj)
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 
-
 	map = rcu_dereference_protected(queue->rps_map, 1);
 	if (map) {
 		RCU_INIT_POINTER(queue->rps_map, NULL);
@@ -900,7 +905,7 @@ static const void *rx_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static struct kobj_type rx_queue_ktype = {
+static struct kobj_type rx_queue_ktype __ro_after_init = {
 	.sysfs_ops = &rx_queue_sysfs_ops,
 	.release = rx_queue_release,
 	.default_attrs = rx_queue_default_attrs,
@@ -915,23 +920,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
 
 	kobj->kset = dev->queues_kset;
 	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
-	    "rx-%u", index);
+				     "rx-%u", index);
 	if (error)
-		goto exit;
+		return error;
 
 	if (dev->sysfs_rx_queue_group) {
 		error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
-		if (error)
-			goto exit;
+		if (error) {
+			kobject_put(kobj);
+			return error;
+		}
 	}
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
 	return error;
-exit:
-	kobject_put(kobj);
-	return error;
 }
 #endif /* CONFIG_SYSFS */
 
@@ -976,39 +980,40 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
  */
 struct netdev_queue_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct netdev_queue *queue,
-	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*show)(struct netdev_queue *queue, char *buf);
 	ssize_t (*store)(struct netdev_queue *queue,
-	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+			 const char *buf, size_t len);
 };
-#define to_netdev_queue_attr(_attr) container_of(_attr,		\
-    struct netdev_queue_attribute, attr)
+#define to_netdev_queue_attr(_attr) \
+	container_of(_attr, struct netdev_queue_attribute, attr)
 
 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
 
 static ssize_t netdev_queue_attr_show(struct kobject *kobj,
 				      struct attribute *attr, char *buf)
 {
-	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	const struct netdev_queue_attribute *attribute
+		= to_netdev_queue_attr(attr);
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
 	if (!attribute->show)
 		return -EIO;
 
-	return attribute->show(queue, attribute, buf);
+	return attribute->show(queue, buf);
 }
 
 static ssize_t netdev_queue_attr_store(struct kobject *kobj,
 				       struct attribute *attr,
 				       const char *buf, size_t count)
 {
-	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	const struct netdev_queue_attribute *attribute
+		= to_netdev_queue_attr(attr);
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
 	if (!attribute->store)
 		return -EIO;
 
-	return attribute->store(queue, attribute, buf, count);
+	return attribute->store(queue, buf, count);
 }
 
 static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1016,9 +1021,7 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
 	.store = netdev_queue_attr_store,
 };
 
-static ssize_t show_trans_timeout(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attribute,
-				  char *buf)
+static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
 {
 	unsigned long trans_timeout;
 
@@ -1040,8 +1043,7 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 	return i;
 }
 
-static ssize_t show_traffic_class(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attribute,
+static ssize_t traffic_class_show(struct netdev_queue *queue,
 				  char *buf)
 {
 	struct net_device *dev = queue->dev;
@@ -1055,16 +1057,14 @@ static ssize_t show_traffic_class(struct netdev_queue *queue,
 }
 
 #ifdef CONFIG_XPS
-static ssize_t show_tx_maxrate(struct netdev_queue *queue,
-			       struct netdev_queue_attribute *attribute,
+static ssize_t tx_maxrate_show(struct netdev_queue *queue,
 			       char *buf)
 {
 	return sprintf(buf, "%lu\n", queue->tx_maxrate);
 }
 
-static ssize_t set_tx_maxrate(struct netdev_queue *queue,
-			      struct netdev_queue_attribute *attribute,
-			      const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct netdev_queue *queue,
+				const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	int err, index = get_netdev_queue_index(queue);
@@ -1089,16 +1089,15 @@ static ssize_t set_tx_maxrate(struct netdev_queue *queue,
 	return err;
 }
 
-static struct netdev_queue_attribute queue_tx_maxrate =
-	__ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
-	       show_tx_maxrate, set_tx_maxrate);
+static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
+	= __ATTR_RW(tx_maxrate);
 #endif
 
-static struct netdev_queue_attribute queue_trans_timeout =
-	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
+	= __ATTR_RO(tx_timeout);
 
-static struct netdev_queue_attribute queue_traffic_class =
-	__ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+static struct netdev_queue_attribute queue_traffic_class __ro_after_init
+	= __ATTR_RO(traffic_class);
 
 #ifdef CONFIG_BQL
 /*
@@ -1115,9 +1114,9 @@ static ssize_t bql_set(const char *buf, const size_t count,
 	unsigned int value;
 	int err;
 
-	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
 		value = DQL_MAX_LIMIT;
-	else {
+	} else {
 		err = kstrtouint(buf, 10, &value);
 		if (err < 0)
 			return err;
@@ -1131,7 +1130,6 @@ static ssize_t bql_set(const char *buf, const size_t count,
 }
 
 static ssize_t bql_show_hold_time(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attr,
 				  char *buf)
 {
 	struct dql *dql = &queue->dql;
@@ -1140,7 +1138,6 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue,
 }
 
 static ssize_t bql_set_hold_time(struct netdev_queue *queue,
-				 struct netdev_queue_attribute *attribute,
 				 const char *buf, size_t len)
 {
 	struct dql *dql = &queue->dql;
@@ -1156,12 +1153,11 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
 	return len;
 }
 
-static struct netdev_queue_attribute bql_hold_time_attribute =
-	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
-	    bql_set_hold_time);
+static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
+	= __ATTR(hold_time, S_IRUGO | S_IWUSR,
+		 bql_show_hold_time, bql_set_hold_time);
 
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
-				 struct netdev_queue_attribute *attr,
 				 char *buf)
 {
 	struct dql *dql = &queue->dql;
@@ -1169,33 +1165,31 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
 	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
 }
 
-static struct netdev_queue_attribute bql_inflight_attribute =
+static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
 	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
 
 #define BQL_ATTR(NAME, FIELD)						\
 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
-				 struct netdev_queue_attribute *attr,	\
 				 char *buf)				\
 {									\
 	return bql_show(buf, queue->dql.FIELD);				\
 }									\
 									\
 static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
-				struct netdev_queue_attribute *attr,	\
 				const char *buf, size_t len)		\
 {									\
 	return bql_set(buf, len, &queue->dql.FIELD);			\
 }									\
 									\
-static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
-	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
-	    bql_set_ ## NAME);
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
+	= __ATTR(NAME, S_IRUGO | S_IWUSR,				\
+		 bql_show_ ## NAME, bql_set_ ## NAME)
 
-BQL_ATTR(limit, limit)
-BQL_ATTR(limit_max, max_limit)
-BQL_ATTR(limit_min, min_limit)
+BQL_ATTR(limit, limit);
+BQL_ATTR(limit_max, max_limit);
+BQL_ATTR(limit_min, min_limit);
 
-static struct attribute *dql_attrs[] = {
+static struct attribute *dql_attrs[] __ro_after_init = {
 	&bql_limit_attribute.attr,
 	&bql_limit_max_attribute.attr,
 	&bql_limit_min_attribute.attr,
@@ -1211,8 +1205,8 @@ static const struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static ssize_t show_xps_map(struct netdev_queue *queue,
-			    struct netdev_queue_attribute *attribute, char *buf)
+static ssize_t xps_cpus_show(struct netdev_queue *queue,
+			     char *buf)
 {
 	struct net_device *dev = queue->dev;
 	int cpu, len, num_tc = 1, tc = 0;
@@ -1258,9 +1252,8 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len < PAGE_SIZE ? len : -EINVAL;
 }
 
-static ssize_t store_xps_map(struct netdev_queue *queue,
-		      struct netdev_queue_attribute *attribute,
-		      const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct netdev_queue *queue,
+			      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	unsigned long index;
@@ -1288,11 +1281,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	return err ? : len;
 }
 
-static struct netdev_queue_attribute xps_cpus_attribute =
-    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
+	= __ATTR_RW(xps_cpus);
 #endif /* CONFIG_XPS */
 
-static struct attribute *netdev_queue_default_attrs[] = {
+static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
 	&queue_trans_timeout.attr,
 	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
@@ -1322,7 +1315,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static struct kobj_type netdev_queue_ktype = {
+static struct kobj_type netdev_queue_ktype __ro_after_init = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
 	.release = netdev_queue_release,
 	.default_attrs = netdev_queue_default_attrs,
@@ -1337,23 +1330,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
 
 	kobj->kset = dev->queues_kset;
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
-	    "tx-%u", index);
+				     "tx-%u", index);
 	if (error)
-		goto exit;
+		return error;
 
 #ifdef CONFIG_BQL
 	error = sysfs_create_group(kobj, &dql_group);
-	if (error)
-		goto exit;
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
 #endif
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
 	return 0;
-exit:
-	kobject_put(kobj);
-	return error;
 }
 #endif /* CONFIG_SYSFS */
 
@@ -1395,7 +1387,7 @@ static int register_queue_kobjects(struct net_device *dev)
 
 #ifdef CONFIG_SYSFS
 	dev->queues_kset = kset_create_and_add("queues",
-	    NULL, &dev->dev.kobj);
+					       NULL, &dev->dev.kobj);
 	if (!dev->queues_kset)
 		return -ENOMEM;
 	real_rx = dev->real_num_rx_queues;
@@ -1463,7 +1455,7 @@ static const void *net_netlink_ns(struct sock *sk)
 	return sock_net(sk);
 }
 
-struct kobj_ns_type_operations net_ns_type_operations = {
+const struct kobj_ns_type_operations net_ns_type_operations = {
 	.type = KOBJ_NS_TYPE_NET,
 	.current_may_mount = net_current_may_mount,
 	.grab_current_ns = net_grab_current_ns,
@@ -1485,7 +1477,8 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 
 	/* pass ifindex to uevent.
 	 * ifindex is useful as it won't change (interface name may change)
-	 * and is what RtNetlink uses natively. */
+	 * and is what RtNetlink uses natively.
+	 */
 	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
 
 exit:
@@ -1502,7 +1495,10 @@ static void netdev_release(struct device *d)
 
 	BUG_ON(dev->reg_state != NETREG_RELEASED);
 
-	kfree(dev->ifalias);
+	/* no need to wait for rcu grace period:
+	 * device is dead and about to be freed.
+	 */
+	kfree(rcu_access_pointer(dev->ifalias));
 	netdev_freemem(dev);
 }
 
@@ -1513,7 +1509,7 @@ static const void *net_namespace(struct device *d)
 	return dev_net(dev);
 }
 
-static struct class net_class = {
+static struct class net_class __ro_after_init = {
 	.name = "net",
 	.dev_release = netdev_release,
 	.dev_groups = net_class_groups,
@@ -1560,7 +1556,7 @@ EXPORT_SYMBOL(of_find_net_device_by_node);
  */
 void netdev_unregister_kobject(struct net_device *ndev)
 {
-	struct device *dev = &(ndev->dev);
+	struct device *dev = &ndev->dev;
 
 	if (!atomic_read(&dev_net(ndev)->count))
 		dev_set_uevent_suppress(dev, 1);
@@ -1577,7 +1573,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
 /* Create sysfs entries for network device. */
 int netdev_register_kobject(struct net_device *ndev)
 {
-	struct device *dev = &(ndev->dev);
+	struct device *dev = &ndev->dev;
 	const struct attribute_group **groups = ndev->sysfs_groups;
 	int error = 0;
 
@@ -1620,14 +1616,14 @@ int netdev_register_kobject(struct net_device *ndev)
 	return error;
 }
 
-int netdev_class_create_file_ns(struct class_attribute *class_attr,
+int netdev_class_create_file_ns(const struct class_attribute *class_attr,
 				const void *ns)
 {
 	return class_create_file_ns(&net_class, class_attr, ns);
 }
 EXPORT_SYMBOL(netdev_class_create_file_ns);
 
-void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
 				 const void *ns)
 {
 	class_remove_file_ns(&net_class, class_attr, ns);
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 2745a1b51e03..006876c7b78d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __NET_SYSFS_H__
 #define __NET_SYSFS_H__
 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 92da5e4ceb4f..380934580fa1 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * consolidates trace point definitions
  *
@@ -31,12 +32,23 @@
 #include <trace/events/napi.h>
 #include <trace/events/sock.h>
 #include <trace/events/udp.h>
+#include <trace/events/tcp.h>
 #include <trace/events/fib.h>
+#include <trace/events/qdisc.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <trace/events/fib6.h>
 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
 #endif
+#if IS_ENABLED(CONFIG_BRIDGE)
+#include <trace/events/bridge.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
+EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+#endif
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8726d051f31d..b797832565d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
 		rtnl_net_notifyid(net, RTM_NEWNSID, id);
 	return id;
 }
+EXPORT_SYMBOL_GPL(peernet2id_alloc);
 
 /* This function returns, if assigned, the id of a peer netns. */
 int peernet2id(struct net *net, struct net *peer)
@@ -855,9 +856,10 @@ static int __init net_ns_init(void)
 
 	register_pernet_subsys(&net_ns_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
-		      NULL);
+		      RTNL_FLAG_DOIT_UNLOCKED);
 
 	return 0;
 }
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 029a61ac6cdd..5e4f04004a49 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 
 	cs->classid = (u32)value;
 
-	css_task_iter_start(css, &it);
+	css_task_iter_start(css, 0, &it);
 	while ((p = css_task_iter_next(&it))) {
 		task_lock(p);
 		iterate_fd(p->files, 0, update_classid_sock,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 912731bed7b7..57557a6a950c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -334,7 +334,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;
 
-	WARN_ON_ONCE(!irqs_disabled());
+	lockdep_assert_irqs_disabled();
 
 	npinfo = rcu_dereference_bh(np->dev->npinfo);
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e1e10ff433a..f95a15086225 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 						+ pkt_dev->pkt_overhead;
 		}
 
-		for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		for (i = 0; i < sizeof(struct in6_addr); i++)
 			if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
 				set = 1;
 				break;
@@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
 static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
 				int datalen)
 {
-	struct timeval timestamp;
+	struct timespec64 timestamp;
 	struct pktgen_hdr *pgh;
 
 	pgh = skb_put(skb, sizeof(*pgh));
@@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
 		pgh->tv_sec = 0;
 		pgh->tv_usec = 0;
 	} else {
-		do_gettimeofday(&timestamp);
+		/*
+		 * pgh->tv_sec wraps in y2106 when interpreted as unsigned
+		 * as done by wireshark, or y2038 when interpreted as signed.
+		 * This is probably harmless, but if anyone wants to improve
+		 * it, we could introduce a variant that puts 64-bit nanoseconds
+		 * into the respective header bytes.
+		 * This would also be slightly faster to read.
+		 */
+		ktime_get_real_ts64(&timestamp);
 		pgh->tv_sec = htonl(timestamp.tv_sec);
-		pgh->tv_usec = htonl(timestamp.tv_usec);
+		pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC);
 	}
 }
 
@@ -3377,7 +3385,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 
 static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
-	unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
+	unsigned int burst = READ_ONCE(pkt_dev->burst);
 	struct net_device *odev = pkt_dev->odev;
 	struct netdev_queue *txq;
 	struct sk_buff *skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9201e3621351..dabba2a91fc8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,7 @@
 struct rtnl_link {
 	rtnl_doit_func		doit;
 	rtnl_dumpit_func	dumpit;
-	rtnl_calcit_func 	calcit;
+	unsigned int		flags;
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
@@ -127,7 +127,8 @@ bool lockdep_rtnl_is_held(void)
 EXPORT_SYMBOL(lockdep_rtnl_is_held);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
-static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -143,58 +144,13 @@ static inline int rtm_msgindex(int msgtype)
 	return msgindex;
 }
 
-static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
-{
-	struct rtnl_link *tab;
-
-	if (protocol <= RTNL_FAMILY_MAX)
-		tab = rtnl_msg_handlers[protocol];
-	else
-		tab = NULL;
-
-	if (tab == NULL || tab[msgindex].doit == NULL)
-		tab = rtnl_msg_handlers[PF_UNSPEC];
-
-	return tab[msgindex].doit;
-}
-
-static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
-{
-	struct rtnl_link *tab;
-
-	if (protocol <= RTNL_FAMILY_MAX)
-		tab = rtnl_msg_handlers[protocol];
-	else
-		tab = NULL;
-
-	if (tab == NULL || tab[msgindex].dumpit == NULL)
-		tab = rtnl_msg_handlers[PF_UNSPEC];
-
-	return tab[msgindex].dumpit;
-}
-
-static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
-{
-	struct rtnl_link *tab;
-
-	if (protocol <= RTNL_FAMILY_MAX)
-		tab = rtnl_msg_handlers[protocol];
-	else
-		tab = NULL;
-
-	if (tab == NULL || tab[msgindex].calcit == NULL)
-		tab = rtnl_msg_handlers[PF_UNSPEC];
-
-	return tab[msgindex].calcit;
-}
-
 /**
  * __rtnl_register - Register a rtnetlink message type
  * @protocol: Protocol family or PF_UNSPEC
  * @msgtype: rtnetlink message type
  * @doit: Function pointer called for each request message
  * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @calcit: Function pointer to calc size of dump message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
  *
  * Registers the specified function pointers (at least one of them has
  * to be non-NULL) to be called whenever a request message for the
@@ -208,7 +164,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
  */
 int __rtnl_register(int protocol, int msgtype,
 		    rtnl_doit_func doit, rtnl_dumpit_func dumpit,
-		    rtnl_calcit_func calcit)
+		    unsigned int flags)
 {
 	struct rtnl_link *tab;
 	int msgindex;
@@ -216,23 +172,20 @@ int __rtnl_register(int protocol, int msgtype,
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
-	tab = rtnl_msg_handlers[protocol];
+	tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
 	if (tab == NULL) {
 		tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
 		if (tab == NULL)
 			return -ENOBUFS;
 
-		rtnl_msg_handlers[protocol] = tab;
+		rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
 	}
 
 	if (doit)
 		tab[msgindex].doit = doit;
-
 	if (dumpit)
 		tab[msgindex].dumpit = dumpit;
-
-	if (calcit)
-		tab[msgindex].calcit = calcit;
+	tab[msgindex].flags |= flags;
 
 	return 0;
 }
@@ -249,9 +202,9 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
  */
 void rtnl_register(int protocol, int msgtype,
 		   rtnl_doit_func doit, rtnl_dumpit_func dumpit,
-		   rtnl_calcit_func calcit)
+		   unsigned int flags)
 {
-	if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
+	if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
 		panic("Unable to register rtnetlink message handler, "
 		      "protocol = %d, message type = %d\n",
 		      protocol, msgtype);
@@ -267,17 +220,23 @@ EXPORT_SYMBOL_GPL(rtnl_register);
  */
 int rtnl_unregister(int protocol, int msgtype)
 {
+	struct rtnl_link *handlers;
 	int msgindex;
 
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
-	if (rtnl_msg_handlers[protocol] == NULL)
+	rtnl_lock();
+	handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+	if (!handlers) {
+		rtnl_unlock();
 		return -ENOENT;
+	}
 
-	rtnl_msg_handlers[protocol][msgindex].doit = NULL;
-	rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
-	rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
+	handlers[msgindex].doit = NULL;
+	handlers[msgindex].dumpit = NULL;
+	handlers[msgindex].flags = 0;
+	rtnl_unlock();
 
 	return 0;
 }
@@ -292,10 +251,20 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
  */
 void rtnl_unregister_all(int protocol)
 {
+	struct rtnl_link *handlers;
+
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 
-	kfree(rtnl_msg_handlers[protocol]);
-	rtnl_msg_handlers[protocol] = NULL;
+	rtnl_lock();
+	handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+	RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+	rtnl_unlock();
+
+	synchronize_net();
+
+	while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
+		schedule();
+	kfree(handlers);
 }
 EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
@@ -433,16 +402,24 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
 {
 	struct net_device *master_dev;
 	const struct rtnl_link_ops *ops;
+	size_t size = 0;
 
-	master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+	rcu_read_lock();
+
+	master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
 	if (!master_dev)
-		return 0;
+		goto out;
+
 	ops = master_dev->rtnl_link_ops;
 	if (!ops || !ops->get_slave_size)
-		return 0;
+		goto out;
 	/* IFLA_INFO_SLAVE_DATA + nested data */
-	return nla_total_size(sizeof(struct nlattr)) +
+	size = nla_total_size(sizeof(struct nlattr)) +
 	       ops->get_slave_size(master_dev, dev);
+
+out:
+	rcu_read_unlock();
+	return size;
 }
 
 static size_t rtnl_link_get_size(const struct net_device *dev)
@@ -476,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
 {
 	const struct rtnl_af_ops *ops;
 
-	list_for_each_entry(ops, &rtnl_af_ops, list) {
+	list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
 		if (ops->family == family)
 			return ops;
 	}
@@ -493,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
 void rtnl_af_register(struct rtnl_af_ops *ops)
 {
 	rtnl_lock();
-	list_add_tail(&ops->list, &rtnl_af_ops);
+	list_add_tail_rcu(&ops->list, &rtnl_af_ops);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL_GPL(rtnl_af_register);
 
 /**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
-	list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
  * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
  * @ops: struct rtnl_af_ops * to unregister
  */
 void rtnl_af_unregister(struct rtnl_af_ops *ops)
 {
 	rtnl_lock();
-	__rtnl_af_unregister(ops);
+	list_del_rcu(&ops->list);
 	rtnl_unlock();
+
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(rtnl_af_unregister);
 
@@ -531,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
 	/* IFLA_AF_SPEC */
 	size = nla_total_size(sizeof(struct nlattr));
 
-	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
 		if (af_ops->get_link_af_size) {
 			/* AF_* + nested data */
 			size += nla_total_size(sizeof(struct nlattr)) +
 				af_ops->get_link_af_size(dev, ext_filter_mask);
 		}
 	}
+	rcu_read_unlock();
 
 	return size;
 }
@@ -545,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
 static bool rtnl_have_link_slave_info(const struct net_device *dev)
 {
 	struct net_device *master_dev;
+	bool ret = false;
 
-	master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+	rcu_read_lock();
+
+	master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
 	if (master_dev && master_dev->rtnl_link_ops)
-		return true;
-	return false;
+		ret = true;
+	rcu_read_unlock();
+	return ret;
 }
 
 static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -946,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + rtnl_xdp_size() /* IFLA_XDP */
 	       + nla_total_size(4)  /* IFLA_EVENT */
-	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
-
+	       + nla_total_size(4)  /* IFLA_NEW_NETNSID */
+	       + nla_total_size(1)  /* IFLA_PROTO_DOWN */
+	       + nla_total_size(4)  /* IFLA_IF_NETNSID */
+	       + 0;
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1234,6 +1209,36 @@ nla_put_vfinfo_failure:
 	return -EMSGSIZE;
 }
 
+static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+					   struct net_device *dev,
+					   u32 ext_filter_mask)
+{
+	struct nlattr *vfinfo;
+	int i, num_vfs;
+
+	if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+		return 0;
+
+	num_vfs = dev_num_vf(dev->dev.parent);
+	if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
+		return -EMSGSIZE;
+
+	if (!dev->netdev_ops->ndo_get_vf_config)
+		return 0;
+
+	vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+	if (!vfinfo)
+		return -EMSGSIZE;
+
+	for (i = 0; i < num_vfs; i++) {
+		if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+			return -EMSGSIZE;
+	}
+
+	nla_nest_end(skb, vfinfo);
+	return 0;
+}
+
 static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 {
 	struct rtnl_link_ifmap map;
@@ -1265,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
 		*prog_id = generic_xdp_prog->aux->id;
 		return XDP_ATTACHED_SKB;
 	}
-	if (!ops->ndo_xdp)
+	if (!ops->ndo_bpf)
 		return XDP_ATTACHED_NONE;
 
-	return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
+	return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1330,16 +1335,108 @@ static u32 rtnl_get_event(unsigned long event)
 	return rtnl_event_type;
 }
 
-static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct net_device *upper_dev;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	upper_dev = netdev_master_upper_dev_get_rcu(dev);
+	if (upper_dev)
+		ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+{
+	int ifindex = dev_get_iflink(dev);
+
+	if (dev->ifindex == ifindex)
+		return 0;
+
+	return nla_put_u32(skb, IFLA_LINK, ifindex);
+}
+
+static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
+					      struct net_device *dev)
+{
+	char buf[IFALIASZ];
+	int ret;
+
+	ret = dev_get_alias(dev, buf, sizeof(buf));
+	return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
+}
+
+static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+				  const struct net_device *dev,
+				  struct net *src_net)
+{
+	if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
+		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+		if (!net_eq(dev_net(dev), link_net)) {
+			int id = peernet2id_alloc(src_net, link_net);
+
+			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+				return -EMSGSIZE;
+		}
+	}
+
+	return 0;
+}
+
+static int rtnl_fill_link_af(struct sk_buff *skb,
+			     const struct net_device *dev,
+			     u32 ext_filter_mask)
+{
+	const struct rtnl_af_ops *af_ops;
+	struct nlattr *af_spec;
+
+	af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+	if (!af_spec)
+		return -EMSGSIZE;
+
+	list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
+		struct nlattr *af;
+		int err;
+
+		if (!af_ops->fill_link_af)
+			continue;
+
+		af = nla_nest_start(skb, af_ops->family);
+		if (!af)
+			return -EMSGSIZE;
+
+		err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
+		/*
+		 * Caller may return ENODATA to indicate that there
+		 * was no data to be dumped. This is not an error, it
+		 * means we should trim the attribute header and
+		 * continue.
+		 */
+		if (err == -ENODATA)
+			nla_nest_cancel(skb, af);
+		else if (err < 0)
+			return -EMSGSIZE;
+
+		nla_nest_end(skb, af);
+	}
+
+	nla_nest_end(skb, af_spec);
+	return 0;
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb,
+			    struct net_device *dev, struct net *src_net,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags, u32 ext_filter_mask,
-			    u32 event)
+			    u32 event, int *new_nsid, int tgt_netnsid)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	struct nlattr *af_spec;
-	struct rtnl_af_ops *af_ops;
-	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 
 	ASSERT_RTNL();
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1354,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	ifm->ifi_flags = dev_get_flags(dev);
 	ifm->ifi_change = change;
 
+	if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+		goto nla_put_failure;
+
 	if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
 	    nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
 	    nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1368,15 +1468,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
-	    (dev->ifindex != dev_get_iflink(dev) &&
-	     nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
-	    (upper_dev &&
-	     nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+	    nla_put_iflink(skb, dev) ||
+	    put_master_ifindex(skb, dev) ||
 	    nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
 	    (dev->qdisc &&
 	     nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
-	    (dev->ifalias &&
-	     nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+	    nla_put_ifalias(skb, dev) ||
 	    nla_put_u32(skb, IFLA_CARRIER_CHANGES,
 			atomic_read(&dev->carrier_changes)) ||
 	    nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1408,27 +1505,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_fill_stats(skb, dev))
 		goto nla_put_failure;
 
-	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
-	    nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
+	if (rtnl_fill_vf(skb, dev, ext_filter_mask))
 		goto nla_put_failure;
 
-	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
-	    ext_filter_mask & RTEXT_FILTER_VF) {
-		int i;
-		struct nlattr *vfinfo;
-		int num_vfs = dev_num_vf(dev->dev.parent);
-
-		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
-		if (!vfinfo)
-			goto nla_put_failure;
-		for (i = 0; i < num_vfs; i++) {
-			if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
-				goto nla_put_failure;
-		}
-
-		nla_nest_end(skb, vfinfo);
-	}
-
 	if (rtnl_port_fill(skb, dev, ext_filter_mask))
 		goto nla_put_failure;
 
@@ -1440,51 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			goto nla_put_failure;
 	}
 
-	if (dev->rtnl_link_ops &&
-	    dev->rtnl_link_ops->get_link_net) {
-		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
-
-		if (!net_eq(dev_net(dev), link_net)) {
-			int id = peernet2id_alloc(dev_net(dev), link_net);
-
-			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
-				goto nla_put_failure;
-		}
-	}
-
-	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+	if (rtnl_fill_link_netnsid(skb, dev, src_net))
 		goto nla_put_failure;
 
-	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
-		if (af_ops->fill_link_af) {
-			struct nlattr *af;
-			int err;
-
-			if (!(af = nla_nest_start(skb, af_ops->family)))
-				goto nla_put_failure;
-
-			err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
-
-			/*
-			 * Caller may return ENODATA to indicate that there
-			 * was no data to be dumped. This is not an error, it
-			 * means we should trim the attribute header and
-			 * continue.
-			 */
-			if (err == -ENODATA)
-				nla_nest_cancel(skb, af);
-			else if (err < 0)
-				goto nla_put_failure;
-
-			nla_nest_end(skb, af);
-		}
-	}
+	if (new_nsid &&
+	    nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
+		goto nla_put_failure;
 
-	nla_nest_end(skb, af_spec);
+	rcu_read_lock();
+	if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
+		goto nla_put_failure_rcu;
+	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
 	return 0;
 
+nla_put_failure_rcu:
+	rcu_read_unlock();
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
@@ -1506,7 +1557,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_NET_NS_FD]	= { .type = NLA_U32 },
-	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
+	/* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
+	 * allow 0-length string (needed to remove an alias).
+	 */
+	[IFLA_IFALIAS]	        = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
@@ -1523,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_XDP]		= { .type = NLA_NESTED },
 	[IFLA_EVENT]		= { .type = NLA_U32 },
 	[IFLA_GROUP]		= { .type = NLA_U32 },
+	[IFLA_IF_NETNSID]	= { .type = NLA_S32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1626,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev,
 	return false;
 }
 
+static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+{
+	struct net *net;
+
+	net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+	if (!net)
+		return ERR_PTR(-EINVAL);
+
+	/* For now, the caller is required to have CAP_NET_ADMIN in
+	 * the user namespace owning the target net ns.
+	 */
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+		put_net(net);
+		return ERR_PTR(-EACCES);
+	}
+	return net;
+}
+
 static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	struct net *tgt_net = net;
 	int h, s_h;
 	int idx = 0, s_idx;
 	struct net_device *dev;
@@ -1638,14 +1712,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	const struct rtnl_link_ops *kind_ops = NULL;
 	unsigned int flags = NLM_F_MULTI;
 	int master_idx = 0;
+	int netnsid = -1;
 	int err;
 	int hdrlen;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
-	cb->seq = net->dev_base_seq;
-
 	/* A hack to preserve kernel<->userspace interface.
 	 * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
 	 * However, before Linux v3.9 the code here assumed rtgenmsg and that's
@@ -1658,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
 			ifla_policy, NULL) >= 0) {
+		if (tb[IFLA_IF_NETNSID]) {
+			netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+			tgt_net = get_target_net(skb, netnsid);
+			if (IS_ERR(tgt_net)) {
+				tgt_net = net;
+				netnsid = -1;
+			}
+		}
+
 		if (tb[IFLA_EXT_MASK])
 			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
 
@@ -1673,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
-		head = &net->dev_index_head[h];
+		head = &tgt_net->dev_index_head[h];
 		hlist_for_each_entry(dev, head, index_hlist) {
 			if (link_dump_filtered(dev, master_idx, kind_ops))
 				goto cont;
 			if (idx < s_idx)
 				goto cont;
-			err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+			err = rtnl_fill_ifinfo(skb, dev, net,
+					       RTM_NEWLINK,
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, 0,
 					       flags,
-					       ext_filter_mask, 0);
+					       ext_filter_mask, 0, NULL,
+					       netnsid);
 
 			if (err < 0) {
 				if (likely(skb->len))
@@ -1691,8 +1775,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 
 				goto out_err;
 			}
-
-			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 			idx++;
 		}
@@ -1702,6 +1784,10 @@ out:
 out_err:
 	cb->args[1] = idx;
 	cb->args[0] = h;
+	cb->seq = net->dev_base_seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+	if (netnsid >= 0)
+		put_net(tgt_net);
 
 	return err;
 }
@@ -1748,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
 			const struct rtnl_af_ops *af_ops;
 
-			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+			rcu_read_lock();
+			af_ops = rtnl_af_lookup(nla_type(af));
+			if (!af_ops) {
+				rcu_read_unlock();
 				return -EAFNOSUPPORT;
+			}
 
-			if (!af_ops->set_link_af)
+			if (!af_ops->set_link_af) {
+				rcu_read_unlock();
 				return -EOPNOTSUPP;
+			}
 
 			if (af_ops->validate_link_af) {
 				err = af_ops->validate_link_af(dev, af);
-				if (err < 0)
+				if (err < 0) {
+					rcu_read_unlock();
 					return err;
+				}
 			}
+
+			rcu_read_unlock();
 		}
 	}
 
@@ -1934,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 	return err;
 }
 
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+			 struct netlink_ext_ack *extack)
 {
 	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 	const struct net_device_ops *ops;
@@ -1959,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
 			return -EINVAL;
 		ops = upper_dev->netdev_ops;
 		if (ops->ndo_add_slave) {
-			err = ops->ndo_add_slave(upper_dev, dev);
+			err = ops->ndo_add_slave(upper_dev, dev, extack);
 			if (err)
 				return err;
 		} else {
@@ -2092,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb,
 	}
 
 	if (tb[IFLA_MASTER]) {
-		err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+		err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
 		if (err)
 			goto errout;
 		status |= DO_SETLINK_MODIFIED;
@@ -2118,7 +2215,7 @@ static int do_setlink(const struct sk_buff *skb,
 				dev->tx_queue_len = orig_len;
 				goto errout;
 			}
-			status |= DO_SETLINK_NOTIFY;
+			status |= DO_SETLINK_MODIFIED;
 		}
 	}
 
@@ -2215,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb,
 		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
 			const struct rtnl_af_ops *af_ops;
 
-			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
-				BUG();
+			rcu_read_lock();
+
+			BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
 
 			err = af_ops->set_link_af(dev, af);
-			if (err < 0)
+			if (err < 0) {
+				rcu_read_unlock();
 				goto errout;
+			}
 
+			rcu_read_unlock();
 			status |= DO_SETLINK_NOTIFY;
 		}
 	}
@@ -2273,7 +2374,7 @@ static int do_setlink(const struct sk_buff *skb,
 
 errout:
 	if (status & DO_SETLINK_MODIFIED) {
-		if (status & DO_SETLINK_NOTIFY)
+		if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
 			netdev_state_change(dev);
 
 		if (err < 0)
@@ -2299,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err < 0)
 		goto errout;
 
+	if (tb[IFLA_IF_NETNSID])
+		return -EOPNOTSUPP;
+
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 	else
@@ -2393,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_IF_NETNSID])
+		return -EOPNOTSUPP;
+
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 
@@ -2524,6 +2631,9 @@ replay:
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_IF_NETNSID])
+		return -EOPNOTSUPP;
+
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 	else
@@ -2601,12 +2711,6 @@ replay:
 					return err;
 				slave_data = slave_attr;
 			}
-			if (m_ops->slave_validate) {
-				err = m_ops->slave_validate(tb, slave_data,
-							    extack);
-				if (err < 0)
-					return err;
-			}
 		}
 
 		if (dev) {
@@ -2736,7 +2840,8 @@ replay:
 				goto out_unregister;
 		}
 		if (tb[IFLA_MASTER]) {
-			err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+			err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+					    extack);
 			if (err)
 				goto out_unregister;
 		}
@@ -2762,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 			struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
+	struct net *tgt_net = net;
 	struct ifinfomsg *ifm;
 	char ifname[IFNAMSIZ];
 	struct nlattr *tb[IFLA_MAX+1];
 	struct net_device *dev = NULL;
 	struct sk_buff *nskb;
+	int netnsid = -1;
 	int err;
 	u32 ext_filter_mask = 0;
 
@@ -2774,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err < 0)
 		return err;
 
+	if (tb[IFLA_IF_NETNSID]) {
+		netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+		tgt_net = get_target_net(skb, netnsid);
+		if (IS_ERR(tgt_net))
+			return PTR_ERR(tgt_net);
+	}
+
 	if (tb[IFLA_IFNAME])
 		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
 
 	if (tb[IFLA_EXT_MASK])
 		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
 
+	err = -EINVAL;
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifi_index > 0)
-		dev = __dev_get_by_index(net, ifm->ifi_index);
+		dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
 	else if (tb[IFLA_IFNAME])
-		dev = __dev_get_by_name(net, ifname);
+		dev = __dev_get_by_name(tgt_net, ifname);
 	else
-		return -EINVAL;
+		goto out;
 
+	err = -ENODEV;
 	if (dev == NULL)
-		return -ENODEV;
+		goto out;
 
+	err = -ENOBUFS;
 	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
 	if (nskb == NULL)
-		return -ENOBUFS;
+		goto out;
 
-	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
-			       nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+	err = rtnl_fill_ifinfo(nskb, dev, net,
+			       RTM_NEWLINK, NETLINK_CB(skb).portid,
+			       nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+			       0, NULL, netnsid);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(nskb);
 	} else
 		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+out:
+	if (netnsid >= 0)
+		put_net(tgt_net);
 
 	return err;
 }
@@ -2831,11 +2953,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	 * traverse the list of net devices and compute the minimum
 	 * buffer size based upon the filter mask.
 	 */
-	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
 					     if_nlmsg_size(dev,
 						           ext_filter_mask));
 	}
+	rcu_read_unlock();
 
 	return nlmsg_total_size(min_ifinfo_dump_size);
 }
@@ -2847,19 +2971,29 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (s_idx == 0)
 		s_idx = 1;
+
 	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		struct rtnl_link *handlers;
+		rtnl_dumpit_func dumpit;
+
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
-		if (rtnl_msg_handlers[idx] == NULL ||
-		    rtnl_msg_handlers[idx][type].dumpit == NULL)
+
+		handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
+		if (!handlers)
+			continue;
+
+		dumpit = READ_ONCE(handlers[type].dumpit);
+		if (!dumpit)
 			continue;
+
 		if (idx > s_idx) {
 			memset(&cb->args[0], 0, sizeof(cb->args));
 			cb->prev_seq = 0;
 			cb->seq = 0;
 		}
-		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
+		if (dumpit(skb, cb))
 			break;
 	}
 	cb->family = idx;
@@ -2869,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 				       unsigned int change,
-				       u32 event, gfp_t flags)
+				       u32 event, gfp_t flags, int *new_nsid)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
@@ -2880,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
 	if (skb == NULL)
 		goto errout;
 
-	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+	err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+			       type, 0, 0, change, 0, 0, event,
+			       new_nsid, -1);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -2903,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
 
 static void rtmsg_ifinfo_event(int type, struct net_device *dev,
 			       unsigned int change, u32 event,
-			       gfp_t flags)
+			       gfp_t flags, int *new_nsid)
 {
 	struct sk_buff *skb;
 
 	if (dev->reg_state != NETREG_REGISTERED)
 		return;
 
-	skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+	skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
 	if (skb)
 		rtmsg_ifinfo_send(skb, dev, flags);
 }
@@ -2918,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
 		  gfp_t flags)
 {
-	rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+	rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
+}
+
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+			 gfp_t flags, int *new_nsid)
+{
+	rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+			   new_nsid);
 }
-EXPORT_SYMBOL(rtmsg_ifinfo);
 
 static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
 				   struct net_device *dev,
@@ -3027,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_add);
 
-static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
+			 struct netlink_ext_ack *extack)
 {
 	u16 vid = 0;
 
 	if (vlan_attr) {
 		if (nla_len(vlan_attr) != sizeof(u16)) {
-			pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+			NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
 			return -EINVAL;
 		}
 
 		vid = nla_get_u16(vlan_attr);
 
 		if (!vid || vid >= VLAN_VID_MASK) {
-			pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
-				vid);
+			NL_SET_ERR_MSG(extack, "invalid vlan id");
 			return -EINVAL;
 		}
 	}
@@ -3066,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex == 0) {
-		pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
+		NL_SET_ERR_MSG(extack, "invalid ifindex");
 		return -EINVAL;
 	}
 
 	dev = __dev_get_by_index(net, ndm->ndm_ifindex);
 	if (dev == NULL) {
-		pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
+		NL_SET_ERR_MSG(extack, "unknown ifindex");
 		return -ENODEV;
 	}
 
 	if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
-		pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
+		NL_SET_ERR_MSG(extack, "invalid address");
 		return -EINVAL;
 	}
 
 	addr = nla_data(tb[NDA_LLADDR]);
 
-	err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+	err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
 	if (err)
 		return err;
 
@@ -3170,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex == 0) {
-		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
+		NL_SET_ERR_MSG(extack, "invalid ifindex");
 		return -EINVAL;
 	}
 
 	dev = __dev_get_by_index(net, ndm->ndm_ifindex);
 	if (dev == NULL) {
-		pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
+		NL_SET_ERR_MSG(extack, "unknown ifindex");
 		return -ENODEV;
 	}
 
 	if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
-		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+		NL_SET_ERR_MSG(extack, "invalid address");
 		return -EINVAL;
 	}
 
 	addr = nla_data(tb[NDA_LLADDR]);
 
-	err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+	err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
 	if (err)
 		return err;
 
@@ -3627,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	dev = __dev_get_by_index(net, ifm->ifi_index);
 	if (!dev) {
-		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		NL_SET_ERR_MSG(extack, "unknown ifindex");
 		return -ENODEV;
 	}
 
@@ -3702,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	dev = __dev_get_by_index(net, ifm->ifi_index);
 	if (!dev) {
-		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		NL_SET_ERR_MSG(extack, "unknown ifindex");
 		return -ENODEV;
 	}
 
@@ -3867,6 +4009,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 		return -EMSGSIZE;
 
 	ifsm = nlmsg_data(nlh);
+	ifsm->family = PF_UNSPEC;
+	ifsm->pad1 = 0;
+	ifsm->pad2 = 0;
 	ifsm->ifindex = dev->ifindex;
 	ifsm->filter_mask = filter_mask;
 
@@ -3950,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 		if (!attr)
 			goto nla_put_failure;
 
-		list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
 			if (af_ops->fill_stats_af) {
 				struct nlattr *af;
 				int err;
 
 				af = nla_nest_start(skb, af_ops->family);
-				if (!af)
+				if (!af) {
+					rcu_read_unlock();
 					goto nla_put_failure;
-
+				}
 				err = af_ops->fill_stats_af(skb, dev);
 
-				if (err == -ENODATA)
+				if (err == -ENODATA) {
 					nla_nest_cancel(skb, af);
-				else if (err < 0)
+				} else if (err < 0) {
+					rcu_read_unlock();
 					goto nla_put_failure;
+				}
 
 				nla_nest_end(skb, af);
 			}
 		}
+		rcu_read_unlock();
 
 		nla_nest_end(skb, attr);
 
@@ -4037,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
 		/* for IFLA_STATS_AF_SPEC */
 		size += nla_total_size(0);
 
-		list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
 			if (af_ops->get_stats_af_size) {
 				size += nla_total_size(
 					af_ops->get_stats_af_size(dev));
@@ -4046,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
 				size += nla_total_size(0);
 			}
 		}
+		rcu_read_unlock();
 	}
 
 	return size;
@@ -4162,11 +4314,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
+	struct rtnl_link *handlers;
+	int err = -EOPNOTSUPP;
 	rtnl_doit_func doit;
+	unsigned int flags;
 	int kind;
 	int family;
 	int type;
-	int err;
 
 	type = nlh->nlmsg_type;
 	if (type > RTM_MAX)
@@ -4184,20 +4338,40 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
+	if (family >= ARRAY_SIZE(rtnl_msg_handlers))
+		family = PF_UNSPEC;
+
+	rcu_read_lock();
+	handlers = rcu_dereference(rtnl_msg_handlers[family]);
+	if (!handlers) {
+		family = PF_UNSPEC;
+		handlers = rcu_dereference(rtnl_msg_handlers[family]);
+	}
+
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
 		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
-		rtnl_calcit_func calcit;
 		u16 min_dump_alloc = 0;
 
-		dumpit = rtnl_get_dumpit(family, type);
-		if (dumpit == NULL)
-			return -EOPNOTSUPP;
-		calcit = rtnl_get_calcit(family, type);
-		if (calcit)
-			min_dump_alloc = calcit(skb, nlh);
+		dumpit = READ_ONCE(handlers[type].dumpit);
+		if (!dumpit) {
+			family = PF_UNSPEC;
+			handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
+			if (!handlers)
+				goto err_unlock;
+
+			dumpit = READ_ONCE(handlers[type].dumpit);
+			if (!dumpit)
+				goto err_unlock;
+		}
+
+		refcount_inc(&rtnl_msg_handlers_ref[family]);
+
+		if (type == RTM_GETLINK - RTM_BASE)
+			min_dump_alloc = rtnl_calcit(skb, nlh);
+
+		rcu_read_unlock();
 
-		__rtnl_unlock();
 		rtnl = net->rtnl;
 		{
 			struct netlink_dump_control c = {
@@ -4206,22 +4380,47 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			};
 			err = netlink_dump_start(rtnl, skb, nlh, &c);
 		}
-		rtnl_lock();
+		refcount_dec(&rtnl_msg_handlers_ref[family]);
 		return err;
 	}
 
-	doit = rtnl_get_doit(family, type);
-	if (doit == NULL)
-		return -EOPNOTSUPP;
+	doit = READ_ONCE(handlers[type].doit);
+	if (!doit) {
+		family = PF_UNSPEC;
+		handlers = rcu_dereference(rtnl_msg_handlers[family]);
+	}
+
+	flags = READ_ONCE(handlers[type].flags);
+	if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
+		refcount_inc(&rtnl_msg_handlers_ref[family]);
+		doit = READ_ONCE(handlers[type].doit);
+		rcu_read_unlock();
+		if (doit)
+			err = doit(skb, nlh, extack);
+		refcount_dec(&rtnl_msg_handlers_ref[family]);
+		return err;
+	}
+
+	rcu_read_unlock();
+
+	rtnl_lock();
+	handlers = rtnl_dereference(rtnl_msg_handlers[family]);
+	if (handlers) {
+		doit = READ_ONCE(handlers[type].doit);
+		if (doit)
+			err = doit(skb, nlh, extack);
+	}
+	rtnl_unlock();
+	return err;
 
-	return doit(skb, nlh, extack);
+err_unlock:
+	rcu_read_unlock();
+	return -EOPNOTSUPP;
 }
 
 static void rtnetlink_rcv(struct sk_buff *skb)
 {
-	rtnl_lock();
 	netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
-	rtnl_unlock();
 }
 
 static int rtnetlink_bind(struct net *net, int group)
@@ -4242,15 +4441,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 
 	switch (event) {
 	case NETDEV_REBOOT:
+	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGEADDR:
 	case NETDEV_CHANGENAME:
 	case NETDEV_FEAT_CHANGE:
 	case NETDEV_BONDING_FAILOVER:
+	case NETDEV_POST_TYPE_CHANGE:
 	case NETDEV_NOTIFY_PEERS:
+	case NETDEV_CHANGEUPPER:
 	case NETDEV_RESEND_IGMP:
 	case NETDEV_CHANGEINFODATA:
+	case NETDEV_CHANGELOWERSTATE:
+	case NETDEV_CHANGE_TX_QUEUE_LEN:
 		rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
-				   GFP_KERNEL);
+				   GFP_KERNEL, NULL);
 		break;
 	default:
 		break;
@@ -4294,29 +4498,34 @@ static struct pernet_operations rtnetlink_net_ops = {
 
 void __init rtnetlink_init(void)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
+		refcount_set(&rtnl_msg_handlers_ref[i], 1);
+
 	if (register_pernet_subsys(&rtnetlink_net_ops))
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
-		      rtnl_dump_ifinfo, rtnl_calcit);
-	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
+		      rtnl_dump_ifinfo, 0);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
 
-	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
 
-	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
 
-	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
-		      NULL);
+		      0);
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f990eb8b30a9..6b0ff396fa9d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -41,7 +41,6 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/kmemcheck.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/in.h>
@@ -158,31 +157,6 @@ out:
  *
  */
 
-struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
-{
-	struct sk_buff *skb;
-
-	/* Get the HEAD */
-	skb = kmem_cache_alloc_node(skbuff_head_cache,
-				    gfp_mask & ~__GFP_DMA, node);
-	if (!skb)
-		goto out;
-
-	/*
-	 * Only clear those fields we need to clear, not those that we will
-	 * actually initialise below. Hence, don't put any more fields after
-	 * the tail pointer in struct sk_buff!
-	 */
-	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->head = NULL;
-	skb->truesize = sizeof(struct sk_buff);
-	refcount_set(&skb->users, 1);
-
-	skb->mac_header = (typeof(skb->mac_header))~0U;
-out:
-	return skb;
-}
-
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -259,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	if (flags & SKB_ALLOC_FCLONE) {
 		struct sk_buff_fclones *fclones;
 
 		fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
-		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
 		skb->fclone = SKB_FCLONE_ORIG;
 		refcount_set(&fclones->fclone_ref, 1);
 
@@ -326,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
 	shinfo = skb_shinfo(skb);
 	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	return skb;
 }
@@ -382,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  */
 void *netdev_alloc_frag(unsigned int fragsz)
 {
-	return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+	return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
 }
 EXPORT_SYMBOL(netdev_alloc_frag);
 
@@ -395,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 
 void *napi_alloc_frag(unsigned int fragsz)
 {
-	return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+	return __napi_alloc_frag(fragsz, GFP_ATOMIC);
 }
 EXPORT_SYMBOL(napi_alloc_frag);
 
@@ -592,21 +563,10 @@ static void skb_release_data(struct sk_buff *skb)
 	for (i = 0; i < shinfo->nr_frags; i++)
 		__skb_frag_unref(&shinfo->frags[i]);
 
-	/*
-	 * If skb buf is from userspace, we need to notify the caller
-	 * the lower device DMA has done;
-	 */
-	if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		struct ubuf_info *uarg;
-
-		uarg = shinfo->destructor_arg;
-		if (uarg->callback)
-			uarg->callback(uarg, true);
-	}
-
 	if (shinfo->frag_list)
 		kfree_skb_list(shinfo->frag_list);
 
+	skb_zcopy_clear(skb, true);
 	skb_free_head(skb);
 }
 
@@ -720,14 +680,7 @@ EXPORT_SYMBOL(kfree_skb_list);
  */
 void skb_tx_error(struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		struct ubuf_info *uarg;
-
-		uarg = skb_shinfo(skb)->destructor_arg;
-		if (uarg->callback)
-			uarg->callback(uarg, false);
-		skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
-	}
+	skb_zcopy_clear(skb, true);
 }
 EXPORT_SYMBOL(skb_tx_error);
 
@@ -753,17 +706,13 @@ EXPORT_SYMBOL(consume_skb);
  *	consume_stateless_skb - free an skbuff, assuming it is stateless
  *	@skb: buffer to free
  *
- *	Works like consume_skb(), but this variant assumes that all the head
- *	states have been already dropped.
+ *	Alike consume_skb(), but this variant assumes that this is the last
+ *	skb reference and all the head states have been already dropped
  */
-void consume_stateless_skb(struct sk_buff *skb)
+void __consume_stateless_skb(struct sk_buff *skb)
 {
-	if (!skb_unref(skb))
-		return;
-
 	trace_consume_skb(skb);
-	if (likely(skb->head))
-		skb_release_data(skb);
+	skb_release_data(skb);
 	kfree_skbmem(skb);
 }
 
@@ -941,6 +890,271 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+	unsigned long max_pg, num_pg, new_pg, old_pg;
+	struct user_struct *user;
+
+	if (capable(CAP_IPC_LOCK) || !size)
+		return 0;
+
+	num_pg = (size >> PAGE_SHIFT) + 2;	/* worst case */
+	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	user = mmp->user ? : current_user();
+
+	do {
+		old_pg = atomic_long_read(&user->locked_vm);
+		new_pg = old_pg + num_pg;
+		if (new_pg > max_pg)
+			return -ENOBUFS;
+	} while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
+		 old_pg);
+
+	if (!mmp->user) {
+		mmp->user = get_uid(user);
+		mmp->num_pg = num_pg;
+	} else {
+		mmp->num_pg += num_pg;
+	}
+
+	return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+	if (mmp->user) {
+		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
+		free_uid(mmp->user);
+	}
+}
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+{
+	struct ubuf_info *uarg;
+	struct sk_buff *skb;
+
+	WARN_ON_ONCE(!in_task());
+
+	if (!sock_flag(sk, SOCK_ZEROCOPY))
+		return NULL;
+
+	skb = sock_omalloc(sk, 0, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
+	uarg = (void *)skb->cb;
+	uarg->mmp.user = NULL;
+
+	if (mm_account_pinned_pages(&uarg->mmp, size)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	uarg->callback = sock_zerocopy_callback;
+	uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
+	uarg->len = 1;
+	uarg->bytelen = size;
+	uarg->zerocopy = 1;
+	refcount_set(&uarg->refcnt, 1);
+	sock_hold(sk);
+
+	return uarg;
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+{
+	return container_of((void *)uarg, struct sk_buff, cb);
+}
+
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+					struct ubuf_info *uarg)
+{
+	if (uarg) {
+		const u32 byte_limit = 1 << 19;		/* limit to a few TSO */
+		u32 bytelen, next;
+
+		/* realloc only when socket is locked (TCP, UDP cork),
+		 * so uarg->len and sk_zckey access is serialized
+		 */
+		if (!sock_owned_by_user(sk)) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+
+		bytelen = uarg->bytelen + size;
+		if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+			/* TCP can create new skb to attach new uarg */
+			if (sk->sk_type == SOCK_STREAM)
+				goto new_alloc;
+			return NULL;
+		}
+
+		next = (u32)atomic_read(&sk->sk_zckey);
+		if ((u32)(uarg->id + uarg->len) == next) {
+			if (mm_account_pinned_pages(&uarg->mmp, size))
+				return NULL;
+			uarg->len++;
+			uarg->bytelen = bytelen;
+			atomic_set(&sk->sk_zckey, ++next);
+			sock_zerocopy_get(uarg);
+			return uarg;
+		}
+	}
+
+new_alloc:
+	return sock_zerocopy_alloc(sk, size);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
+
+static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
+{
+	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+	u32 old_lo, old_hi;
+	u64 sum_len;
+
+	old_lo = serr->ee.ee_info;
+	old_hi = serr->ee.ee_data;
+	sum_len = old_hi - old_lo + 1ULL + len;
+
+	if (sum_len >= (1ULL << 32))
+		return false;
+
+	if (lo != old_hi + 1)
+		return false;
+
+	serr->ee.ee_data += len;
+	return true;
+}
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+{
+	struct sk_buff *tail, *skb = skb_from_uarg(uarg);
+	struct sock_exterr_skb *serr;
+	struct sock *sk = skb->sk;
+	struct sk_buff_head *q;
+	unsigned long flags;
+	u32 lo, hi;
+	u16 len;
+
+	mm_unaccount_pinned_pages(&uarg->mmp);
+
+	/* if !len, there was only 1 call, and it was aborted
+	 * so do not queue a completion notification
+	 */
+	if (!uarg->len || sock_flag(sk, SOCK_DEAD))
+		goto release;
+
+	len = uarg->len;
+	lo = uarg->id;
+	hi = uarg->id + len - 1;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = 0;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+	serr->ee.ee_data = hi;
+	serr->ee.ee_info = lo;
+	if (!success)
+		serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+
+	q = &sk->sk_error_queue;
+	spin_lock_irqsave(&q->lock, flags);
+	tail = skb_peek_tail(q);
+	if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
+	    !skb_zerocopy_notify_extend(tail, lo, len)) {
+		__skb_queue_tail(q, skb);
+		skb = NULL;
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	sk->sk_error_report(sk);
+
+release:
+	consume_skb(skb);
+	sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
+
+void sock_zerocopy_put(struct ubuf_info *uarg)
+{
+	if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
+		if (uarg->callback)
+			uarg->callback(uarg, uarg->zerocopy);
+		else
+			consume_skb(skb_from_uarg(uarg));
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+
+void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+{
+	if (uarg) {
+		struct sock *sk = skb_from_uarg(uarg)->sk;
+
+		atomic_dec(&sk->sk_zckey);
+		uarg->len--;
+
+		sock_zerocopy_put(uarg);
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+
+extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+				   struct iov_iter *from, size_t length);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg)
+{
+	struct ubuf_info *orig_uarg = skb_zcopy(skb);
+	struct iov_iter orig_iter = msg->msg_iter;
+	int err, orig_len = skb->len;
+
+	/* An skb can only point to one uarg. This edge case happens when
+	 * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
+	 */
+	if (orig_uarg && uarg != orig_uarg)
+		return -EEXIST;
+
+	err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+	if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+		struct sock *save_sk = skb->sk;
+
+		/* Streams do not free skb on error. Reset to prev state. */
+		msg->msg_iter = orig_iter;
+		skb->sk = sk;
+		___pskb_trim(skb, orig_len);
+		skb->sk = save_sk;
+		return err;
+	}
+
+	skb_zcopy_set(skb, uarg);
+	return skb->len - orig_len;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+
+static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
+			      gfp_t gfp_mask)
+{
+	if (skb_zcopy(orig)) {
+		if (skb_zcopy(nskb)) {
+			/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
+			if (!gfp_mask) {
+				WARN_ON_ONCE(1);
+				return -ENOMEM;
+			}
+			if (skb_uarg(nskb) == skb_uarg(orig))
+				return 0;
+			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
+				return -EIO;
+		}
+		skb_zcopy_set(nskb, skb_uarg(orig));
+	}
+	return 0;
+}
+
 /**
  *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
  *	@skb: the skb to modify
@@ -958,15 +1172,19 @@ EXPORT_SYMBOL_GPL(skb_morph);
  */
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	int i;
 	int num_frags = skb_shinfo(skb)->nr_frags;
 	struct page *page, *head = NULL;
-	struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+	int i, new_frags;
+	u32 d_off;
 
-	for (i = 0; i < num_frags; i++) {
-		u8 *vaddr;
-		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+	if (!num_frags)
+		return 0;
 
+	if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
+		return -EINVAL;
+
+	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < new_frags; i++) {
 		page = alloc_page(gfp_mask);
 		if (!page) {
 			while (head) {
@@ -976,28 +1194,51 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 			}
 			return -ENOMEM;
 		}
-		vaddr = kmap_atomic(skb_frag_page(f));
-		memcpy(page_address(page),
-		       vaddr + f->page_offset, skb_frag_size(f));
-		kunmap_atomic(vaddr);
 		set_page_private(page, (unsigned long)head);
 		head = page;
 	}
 
+	page = head;
+	d_off = 0;
+	for (i = 0; i < num_frags; i++) {
+		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+		u32 p_off, p_len, copied;
+		struct page *p;
+		u8 *vaddr;
+
+		skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+				      p, p_off, p_len, copied) {
+			u32 copy, done = 0;
+			vaddr = kmap_atomic(p);
+
+			while (done < p_len) {
+				if (d_off == PAGE_SIZE) {
+					d_off = 0;
+					page = (struct page *)page_private(page);
+				}
+				copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
+				memcpy(page_address(page) + d_off,
+				       vaddr + p_off + done, copy);
+				done += copy;
+				d_off += copy;
+			}
+			kunmap_atomic(vaddr);
+		}
+	}
+
 	/* skb frags release userspace buffers */
 	for (i = 0; i < num_frags; i++)
 		skb_frag_unref(skb, i);
 
-	uarg->callback(uarg, false);
-
 	/* skb frags point to kernel buffers */
-	for (i = num_frags - 1; i >= 0; i--) {
-		__skb_fill_page_desc(skb, i, head, 0,
-				     skb_shinfo(skb)->frags[i].size);
+	for (i = 0; i < new_frags - 1; i++) {
+		__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
 		head = (struct page *)page_private(head);
 	}
+	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+	skb_shinfo(skb)->nr_frags = new_frags;
 
-	skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+	skb_zcopy_clear(skb, false);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(skb_copy_ubufs);
@@ -1038,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		if (!n)
 			return NULL;
 
-		kmemcheck_annotate_bitfield(n, flags1);
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
@@ -1109,8 +1349,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 	/* Set the tail pointer and length */
 	skb_put(n, skb->len);
 
-	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
-		BUG();
+	BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
 
 	copy_skb_header(n, skb);
 	return n;
@@ -1158,7 +1397,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
 	if (skb_shinfo(skb)->nr_frags) {
 		int i;
 
-		if (skb_orphan_frags(skb, gfp_mask)) {
+		if (skb_orphan_frags(skb, gfp_mask) ||
+		    skb_zerocopy_clone(n, skb, gfp_mask)) {
 			kfree_skb(n);
 			n = NULL;
 			goto out;
@@ -1207,8 +1447,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	BUG_ON(nhead < 0);
 
-	if (skb_shared(skb))
-		BUG();
+	BUG_ON(skb_shared(skb));
 
 	size = SKB_DATA_ALIGN(size);
 
@@ -1235,9 +1474,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	 * be since all we did is relocate the values
 	 */
 	if (skb_cloned(skb)) {
-		/* copy this zero copy skb frags */
 		if (skb_orphan_frags(skb, gfp_mask))
 			goto nofrags;
+		if (skb_zcopy(skb))
+			refcount_inc(&skb_uarg(skb)->refcnt);
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 			skb_frag_ref(skb, i);
 
@@ -1266,6 +1506,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->nohdr    = 0;
 	atomic_set(&skb_shinfo(skb)->dataref, 1);
 
+	skb_metadata_clear(skb);
+
 	/* It is not generally safe to change skb->truesize.
 	 * For the moment, we really care of rx path, or
 	 * when skb is orphaned (not attached to a socket).
@@ -1350,9 +1592,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 		head_copy_off = newheadroom - head_copy_len;
 
 	/* Copy the linear header and data. */
-	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
-			  skb->len + head_copy_len))
-		BUG();
+	BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+			     skb->len + head_copy_len));
 
 	copy_skb_header(n, skb);
 
@@ -1363,18 +1604,20 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 EXPORT_SYMBOL(skb_copy_expand);
 
 /**
- *	skb_pad			-	zero pad the tail of an skb
+ *	__skb_pad		-	zero pad the tail of an skb
  *	@skb: buffer to pad
  *	@pad: space to pad
+ *	@free_on_error: free buffer on error
  *
  *	Ensure that a buffer is followed by a padding area that is zero
  *	filled. Used by network drivers which may DMA or transfer data
  *	beyond the buffer end onto the wire.
  *
- *	May return error in out of memory cases. The skb is freed on error.
+ *	May return error in out of memory cases. The skb is freed on error
+ *	if @free_on_error is true.
  */
 
-int skb_pad(struct sk_buff *skb, int pad)
+int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
 {
 	int err;
 	int ntail;
@@ -1403,10 +1646,11 @@ int skb_pad(struct sk_buff *skb, int pad)
 	return 0;
 
 free_skb:
-	kfree_skb(skb);
+	if (free_on_error)
+		kfree_skb(skb);
 	return err;
 }
-EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(__skb_pad);
 
 /**
  *	pskb_put - add data to the tail of a potentially fragmented buffer
@@ -1630,8 +1874,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
 			return NULL;
 	}
 
-	if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
-		BUG();
+	BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
+			     skb_tail_pointer(skb), delta));
 
 	/* Optimization: no fragments, no reasons to preestimate
 	 * size of pulled pages. Superb.
@@ -1650,7 +1894,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
 	}
 
 	/* If we need update frag list, we are in troubles.
-	 * Certainly, it possible to add an offset to skb data,
+	 * Certainly, it is possible to add an offset to skb data,
 	 * but taking into account that pulling is expected to
 	 * be very rare operation, it is worth to fight against
 	 * further bloating skb head and crucify ourselves here instead.
@@ -1719,6 +1963,8 @@ pull_pages:
 			if (eat) {
 				skb_shinfo(skb)->frags[k].page_offset += eat;
 				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+				if (!i)
+					goto end;
 				eat = 0;
 			}
 			k++;
@@ -1726,9 +1972,13 @@ pull_pages:
 	}
 	skb_shinfo(skb)->nr_frags = k;
 
+end:
 	skb->tail     += delta;
 	skb->data_len -= delta;
 
+	if (!skb->data_len)
+		skb_zcopy_clear(skb, false);
+
 	return skb_tail_pointer(skb);
 }
 EXPORT_SYMBOL(__pskb_pull_tail);
@@ -1776,16 +2026,20 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 
 		end = start + skb_frag_size(f);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
 
-			vaddr = kmap_atomic(skb_frag_page(f));
-			memcpy(to,
-			       vaddr + f->page_offset + offset - start,
-			       copy);
-			kunmap_atomic(vaddr);
+			skb_frag_foreach_page(f,
+					      f->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				memcpy(to + copied, vaddr + p_off, p_len);
+				kunmap_atomic(vaddr);
+			}
 
 			if ((len -= copy) == 0)
 				return 0;
@@ -2005,6 +2259,107 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 }
 EXPORT_SYMBOL_GPL(skb_splice_bits);
 
+/* Send skb data on a socket. Socket must be locked. */
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+			 int len)
+{
+	unsigned int orig_len = len;
+	struct sk_buff *head = skb;
+	unsigned short fragidx;
+	int slen, ret;
+
+do_frag_list:
+
+	/* Deal with head data */
+	while (offset < skb_headlen(skb) && len) {
+		struct kvec kv;
+		struct msghdr msg;
+
+		slen = min_t(int, len, skb_headlen(skb) - offset);
+		kv.iov_base = skb->data + offset;
+		kv.iov_len = slen;
+		memset(&msg, 0, sizeof(msg));
+
+		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+		if (ret <= 0)
+			goto error;
+
+		offset += ret;
+		len -= ret;
+	}
+
+	/* All the data was skb head? */
+	if (!len)
+		goto out;
+
+	/* Make offset relative to start of frags */
+	offset -= skb_headlen(skb);
+
+	/* Find where we are in frag list */
+	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+
+		if (offset < frag->size)
+			break;
+
+		offset -= frag->size;
+	}
+
+	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+
+		slen = min_t(size_t, len, frag->size - offset);
+
+		while (slen) {
+			ret = kernel_sendpage_locked(sk, frag->page.p,
+						     frag->page_offset + offset,
+						     slen, MSG_DONTWAIT);
+			if (ret <= 0)
+				goto error;
+
+			len -= ret;
+			offset += ret;
+			slen -= ret;
+		}
+
+		offset = 0;
+	}
+
+	if (len) {
+		/* Process any frag lists */
+
+		if (skb == head) {
+			if (skb_has_frag_list(skb)) {
+				skb = skb_shinfo(skb)->frag_list;
+				goto do_frag_list;
+			}
+		} else if (skb->next) {
+			skb = skb->next;
+			goto do_frag_list;
+		}
+	}
+
+out:
+	return orig_len - len;
+
+error:
+	return orig_len == len ? ret : orig_len - len;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+
+/* Send skb data on a socket. */
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+{
+	int ret = 0;
+
+	lock_sock(sk);
+	ret = skb_send_sock_locked(sk, skb, offset, len);
+	release_sock(sk);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock);
+
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
@@ -2044,15 +2399,20 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
 
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			memcpy(vaddr + frag->page_offset + offset - start,
-			       from, copy);
-			kunmap_atomic(vaddr);
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				memcpy(vaddr + p_off, from + copied, p_len);
+				kunmap_atomic(vaddr);
+			}
 
 			if ((len -= copy) == 0)
 				return 0;
@@ -2117,20 +2477,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			__wsum csum2;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = ops->update(vaddr + frag->page_offset +
-					    offset - start, copy, 0);
-			kunmap_atomic(vaddr);
-			csum = ops->combine(csum, csum2, pos, copy);
+
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				csum2 = ops->update(vaddr + p_off, p_len, 0);
+				kunmap_atomic(vaddr);
+				csum = ops->combine(csum, csum2, pos, p_len);
+				pos += p_len;
+			}
+
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
-			pos    += copy;
 		}
 		start = end;
 	}
@@ -2203,24 +2570,31 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 
 		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
 		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			u32 p_off, p_len, copied;
+			struct page *p;
 			__wsum csum2;
 			u8 *vaddr;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = csum_partial_copy_nocheck(vaddr +
-							  frag->page_offset +
-							  offset - start, to,
-							  copy, 0);
-			kunmap_atomic(vaddr);
-			csum = csum_block_add(csum, csum2, pos);
+
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
+								  to + copied,
+								  p_len, 0);
+				kunmap_atomic(vaddr);
+				csum = csum_block_add(csum, csum2, pos);
+				pos += p_len;
+			}
+
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
 			to     += copy;
-			pos    += copy;
 		}
 		start = end;
 	}
@@ -2360,6 +2734,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
 		skb_tx_error(from);
 		return -ENOMEM;
 	}
+	skb_zerocopy_clone(to, from, GFP_ATOMIC);
 
 	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
 		if (!len)
@@ -2471,12 +2846,15 @@ EXPORT_SYMBOL(skb_queue_purge);
  */
 void skb_rbtree_purge(struct rb_root *root)
 {
-	struct sk_buff *skb, *next;
+	struct rb_node *p = rb_first(root);
 
-	rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
-		kfree_skb(skb);
+	while (p) {
+		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
 
-	*root = RB_ROOT;
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, root);
+		kfree_skb(skb);
+	}
 }
 
 /**
@@ -2657,6 +3035,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 
 	skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
 				      SKBTX_SHARED_FRAG;
+	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2700,6 +3079,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 
 	if (skb_headlen(skb))
 		return 0;
+	if (skb_zcopy(tgt) || skb_zcopy(skb))
+		return 0;
 
 	todo = shiftlen;
 	from = 0;
@@ -3273,6 +3654,8 @@ normal:
 
 		skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
 					      SKBTX_SHARED_FRAG;
+		if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
+			goto err;
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
@@ -4380,6 +4763,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		      bool *fragstolen, int *delta_truesize)
 {
+	struct skb_shared_info *to_shinfo, *from_shinfo;
 	int i, delta, len = from->len;
 
 	*fragstolen = false;
@@ -4394,15 +4778,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		return true;
 	}
 
-	if (skb_has_frag_list(to) || skb_has_frag_list(from))
+	to_shinfo = skb_shinfo(to);
+	from_shinfo = skb_shinfo(from);
+	if (to_shinfo->frag_list || from_shinfo->frag_list)
+		return false;
+	if (skb_zcopy(to) || skb_zcopy(from))
 		return false;
 
 	if (skb_headlen(from) != 0) {
 		struct page *page;
 		unsigned int offset;
 
-		if (skb_shinfo(to)->nr_frags +
-		    skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+		if (to_shinfo->nr_frags +
+		    from_shinfo->nr_frags >= MAX_SKB_FRAGS)
 			return false;
 
 		if (skb_head_is_locked(from))
@@ -4413,12 +4801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		page = virt_to_head_page(from->head);
 		offset = from->data - (unsigned char *)page_address(page);
 
-		skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+		skb_fill_page_desc(to, to_shinfo->nr_frags,
 				   page, offset, skb_headlen(from));
 		*fragstolen = true;
 	} else {
-		if (skb_shinfo(to)->nr_frags +
-		    skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+		if (to_shinfo->nr_frags +
+		    from_shinfo->nr_frags > MAX_SKB_FRAGS)
 			return false;
 
 		delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4426,19 +4814,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 
 	WARN_ON_ONCE(delta < len);
 
-	memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
-	       skb_shinfo(from)->frags,
-	       skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
-	skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+	memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+	       from_shinfo->frags,
+	       from_shinfo->nr_frags * sizeof(skb_frag_t));
+	to_shinfo->nr_frags += from_shinfo->nr_frags;
 
 	if (!skb_cloned(from))
-		skb_shinfo(from)->nr_frags = 0;
+		from_shinfo->nr_frags = 0;
 
 	/* if the skb is not cloned this does nothing
 	 * since we set nr_frags to 0.
 	 */
-	for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
-		skb_frag_ref(from, i);
+	for (i = 0; i < from_shinfo->nr_frags; i++)
+		__skb_frag_ref(&from_shinfo->frags[i]);
 
 	to->truesize += delta;
 	to->len += len;
@@ -4476,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	if (!xnet)
 		return;
 
+	ipvs_reset(skb);
 	skb_orphan(skb);
 	skb->mark = 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404c73eb..c0b5b2f17412 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
 static struct lock_class_key af_elock_keys[AF_MAX];
 static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
-/* Take into consideration the size of the struct sk_buff overhead in the
- * determination of these values, since that is non-constant across
- * platforms.  This makes socket queueing behavior and performance
- * not depend upon such differences.
- */
-#define _SK_MEM_PACKETS		256
-#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
-#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-
 /* Run time adjustable parameters. */
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 EXPORT_SYMBOL(sysctl_wmem_max);
@@ -1055,6 +1045,20 @@ set_rcvbuf:
 		if (val == 1)
 			dst_negative_advice(sk);
 		break;
+
+	case SO_ZEROCOPY:
+		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+			ret = -ENOTSUPP;
+		else if (sk->sk_protocol != IPPROTO_TCP)
+			ret = -ENOTSUPP;
+		else if (sk->sk_state != TCP_CLOSE)
+			ret = -EBUSY;
+		else if (val < 0 || val > 1)
+			ret = -EINVAL;
+		else
+			sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val64 = sock_gen_cookie(sk);
 		break;
 
+	case SO_ZEROCOPY:
+		v.val = sock_flag(sk, SOCK_ZEROCOPY);
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1461,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		sk = kmalloc(prot->obj_size, priority);
 
 	if (sk != NULL) {
-		kmemcheck_annotate_bitfield(sk, flags);
-
 		if (security_sk_alloc(sk, family, priority))
 			goto out_free;
 
@@ -1646,6 +1652,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 
 		sock_copy(newsk, sk);
 
+		newsk->sk_prot_creator = sk->sk_prot;
+
 		/* SANITY */
 		if (likely(newsk->sk_net_refcnt))
 			get_net(sock_net(newsk));
@@ -1667,19 +1675,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_dst_pending_confirm = 0;
 		newsk->sk_wmem_queued	= 0;
 		newsk->sk_forward_alloc = 0;
+
+		/* sk->sk_memcg will be populated at accept() time */
+		newsk->sk_memcg = NULL;
+
 		atomic_set(&newsk->sk_drops, 0);
 		newsk->sk_send_head	= NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+		atomic_set(&newsk->sk_zckey, 0);
 
 		sock_reset_flag(newsk, SOCK_DONE);
+		cgroup_sk_alloc(&newsk->sk_cgrp_data);
 
-		filter = rcu_dereference_protected(newsk->sk_filter, 1);
+		rcu_read_lock();
+		filter = rcu_dereference(sk->sk_filter);
 		if (filter != NULL)
 			/* though it's an empty new sock, the charging may fail
 			 * if sysctl_optmem_max was changed between creation of
 			 * original socket and cloning
 			 */
 			is_charged = sk_filter_charge(newsk, filter);
+		RCU_INIT_POINTER(newsk->sk_filter, filter);
+		rcu_read_unlock();
 
 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
 			/* We need to make sure that we don't uncharge the new
@@ -1700,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
 		atomic64_set(&newsk->sk_cookie, 0);
 
-		mem_cgroup_sk_alloc(newsk);
-		cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
 		 * (Documentation/RCU/rculist_nulls.txt for details)
@@ -1757,7 +1771,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
-		if (dst->header_len) {
+		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 		} else {
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
@@ -1923,6 +1937,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 }
 EXPORT_SYMBOL(sock_wmalloc);
 
+static void sock_ofree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority)
+{
+	struct sk_buff *skb;
+
+	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+	    sysctl_optmem_max)
+		return NULL;
+
+	skb = alloc_skb(size, priority);
+	if (!skb)
+		return NULL;
+
+	atomic_add(skb->truesize, &sk->sk_omem_alloc);
+	skb->sk = sk;
+	skb->destructor = sock_ofree;
+	return skb;
+}
+
 /*
  * Allocate a memory block from the socket's option memory buffer.
  */
@@ -2303,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 
 	/* guarantee minimum buffer size under pressure */
 	if (kind == SK_MEM_RECV) {
-		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
 			return 1;
 
 	} else { /* SK_MEM_SEND */
+		int wmem0 = sk_get_wmem0(sk, prot);
+
 		if (sk->sk_type == SOCK_STREAM) {
-			if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+			if (sk->sk_wmem_queued < wmem0)
 				return 1;
-		} else if (refcount_read(&sk->sk_wmem_alloc) <
-			   prot->sysctl_wmem[0])
+		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
 				return 1;
+		}
 	}
 
 	if (sk_has_memory_pressure(sk)) {
@@ -2408,9 +2451,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
 
 int sk_set_peek_off(struct sock *sk, int val)
 {
-	if (val < 0)
-		return -EINVAL;
-
 	sk->sk_peek_off = val;
 	return 0;
 }
@@ -2500,6 +2540,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 }
 EXPORT_SYMBOL(sock_no_sendmsg);
 
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
 		    int flags)
 {
@@ -2528,6 +2574,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
 }
 EXPORT_SYMBOL(sock_no_sendpage);
 
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
 /*
  *	Default Socket Callbacks
  */
@@ -2623,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk_init_common(sk);
 	sk->sk_send_head	=	NULL;
 
-	init_timer(&sk->sk_timer);
+	timer_setup(&sk->sk_timer, NULL, 0);
 
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
@@ -2673,6 +2735,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
 	sk->sk_stamp = SK_DEFAULT_STAMP;
+	atomic_set(&sk->sk_zckey, 0);
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
@@ -2681,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_max_pacing_rate = ~0U;
 	sk->sk_pacing_rate = ~0U;
+	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
@@ -2979,7 +3043,6 @@ struct prot_inuse {
 
 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
 
-#ifdef CONFIG_NET_NS
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
 	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
@@ -3023,27 +3086,6 @@ static __init int net_inuse_init(void)
 }
 
 core_initcall(net_inuse_init);
-#else
-static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
-
-void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
-{
-	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
-
-int sock_prot_inuse_get(struct net *net, struct proto *prot)
-{
-	int cpu, idx = prot->inuse_idx;
-	int res = 0;
-
-	for_each_possible_cpu(cpu)
-		res += per_cpu(prot_inuse, cpu).val[idx];
-
-	return res >= 0 ? res : 0;
-}
-EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
-#endif
 
 static void assign_proto_idx(struct proto *prot)
 {
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index eed1ebf7f29d..5eeb1d20cc38 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * To speed up listener socket lookup, create an array to store all sockets
  * listening on the same port.  This allows a decision to be made after finding
@@ -36,9 +37,14 @@ int reuseport_alloc(struct sock *sk)
 	 * soft irq of receive path or setsockopt from process context
 	 */
 	spin_lock_bh(&reuseport_lock);
-	WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
-					    lockdep_is_held(&reuseport_lock)),
-		  "multiple allocations for the same socket");
+
+	/* Allocation attempts can occur concurrently via the setsockopt path
+	 * and the bind/hash path.  Nothing to do when we lose the race.
+	 */
+	if (rcu_dereference_protected(sk->sk_reuseport_cb,
+				      lockdep_is_held(&reuseport_lock)))
+		goto out;
+
 	reuse = __reuseport_alloc(INIT_SOCKS);
 	if (!reuse) {
 		spin_unlock_bh(&reuseport_lock);
@@ -49,6 +55,7 @@ int reuseport_alloc(struct sock *sk)
 	reuse->num_socks = 1;
 	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
+out:
 	spin_unlock_bh(&reuseport_lock);
 
 	return 0;
diff --git a/net/core/stream.c b/net/core/stream.c
index 20231dbb1da0..1cff9c6270c6 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *     SUCS NET3:
  *
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7cd9aafe99e..cbc3dde4cfcc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* -*- linux-c -*-
  * sysctl_net_core.c: sysctl interface to net core subsystem.
  *
diff --git a/net/core/tso.c b/net/core/tso.c
index 5dca7ce8ee9f..43f4eba61933 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/export.h>
 #include <linux/if_vlan.h>
 #include <net/ip.h>
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 733f523707ac..bae7d78aa068 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1938,8 +1938,8 @@ static int __init dcbnl_init(void)
 {
 	INIT_LIST_HEAD(&dcb_app_list);
 
-	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
 
 	return 0;
 }
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 5c8362b037ed..2e7b56097bc4 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
 dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e1295d5f2c56..1c75cd1255f6 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
 						  DCCPF_SEQ_WMAX));
 }
 
-static void ccid2_hc_tx_rto_expire(unsigned long data)
+static void ccid2_hc_tx_rto_expire(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
-	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+	struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
+	struct sock *sk = hc->sk;
 	const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
 
 	bh_lock_sock(sk);
@@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	hc->tx_rpdupack  = -1;
 	hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
 	hc->tx_cwnd_used = 0;
-	setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
-			(unsigned long)sk);
+	hc->sk		 = sk;
+	timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
 	INIT_LIST_HEAD(&hc->tx_av_chunks);
 	return 0;
 }
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6e50ef2898fb..1af0116dc6ce 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock {
 				tx_rto;
 	u64			tx_rtt_seq:48;
 	struct timer_list	tx_rtotimer;
+	struct sock		*sk;
 
 	/* Congestion Window validation (optional, RFC 2861) */
 	u32			tx_cwnd_used,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 119c04317d48..8b5ba6dffac7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc,
 	}
 }
 
-static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
-	struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
+	struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer);
+	struct sock *sk = hc->sk;
 	unsigned long t_nfb = USEC_PER_SEC / 5;
 
 	bh_lock_sock(sk);
@@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
 
 	hc->tx_state = TFRC_SSTATE_NO_SENT;
 	hc->tx_hist  = NULL;
-	setup_timer(&hc->tx_no_feedback_timer,
-			ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
+	hc->sk	     = sk;
+	timer_setup(&hc->tx_no_feedback_timer,
+		    ccid3_hc_tx_no_feedback_timer, 0);
 	return 0;
 }
 
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 1a9933c29672..813d91c6e1e2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock {
 	u8				tx_last_win_count;
 	ktime_t				tx_t_last_win_count;
 	struct timer_list		tx_no_feedback_timer;
+	struct sock			*sk;
 	ktime_t				tx_t_ld;
 	ktime_t				tx_t_nom;
 	struct tfrc_tx_hist_entry	*tx_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 08df7a3acb3d..876e18592d71 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 {
 	const u8 idx_a = tfrc_rx_hist_index(h, a),
 		 idx_b = tfrc_rx_hist_index(h, b);
-	struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
 
-	h->ring[idx_a] = h->ring[idx_b];
-	h->ring[idx_b] = tmp;
+	swap(h->ring[idx_a], h->ring[idx_b]);
 }
 
 /*
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 62b5828acde0..d7f265e1f50c 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * TFRC library initialisation
  *
diff --git a/net/dccp/input.c b/net/dccp/input.c
index fa6be9750bb4..d28d46bff6ab 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 	case DCCP_PKT_DATA:
 		if (sk->sk_state == DCCP_RESPOND)
 			break;
+		/* fall through */
 	case DCCP_PKT_DATAACK:
 	case DCCP_PKT_ACK:
 		/*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 1b202f16531f..e65fcb45c3f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	sk = __inet_lookup_established(net, &dccp_hashinfo,
 				       iph->daddr, dh->dccph_dport,
 				       iph->saddr, ntohs(dh->dccph_sport),
-				       inet_iif(skb));
+				       inet_iif(skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
 	newinet->inet_saddr	= ireq->ir_loc_addr;
-	newinet->inet_opt	= ireq->opt;
-	ireq->opt	   = NULL;
+	RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
 	newinet->mc_index  = inet_iif(skb);
 	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
 	newinet->inet_id   = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+	if (*own_req)
+		ireq->ireq_opt = NULL;
+	else
+		newinet->inet_opt = NULL;
 	return newsk;
 
 exit_overflow:
@@ -441,6 +443,7 @@ exit:
 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 	return NULL;
 put_and_exit:
+	newinet->inet_opt = NULL;
 	inet_csk_prepare_forced_close(newsk);
 	dccp_done(newsk);
 	goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
 							      ireq->ir_rmt_addr);
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    ireq->opt);
+					    ireq_opt_deref(ireq));
 		err = net_xmit_eval(err);
 	}
 
@@ -548,7 +551,7 @@ out:
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
 {
 	dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
-	kfree(inet_rsk(req)->opt);
+	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
 }
 
 void dccp_syn_ack_timeout(const struct request_sock *req)
@@ -804,7 +807,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
-			       dh->dccph_sport, dh->dccph_dport, &refcounted);
+			       dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
 	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1b58eac8aad3..5df7857fc0f3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -16,6 +16,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/xfrm.h>
+#include <linux/string.h>
 
 #include <net/addrconf.h>
 #include <net/inet_common.h>
@@ -30,6 +31,7 @@
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 #include <net/secure_seq.h>
+#include <net/sock.h>
 
 #include "dccp.h"
 #include "ipv6.h"
@@ -89,7 +91,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
 					&hdr->daddr, dh->dccph_dport,
 					&hdr->saddr, ntohs(dh->dccph_sport),
-					inet6_iif(skb));
+					inet6_iif(skb), 0);
 
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -597,19 +599,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 					       --ANK (980728)
 	 */
 	if (np->rxopt.all)
-	/*
-	 * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
-	 *        (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
-	 */
 		opt_skb = skb_clone(skb, GFP_ATOMIC);
 
 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
 			goto reset;
-		if (opt_skb) {
-			/* XXX This is where we would goto ipv6_pktoptions. */
-			__kfree_skb(opt_skb);
-		}
+		if (opt_skb)
+			goto ipv6_pktoptions;
 		return 0;
 	}
 
@@ -640,10 +636,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
 		goto reset;
-	if (opt_skb) {
-		/* XXX This is where we would goto ipv6_pktoptions. */
-		__kfree_skb(opt_skb);
-	}
+	if (opt_skb)
+		goto ipv6_pktoptions;
 	return 0;
 
 reset:
@@ -653,6 +647,35 @@ discard:
 		__kfree_skb(opt_skb);
 	kfree_skb(skb);
 	return 0;
+
+/* Handling IPV6_PKTOPTIONS skb the similar
+ * way it's done for net/ipv6/tcp_ipv6.c
+ */
+ipv6_pktoptions:
+	if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
+		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
+			np->mcast_oif = inet6_iif(opt_skb);
+		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
+			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
+			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+		if (np->repflow)
+			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
+		if (ipv6_opt_accepted(sk, opt_skb,
+				      &DCCP_SKB_CB(opt_skb)->header.h6)) {
+			skb_set_owner_r(opt_skb, sk);
+			memmove(IP6CB(opt_skb),
+				&DCCP_SKB_CB(opt_skb)->header.h6,
+				sizeof(struct inet6_skb_parm));
+			opt_skb = xchg(&np->pktoptions, opt_skb);
+		} else {
+			__kfree_skb(opt_skb);
+			opt_skb = xchg(&np->pktoptions, NULL);
+		}
+	}
+
+	kfree_skb(opt_skb);
+	return 0;
 }
 
 static int dccp_v6_rcv(struct sk_buff *skb)
@@ -687,7 +710,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 lookup:
 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
 			        dh->dccph_sport, dh->dccph_dport,
-				inet6_iif(skb), &refcounted);
+				inet6_iif(skb), 0, &refcounted);
 	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 51cdfc3bd8ca..4e40db017e19 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			 * Ack vectors are processed by the TX CCID if it is
 			 * interested. The RX CCID need not parse Ack Vectors,
 			 * since it is only interested in clearing old state.
-			 * Fall through.
 			 */
+			/* fall through */
 		case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
 						     pkt_type, opt, value, len))
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 86bc40ba6ba5..b68168fcc06a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,6 +24,7 @@
 #include <net/checksum.h>
 
 #include <net/inet_sock.h>
+#include <net/inet_common.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
 
@@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type)
 
 EXPORT_SYMBOL_GPL(dccp_packet_name);
 
+static void dccp_sk_destruct(struct sock *sk)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+	dp->dccps_hc_tx_ccid = NULL;
+	inet_sock_destruct(sk);
+}
+
 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	icsk->icsk_syn_retries	= sysctl_dccp_request_retries;
 	sk->sk_state		= DCCP_CLOSED;
 	sk->sk_write_space	= dccp_write_space;
+	sk->sk_destruct		= dccp_sk_destruct;
 	icsk->icsk_sync_mss	= dccp_sync_mss;
 	dp->dccps_mss_cache	= 536;
 	dp->dccps_rate_last	= jiffies;
@@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk)
 		dp->dccps_hc_rx_ackvec = NULL;
 	}
 	ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
-	ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
-	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+	dp->dccps_hc_rx_ccid = NULL;
 
 	/* clean up feature negotiation state */
 	dccp_feat_list_purge(&dp->dccps_featneg);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c34027758..b50a8732ff43 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
 		__sk_dst_reset(sk);
 }
 
-static void dccp_write_timer(unsigned long data)
+static void dccp_write_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_retransmit_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 	int event = 0;
 
 	bh_lock_sock(sk);
@@ -161,19 +162,20 @@ out:
 	sock_put(sk);
 }
 
-static void dccp_keepalive_timer(unsigned long data)
+static void dccp_keepalive_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 
 	pr_err("dccp should not use a keepalive timer !\n");
 	sock_put(sk);
 }
 
 /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
+static void dccp_delack_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_delack_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -232,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data)
 	bh_unlock_sock(sk);
 }
 
-static void dccp_write_xmit_timer(unsigned long data)
+static void dccp_write_xmit_timer(struct timer_list *t)
 {
-	dccp_write_xmitlet(data);
-	sock_put((struct sock *)data);
+	struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer);
+	struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
+
+	dccp_write_xmitlet((unsigned long)sk);
+	sock_put(sk);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
@@ -243,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
-	setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
-							     (unsigned long)sk);
+	timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0);
 	inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
 				  &dccp_keepalive_timer);
 }
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
index e44003af71f6..9e38122d942b 100644
--- a/net/decnet/Makefile
+++ b/net/decnet/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_DECNET) += decnet.o
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 73a0399dc7a2..518cea17b811 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
 	scp->keepalive = 10 * HZ;
 	scp->keepalive_fxn = dn_keepalive;
 
-	init_timer(&scp->delack_timer);
-	scp->delack_pending = 0;
-	scp->delack_fxn = dn_nsp_delayed_ack;
-
 	dn_start_slow_timer(sk);
 out:
 	return sk;
@@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk)
 		goto disc_reject;
 	case DN_RUN:
 		scp->state = DN_DI;
+		/* fall through */
 	case DN_DI:
 	case DN_DR:
 disc_reject:
 		dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
+		/* fall through */
 	case DN_NC:
 	case DN_NR:
 	case DN_RJ:
@@ -651,6 +649,7 @@ disc_reject:
 		break;
 	default:
 		printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
+		/* fall through */
 	case DN_O:
 		dn_stop_slow_timer(sk);
 
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index fa0110b57ca1..9153247dad28 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -1038,14 +1039,14 @@ static void dn_eth_down(struct net_device *dev)
 
 static void dn_dev_set_timer(struct net_device *dev);
 
-static void dn_dev_timer_func(unsigned long arg)
+static void dn_dev_timer_func(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)arg;
-	struct dn_dev *dn_db;
+	struct dn_dev *dn_db = from_timer(dn_db, t, timer);
+	struct net_device *dev;
 	struct dn_ifaddr *ifa;
 
 	rcu_read_lock();
-	dn_db = rcu_dereference(dev->dn_ptr);
+	dev = dn_db->dev;
 	if (dn_db->t3 <= dn_db->parms.t2) {
 		if (dn_db->parms.timer3) {
 			for (ifa = rcu_dereference(dn_db->ifa_list);
@@ -1070,8 +1071,6 @@ static void dn_dev_set_timer(struct net_device *dev)
 	if (dn_db->parms.t2 > dn_db->parms.t3)
 		dn_db->parms.t2 = dn_db->parms.t3;
 
-	dn_db->timer.data = (unsigned long)dev;
-	dn_db->timer.function = dn_dev_timer_func;
 	dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
 
 	add_timer(&dn_db->timer);
@@ -1100,7 +1099,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
 
 	rcu_assign_pointer(dev->dn_ptr, dn_db);
 	dn_db->dev = dev;
-	init_timer(&dn_db->timer);
+	timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
 
 	dn_db->uptime = jiffies;
 
@@ -1419,9 +1418,9 @@ void __init dn_dev_init(void)
 
 	dn_dev_devices_on();
 
-	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0);
 
 	proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f9f6fb3f3c5b..b37a1b833c77 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -791,8 +792,8 @@ void __init dn_fib_init(void)
 
 	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
 
-	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0);
 }
 
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 21dedf6fd0f7..528119a5618e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -94,7 +95,7 @@ struct neigh_table dn_neigh_table = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 0,
 			[NEIGH_VAR_ANYCAST_DELAY] = 0,
 			[NEIGH_VAR_PROXY_DELAY] = 0,
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 7ac086d5c0c0..1b2120645730 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
 	 * Swap src & dst and look up in the normal way.
 	 */
 	if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
-		__le16 tmp = cb->dst_port;
-		cb->dst_port = cb->src_port;
-		cb->src_port = tmp;
-		tmp = cb->dst;
-		cb->dst = cb->src;
-		cb->src = tmp;
+		swap(cb->dst_port, cb->src_port);
+		swap(cb->dst, cb->src);
 	}
 
 	/*
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 66f035e476ea..56a52a004c56 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c
 	ackcrs |= 0x8000;
 
 	/* If this is an "other data/ack" message, swap acknum and ackcrs */
-	if (other) {
-		unsigned short tmp = acknum;
-		acknum = ackcrs;
-		ackcrs = tmp;
-	}
+	if (other)
+		swap(acknum, ackcrs);
 
 	/* Set "cross subchannel" bit in ackcrs */
 	ackcrs |= 0x2000;
@@ -491,17 +488,6 @@ void dn_send_conn_ack (struct sock *sk)
 	dn_nsp_send(skb);
 }
 
-void dn_nsp_delayed_ack(struct sock *sk)
-{
-	struct dn_scp *scp = DN_SK(sk);
-
-	if (scp->ackxmt_oth != scp->numoth_rcv)
-		dn_nsp_send_oth_ack(sk);
-
-	if (scp->ackxmt_dat != scp->numdat_rcv)
-		dn_nsp_send_data_ack(sk);
-}
-
 static int dn_nsp_retrans_conn_conf(struct sock *sk)
 {
 	struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index bcbe548f8854..b36dceab0dc1 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -131,7 +131,7 @@ static struct dn_rt_hash_bucket *dn_rt_hash_table;
 static unsigned int dn_rt_hash_mask;
 
 static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0);
+static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
 int decnet_dst_gc_interval = 2;
 
 static struct dst_ops dn_dst_ops = {
@@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
 					   dn_rt_hash_table[hash].chain);
 			rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->dst, now);
+			dst_hold_and_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
 			dst_release_immediate(&rt->dst);
@@ -351,7 +351,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
 	rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
 	rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
 
-	dst_use(&rt->dst, now);
+	dst_hold_and_use(&rt->dst, now);
 	spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 	*rp = rt;
 	return 0;
@@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
 			    (flp->flowidn_mark == rt->fld.flowidn_mark) &&
 			    dn_is_output_route(rt) &&
 			    (rt->fld.flowidn_oif == flp->flowidn_oif)) {
-				dst_use(&rt->dst, jiffies);
+				dst_hold_and_use(&rt->dst, jiffies);
 				rcu_read_unlock_bh();
 				*pprt = &rt->dst;
 				return 0;
@@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb)
 		    (rt->fld.flowidn_oif == 0) &&
 		    (rt->fld.flowidn_mark == skb->mark) &&
 		    (rt->fld.flowidn_iif == cb->iif)) {
-			dst_use(&rt->dst, jiffies);
+			dst_hold_and_use(&rt->dst, jiffies);
 			rcu_read_unlock();
 			skb_dst_set(skb, (struct dst_entry *)rt);
 			return 0;
@@ -1922,10 +1922,10 @@ void __init dn_route_init(void)
 
 #ifdef CONFIG_DECNET_ROUTER
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_fib_dump, NULL);
+		      dn_fib_dump, 0);
 #else
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_cache_dump, NULL);
+		      dn_cache_dump, 0);
 #endif
 }
 
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 295bbd6a56f2..c795c3f509c9 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 232675480756..f0710b5d037d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
@@ -155,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz)
 	default:
 		printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
 		       old_divisor);
+		/* fall through */
 	case 256:
 		new_divisor = 1024;
 		new_hashmask = 0x3FF;
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 1d330fd43dc7..f430daed24a0 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index aa8ffecc46a4..ab395e55cd78 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 	RCV_SKB_FAIL(-EINVAL);
 }
 
-static struct nf_hook_ops dnrmg_ops __read_mostly = {
+static const struct nf_hook_ops dnrmg_ops = {
 	.hook		= dnrmg_hook,
 	.pf		= NFPROTO_DECNET,
 	.hooknum	= NF_DN_ROUTE,
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 6c7da6c29bf0..55bf64a22b59 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * DECnet       An implementation of the DECnet protocol suite for the LINUX
  *              operating system.  DECnet is implemented using the  BSD Socket
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8737412c7b27..e1d4d898a007 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
 static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 {
 	seq_puts(m, key->description);
-	if (key_is_instantiated(key)) {
+	if (key_is_positive(key)) {
 		int err = PTR_ERR(key->payload.data[dns_key_error]);
 
 		if (err)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cc5f8f971689..03c3bdf25468 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -7,6 +7,7 @@ config HAVE_NET_DSA
 config NET_DSA
 	tristate "Distributed Switch Architecture"
 	depends on HAVE_NET_DSA && MAY_USE_DEVLINK
+	depends on BRIDGE || BRIDGE=n
 	select NET_SWITCHDEV
 	select PHYLIB
 	---help---
@@ -19,6 +20,9 @@ if NET_DSA
 config NET_DSA_TAG_BRCM
 	bool
 
+config NET_DSA_TAG_BRCM_PREPEND
+	bool
+
 config NET_DSA_TAG_DSA
 	bool
 
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index fcce25da937c..0e13c1f95d13 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,9 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
 
 # tagging formats
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
+dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
 dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 416ac4ef9ba9..6a9d0f50fbee 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
@@ -43,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 #ifdef CONFIG_NET_DSA_TAG_BRCM
 	[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
 #endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+	[DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops,
+#endif
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
 #endif
@@ -67,37 +71,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 	[DSA_TAG_PROTO_NONE] = &none_ops,
 };
 
-int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
-		      struct dsa_port *dport, int port)
-{
-	struct device_node *port_dn = dport->dn;
-	struct phy_device *phydev;
-	int ret, mode;
-
-	if (of_phy_is_fixed_link(port_dn)) {
-		ret = of_phy_register_fixed_link(port_dn);
-		if (ret) {
-			dev_err(dev, "failed to register fixed PHY\n");
-			return ret;
-		}
-		phydev = of_phy_find_device(port_dn);
-
-		mode = of_get_phy_mode(port_dn);
-		if (mode < 0)
-			mode = PHY_INTERFACE_MODE_NA;
-		phydev->interface = mode;
-
-		genphy_config_init(phydev);
-		genphy_read_status(phydev);
-		if (ds->ops->adjust_link)
-			ds->ops->adjust_link(ds, port, phydev);
-
-		put_device(&phydev->mdio.dev);
-	}
-
-	return 0;
-}
-
 const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
 {
 	const struct dsa_device_ops *ops;
@@ -112,42 +85,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
 	return ops;
 }
 
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
-{
-	struct dsa_switch *ds = cpu_dp->ds;
-	struct net_device *master;
-	struct ethtool_ops *cpu_ops;
-
-	master = cpu_dp->netdev;
-
-	cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
-	if (!cpu_ops)
-		return -ENOMEM;
-
-	memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
-	       sizeof(struct ethtool_ops));
-	cpu_dp->orig_ethtool_ops = master->ethtool_ops;
-	memcpy(cpu_ops, &cpu_dp->ethtool_ops,
-	       sizeof(struct ethtool_ops));
-	dsa_cpu_port_ethtool_init(cpu_ops);
-	master->ethtool_ops = cpu_ops;
-
-	return 0;
-}
-
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
-{
-	cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
-}
-
-void dsa_cpu_dsa_destroy(struct dsa_port *port)
-{
-	struct device_node *port_dn = port->dn;
-
-	if (of_phy_is_fixed_link(port_dn))
-		of_phy_deregister_fixed_link(port_dn);
-}
-
 static int dev_is_class(struct device *dev, void *class)
 {
 	if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -186,12 +123,14 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
-			  struct packet_type *pt, struct net_device *orig_dev)
+			  struct packet_type *pt, struct net_device *unused)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
 	struct sk_buff *nskb = NULL;
+	struct pcpu_sw_netstats *s;
+	struct dsa_slave_priv *p;
 
-	if (unlikely(dst == NULL)) {
+	if (unlikely(!cpu_dp)) {
 		kfree_skb(skb);
 		return 0;
 	}
@@ -200,19 +139,23 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb)
 		return 0;
 
-	nskb = dst->rcv(skb, dev, pt, orig_dev);
+	nskb = cpu_dp->rcv(skb, dev, pt);
 	if (!nskb) {
 		kfree_skb(skb);
 		return 0;
 	}
 
 	skb = nskb;
+	p = netdev_priv(skb->dev);
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, skb->dev);
 
-	skb->dev->stats.rx_packets++;
-	skb->dev->stats.rx_bytes += skb->len;
+	s = this_cpu_ptr(p->stats64);
+	u64_stats_update_begin(&s->syncp);
+	s->rx_packets++;
+	s->rx_bytes += skb->len;
+	u64_stats_update_end(&s->syncp);
 
 	netif_receive_skb(skb);
 
@@ -220,6 +163,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 }
 
 #ifdef CONFIG_PM_SLEEP
+static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+	return dsa_is_user_port(ds, p) && ds->ports[p].slave;
+}
+
 int dsa_switch_suspend(struct dsa_switch *ds)
 {
 	int i, ret = 0;
@@ -229,7 +177,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
 		if (!dsa_is_port_initialized(ds, i))
 			continue;
 
-		ret = dsa_slave_suspend(ds->ports[i].netdev);
+		ret = dsa_slave_suspend(ds->ports[i].slave);
 		if (ret)
 			return ret;
 	}
@@ -256,7 +204,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
 		if (!dsa_is_port_initialized(ds, i))
 			continue;
 
-		ret = dsa_slave_resume(ds->ports[i].netdev);
+		ret = dsa_slave_resume(ds->ports[i].slave);
 		if (ret)
 			return ret;
 	}
@@ -271,10 +219,44 @@ static struct packet_type dsa_pack_type __read_mostly = {
 	.func	= dsa_switch_rcv,
 };
 
+static struct workqueue_struct *dsa_owq;
+
+bool dsa_schedule_work(struct work_struct *work)
+{
+	return queue_work(dsa_owq, work);
+}
+
+static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
+
+int register_dsa_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_dsa_notifier);
+
+int unregister_dsa_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
+
+int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+		       struct dsa_notifier_info *info)
+{
+	info->dev = dev;
+	return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
+}
+EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+
 static int __init dsa_init_module(void)
 {
 	int rc;
 
+	dsa_owq = alloc_ordered_workqueue("dsa_ordered",
+					  WQ_MEM_RECLAIM);
+	if (!dsa_owq)
+		return -ENOMEM;
+
 	rc = dsa_slave_register_notifier();
 	if (rc)
 		return rc;
@@ -294,6 +276,7 @@ static void __exit dsa_cleanup_module(void)
 	dsa_slave_unregister_notifier();
 	dev_remove_pack(&dsa_pack_type);
 	dsa_legacy_unregister();
+	destroy_workqueue(dsa_owq);
 }
 module_exit(dsa_cleanup_module);
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index c442051d5a55..44e3fb7dec8c 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,293 +21,297 @@
 
 #include "dsa_priv.h"
 
-static LIST_HEAD(dsa_switch_trees);
+static LIST_HEAD(dsa_tree_list);
 static DEFINE_MUTEX(dsa2_mutex);
 
 static const struct devlink_ops dsa_devlink_ops = {
 };
 
-static struct dsa_switch_tree *dsa_get_dst(u32 tree)
+static struct dsa_switch_tree *dsa_tree_find(int index)
 {
 	struct dsa_switch_tree *dst;
 
-	list_for_each_entry(dst, &dsa_switch_trees, list)
-		if (dst->tree == tree) {
-			kref_get(&dst->refcount);
+	list_for_each_entry(dst, &dsa_tree_list, list)
+		if (dst->index == index)
 			return dst;
-		}
+
 	return NULL;
 }
 
-static void dsa_free_dst(struct kref *ref)
+static struct dsa_switch_tree *dsa_tree_alloc(int index)
 {
-	struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree,
-						   refcount);
+	struct dsa_switch_tree *dst;
 
-	list_del(&dst->list);
-	kfree(dst);
+	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+	if (!dst)
+		return NULL;
+
+	dst->index = index;
+
+	INIT_LIST_HEAD(&dst->list);
+	list_add_tail(&dsa_tree_list, &dst->list);
+
+	/* Initialize the reference counter to the number of switches, not 1 */
+	kref_init(&dst->refcount);
+	refcount_set(&dst->refcount.refcount, 0);
+
+	return dst;
 }
 
-static void dsa_put_dst(struct dsa_switch_tree *dst)
+static void dsa_tree_free(struct dsa_switch_tree *dst)
 {
-	kref_put(&dst->refcount, dsa_free_dst);
+	list_del(&dst->list);
+	kfree(dst);
 }
 
-static struct dsa_switch_tree *dsa_add_dst(u32 tree)
+static struct dsa_switch_tree *dsa_tree_touch(int index)
 {
 	struct dsa_switch_tree *dst;
 
-	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+	dst = dsa_tree_find(index);
 	if (!dst)
-		return NULL;
-	dst->tree = tree;
-	INIT_LIST_HEAD(&dst->list);
-	list_add_tail(&dsa_switch_trees, &dst->list);
-	kref_init(&dst->refcount);
+		dst = dsa_tree_alloc(index);
 
 	return dst;
 }
 
-static void dsa_dst_add_ds(struct dsa_switch_tree *dst,
-			   struct dsa_switch *ds, u32 index)
+static void dsa_tree_get(struct dsa_switch_tree *dst)
 {
 	kref_get(&dst->refcount);
-	dst->ds[index] = ds;
 }
 
-static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
-			   struct dsa_switch *ds, u32 index)
+static void dsa_tree_release(struct kref *ref)
 {
-	dst->ds[index] = NULL;
-	kref_put(&dst->refcount, dsa_free_dst);
+	struct dsa_switch_tree *dst;
+
+	dst = container_of(ref, struct dsa_switch_tree, refcount);
+
+	dsa_tree_free(dst);
 }
 
-/* For platform data configurations, we need to have a valid name argument to
- * differentiate a disabled port from an enabled one
- */
-static bool dsa_port_is_valid(struct dsa_port *port)
+static void dsa_tree_put(struct dsa_switch_tree *dst)
 {
-	return !!(port->dn || port->name);
+	kref_put(&dst->refcount, dsa_tree_release);
 }
 
 static bool dsa_port_is_dsa(struct dsa_port *port)
 {
-	if (port->name && !strcmp(port->name, "dsa"))
-		return true;
-	else
-		return !!of_parse_phandle(port->dn, "link", 0);
+	return port->type == DSA_PORT_TYPE_DSA;
 }
 
 static bool dsa_port_is_cpu(struct dsa_port *port)
 {
-	if (port->name && !strcmp(port->name, "cpu"))
-		return true;
-	else
-		return !!of_parse_phandle(port->dn, "ethernet", 0);
+	return port->type == DSA_PORT_TYPE_CPU;
 }
 
-static bool dsa_ds_find_port_dn(struct dsa_switch *ds,
-				struct device_node *port)
+static bool dsa_port_is_user(struct dsa_port *dp)
 {
-	u32 index;
-
-	for (index = 0; index < ds->num_ports; index++)
-		if (ds->ports[index].dn == port)
-			return true;
-	return false;
+	return dp->type == DSA_PORT_TYPE_USER;
 }
 
-static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst,
-					       struct device_node *port)
+static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
+						   struct device_node *dn)
 {
 	struct dsa_switch *ds;
-	u32 index;
+	struct dsa_port *dp;
+	int device, port;
 
-	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
-		ds = dst->ds[index];
+	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+		ds = dst->ds[device];
 		if (!ds)
 			continue;
 
-		if (dsa_ds_find_port_dn(ds, port))
-			return ds;
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
+
+			if (dp->dn == dn)
+				return dp;
+		}
 	}
 
 	return NULL;
 }
 
-static int dsa_port_complete(struct dsa_switch_tree *dst,
-			     struct dsa_switch *src_ds,
-			     struct dsa_port *port,
-			     u32 src_port)
+static bool dsa_port_setup_routing_table(struct dsa_port *dp)
 {
-	struct device_node *link;
-	int index;
-	struct dsa_switch *dst_ds;
-
-	for (index = 0;; index++) {
-		link = of_parse_phandle(port->dn, "link", index);
-		if (!link)
-			break;
-
-		dst_ds = dsa_dst_find_port_dn(dst, link);
-		of_node_put(link);
+	struct dsa_switch *ds = dp->ds;
+	struct dsa_switch_tree *dst = ds->dst;
+	struct device_node *dn = dp->dn;
+	struct of_phandle_iterator it;
+	struct dsa_port *link_dp;
+	int err;
 
-		if (!dst_ds)
-			return 1;
+	of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
+		link_dp = dsa_tree_find_port_by_node(dst, it.node);
+		if (!link_dp) {
+			of_node_put(it.node);
+			return false;
+		}
 
-		src_ds->rtable[dst_ds->index] = src_port;
+		ds->rtable[link_dp->ds->index] = dp->index;
 	}
 
-	return 0;
+	return true;
 }
 
-/* A switch is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
 {
-	struct dsa_port *port;
-	u32 index;
-	int err;
-
-	for (index = 0; index < ds->num_ports; index++) {
-		port = &ds->ports[index];
-		if (!dsa_port_is_valid(port))
-			continue;
+	bool complete = true;
+	struct dsa_port *dp;
+	int i;
 
-		if (!dsa_port_is_dsa(port))
-			continue;
+	for (i = 0; i < DSA_MAX_SWITCHES; i++)
+		ds->rtable[i] = DSA_RTABLE_NONE;
 
-		err = dsa_port_complete(dst, ds, port, index);
-		if (err != 0)
-			return err;
+	for (i = 0; i < ds->num_ports; i++) {
+		dp = &ds->ports[i];
 
-		ds->dsa_port_mask |= BIT(index);
+		if (dsa_port_is_dsa(dp)) {
+			complete = dsa_port_setup_routing_table(dp);
+			if (!complete)
+				break;
+		}
 	}
 
-	return 0;
+	return complete;
 }
 
-/* A tree is complete if all the DSA ports phandles point to ports
- * known in the tree. A return value of 1 means the tree is not
- * complete. This is not an error condition. A value of 0 is
- * success.
- */
-static int dsa_dst_complete(struct dsa_switch_tree *dst)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
 {
 	struct dsa_switch *ds;
-	u32 index;
-	int err;
+	bool complete = true;
+	int device;
 
-	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
-		ds = dst->ds[index];
+	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+		ds = dst->ds[device];
 		if (!ds)
 			continue;
 
-		err = dsa_ds_complete(dst, ds);
-		if (err != 0)
-			return err;
+		complete = dsa_switch_setup_routing_table(ds);
+		if (!complete)
+			break;
 	}
 
-	return 0;
+	return complete;
 }
 
-static int dsa_dsa_port_apply(struct dsa_port *port)
+static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
 {
-	struct dsa_switch *ds = port->ds;
-	int err;
+	struct dsa_switch *ds;
+	struct dsa_port *dp;
+	int device, port;
 
-	err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
-	if (err) {
-		dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
-			 port->index, err);
-		return err;
-	}
+	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+		ds = dst->ds[device];
+		if (!ds)
+			continue;
 
-	memset(&port->devlink_port, 0, sizeof(port->devlink_port));
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
 
-	return devlink_port_register(ds->devlink, &port->devlink_port,
-				     port->index);
-}
+			if (dsa_port_is_cpu(dp))
+				return dp;
+		}
+	}
 
-static void dsa_dsa_port_unapply(struct dsa_port *port)
-{
-	devlink_port_unregister(&port->devlink_port);
-	dsa_cpu_dsa_destroy(port);
+	return NULL;
 }
 
-static int dsa_cpu_port_apply(struct dsa_port *port)
+static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
 {
-	struct dsa_switch *ds = port->ds;
-	int err;
+	struct dsa_switch *ds;
+	struct dsa_port *dp;
+	int device, port;
 
-	err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
-	if (err) {
-		dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
-			 port->index, err);
-		return err;
+	/* DSA currently only supports a single CPU port */
+	dst->cpu_dp = dsa_tree_find_first_cpu(dst);
+	if (!dst->cpu_dp) {
+		pr_warn("Tree has no master device\n");
+		return -EINVAL;
 	}
 
-	memset(&port->devlink_port, 0, sizeof(port->devlink_port));
-	err = devlink_port_register(ds->devlink, &port->devlink_port,
-				    port->index);
-	return err;
+	/* Assign the default CPU port to all ports of the fabric */
+	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+		ds = dst->ds[device];
+		if (!ds)
+			continue;
+
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
+
+			if (dsa_port_is_user(dp))
+				dp->cpu_dp = dst->cpu_dp;
+		}
+	}
+
+	return 0;
 }
 
-static void dsa_cpu_port_unapply(struct dsa_port *port)
+static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
 {
-	devlink_port_unregister(&port->devlink_port);
-	dsa_cpu_dsa_destroy(port);
-	port->ds->cpu_port_mask &= ~BIT(port->index);
-
+	/* DSA currently only supports a single CPU port */
+	dst->cpu_dp = NULL;
 }
 
-static int dsa_user_port_apply(struct dsa_port *port)
+static int dsa_port_setup(struct dsa_port *dp)
 {
-	struct dsa_switch *ds = port->ds;
-	const char *name = port->name;
+	struct dsa_switch *ds = dp->ds;
 	int err;
 
-	if (port->dn)
-		name = of_get_property(port->dn, "label", NULL);
-	if (!name)
-		name = "eth%d";
+	memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
 
-	err = dsa_slave_create(ds, ds->dev, port->index, name);
-	if (err) {
-		dev_warn(ds->dev, "Failed to create slave %d: %d\n",
-			 port->index, err);
-		port->netdev = NULL;
-		return err;
-	}
-
-	memset(&port->devlink_port, 0, sizeof(port->devlink_port));
-	err = devlink_port_register(ds->devlink, &port->devlink_port,
-				    port->index);
+	err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index);
 	if (err)
 		return err;
 
-	devlink_port_type_eth_set(&port->devlink_port, port->netdev);
+	switch (dp->type) {
+	case DSA_PORT_TYPE_UNUSED:
+		break;
+	case DSA_PORT_TYPE_CPU:
+	case DSA_PORT_TYPE_DSA:
+		err = dsa_port_fixed_link_register_of(dp);
+		if (err) {
+			dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+				ds->index, dp->index);
+			return err;
+		}
+
+		break;
+	case DSA_PORT_TYPE_USER:
+		err = dsa_slave_create(dp);
+		if (err)
+			dev_err(ds->dev, "failed to create slave for port %d.%d\n",
+				ds->index, dp->index);
+		else
+			devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+		break;
+	}
 
 	return 0;
 }
 
-static void dsa_user_port_unapply(struct dsa_port *port)
+static void dsa_port_teardown(struct dsa_port *dp)
 {
-	devlink_port_unregister(&port->devlink_port);
-	if (port->netdev) {
-		dsa_slave_destroy(port->netdev);
-		port->netdev = NULL;
-		port->ds->enabled_port_mask &= ~(1 << port->index);
+	devlink_port_unregister(&dp->devlink_port);
+
+	switch (dp->type) {
+	case DSA_PORT_TYPE_UNUSED:
+		break;
+	case DSA_PORT_TYPE_CPU:
+	case DSA_PORT_TYPE_DSA:
+		dsa_port_fixed_link_unregister_of(dp);
+		break;
+	case DSA_PORT_TYPE_USER:
+		if (dp->slave) {
+			dsa_slave_destroy(dp->slave);
+			dp->slave = NULL;
+		}
+		break;
 	}
 }
 
-static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static int dsa_switch_setup(struct dsa_switch *ds)
 {
-	struct dsa_port *port;
-	u32 index;
 	int err;
 
 	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
@@ -315,7 +319,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 	 * the slave MDIO bus driver rely on these values for probing PHY
 	 * devices or not
 	 */
-	ds->phys_mii_mask = ds->enabled_port_mask;
+	ds->phys_mii_mask |= dsa_user_ports(ds);
 
 	/* Add the switch to devlink before calling setup, so that setup can
 	 * add dpipe tables
@@ -336,12 +340,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 	if (err)
 		return err;
 
-	if (ds->ops->set_addr) {
-		err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
-		if (err < 0)
-			return err;
-	}
-
 	if (!ds->slave_mii_bus && ds->ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
 		if (!ds->slave_mii_bus)
@@ -354,56 +352,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 			return err;
 	}
 
-	for (index = 0; index < ds->num_ports; index++) {
-		port = &ds->ports[index];
-		if (!dsa_port_is_valid(port))
-			continue;
-
-		if (dsa_port_is_dsa(port)) {
-			err = dsa_dsa_port_apply(port);
-			if (err)
-				return err;
-			continue;
-		}
-
-		if (dsa_port_is_cpu(port)) {
-			err = dsa_cpu_port_apply(port);
-			if (err)
-				return err;
-			continue;
-		}
-
-		err = dsa_user_port_apply(port);
-		if (err)
-			continue;
-	}
-
 	return 0;
 }
 
-static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static void dsa_switch_teardown(struct dsa_switch *ds)
 {
-	struct dsa_port *port;
-	u32 index;
-
-	for (index = 0; index < ds->num_ports; index++) {
-		port = &ds->ports[index];
-		if (!dsa_port_is_valid(port))
-			continue;
-
-		if (dsa_port_is_dsa(port)) {
-			dsa_dsa_port_unapply(port);
-			continue;
-		}
-
-		if (dsa_port_is_cpu(port)) {
-			dsa_cpu_port_unapply(port);
-			continue;
-		}
-
-		dsa_user_port_unapply(port);
-	}
-
 	if (ds->slave_mii_bus && ds->ops->phy_read)
 		mdiobus_unregister(ds->slave_mii_bus);
 
@@ -417,198 +370,228 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
 
 }
 
-static int dsa_dst_apply(struct dsa_switch_tree *dst)
+static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
 {
 	struct dsa_switch *ds;
-	u32 index;
+	struct dsa_port *dp;
+	int device, port;
 	int err;
 
-	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
-		ds = dst->ds[index];
+	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+		ds = dst->ds[device];
 		if (!ds)
 			continue;
 
-		err = dsa_ds_apply(dst, ds);
+		err = dsa_switch_setup(ds);
 		if (err)
 			return err;
-	}
 
-	if (dst->cpu_dp) {
-		err = dsa_cpu_port_ethtool_setup(dst->cpu_dp);
-		if (err)
-			return err;
-	}
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
 
-	/* If we use a tagging format that doesn't have an ethertype
-	 * field, make sure that all packets from this point on get
-	 * sent to the tag format's receive function.
-	 */
-	wmb();
-	dst->cpu_dp->netdev->dsa_ptr = dst;
-	dst->applied = true;
+			err = dsa_port_setup(dp);
+			if (err)
+				return err;
+		}
+	}
 
 	return 0;
 }
 
-static void dsa_dst_unapply(struct dsa_switch_tree *dst)
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
 {
 	struct dsa_switch *ds;
-	u32 index;
-
-	if (!dst->applied)
-		return;
-
-	dst->cpu_dp->netdev->dsa_ptr = NULL;
-
-	/* If we used a tagging format that doesn't have an ethertype
-	 * field, make sure that all packets from this point get sent
-	 * without the tag and go through the regular receive path.
-	 */
-	wmb();
+	struct dsa_port *dp;
+	int device, port;
 
-	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
-		ds = dst->ds[index];
+	for (device = 0; device < DSA_MAX_SWITCHES; device++) {
+		ds = dst->ds[device];
 		if (!ds)
 			continue;
 
-		dsa_ds_unapply(dst, ds);
-	}
+		for (port = 0; port < ds->num_ports; port++) {
+			dp = &ds->ports[port];
+
+			dsa_port_teardown(dp);
+		}
 
-	if (dst->cpu_dp) {
-		dsa_cpu_port_ethtool_restore(dst->cpu_dp);
-		dst->cpu_dp = NULL;
+		dsa_switch_teardown(ds);
 	}
+}
+
+static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
+{
+	struct dsa_port *cpu_dp = dst->cpu_dp;
+	struct net_device *master = cpu_dp->master;
 
-	pr_info("DSA: tree %d unapplied\n", dst->tree);
-	dst->applied = false;
+	/* DSA currently supports a single pair of CPU port and master device */
+	return dsa_master_setup(master, cpu_dp);
 }
 
-static int dsa_cpu_parse(struct dsa_port *port, u32 index,
-			 struct dsa_switch_tree *dst,
-			 struct dsa_switch *ds)
+static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
 {
-	enum dsa_tag_protocol tag_protocol;
-	struct net_device *ethernet_dev;
-	struct device_node *ethernet;
+	struct dsa_port *cpu_dp = dst->cpu_dp;
+	struct net_device *master = cpu_dp->master;
 
-	if (port->dn) {
-		ethernet = of_parse_phandle(port->dn, "ethernet", 0);
-		if (!ethernet)
-			return -EINVAL;
-		ethernet_dev = of_find_net_device_by_node(ethernet);
-	} else {
-		ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
-		dev_put(ethernet_dev);
-	}
+	return dsa_master_teardown(master);
+}
 
-	if (!ethernet_dev)
-		return -EPROBE_DEFER;
+static int dsa_tree_setup(struct dsa_switch_tree *dst)
+{
+	bool complete;
+	int err;
 
-	if (!dst->cpu_dp) {
-		dst->cpu_dp = port;
-		dst->cpu_dp->netdev = ethernet_dev;
+	if (dst->setup) {
+		pr_err("DSA: tree %d already setup! Disjoint trees?\n",
+		       dst->index);
+		return -EEXIST;
 	}
 
-	/* Initialize cpu_port_mask now for drv->setup()
-	 * to have access to a correct value, just like what
-	 * net/dsa/dsa.c::dsa_switch_setup_one does.
-	 */
-	ds->cpu_port_mask |= BIT(index);
+	complete = dsa_tree_setup_routing_table(dst);
+	if (!complete)
+		return 0;
 
-	tag_protocol = ds->ops->get_tag_protocol(ds);
-	dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
-	if (IS_ERR(dst->tag_ops)) {
-		dev_warn(ds->dev, "No tagger for this switch\n");
-		ds->cpu_port_mask &= ~BIT(index);
-		return PTR_ERR(dst->tag_ops);
-	}
+	err = dsa_tree_setup_default_cpu(dst);
+	if (err)
+		return err;
 
-	dst->rcv = dst->tag_ops->rcv;
+	err = dsa_tree_setup_switches(dst);
+	if (err)
+		return err;
+
+	err = dsa_tree_setup_master(dst);
+	if (err)
+		return err;
+
+	dst->setup = true;
+
+	pr_info("DSA: tree %d setup\n", dst->index);
 
 	return 0;
 }
 
-static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
+static void dsa_tree_teardown(struct dsa_switch_tree *dst)
 {
-	struct dsa_port *port;
-	u32 index;
+	if (!dst->setup)
+		return;
+
+	dsa_tree_teardown_master(dst);
+
+	dsa_tree_teardown_switches(dst);
+
+	dsa_tree_teardown_default_cpu(dst);
+
+	pr_info("DSA: tree %d torn down\n", dst->index);
+
+	dst->setup = false;
+}
+
+static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
+				   unsigned int index)
+{
+	dsa_tree_teardown(dst);
+
+	dst->ds[index] = NULL;
+	dsa_tree_put(dst);
+}
+
+static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
+			       struct dsa_switch *ds)
+{
+	unsigned int index = ds->index;
 	int err;
 
-	for (index = 0; index < ds->num_ports; index++) {
-		port = &ds->ports[index];
-		if (!dsa_port_is_valid(port) ||
-		    dsa_port_is_dsa(port))
-			continue;
+	if (dst->ds[index])
+		return -EBUSY;
 
-		if (dsa_port_is_cpu(port)) {
-			err = dsa_cpu_parse(port, index, dst, ds);
-			if (err)
-				return err;
-		} else {
-			/* Initialize enabled_port_mask now for drv->setup()
-			 * to have access to a correct value, just like what
-			 * net/dsa/dsa.c::dsa_switch_setup_one does.
-			 */
-			ds->enabled_port_mask |= BIT(index);
-		}
+	dsa_tree_get(dst);
+	dst->ds[index] = ds;
 
-	}
+	err = dsa_tree_setup(dst);
+	if (err)
+		dsa_tree_remove_switch(dst, index);
 
-	pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
+	return err;
+}
+
+static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
+{
+	if (!name)
+		name = "eth%d";
+
+	dp->type = DSA_PORT_TYPE_USER;
+	dp->name = name;
 
 	return 0;
 }
 
-static int dsa_dst_parse(struct dsa_switch_tree *dst)
+static int dsa_port_parse_dsa(struct dsa_port *dp)
 {
-	struct dsa_switch *ds;
-	struct dsa_port *dp;
-	u32 index;
-	int port;
-	int err;
+	dp->type = DSA_PORT_TYPE_DSA;
 
-	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
-		ds = dst->ds[index];
-		if (!ds)
-			continue;
+	return 0;
+}
 
-		err = dsa_ds_parse(dst, ds);
-		if (err)
-			return err;
-	}
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
+{
+	struct dsa_switch *ds = dp->ds;
+	struct dsa_switch_tree *dst = ds->dst;
+	const struct dsa_device_ops *tag_ops;
+	enum dsa_tag_protocol tag_protocol;
 
-	if (!dst->cpu_dp->netdev) {
-		pr_warn("Tree has no master device\n");
-		return -EINVAL;
+	tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
+	tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+	if (IS_ERR(tag_ops)) {
+		dev_warn(ds->dev, "No tagger for this switch\n");
+		return PTR_ERR(tag_ops);
 	}
 
-	/* Assign the default CPU port to all ports of the fabric */
-	for (index = 0; index < DSA_MAX_SWITCHES; index++) {
-		ds = dst->ds[index];
-		if (!ds)
-			continue;
+	dp->type = DSA_PORT_TYPE_CPU;
+	dp->rcv = tag_ops->rcv;
+	dp->tag_ops = tag_ops;
+	dp->master = master;
+	dp->dst = dst;
 
-		for (port = 0; port < ds->num_ports; port++) {
-			dp = &ds->ports[port];
-			if (!dsa_port_is_valid(dp) ||
-			    dsa_port_is_dsa(dp) ||
-			    dsa_port_is_cpu(dp))
-				continue;
+	return 0;
+}
 
-			dp->cpu_dp = dst->cpu_dp;
-		}
+static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
+{
+	struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
+	const char *name = of_get_property(dn, "label", NULL);
+	bool link = of_property_read_bool(dn, "link");
+
+	dp->dn = dn;
+
+	if (ethernet) {
+		struct net_device *master;
+
+		master = of_find_net_device_by_node(ethernet);
+		if (!master)
+			return -EPROBE_DEFER;
+
+		return dsa_port_parse_cpu(dp, master);
 	}
 
-	pr_info("DSA: tree %d parsed\n", dst->tree);
+	if (link)
+		return dsa_port_parse_dsa(dp);
 
-	return 0;
+	return dsa_port_parse_user(dp, name);
 }
 
-static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
+static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
+				     struct device_node *dn)
 {
-	struct device_node *port;
-	int err;
+	struct device_node *ports, *port;
+	struct dsa_port *dp;
 	u32 reg;
+	int err;
+
+	ports = of_get_child_by_name(dn, "ports");
+	if (!ports) {
+		dev_err(ds->dev, "no ports child node found\n");
+		return -EINVAL;
+	}
 
 	for_each_available_child_of_node(ports, port) {
 		err = of_property_read_u32(port, "reg", &reg);
@@ -618,174 +601,140 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
 		if (reg >= ds->num_ports)
 			return -EINVAL;
 
-		ds->ports[reg].dn = port;
+		dp = &ds->ports[reg];
+
+		err = dsa_port_parse_of(dp, port);
+		if (err)
+			return err;
 	}
 
 	return 0;
 }
 
-static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds)
+static int dsa_switch_parse_member_of(struct dsa_switch *ds,
+				      struct device_node *dn)
 {
-	bool valid_name_found = false;
-	unsigned int i;
+	u32 m[2] = { 0, 0 };
+	int sz;
 
-	for (i = 0; i < DSA_MAX_PORTS; i++) {
-		if (!cd->port_names[i])
-			continue;
+	/* Don't error out if this optional property isn't found */
+	sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
+	if (sz < 0 && sz != -EINVAL)
+		return sz;
 
-		ds->ports[i].name = cd->port_names[i];
-		valid_name_found = true;
-	}
-
-	if (!valid_name_found && i == DSA_MAX_PORTS)
+	ds->index = m[1];
+	if (ds->index >= DSA_MAX_SWITCHES)
 		return -EINVAL;
 
+	ds->dst = dsa_tree_touch(m[0]);
+	if (!ds->dst)
+		return -ENOMEM;
+
 	return 0;
 }
 
-static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index)
+static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
 {
 	int err;
 
-	*tree = *index = 0;
-
-	err = of_property_read_u32_index(np, "dsa,member", 0, tree);
-	if (err) {
-		/* Does not exist, but it is optional */
-		if (err == -EINVAL)
-			return 0;
-		return err;
-	}
-
-	err = of_property_read_u32_index(np, "dsa,member", 1, index);
+	err = dsa_switch_parse_member_of(ds, dn);
 	if (err)
 		return err;
 
-	if (*index >= DSA_MAX_SWITCHES)
-		return -EINVAL;
-
-	return 0;
+	return dsa_switch_parse_ports_of(ds, dn);
 }
 
-static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index)
+static int dsa_port_parse(struct dsa_port *dp, const char *name,
+			  struct device *dev)
 {
-	if (!pd)
-		return -ENODEV;
+	if (!strcmp(name, "cpu")) {
+		struct net_device *master;
 
-	/* We do not support complex trees with dsa_chip_data */
-	*tree = 0;
-	*index = 0;
+		master = dsa_dev_to_net_device(dev);
+		if (!master)
+			return -EPROBE_DEFER;
 
-	return 0;
-}
-
-static struct device_node *dsa_get_ports(struct dsa_switch *ds,
-					 struct device_node *np)
-{
-	struct device_node *ports;
+		dev_put(master);
 
-	ports = of_get_child_by_name(np, "ports");
-	if (!ports) {
-		dev_err(ds->dev, "no ports child node found\n");
-		return ERR_PTR(-EINVAL);
+		return dsa_port_parse_cpu(dp, master);
 	}
 
-	return ports;
+	if (!strcmp(name, "dsa"))
+		return dsa_port_parse_dsa(dp);
+
+	return dsa_port_parse_user(dp, name);
 }
 
-static int _dsa_register_switch(struct dsa_switch *ds)
+static int dsa_switch_parse_ports(struct dsa_switch *ds,
+				  struct dsa_chip_data *cd)
 {
-	struct dsa_chip_data *pdata = ds->dev->platform_data;
-	struct device_node *np = ds->dev->of_node;
-	struct dsa_switch_tree *dst;
-	struct device_node *ports;
-	u32 tree, index;
-	int i, err;
-
-	if (np) {
-		err = dsa_parse_member_dn(np, &tree, &index);
-		if (err)
-			return err;
+	bool valid_name_found = false;
+	struct dsa_port *dp;
+	struct device *dev;
+	const char *name;
+	unsigned int i;
+	int err;
 
-		ports = dsa_get_ports(ds, np);
-		if (IS_ERR(ports))
-			return PTR_ERR(ports);
+	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		name = cd->port_names[i];
+		dev = cd->netdev[i];
+		dp = &ds->ports[i];
 
-		err = dsa_parse_ports_dn(ports, ds);
-		if (err)
-			return err;
-	} else {
-		err = dsa_parse_member(pdata, &tree, &index);
-		if (err)
-			return err;
+		if (!name)
+			continue;
 
-		err = dsa_parse_ports(pdata, ds);
+		err = dsa_port_parse(dp, name, dev);
 		if (err)
 			return err;
-	}
 
-	dst = dsa_get_dst(tree);
-	if (!dst) {
-		dst = dsa_add_dst(tree);
-		if (!dst)
-			return -ENOMEM;
-	}
-
-	if (dst->ds[index]) {
-		err = -EBUSY;
-		goto out;
+		valid_name_found = true;
 	}
 
-	ds->dst = dst;
-	ds->index = index;
-	ds->cd = pdata;
-
-	/* Initialize the routing table */
-	for (i = 0; i < DSA_MAX_SWITCHES; ++i)
-		ds->rtable[i] = DSA_RTABLE_NONE;
+	if (!valid_name_found && i == DSA_MAX_PORTS)
+		return -EINVAL;
 
-	dsa_dst_add_ds(dst, ds, index);
+	return 0;
+}
 
-	err = dsa_dst_complete(dst);
-	if (err < 0)
-		goto out_del_dst;
+static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
+{
+	ds->cd = cd;
 
-	if (err == 1) {
-		/* Not all switches registered yet */
-		err = 0;
-		goto out;
-	}
+	/* We don't support interconnected switches nor multiple trees via
+	 * platform data, so this is the unique switch of the tree.
+	 */
+	ds->index = 0;
+	ds->dst = dsa_tree_touch(0);
+	if (!ds->dst)
+		return -ENOMEM;
 
-	if (dst->applied) {
-		pr_info("DSA: Disjoint trees?\n");
-		return -EINVAL;
-	}
+	return dsa_switch_parse_ports(ds, cd);
+}
 
-	err = dsa_dst_parse(dst);
-	if (err) {
-		if (err == -EPROBE_DEFER) {
-			dsa_dst_del_ds(dst, ds, ds->index);
-			return err;
-		}
+static int dsa_switch_add(struct dsa_switch *ds)
+{
+	struct dsa_switch_tree *dst = ds->dst;
 
-		goto out_del_dst;
-	}
+	return dsa_tree_add_switch(dst, ds);
+}
 
-	err = dsa_dst_apply(dst);
-	if (err) {
-		dsa_dst_unapply(dst);
-		goto out_del_dst;
-	}
+static int dsa_switch_probe(struct dsa_switch *ds)
+{
+	struct dsa_chip_data *pdata = ds->dev->platform_data;
+	struct device_node *np = ds->dev->of_node;
+	int err;
 
-	dsa_put_dst(dst);
-	return 0;
+	if (np)
+		err = dsa_switch_parse_of(ds, np);
+	else if (pdata)
+		err = dsa_switch_parse(ds, pdata);
+	else
+		err = -ENODEV;
 
-out_del_dst:
-	dsa_dst_del_ds(dst, ds, ds->index);
-out:
-	dsa_put_dst(dst);
+	if (err)
+		return err;
 
-	return err;
+	return dsa_switch_add(ds);
 }
 
 struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
@@ -815,26 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds)
 	int err;
 
 	mutex_lock(&dsa2_mutex);
-	err = _dsa_register_switch(ds);
+	err = dsa_switch_probe(ds);
 	mutex_unlock(&dsa2_mutex);
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(dsa_register_switch);
 
-static void _dsa_unregister_switch(struct dsa_switch *ds)
+static void dsa_switch_remove(struct dsa_switch *ds)
 {
 	struct dsa_switch_tree *dst = ds->dst;
+	unsigned int index = ds->index;
 
-	dsa_dst_unapply(dst);
-
-	dsa_dst_del_ds(dst, ds, ds->index);
+	dsa_tree_remove_switch(dst, index);
 }
 
 void dsa_unregister_switch(struct dsa_switch *ds)
 {
 	mutex_lock(&dsa2_mutex);
-	_dsa_unregister_switch(ds);
+	dsa_switch_remove(ds);
 	mutex_unlock(&dsa2_mutex);
 }
 EXPORT_SYMBOL_GPL(dsa_unregister_switch);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 55982cc39b24..7d036696e8c4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -43,10 +43,10 @@ struct dsa_notifier_bridge_info {
 
 /* DSA_NOTIFIER_FDB_* */
 struct dsa_notifier_fdb_info {
-	const struct switchdev_obj_port_fdb *fdb;
-	struct switchdev_trans *trans;
 	int sw_index;
 	int port;
+	const unsigned char *addr;
+	u16 vid;
 };
 
 /* DSA_NOTIFIER_MDB_* */
@@ -65,18 +65,13 @@ struct dsa_notifier_vlan_info {
 	int port;
 };
 
-struct dsa_device_ops {
-	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
-	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev);
-};
-
 struct dsa_slave_priv {
-	/* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
+	/* Copy of CPU port xmit for faster access in slave transmit hot path */
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
 					struct net_device *dev);
 
+	struct pcpu_sw_netstats	*stats64;
+
 	/* DSA port data, such as switch, port index, etc. */
 	struct dsa_port		*dp;
 
@@ -84,7 +79,6 @@ struct dsa_slave_priv {
 	 * The phylib phy_device pointer for the PHY connected
 	 * to this port.
 	 */
-	struct phy_device	*phy;
 	phy_interface_t		phy_interface;
 	int			old_link;
 	int			old_pause;
@@ -99,68 +93,105 @@ struct dsa_slave_priv {
 };
 
 /* dsa.c */
-int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
-		      struct dsa_port *dport, int port);
-void dsa_cpu_dsa_destroy(struct dsa_port *dport);
 const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
+bool dsa_schedule_work(struct work_struct *work);
 
 /* legacy.c */
 int dsa_legacy_register(void);
 void dsa_legacy_unregister(void);
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid,
+		       u16 flags);
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid);
+
+/* master.c */
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
+void dsa_master_teardown(struct net_device *dev);
+
+static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+						       int device, int port)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	struct dsa_switch_tree *dst = cpu_dp->dst;
+	struct dsa_switch *ds;
+
+	if (device < 0 || device >= DSA_MAX_SWITCHES)
+		return NULL;
+
+	ds = dst->ds[device];
+	if (!ds)
+		return NULL;
+
+	if (port < 0 || port >= ds->num_ports)
+		return NULL;
+
+	return ds->ports[port].slave;
+}
 
 /* port.c */
 int dsa_port_set_state(struct dsa_port *dp, u8 state,
 		       struct switchdev_trans *trans);
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct switchdev_trans *trans);
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 			 struct switchdev_trans *trans);
-int dsa_port_fdb_add(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb,
-		     struct switchdev_trans *trans);
-int dsa_port_fdb_del(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb);
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
-		      switchdev_obj_dump_cb_t *cb);
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid);
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid);
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
+int dsa_port_mdb_add(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct switchdev_trans *trans);
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
-		      switchdev_obj_dump_cb_t *cb);
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans);
 int dsa_port_vlan_del(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_vlan_dump(struct dsa_port *dp,
-		       struct switchdev_obj_port_vlan *vlan,
-		       switchdev_obj_dump_cb_t *cb);
+int dsa_port_fixed_link_register_of(struct dsa_port *dp);
+void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-		     int port, const char *name);
+int dsa_slave_create(struct dsa_port *dp);
 void dsa_slave_destroy(struct net_device *slave_dev);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
 int dsa_slave_register_notifier(void);
 void dsa_slave_unregister_notifier(void);
 
+static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	return p->dp;
+}
+
+static inline struct net_device *
+dsa_slave_to_master(const struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	return dp->cpu_dp->master;
+}
+
 /* switch.c */
 int dsa_switch_register_notifier(struct dsa_switch *ds);
 void dsa_switch_unregister_notifier(struct dsa_switch *ds);
 
 /* tag_brcm.c */
 extern const struct dsa_device_ops brcm_netdev_ops;
+extern const struct dsa_device_ops brcm_prepend_netdev_ops;
 
 /* tag_dsa.c */
 extern const struct dsa_device_ops dsa_netdev_ops;
@@ -183,14 +214,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
 /* tag_trailer.c */
 extern const struct dsa_device_ops trailer_netdev_ops;
 
-static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
-{
-	return p->dp->cpu_dp->netdev;
-}
-
-static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
-{
-	return dst->cpu_dp;
-}
-
 #endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 1d7a3282f2a7..84611d7fcfa2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -78,31 +78,30 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
 }
 
 /* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
 {
-	struct dsa_port *dport;
 	int ret, port;
 
 	for (port = 0; port < ds->num_ports; port++) {
 		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
 			continue;
 
-		dport = &ds->ports[port];
-		ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
+		ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 
-static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master,
-				struct device *parent)
+static int dsa_switch_setup_one(struct dsa_switch *ds,
+				struct net_device *master)
 {
 	const struct dsa_switch_ops *ops = ds->ops;
 	struct dsa_switch_tree *dst = ds->dst;
 	struct dsa_chip_data *cd = ds->cd;
 	bool valid_name_found = false;
 	int index = ds->index;
+	struct dsa_port *dp;
 	int i, ret;
 
 	/*
@@ -111,9 +110,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 	for (i = 0; i < ds->num_ports; i++) {
 		char *name;
 
+		dp = &ds->ports[i];
+
 		name = cd->port_names[i];
 		if (name == NULL)
 			continue;
+		dp->name = name;
 
 		if (!strcmp(name, "cpu")) {
 			if (dst->cpu_dp) {
@@ -122,12 +124,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 				return -EINVAL;
 			}
 			dst->cpu_dp = &ds->ports[i];
-			dst->cpu_dp->netdev = master;
-			ds->cpu_port_mask |= 1 << i;
+			dst->cpu_dp->master = master;
+			dp->type = DSA_PORT_TYPE_CPU;
 		} else if (!strcmp(name, "dsa")) {
-			ds->dsa_port_mask |= 1 << i;
+			dp->type = DSA_PORT_TYPE_DSA;
 		} else {
-			ds->enabled_port_mask |= 1 << i;
+			dp->type = DSA_PORT_TYPE_USER;
 		}
 		valid_name_found = true;
 	}
@@ -138,7 +140,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 	/* Make the built-in MII bus mask match the number of ports,
 	 * switch drivers can override this later
 	 */
-	ds->phys_mii_mask = ds->enabled_port_mask;
+	ds->phys_mii_mask |= dsa_user_ports(ds);
 
 	/*
 	 * If the CPU connects to this switch, set the switch tree
@@ -146,14 +148,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 	 * switch.
 	 */
 	if (dst->cpu_dp->ds == ds) {
+		const struct dsa_device_ops *tag_ops;
 		enum dsa_tag_protocol tag_protocol;
 
-		tag_protocol = ops->get_tag_protocol(ds);
-		dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
-		if (IS_ERR(dst->tag_ops))
-			return PTR_ERR(dst->tag_ops);
+		tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
+		tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+		if (IS_ERR(tag_ops))
+			return PTR_ERR(tag_ops);
+
+		dst->cpu_dp->tag_ops = tag_ops;
 
-		dst->rcv = dst->tag_ops->rcv;
+		/* Few copies for faster access in master receive hot path */
+		dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
+		dst->cpu_dp->dst = dst;
 	}
 
 	memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
@@ -169,14 +176,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 	if (ret)
 		return ret;
 
-	if (ops->set_addr) {
-		ret = ops->set_addr(ds, master->dev_addr);
-		if (ret < 0)
-			return ret;
-	}
-
 	if (!ds->slave_mii_bus && ops->phy_read) {
-		ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
 		if (!ds->slave_mii_bus)
 			return -ENOMEM;
 		dsa_slave_mii_bus_init(ds);
@@ -193,25 +194,21 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 		ds->ports[i].dn = cd->port_dn[i];
 		ds->ports[i].cpu_dp = dst->cpu_dp;
 
-		if (!(ds->enabled_port_mask & (1 << i)))
+		if (dsa_is_user_port(ds, i))
 			continue;
 
-		ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
+		ret = dsa_slave_create(&ds->ports[i]);
 		if (ret < 0)
 			netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
 				   index, i, cd->port_names[i], ret);
 	}
 
 	/* Perform configuration of the CPU and DSA ports */
-	ret = dsa_cpu_dsa_setups(ds, parent);
+	ret = dsa_cpu_dsa_setups(ds);
 	if (ret < 0)
 		netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
 			   index);
 
-	ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
@@ -252,7 +249,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master,
 	ds->ops = ops;
 	ds->priv = priv;
 
-	ret = dsa_switch_setup_one(ds, master, parent);
+	ret = dsa_switch_setup_one(ds, master);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -265,24 +262,20 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 
 	/* Destroy network devices for physical switch ports. */
 	for (port = 0; port < ds->num_ports; port++) {
-		if (!(ds->enabled_port_mask & (1 << port)))
+		if (!dsa_is_user_port(ds, port))
 			continue;
 
-		if (!ds->ports[port].netdev)
+		if (!ds->ports[port].slave)
 			continue;
 
-		dsa_slave_destroy(ds->ports[port].netdev);
+		dsa_slave_destroy(ds->ports[port].slave);
 	}
 
 	/* Disable configuration of the CPU and DSA ports */
 	for (port = 0; port < ds->num_ports; port++) {
 		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
 			continue;
-		dsa_cpu_dsa_destroy(&ds->ports[port]);
-
-		/* Clearing a bit which is not set does no harm */
-		ds->cpu_port_mask |= ~(1 << port);
-		ds->dsa_port_mask |= ~(1 << port);
+		dsa_port_fixed_link_unregister_of(&ds->ports[port]);
 	}
 
 	if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -600,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
 	if (!configured)
 		return -EPROBE_DEFER;
 
-	/*
-	 * If we use a tagging format that doesn't have an ethertype
-	 * field, make sure that all packets from this point on get
-	 * sent to the tag format's receive function.
-	 */
-	wmb();
-	dev->dsa_ptr = dst;
-
-	return 0;
+	return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
 }
 
 static int dsa_probe(struct platform_device *pdev)
@@ -673,13 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
 {
 	int i;
 
-	dst->cpu_dp->netdev->dsa_ptr = NULL;
-
-	/* If we used a tagging format that doesn't have an ethertype
-	 * field, make sure that all packets from this point get sent
-	 * without the tag and go through the regular receive path.
-	 */
-	wmb();
+	dsa_master_teardown(dst->cpu_dp->master);
 
 	for (i = 0; i < dst->pd->nr_chips; i++) {
 		struct dsa_switch *ds = dst->ds[i];
@@ -688,9 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
 			dsa_switch_destroy(ds);
 	}
 
-	dsa_cpu_port_ethtool_restore(dst->cpu_dp);
-
-	dev_put(dst->cpu_dp->netdev);
+	dev_put(dst->cpu_dp->master);
 }
 
 static int dsa_remove(struct platform_device *pdev)
@@ -741,6 +718,26 @@ static int dsa_resume(struct device *d)
 }
 #endif
 
+/* legacy way, bypassing the bridge *****************************************/
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid,
+		       u16 flags)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	return dsa_port_fdb_add(dp, addr, vid);
+}
+
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	return dsa_port_fdb_del(dp, addr, vid);
+}
+
 static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
 
 static const struct of_device_id dsa_of_match_table[] = {
diff --git a/net/dsa/master.c b/net/dsa/master.c
new file mode 100644
index 000000000000..00589147f042
--- /dev/null
+++ b/net/dsa/master.c
@@ -0,0 +1,143 @@
+/*
+ * Handling of a master device, switching frames via its switch fabric CPU port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "dsa_priv.h"
+
+static void dsa_master_get_ethtool_stats(struct net_device *dev,
+					 struct ethtool_stats *stats,
+					 uint64_t *data)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+	struct dsa_switch *ds = cpu_dp->ds;
+	int port = cpu_dp->index;
+	int count = 0;
+
+	if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
+		count = ops->get_sset_count(dev, ETH_SS_STATS);
+		ops->get_ethtool_stats(dev, stats, data);
+	}
+
+	if (ds->ops->get_ethtool_stats)
+		ds->ops->get_ethtool_stats(ds, port, data + count);
+}
+
+static int dsa_master_get_sset_count(struct net_device *dev, int sset)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+	struct dsa_switch *ds = cpu_dp->ds;
+	int count = 0;
+
+	if (ops && ops->get_sset_count)
+		count += ops->get_sset_count(dev, sset);
+
+	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+		count += ds->ops->get_sset_count(ds);
+
+	return count;
+}
+
+static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
+				   uint8_t *data)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+	struct dsa_switch *ds = cpu_dp->ds;
+	int port = cpu_dp->index;
+	int len = ETH_GSTRING_LEN;
+	int mcount = 0, count;
+	unsigned int i;
+	uint8_t pfx[4];
+	uint8_t *ndata;
+
+	snprintf(pfx, sizeof(pfx), "p%.2d", port);
+	/* We do not want to be NULL-terminated, since this is a prefix */
+	pfx[sizeof(pfx) - 1] = '_';
+
+	if (ops && ops->get_sset_count && ops->get_strings) {
+		mcount = ops->get_sset_count(dev, ETH_SS_STATS);
+		ops->get_strings(dev, stringset, data);
+	}
+
+	if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
+		ndata = data + mcount * len;
+		/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
+		 * the output after to prepend our CPU port prefix we
+		 * constructed earlier
+		 */
+		ds->ops->get_strings(ds, port, ndata);
+		count = ds->ops->get_sset_count(ds);
+		for (i = 0; i < count; i++) {
+			memmove(ndata + (i * len + sizeof(pfx)),
+				ndata + i * len, len - sizeof(pfx));
+			memcpy(ndata + i * len, pfx, sizeof(pfx));
+		}
+	}
+}
+
+static int dsa_master_ethtool_setup(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	struct dsa_switch *ds = cpu_dp->ds;
+	struct ethtool_ops *ops;
+
+	ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	cpu_dp->orig_ethtool_ops = dev->ethtool_ops;
+	if (cpu_dp->orig_ethtool_ops)
+		memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
+
+	ops->get_sset_count = dsa_master_get_sset_count;
+	ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
+	ops->get_strings = dsa_master_get_strings;
+
+	dev->ethtool_ops = ops;
+
+	return 0;
+}
+
+static void dsa_master_ethtool_teardown(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
+	cpu_dp->orig_ethtool_ops = NULL;
+}
+
+int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+	/* If we use a tagging format that doesn't have an ethertype
+	 * field, make sure that all packets from this point on get
+	 * sent to the tag format's receive function.
+	 */
+	wmb();
+
+	dev->dsa_ptr = cpu_dp;
+
+	return dsa_master_ethtool_setup(dev);
+}
+
+void dsa_master_teardown(struct net_device *dev)
+{
+	dsa_master_ethtool_teardown(dev);
+
+	dev->dsa_ptr = NULL;
+
+	/* If we used a tagging format that doesn't have an ethertype
+	 * field, make sure that all packets from this point get sent
+	 * without the tag and go through the regular receive path.
+	 */
+	wmb();
+}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index efc3bce3a89d..bb4be2679904 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -12,10 +12,12 @@
 
 #include <linux/if_bridge.h>
 #include <linux/notifier.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
 
 #include "dsa_priv.h"
 
-static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
 {
 	struct raw_notifier_head *nh = &dp->ds->dst->nh;
 	int err;
@@ -56,7 +58,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
 	return 0;
 }
 
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
 {
 	int err;
 
@@ -65,6 +67,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
 		pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
 }
 
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+	u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
+	int err;
+
+	if (ds->ops->port_enable) {
+		err = ds->ops->port_enable(ds, port, phy);
+		if (err)
+			return err;
+	}
+
+	dsa_port_set_state_now(dp, stp_state);
+
+	return 0;
+}
+
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
+{
+	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
+
+	dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+
+	if (ds->ops->port_disable)
+		ds->ops->port_disable(ds, port, phy);
+}
+
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
 {
 	struct dsa_notifier_bridge_info info = {
@@ -146,44 +177,45 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 	return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
 }
 
-int dsa_port_fdb_add(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb,
-		     struct switchdev_trans *trans)
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid)
 {
 	struct dsa_notifier_fdb_info info = {
 		.sw_index = dp->ds->index,
 		.port = dp->index,
-		.trans = trans,
-		.fdb = fdb,
+		.addr = addr,
+		.vid = vid,
 	};
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
 }
 
-int dsa_port_fdb_del(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb)
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid)
 {
 	struct dsa_notifier_fdb_info info = {
 		.sw_index = dp->ds->index,
 		.port = dp->index,
-		.fdb = fdb,
+		.addr = addr,
+		.vid = vid,
+
 	};
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
 }
 
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
-		      switchdev_obj_dump_cb_t *cb)
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
 
-	if (ds->ops->port_fdb_dump)
-		return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
+	if (!ds->ops->port_fdb_dump)
+		return -EOPNOTSUPP;
 
-	return -EOPNOTSUPP;
+	return ds->ops->port_fdb_dump(ds, port, cb, data);
 }
 
-int dsa_port_mdb_add(struct dsa_port *dp,
+int dsa_port_mdb_add(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct switchdev_trans *trans)
 {
@@ -197,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp,
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
 }
 
-int dsa_port_mdb_del(struct dsa_port *dp,
+int dsa_port_mdb_del(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb)
 {
 	struct dsa_notifier_mdb_info info = {
@@ -209,17 +241,6 @@ int dsa_port_mdb_del(struct dsa_port *dp,
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
 }
 
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
-		      switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_mdb_dump)
-		return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans)
@@ -231,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp,
 		.vlan = vlan,
 	};
 
-	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+	if (br_vlan_enabled(dp->bridge_dev))
+		return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+
+	return 0;
 }
 
 int dsa_port_vlan_del(struct dsa_port *dp,
@@ -243,17 +267,53 @@ int dsa_port_vlan_del(struct dsa_port *dp,
 		.vlan = vlan,
 	};
 
-	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+	if (br_vlan_enabled(dp->bridge_dev))
+		return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+
+	return 0;
 }
 
-int dsa_port_vlan_dump(struct dsa_port *dp,
-		       struct switchdev_obj_port_vlan *vlan,
-		       switchdev_obj_dump_cb_t *cb)
+int dsa_port_fixed_link_register_of(struct dsa_port *dp)
 {
+	struct device_node *dn = dp->dn;
 	struct dsa_switch *ds = dp->ds;
+	struct phy_device *phydev;
+	int port = dp->index;
+	int mode;
+	int err;
 
-	if (ds->ops->port_vlan_dump)
-		return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
+	if (of_phy_is_fixed_link(dn)) {
+		err = of_phy_register_fixed_link(dn);
+		if (err) {
+			dev_err(ds->dev,
+				"failed to register the fixed PHY of port %d\n",
+				port);
+			return err;
+		}
+
+		phydev = of_phy_find_device(dn);
+
+		mode = of_get_phy_mode(dn);
+		if (mode < 0)
+			mode = PHY_INTERFACE_MODE_NA;
+		phydev->interface = mode;
+
+		genphy_config_init(phydev);
+		genphy_read_status(phydev);
+
+		if (ds->ops->adjust_link)
+			ds->ops->adjust_link(ds, port, phydev);
+
+		put_device(&phydev->mdio.dev);
+	}
+
+	return 0;
+}
+
+void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+{
+	struct device_node *dn = dp->dn;
 
-	return -EOPNOTSUPP;
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
 }
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9507bd38cf04..d6e7a642493b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 	ds->slave_mii_bus->read = dsa_slave_phy_read;
 	ds->slave_mii_bus->write = dsa_slave_phy_write;
 	snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
-		 ds->dst->tree, ds->index);
+		 ds->dst->index, ds->index);
 	ds->slave_mii_bus->parent = ds->dev;
 	ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
 }
@@ -64,18 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
 /* slave device handling ****************************************************/
 static int dsa_slave_get_iflink(const struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-
-	return dsa_master_netdev(p)->ifindex;
+	return dsa_slave_to_master(dev)->ifindex;
 }
 
 static int dsa_slave_open(struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
-	struct dsa_switch *ds = dp->ds;
-	struct net_device *master = dsa_master_netdev(p);
-	u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+	struct net_device *master = dsa_slave_to_master(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err;
 
 	if (!(master->flags & IFF_UP))
@@ -98,16 +93,12 @@ static int dsa_slave_open(struct net_device *dev)
 			goto clear_allmulti;
 	}
 
-	if (ds->ops->port_enable) {
-		err = ds->ops->port_enable(ds, p->dp->index, p->phy);
-		if (err)
-			goto clear_promisc;
-	}
-
-	dsa_port_set_state_now(p->dp, stp_state);
+	err = dsa_port_enable(dp, dev->phydev);
+	if (err)
+		goto clear_promisc;
 
-	if (p->phy)
-		phy_start(p->phy);
+	if (dev->phydev)
+		phy_start(dev->phydev);
 
 	return 0;
 
@@ -126,12 +117,13 @@ out:
 
 static int dsa_slave_close(struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct net_device *master = dsa_master_netdev(p);
-	struct dsa_switch *ds = p->dp->ds;
+	struct net_device *master = dsa_slave_to_master(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 
-	if (p->phy)
-		phy_stop(p->phy);
+	dsa_port_disable(dp, dev->phydev);
 
 	dev_mc_unsync(master, dev);
 	dev_uc_unsync(master, dev);
@@ -143,18 +135,12 @@ static int dsa_slave_close(struct net_device *dev)
 	if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
 		dev_uc_del(master, dev->dev_addr);
 
-	if (ds->ops->port_disable)
-		ds->ops->port_disable(ds, p->dp->index, p->phy);
-
-	dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
-
 	return 0;
 }
 
 static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct net_device *master = dsa_master_netdev(p);
+	struct net_device *master = dsa_slave_to_master(dev);
 
 	if (change & IFF_ALLMULTI)
 		dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -164,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
 
 static void dsa_slave_set_rx_mode(struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct net_device *master = dsa_master_netdev(p);
+	struct net_device *master = dsa_slave_to_master(dev);
 
 	dev_mc_sync(master, dev);
 	dev_uc_sync(master, dev);
@@ -173,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
 
 static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct net_device *master = dsa_master_netdev(p);
+	struct net_device *master = dsa_slave_to_master(dev);
 	struct sockaddr *addr = a;
 	int err;
 
@@ -199,22 +183,90 @@ out:
 	return 0;
 }
 
-static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+struct dsa_slave_dump_ctx {
+	struct net_device *dev;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	int idx;
+};
+
+static int
+dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid,
+			   bool is_static, void *data)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_slave_dump_ctx *dump = data;
+	u32 portid = NETLINK_CB(dump->cb->skb).portid;
+	u32 seq = dump->cb->nlh->nlmsg_seq;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	if (dump->idx < dump->cb->args[2])
+		goto skip;
+
+	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+			sizeof(*ndm), NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family  = AF_BRIDGE;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags   = NTF_SELF;
+	ndm->ndm_type    = 0;
+	ndm->ndm_ifindex = dump->dev->ifindex;
+	ndm->ndm_state   = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
+		goto nla_put_failure;
+
+	if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
+		goto nla_put_failure;
+
+	nlmsg_end(dump->skb, nlh);
+
+skip:
+	dump->idx++;
+	return 0;
 
-	if (p->phy != NULL)
-		return phy_mii_ioctl(p->phy, ifr, cmd);
+nla_put_failure:
+	nlmsg_cancel(dump->skb, nlh);
+	return -EMSGSIZE;
+}
 
-	return -EOPNOTSUPP;
+static int
+dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		   struct net_device *dev, struct net_device *filter_dev,
+		   int *idx)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_slave_dump_ctx dump = {
+		.dev = dev,
+		.skb = skb,
+		.cb = cb,
+		.idx = *idx,
+	};
+	int err;
+
+	err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
+	*idx = dump.idx;
+
+	return err;
+}
+
+static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	if (!dev->phydev)
+		return -ENODEV;
+
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static int dsa_slave_port_attr_set(struct net_device *dev,
 				   const struct switchdev_attr *attr,
 				   struct switchdev_trans *trans)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int ret;
 
 	switch (attr->id) {
@@ -240,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 				  const struct switchdev_obj *obj,
 				  struct switchdev_trans *trans)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err;
 
 	/* For the prepare phase, ensure the full set of changes is feasable in
@@ -250,12 +301,16 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 	 */
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
 		break;
+	case SWITCHDEV_OBJ_ID_HOST_MDB:
+		/* DSA can directly translate this to a normal MDB add,
+		 * but on the CPU port.
+		 */
+		err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
+				       trans);
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
 					trans);
@@ -271,45 +326,21 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 static int dsa_slave_port_obj_del(struct net_device *dev,
 				  const struct switchdev_obj *obj)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err;
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
-static int dsa_slave_port_obj_dump(struct net_device *dev,
-				   struct switchdev_obj *obj,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
+	case SWITCHDEV_OBJ_ID_HOST_MDB:
+		/* DSA can directly translate this to a normal MDB add,
+		 * but on the CPU port.
+		 */
+		err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
+		err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -322,13 +353,17 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
 static int dsa_slave_port_attr_get(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
+	struct dsa_switch_tree *dst = ds->dst;
 
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		attr->u.ppid.id_len = sizeof(ds->index);
-		memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
+		attr->u.ppid.id_len = sizeof(dst->index);
+		memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
+		attr->u.brport_flags_support = 0;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -337,10 +372,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
 	return 0;
 }
 
-static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
-					       struct sk_buff *skb)
+static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
+						     struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_POLL_CONTROLLER
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
 	if (p->netpoll)
 		netpoll_send_skb(p->netpoll, skb);
 #else
@@ -352,10 +389,14 @@ static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct pcpu_sw_netstats *s;
 	struct sk_buff *nskb;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	s = this_cpu_ptr(p->stats64);
+	u64_stats_update_begin(&s->syncp);
+	s->tx_packets++;
+	s->tx_bytes += skb->len;
+	u64_stats_update_end(&s->syncp);
 
 	/* Transmit function may have to reallocate the original SKB,
 	 * in which case it must have freed it. Only free it here on error.
@@ -370,43 +411,18 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * tag to be successfully transmitted
 	 */
 	if (unlikely(netpoll_tx_running(dev)))
-		return dsa_netpoll_send_skb(p, nskb);
+		return dsa_slave_netpoll_send_skb(dev, nskb);
 
 	/* Queue the SKB for transmission on the parent interface, but
 	 * do not modify its EtherType
 	 */
-	nskb->dev = dsa_master_netdev(p);
+	nskb->dev = dsa_slave_to_master(dev);
 	dev_queue_xmit(nskb);
 
 	return NETDEV_TX_OK;
 }
 
 /* ethtool operations *******************************************************/
-static int
-dsa_slave_get_link_ksettings(struct net_device *dev,
-			     struct ethtool_link_ksettings *cmd)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-
-	if (!p->phy)
-		return -EOPNOTSUPP;
-
-	phy_ethtool_ksettings_get(p->phy, cmd);
-
-	return 0;
-}
-
-static int
-dsa_slave_set_link_ksettings(struct net_device *dev,
-			     const struct ethtool_link_ksettings *cmd)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-
-	if (p->phy != NULL)
-		return phy_ethtool_ksettings_set(p->phy, cmd);
-
-	return -EOPNOTSUPP;
-}
 
 static void dsa_slave_get_drvinfo(struct net_device *dev,
 				  struct ethtool_drvinfo *drvinfo)
@@ -418,11 +434,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
 
 static int dsa_slave_get_regs_len(struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (ds->ops->get_regs_len)
-		return ds->ops->get_regs_len(ds, p->dp->index);
+		return ds->ops->get_regs_len(ds, dp->index);
 
 	return -EOPNOTSUPP;
 }
@@ -430,39 +446,27 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
 static void
 dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (ds->ops->get_regs)
-		ds->ops->get_regs(ds, p->dp->index, regs, _p);
-}
-
-static int dsa_slave_nway_reset(struct net_device *dev)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-
-	if (p->phy != NULL)
-		return genphy_restart_aneg(p->phy);
-
-	return -EOPNOTSUPP;
+		ds->ops->get_regs(ds, dp->index, regs, _p);
 }
 
 static u32 dsa_slave_get_link(struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	if (!dev->phydev)
+		return -ENODEV;
 
-	if (p->phy != NULL) {
-		genphy_update_link(p->phy);
-		return p->phy->link;
-	}
+	genphy_update_link(dev->phydev);
 
-	return -EOPNOTSUPP;
+	return dev->phydev->link;
 }
 
 static int dsa_slave_get_eeprom_len(struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (ds->cd && ds->cd->eeprom_len)
 		return ds->cd->eeprom_len;
@@ -476,8 +480,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
 static int dsa_slave_get_eeprom(struct net_device *dev,
 				struct ethtool_eeprom *eeprom, u8 *data)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (ds->ops->get_eeprom)
 		return ds->ops->get_eeprom(ds, eeprom, data);
@@ -488,8 +492,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
 static int dsa_slave_set_eeprom(struct net_device *dev,
 				struct ethtool_eeprom *eeprom, u8 *data)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (ds->ops->set_eeprom)
 		return ds->ops->set_eeprom(ds, eeprom, data);
@@ -500,8 +504,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
 static void dsa_slave_get_strings(struct net_device *dev,
 				  uint32_t stringset, uint8_t *data)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (stringset == ETH_SS_STATS) {
 		int len = ETH_GSTRING_LEN;
@@ -511,80 +515,7 @@ static void dsa_slave_get_strings(struct net_device *dev,
 		strncpy(data + 2 * len, "rx_packets", len);
 		strncpy(data + 3 * len, "rx_bytes", len);
 		if (ds->ops->get_strings)
-			ds->ops->get_strings(ds, p->dp->index, data + 4 * len);
-	}
-}
-
-static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
-					   struct ethtool_stats *stats,
-					   uint64_t *data)
-{
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds = cpu_dp->ds;
-	s8 cpu_port = cpu_dp->index;
-	int count = 0;
-
-	if (cpu_dp->ethtool_ops.get_sset_count) {
-		count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
-		cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
-	}
-
-	if (ds->ops->get_ethtool_stats)
-		ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
-}
-
-static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
-{
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds = cpu_dp->ds;
-	int count = 0;
-
-	if (cpu_dp->ethtool_ops.get_sset_count)
-		count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
-
-	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-		count += ds->ops->get_sset_count(ds);
-
-	return count;
-}
-
-static void dsa_cpu_port_get_strings(struct net_device *dev,
-				     uint32_t stringset, uint8_t *data)
-{
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds = cpu_dp->ds;
-	s8 cpu_port = cpu_dp->index;
-	int len = ETH_GSTRING_LEN;
-	int mcount = 0, count;
-	unsigned int i;
-	uint8_t pfx[4];
-	uint8_t *ndata;
-
-	snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
-	/* We do not want to be NULL-terminated, since this is a prefix */
-	pfx[sizeof(pfx) - 1] = '_';
-
-	if (cpu_dp->ethtool_ops.get_sset_count) {
-		mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
-		cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
-	}
-
-	if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
-		ndata = data + mcount * len;
-		/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
-		 * the output after to prepend our CPU port prefix we
-		 * constructed earlier
-		 */
-		ds->ops->get_strings(ds, cpu_port, ndata);
-		count = ds->ops->get_sset_count(ds);
-		for (i = 0; i < count; i++) {
-			memmove(ndata + (i * len + sizeof(pfx)),
-				ndata + i * len, len - sizeof(pfx));
-			memcpy(ndata + i * len, pfx, sizeof(pfx));
-		}
+			ds->ops->get_strings(ds, dp->index, data + 4 * len);
 	}
 }
 
@@ -592,21 +523,37 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
 					struct ethtool_stats *stats,
 					uint64_t *data)
 {
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
-
-	data[0] = dev->stats.tx_packets;
-	data[1] = dev->stats.tx_bytes;
-	data[2] = dev->stats.rx_packets;
-	data[3] = dev->stats.rx_bytes;
+	struct dsa_switch *ds = dp->ds;
+	struct pcpu_sw_netstats *s;
+	unsigned int start;
+	int i;
+
+	for_each_possible_cpu(i) {
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+		s = per_cpu_ptr(p->stats64, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&s->syncp);
+			tx_packets = s->tx_packets;
+			tx_bytes = s->tx_bytes;
+			rx_packets = s->rx_packets;
+			rx_bytes = s->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&s->syncp, start));
+		data[0] += tx_packets;
+		data[1] += tx_bytes;
+		data[2] += rx_packets;
+		data[3] += rx_bytes;
+	}
 	if (ds->ops->get_ethtool_stats)
-		ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4);
+		ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
 }
 
 static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (sset == ETH_SS_STATS) {
 		int count;
@@ -623,69 +570,77 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
 static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (ds->ops->get_wol)
-		ds->ops->get_wol(ds, p->dp->index, w);
+		ds->ops->get_wol(ds, dp->index, w);
 }
 
 static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 	int ret = -EOPNOTSUPP;
 
 	if (ds->ops->set_wol)
-		ret = ds->ops->set_wol(ds, p->dp->index, w);
+		ret = ds->ops->set_wol(ds, dp->index, w);
 
 	return ret;
 }
 
 static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 	int ret;
 
-	if (!ds->ops->set_eee)
+	/* Port's PHY and MAC both need to be EEE capable */
+	if (!dev->phydev)
+		return -ENODEV;
+
+	if (!ds->ops->set_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e);
+	ret = ds->ops->set_mac_eee(ds, dp->index, e);
 	if (ret)
 		return ret;
 
-	if (p->phy)
-		ret = phy_ethtool_set_eee(p->phy, e);
+	if (e->eee_enabled) {
+		ret = phy_init_eee(dev->phydev, 0);
+		if (ret)
+			return ret;
+	}
 
-	return ret;
+	return phy_ethtool_set_eee(dev->phydev, e);
 }
 
 static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 	int ret;
 
-	if (!ds->ops->get_eee)
+	/* Port's PHY and MAC both need to be EEE capable */
+	if (!dev->phydev)
+		return -ENODEV;
+
+	if (!ds->ops->get_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->get_eee(ds, p->dp->index, e);
+	ret = ds->ops->get_mac_eee(ds, dp->index, e);
 	if (ret)
 		return ret;
 
-	if (p->phy)
-		ret = phy_ethtool_get_eee(p->phy, e);
-
-	return ret;
+	return phy_ethtool_get_eee(dev->phydev, e);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static int dsa_slave_netpoll_setup(struct net_device *dev,
 				   struct netpoll_info *ni)
 {
+	struct net_device *master = dsa_slave_to_master(dev);
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct net_device *master = dsa_master_netdev(p);
 	struct netpoll *netpoll;
 	int err = 0;
 
@@ -725,18 +680,18 @@ static void dsa_slave_poll_controller(struct net_device *dev)
 static int dsa_slave_get_phys_port_name(struct net_device *dev,
 					char *name, size_t len)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 
-	if (snprintf(name, len, "p%d", p->dp->index) >= len)
+	if (snprintf(name, len, "p%d", dp->index) >= len)
 		return -EINVAL;
 
 	return 0;
 }
 
 static struct dsa_mall_tc_entry *
-dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
-			     unsigned long cookie)
+dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_mall_tc_entry *mall_tc_entry;
 
 	list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -747,17 +702,18 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
 }
 
 static int dsa_slave_add_cls_matchall(struct net_device *dev,
-				      __be16 protocol,
 				      struct tc_cls_matchall_offload *cls,
 				      bool ingress)
 {
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_mall_tc_entry *mall_tc_entry;
-	struct dsa_switch *ds = p->dp->ds;
+	__be16 protocol = cls->common.protocol;
 	struct net *net = dev_net(dev);
-	struct dsa_slave_priv *to_p;
+	struct dsa_switch *ds = dp->ds;
 	struct net_device *to_dev;
 	const struct tc_action *a;
+	struct dsa_port *to_dp;
 	int err = -EOPNOTSUPP;
 	LIST_HEAD(actions);
 	int ifindex;
@@ -765,7 +721,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
 	if (!ds->ops->port_mirror_add)
 		return err;
 
-	if (!tc_single_action(cls->exts))
+	if (!tcf_exts_has_one_action(cls->exts))
 		return err;
 
 	tcf_exts_to_list(cls->exts, &actions);
@@ -790,13 +746,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
 		mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
 		mirror = &mall_tc_entry->mirror;
 
-		to_p = netdev_priv(to_dev);
+		to_dp = dsa_slave_to_port(to_dev);
 
-		mirror->to_local_port = to_p->dp->index;
+		mirror->to_local_port = to_dp->index;
 		mirror->ingress = ingress;
 
-		err = ds->ops->port_mirror_add(ds, p->dp->index, mirror,
-					       ingress);
+		err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
 		if (err) {
 			kfree(mall_tc_entry);
 			return err;
@@ -811,14 +766,14 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
 static void dsa_slave_del_cls_matchall(struct net_device *dev,
 				       struct tc_cls_matchall_offload *cls)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_mall_tc_entry *mall_tc_entry;
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_switch *ds = dp->ds;
 
 	if (!ds->ops->port_mirror_del)
 		return;
 
-	mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie);
+	mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
 	if (!mall_tc_entry)
 		return;
 
@@ -826,8 +781,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
 
 	switch (mall_tc_entry->type) {
 	case DSA_PORT_MALL_MIRROR:
-		ds->ops->port_mirror_del(ds, p->dp->index,
-					 &mall_tc_entry->mirror);
+		ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror);
 		break;
 	default:
 		WARN_ON(1);
@@ -836,67 +790,143 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
 	kfree(mall_tc_entry);
 }
 
-static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 protocol,
-			      struct tc_to_netdev *tc)
+static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
+					   struct tc_cls_matchall_offload *cls,
+					   bool ingress)
+{
+	if (cls->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return dsa_slave_add_cls_matchall(dev, cls, ingress);
+	case TC_CLSMATCHALL_DESTROY:
+		dsa_slave_del_cls_matchall(dev, cls);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				       void *cb_priv, bool ingress)
 {
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	struct net_device *dev = cb_priv;
 
-	if (chain_index)
+	if (!tc_can_offload(dev))
 		return -EOPNOTSUPP;
 
-	switch (tc->type) {
-	case TC_SETUP_MATCHALL:
-		switch (tc->cls_mall->command) {
-		case TC_CLSMATCHALL_REPLACE:
-			return dsa_slave_add_cls_matchall(dev, protocol,
-							  tc->cls_mall,
-							  ingress);
-		case TC_CLSMATCHALL_DESTROY:
-			dsa_slave_del_cls_matchall(dev, tc->cls_mall);
-			return 0;
-		}
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
+static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
+					  void *type_data, void *cb_priv)
+{
+	return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
+}
+
+static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
+					  void *type_data, void *cb_priv)
 {
-	ops->get_sset_count = dsa_cpu_port_get_sset_count;
-	ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
-	ops->get_strings = dsa_cpu_port_get_strings;
+	return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
+}
+
+static int dsa_slave_setup_tc_block(struct net_device *dev,
+				    struct tc_block_offload *f)
+{
+	tc_setup_cb_t *cb;
+
+	if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		cb = dsa_slave_setup_tc_block_cb_ig;
+	else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+		cb = dsa_slave_setup_tc_block_cb_eg;
+	else
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, cb, dev, dev);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, cb, dev);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return dsa_slave_setup_tc_block(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void dsa_slave_get_stats64(struct net_device *dev,
+				  struct rtnl_link_stats64 *stats)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct pcpu_sw_netstats *s;
+	unsigned int start;
+	int i;
+
+	netdev_stats_to_stats64(stats, &dev->stats);
+	for_each_possible_cpu(i) {
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+		s = per_cpu_ptr(p->stats64, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&s->syncp);
+			tx_packets = s->tx_packets;
+			tx_bytes = s->tx_bytes;
+			rx_packets = s->rx_packets;
+			rx_bytes = s->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&s->syncp, start));
+
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes += tx_bytes;
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes += rx_bytes;
+	}
 }
 
 static int dsa_slave_get_rxnfc(struct net_device *dev,
 			       struct ethtool_rxnfc *nfc, u32 *rule_locs)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (!ds->ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
-	return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs);
+	return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
 }
 
 static int dsa_slave_set_rxnfc(struct net_device *dev,
 			       struct ethtool_rxnfc *nfc)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
 
 	if (!ds->ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
-	return ds->ops->set_rxnfc(ds, p->dp->index, nfc);
+	return ds->ops->set_rxnfc(ds, dp->index, nfc);
 }
 
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_drvinfo		= dsa_slave_get_drvinfo,
 	.get_regs_len		= dsa_slave_get_regs_len,
 	.get_regs		= dsa_slave_get_regs,
-	.nway_reset		= dsa_slave_nway_reset,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_link		= dsa_slave_get_link,
 	.get_eeprom_len		= dsa_slave_get_eeprom_len,
 	.get_eeprom		= dsa_slave_get_eeprom,
@@ -908,8 +938,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_wol		= dsa_slave_get_wol,
 	.set_eee		= dsa_slave_set_eee,
 	.get_eee		= dsa_slave_get_eee,
-	.get_link_ksettings	= dsa_slave_get_link_ksettings,
-	.set_link_ksettings	= dsa_slave_set_link_ksettings,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 	.get_rxnfc		= dsa_slave_get_rxnfc,
 	.set_rxnfc		= dsa_slave_set_rxnfc,
 };
@@ -921,9 +951,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_fdb_add		= switchdev_port_fdb_add,
-	.ndo_fdb_del		= switchdev_port_fdb_del,
-	.ndo_fdb_dump		= switchdev_port_fdb_dump,
+	.ndo_fdb_add		= dsa_legacy_fdb_add,
+	.ndo_fdb_del		= dsa_legacy_fdb_del,
+	.ndo_fdb_dump		= dsa_slave_fdb_dump,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 	.ndo_get_iflink		= dsa_slave_get_iflink,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -931,11 +961,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_netpoll_cleanup	= dsa_slave_netpoll_cleanup,
 	.ndo_poll_controller	= dsa_slave_poll_controller,
 #endif
-	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
-	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
-	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
 	.ndo_get_phys_port_name	= dsa_slave_get_phys_port_name,
 	.ndo_setup_tc		= dsa_slave_setup_tc,
+	.ndo_get_stats64	= dsa_slave_get_stats64,
 };
 
 static const struct switchdev_ops dsa_slave_switchdev_ops = {
@@ -943,7 +971,6 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
 	.switchdev_port_attr_set	= dsa_slave_port_attr_set,
 	.switchdev_port_obj_add		= dsa_slave_port_obj_add,
 	.switchdev_port_obj_del		= dsa_slave_port_obj_del,
-	.switchdev_port_obj_dump	= dsa_slave_port_obj_dump,
 };
 
 static struct device_type dsa_type = {
@@ -952,78 +979,81 @@ static struct device_type dsa_type = {
 
 static void dsa_slave_adjust_link(struct net_device *dev)
 {
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_switch *ds = dp->ds;
 	unsigned int status_changed = 0;
 
-	if (p->old_link != p->phy->link) {
+	if (p->old_link != dev->phydev->link) {
 		status_changed = 1;
-		p->old_link = p->phy->link;
+		p->old_link = dev->phydev->link;
 	}
 
-	if (p->old_duplex != p->phy->duplex) {
+	if (p->old_duplex != dev->phydev->duplex) {
 		status_changed = 1;
-		p->old_duplex = p->phy->duplex;
+		p->old_duplex = dev->phydev->duplex;
 	}
 
-	if (p->old_pause != p->phy->pause) {
+	if (p->old_pause != dev->phydev->pause) {
 		status_changed = 1;
-		p->old_pause = p->phy->pause;
+		p->old_pause = dev->phydev->pause;
 	}
 
 	if (ds->ops->adjust_link && status_changed)
-		ds->ops->adjust_link(ds, p->dp->index, p->phy);
+		ds->ops->adjust_link(ds, dp->index, dev->phydev);
 
 	if (status_changed)
-		phy_print_status(p->phy);
+		phy_print_status(dev->phydev);
 }
 
 static int dsa_slave_fixed_link_update(struct net_device *dev,
 				       struct fixed_phy_status *status)
 {
-	struct dsa_slave_priv *p;
 	struct dsa_switch *ds;
+	struct dsa_port *dp;
 
 	if (dev) {
-		p = netdev_priv(dev);
-		ds = p->dp->ds;
+		dp = dsa_slave_to_port(dev);
+		ds = dp->ds;
 		if (ds->ops->fixed_link_update)
-			ds->ops->fixed_link_update(ds, p->dp->index, status);
+			ds->ops->fixed_link_update(ds, dp->index, status);
 	}
 
 	return 0;
 }
 
 /* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
-				 struct net_device *slave_dev,
-				 int addr)
+static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
 {
-	struct dsa_switch *ds = p->dp->ds;
+	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+	struct dsa_slave_priv *p = netdev_priv(slave_dev);
+	struct dsa_switch *ds = dp->ds;
 
-	p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr);
-	if (!p->phy) {
+	slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
+	if (!slave_dev->phydev) {
 		netdev_err(slave_dev, "no phy at %d\n", addr);
 		return -ENODEV;
 	}
 
 	/* Use already configured phy mode */
 	if (p->phy_interface == PHY_INTERFACE_MODE_NA)
-		p->phy_interface = p->phy->interface;
-	return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
-				  p->phy_interface);
+		p->phy_interface = slave_dev->phydev->interface;
+
+	return phy_connect_direct(slave_dev, slave_dev->phydev,
+				  dsa_slave_adjust_link, p->phy_interface);
 }
 
-static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
-				struct net_device *slave_dev)
+static int dsa_slave_phy_setup(struct net_device *slave_dev)
 {
-	struct dsa_switch *ds = p->dp->ds;
-	struct device_node *phy_dn, *port_dn;
+	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+	struct dsa_slave_priv *p = netdev_priv(slave_dev);
+	struct device_node *port_dn = dp->dn;
+	struct dsa_switch *ds = dp->ds;
+	struct device_node *phy_dn;
 	bool phy_is_fixed = false;
 	u32 phy_flags = 0;
 	int mode, ret;
 
-	port_dn = p->dp->dn;
 	mode = of_get_phy_mode(port_dn);
 	if (mode < 0)
 		mode = PHY_INTERFACE_MODE_NA;
@@ -1044,52 +1074,35 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 	}
 
 	if (ds->ops->get_phy_flags)
-		phy_flags = ds->ops->get_phy_flags(ds, p->dp->index);
+		phy_flags = ds->ops->get_phy_flags(ds, dp->index);
 
 	if (phy_dn) {
-		int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
-
-		/* If this PHY address is part of phys_mii_mask, which means
-		 * that we need to divert reads and writes to/from it, then we
-		 * want to bind this device using the slave MII bus created by
-		 * DSA to make that happen.
-		 */
-		if (!phy_is_fixed && phy_id >= 0 &&
-		    (ds->phys_mii_mask & (1 << phy_id))) {
-			ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
-			if (ret) {
-				netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
-				of_node_put(phy_dn);
-				return ret;
-			}
-		} else {
-			p->phy = of_phy_connect(slave_dev, phy_dn,
-						dsa_slave_adjust_link,
-						phy_flags,
-						p->phy_interface);
-		}
-
+		slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
+						   dsa_slave_adjust_link,
+						   phy_flags,
+						   p->phy_interface);
 		of_node_put(phy_dn);
 	}
 
-	if (p->phy && phy_is_fixed)
-		fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update);
+	if (slave_dev->phydev && phy_is_fixed)
+		fixed_phy_set_link_update(slave_dev->phydev,
+					  dsa_slave_fixed_link_update);
 
 	/* We could not connect to a designated PHY, so use the switch internal
 	 * MDIO bus instead
 	 */
-	if (!p->phy) {
-		ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index);
+	if (!slave_dev->phydev) {
+		ret = dsa_slave_phy_connect(slave_dev, dp->index);
 		if (ret) {
 			netdev_err(slave_dev, "failed to connect to port %d: %d\n",
-				   p->dp->index, ret);
+				   dp->index, ret);
 			if (phy_is_fixed)
 				of_phy_deregister_fixed_link(port_dn);
 			return ret;
 		}
 	}
 
-	phy_attached_info(p->phy);
+	phy_attached_info(slave_dev->phydev);
 
 	return 0;
 }
@@ -1109,12 +1122,12 @@ int dsa_slave_suspend(struct net_device *slave_dev)
 
 	netif_device_detach(slave_dev);
 
-	if (p->phy) {
-		phy_stop(p->phy);
+	if (slave_dev->phydev) {
+		phy_stop(slave_dev->phydev);
 		p->old_pause = -1;
 		p->old_link = -1;
 		p->old_duplex = -1;
-		phy_suspend(p->phy);
+		phy_suspend(slave_dev->phydev);
 	}
 
 	return 0;
@@ -1122,33 +1135,46 @@ int dsa_slave_suspend(struct net_device *slave_dev)
 
 int dsa_slave_resume(struct net_device *slave_dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(slave_dev);
-
 	netif_device_attach(slave_dev);
 
-	if (p->phy) {
-		phy_resume(p->phy);
-		phy_start(p->phy);
+	if (slave_dev->phydev) {
+		phy_resume(slave_dev->phydev);
+		phy_start(slave_dev->phydev);
 	}
 
 	return 0;
 }
 
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-		     int port, const char *name)
+static void dsa_slave_notify(struct net_device *dev, unsigned long val)
 {
-	struct dsa_switch_tree *dst = ds->dst;
-	struct net_device *master;
+	struct net_device *master = dsa_slave_to_master(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_notifier_register_info rinfo = {
+		.switch_number = dp->ds->index,
+		.port_number = dp->index,
+		.master = master,
+		.info.dev = dev,
+	};
+
+	call_dsa_notifiers(val, dev, &rinfo.info);
+}
+
+int dsa_slave_create(struct dsa_port *port)
+{
+	const struct dsa_port *cpu_dp = port->cpu_dp;
+	struct net_device *master = cpu_dp->master;
+	struct dsa_switch *ds = port->ds;
+	const char *name = port->name;
 	struct net_device *slave_dev;
 	struct dsa_slave_priv *p;
-	struct dsa_port *cpu_dp;
 	int ret;
 
-	cpu_dp = ds->dst->cpu_dp;
-	master = cpu_dp->netdev;
+	if (!ds->num_tx_queues)
+		ds->num_tx_queues = 1;
 
-	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
-				 NET_NAME_UNKNOWN, ether_setup);
+	slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name,
+				     NET_NAME_UNKNOWN, ether_setup,
+				     ds->num_tx_queues, 1);
 	if (slave_dev == NULL)
 		return -ENOMEM;
 
@@ -1166,57 +1192,72 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
 				 NULL);
 
-	SET_NETDEV_DEV(slave_dev, parent);
-	slave_dev->dev.of_node = ds->ports[port].dn;
+	SET_NETDEV_DEV(slave_dev, port->ds->dev);
+	slave_dev->dev.of_node = port->dn;
 	slave_dev->vlan_features = master->vlan_features;
 
 	p = netdev_priv(slave_dev);
-	p->dp = &ds->ports[port];
+	p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!p->stats64) {
+		free_netdev(slave_dev);
+		return -ENOMEM;
+	}
+	p->dp = port;
 	INIT_LIST_HEAD(&p->mall_tc_list);
-	p->xmit = dst->tag_ops->xmit;
+	p->xmit = cpu_dp->tag_ops->xmit;
 
 	p->old_pause = -1;
 	p->old_link = -1;
 	p->old_duplex = -1;
 
-	ds->ports[port].netdev = slave_dev;
-	ret = register_netdev(slave_dev);
-	if (ret) {
-		netdev_err(master, "error %d registering interface %s\n",
-			   ret, slave_dev->name);
-		ds->ports[port].netdev = NULL;
-		free_netdev(slave_dev);
-		return ret;
-	}
+	port->slave = slave_dev;
 
 	netif_carrier_off(slave_dev);
 
-	ret = dsa_slave_phy_setup(p, slave_dev);
+	ret = dsa_slave_phy_setup(slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d setting up slave phy\n", ret);
-		unregister_netdev(slave_dev);
-		free_netdev(slave_dev);
-		return ret;
+		goto out_free;
+	}
+
+	dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
+
+	ret = register_netdev(slave_dev);
+	if (ret) {
+		netdev_err(master, "error %d registering interface %s\n",
+			   ret, slave_dev->name);
+		goto out_phy;
 	}
 
 	return 0;
+
+out_phy:
+	phy_disconnect(slave_dev->phydev);
+	if (of_phy_is_fixed_link(port->dn))
+		of_phy_deregister_fixed_link(port->dn);
+out_free:
+	free_percpu(p->stats64);
+	free_netdev(slave_dev);
+	port->slave = NULL;
+	return ret;
 }
 
 void dsa_slave_destroy(struct net_device *slave_dev)
 {
+	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
 	struct dsa_slave_priv *p = netdev_priv(slave_dev);
-	struct device_node *port_dn;
-
-	port_dn = p->dp->dn;
+	struct device_node *port_dn = dp->dn;
 
 	netif_carrier_off(slave_dev);
-	if (p->phy) {
-		phy_disconnect(p->phy);
+	if (slave_dev->phydev) {
+		phy_disconnect(slave_dev->phydev);
 
 		if (of_phy_is_fixed_link(port_dn))
 			of_phy_deregister_fixed_link(port_dn);
 	}
+	dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
 	unregister_netdev(slave_dev);
+	free_percpu(p->stats64);
 	free_netdev(slave_dev);
 }
 
@@ -1228,8 +1269,7 @@ static bool dsa_slave_dev_check(struct net_device *dev)
 static int dsa_slave_changeupper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	int err = NOTIFY_DONE;
 
 	if (netif_is_bridge_master(info->upper_dev)) {
@@ -1250,7 +1290,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (dev->netdev_ops != &dsa_slave_netdev_ops)
+	if (!dsa_slave_dev_check(dev))
 		return NOTIFY_DONE;
 
 	if (event == NETDEV_CHANGEUPPER)
@@ -1259,19 +1299,142 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+struct dsa_switchdev_event_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void dsa_slave_switchdev_event_work(struct work_struct *work)
+{
+	struct dsa_switchdev_event_work *switchdev_work =
+		container_of(work, struct dsa_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	int err;
+
+	rtnl_lock();
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		fdb_info = &switchdev_work->fdb_info;
+		err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
+		if (err) {
+			netdev_dbg(dev, "fdb add failed err=%d\n", err);
+			break;
+		}
+		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+					 &fdb_info->info);
+		break;
+
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		fdb_info = &switchdev_work->fdb_info;
+		err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
+		if (err) {
+			netdev_dbg(dev, "fdb del failed err=%d\n", err);
+			dev_close(dev);
+		}
+		break;
+	}
+	rtnl_unlock();
+
+	kfree(switchdev_work->fdb_info.addr);
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+static int
+dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work *
+				  switchdev_work,
+				  const struct switchdev_notifier_fdb_info *
+				  fdb_info)
+{
+	memcpy(&switchdev_work->fdb_info, fdb_info,
+	       sizeof(switchdev_work->fdb_info));
+	switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+	if (!switchdev_work->fdb_info.addr)
+		return -ENOMEM;
+	ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+			fdb_info->addr);
+	return 0;
+}
+
+/* Called under rcu_read_lock() */
+static int dsa_slave_switchdev_event(struct notifier_block *unused,
+				     unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct dsa_switchdev_event_work *switchdev_work;
+
+	if (!dsa_slave_dev_check(dev))
+		return NOTIFY_DONE;
+
+	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+	if (!switchdev_work)
+		return NOTIFY_BAD;
+
+	INIT_WORK(&switchdev_work->work,
+		  dsa_slave_switchdev_event_work);
+	switchdev_work->dev = dev;
+	switchdev_work->event = event;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		if (dsa_slave_switchdev_fdb_work_init(switchdev_work,
+						      ptr))
+			goto err_fdb_work_init;
+		dev_hold(dev);
+		break;
+	default:
+		kfree(switchdev_work);
+		return NOTIFY_DONE;
+	}
+
+	dsa_schedule_work(&switchdev_work->work);
+	return NOTIFY_OK;
+
+err_fdb_work_init:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
+}
+
 static struct notifier_block dsa_slave_nb __read_mostly = {
-	.notifier_call	= dsa_slave_netdevice_event,
+	.notifier_call  = dsa_slave_netdevice_event,
+};
+
+static struct notifier_block dsa_slave_switchdev_notifier = {
+	.notifier_call = dsa_slave_switchdev_event,
 };
 
 int dsa_slave_register_notifier(void)
 {
-	return register_netdevice_notifier(&dsa_slave_nb);
+	int err;
+
+	err = register_netdevice_notifier(&dsa_slave_nb);
+	if (err)
+		return err;
+
+	err = register_switchdev_notifier(&dsa_slave_switchdev_notifier);
+	if (err)
+		goto err_switchdev_nb;
+
+	return 0;
+
+err_switchdev_nb:
+	unregister_netdevice_notifier(&dsa_slave_nb);
+	return err;
 }
 
 void dsa_slave_unregister_notifier(void)
 {
 	int err;
 
+	err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
+	if (err)
+		pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);
+
 	err = unregister_netdevice_notifier(&dsa_slave_nb);
 	if (err)
 		pr_err("DSA: failed to unregister slave notifier (%d)\n", err);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 97e2e9c8cf3f..29608d087a7c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,30 +83,20 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
-	const struct switchdev_obj_port_fdb *fdb = info->fdb;
-	struct switchdev_trans *trans = info->trans;
-
 	/* Do not care yet about other switch chips of the fabric */
 	if (ds->index != info->sw_index)
 		return 0;
 
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
-			return -EOPNOTSUPP;
-
-		return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
-	}
-
-	ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+	if (!ds->ops->port_fdb_add)
+		return -EOPNOTSUPP;
 
-	return 0;
+	return ds->ops->port_fdb_add(ds, info->port, info->addr,
+				     info->vid);
 }
 
 static int dsa_switch_fdb_del(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
-	const struct switchdev_obj_port_fdb *fdb = info->fdb;
-
 	/* Do not care yet about other switch chips of the fabric */
 	if (ds->index != info->sw_index)
 		return 0;
@@ -114,7 +104,8 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	if (!ds->ops->port_fdb_del)
 		return -EOPNOTSUPP;
 
-	return ds->ops->port_fdb_del(ds, info->port, fdb);
+	return ds->ops->port_fdb_del(ds, info->port, info->addr,
+				     info->vid);
 }
 
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
@@ -130,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 	if (ds->index == info->sw_index)
 		set_bit(info->port, group);
 	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+		if (dsa_is_dsa_port(ds, port))
 			set_bit(port, group);
 
 	if (switchdev_trans_ph_prepare(trans)) {
@@ -142,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 			if (err)
 				return err;
 		}
+
+		return 0;
 	}
 
 	for_each_set_bit(port, group, ds->num_ports)
@@ -189,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
 			if (err)
 				return err;
 		}
+
+		return 0;
 	}
 
 	for_each_set_bit(port, members, ds->num_ports)
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index c697d9815177..e6e0b7b6025c 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -59,9 +59,12 @@
 #define BRCM_EG_TC_MASK		0x7
 #define BRCM_EG_PID_MASK	0x1f
 
-static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
+					struct net_device *dev,
+					unsigned int offset)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	u16 queue = skb_get_queue_mapping(skb);
 	u8 *brcm_tag;
 
 	if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
@@ -69,40 +72,42 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
 
 	skb_push(skb, BRCM_TAG_LEN);
 
-	memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN);
+	if (offset)
+		memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
 
-	/* Build the tag after the MAC Source Address */
-	brcm_tag = skb->data + 2 * ETH_ALEN;
+	brcm_tag = skb->data + offset;
 
 	/* Set the ingress opcode, traffic class, tag enforcment is
 	 * deprecated
 	 */
 	brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) |
-			((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK);
+		       ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
 	brcm_tag[1] = 0;
 	brcm_tag[2] = 0;
-	if (p->dp->index == 8)
+	if (dp->index == 8)
 		brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
-	brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK;
+	brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
+
+	/* Now tell the master network device about the desired output queue
+	 * as well
+	 */
+	skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
 
 	return skb;
 }
 
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				    struct packet_type *pt,
-				    struct net_device *orig_dev)
+static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
+				       struct net_device *dev,
+				       struct packet_type *pt,
+				       unsigned int offset)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds = cpu_dp->ds;
 	int source_port;
 	u8 *brcm_tag;
 
 	if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
 		return NULL;
 
-	/* skb->data points to the EtherType, the tag is right before it */
-	brcm_tag = skb->data - 2;
+	brcm_tag = skb->data - offset;
 
 	/* The opcode should never be different than 0b000 */
 	if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK))
@@ -117,24 +122,67 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	/* Locate which port this is coming from */
 	source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
 
-	/* Validate port against switch setup, either the port is totally */
-	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	if (!skb->dev)
 		return NULL;
 
 	/* Remove Broadcom tag and update checksum */
 	skb_pull_rcsum(skb, BRCM_TAG_LEN);
 
+	return skb;
+}
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
+{
+	/* Build the tag after the MAC Source Address */
+	return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN);
+}
+
+
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+				    struct packet_type *pt)
+{
+	struct sk_buff *nskb;
+
+	/* skb->data points to the EtherType, the tag is right before it */
+	nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+	if (!nskb)
+		return nskb;
+
 	/* Move the Ethernet DA and SA */
-	memmove(skb->data - ETH_HLEN,
-		skb->data - ETH_HLEN - BRCM_TAG_LEN,
+	memmove(nskb->data - ETH_HLEN,
+		nskb->data - ETH_HLEN - BRCM_TAG_LEN,
 		2 * ETH_ALEN);
 
-	skb->dev = ds->ports[source_port].netdev;
-
-	return skb;
+	return nskb;
 }
 
 const struct dsa_device_ops brcm_netdev_ops = {
 	.xmit	= brcm_tag_xmit,
 	.rcv	= brcm_tag_rcv,
 };
+#endif
+
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
+					     struct net_device *dev)
+{
+	/* tag is prepended to the packet */
+	return brcm_tag_xmit_ll(skb, dev, 0);
+}
+
+static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
+					    struct net_device *dev,
+					    struct packet_type *pt)
+{
+	/* tag is prepended to the packet */
+	return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+}
+
+const struct dsa_device_ops brcm_prepend_netdev_ops = {
+	.xmit	= brcm_tag_xmit_prepend,
+	.rcv	= brcm_tag_rcv_prepend,
+};
+#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 12867a4b458f..cd13cfc542ce 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -18,7 +18,7 @@
 
 static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u8 *dsa_header;
 
 	/*
@@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
 		 */
 		dsa_header = skb->data + 2 * ETH_ALEN;
-		dsa_header[0] = 0x60 | p->dp->ds->index;
-		dsa_header[1] = p->dp->index << 3;
+		dsa_header[0] = 0x60 | dp->ds->index;
+		dsa_header[1] = dp->index << 3;
 
 		/*
 		 * Move CFI field from byte 2 to byte 1.
@@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * Construct untagged FROM_CPU DSA tag.
 		 */
 		dsa_header = skb->data + 2 * ETH_ALEN;
-		dsa_header[0] = 0x40 | p->dp->ds->index;
-		dsa_header[1] = p->dp->index << 3;
+		dsa_header[0] = 0x40 | dp->ds->index;
+		dsa_header[1] = dp->index << 3;
 		dsa_header[2] = 0x00;
 		dsa_header[3] = 0x00;
 	}
@@ -65,11 +65,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev)
+			       struct packet_type *pt)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds;
 	u8 *dsa_header;
 	int source_device;
 	int source_port;
@@ -94,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	source_device = dsa_header[0] & 0x1f;
 	source_port = (dsa_header[1] >> 3) & 0x1f;
 
-	/*
-	 * Check that the source device exists and that the source
-	 * port is a registered DSA port.
-	 */
-	if (source_device >= DSA_MAX_SWITCHES)
-		return NULL;
-
-	ds = dst->ds[source_device];
-	if (!ds)
-		return NULL;
-
-	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+	skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+	if (!skb->dev)
 		return NULL;
 
 	/*
@@ -154,7 +141,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
 			2 * ETH_ALEN);
 	}
 
-	skb->dev = ds->ports[source_port].netdev;
+	skb->offload_fwd_mark = 1;
 
 	return skb;
 }
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 67a9d26f9075..4083326b806e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -19,7 +19,7 @@
 
 static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u8 *edsa_header;
 
 	/*
@@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 		edsa_header[1] = ETH_P_EDSA & 0xff;
 		edsa_header[2] = 0x00;
 		edsa_header[3] = 0x00;
-		edsa_header[4] = 0x60 | p->dp->ds->index;
-		edsa_header[5] = p->dp->index << 3;
+		edsa_header[4] = 0x60 | dp->ds->index;
+		edsa_header[5] = dp->index << 3;
 
 		/*
 		 * Move CFI field from byte 6 to byte 5.
@@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 		edsa_header[1] = ETH_P_EDSA & 0xff;
 		edsa_header[2] = 0x00;
 		edsa_header[3] = 0x00;
-		edsa_header[4] = 0x40 | p->dp->ds->index;
-		edsa_header[5] = p->dp->index << 3;
+		edsa_header[4] = 0x40 | dp->ds->index;
+		edsa_header[5] = dp->index << 3;
 		edsa_header[6] = 0x00;
 		edsa_header[7] = 0x00;
 	}
@@ -78,11 +78,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
-				struct packet_type *pt,
-				struct net_device *orig_dev)
+				struct packet_type *pt)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds;
 	u8 *edsa_header;
 	int source_device;
 	int source_port;
@@ -107,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 	source_device = edsa_header[0] & 0x1f;
 	source_port = (edsa_header[1] >> 3) & 0x1f;
 
-	/*
-	 * Check that the source device exists and that the source
-	 * port is a registered DSA port.
-	 */
-	if (source_device >= DSA_MAX_SWITCHES)
-		return NULL;
-
-	ds = dst->ds[source_device];
-	if (!ds)
-		return NULL;
-
-	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+	skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+	if (!skb->dev)
 		return NULL;
 
 	/*
@@ -173,7 +160,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
 			2 * ETH_ALEN);
 	}
 
-	skb->dev = ds->ports[source_port].netdev;
+	skb->offload_fwd_mark = 1;
 
 	return skb;
 }
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index de66ca8e6201..0f62effad88f 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -34,7 +34,7 @@
 
 static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct sk_buff *nskb;
 	int padlen;
 	u8 *tag;
@@ -42,7 +42,8 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 	padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
 
 	if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
-		if (skb_put_padto(skb, skb->len + padlen))
+		/* Let dsa_slave_xmit() free skb */
+		if (__skb_put_padto(skb, skb->len + padlen, false))
 			return NULL;
 
 		nskb = skb;
@@ -60,41 +61,38 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 					 skb_transport_header(skb) - skb->head);
 		skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
 
-		if (skb_put_padto(nskb, nskb->len + padlen)) {
-			kfree_skb(nskb);
+		/* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free
+		 * skb
+		 */
+		if (skb_put_padto(nskb, nskb->len + padlen))
 			return NULL;
-		}
 
-		kfree_skb(skb);
+		consume_skb(skb);
 	}
 
 	tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
 	tag[0] = 0;
-	tag[1] = 1 << p->dp->index; /* destination port */
+	tag[1] = 1 << dp->index; /* destination port */
 
 	return nskb;
 }
 
 static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev)
+			       struct packet_type *pt)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds = cpu_dp->ds;
 	u8 *tag;
 	int source_port;
 
 	tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
 
 	source_port = tag[0] & 7;
-	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	if (!skb->dev)
 		return NULL;
 
 	pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
 
-	skb->dev = ds->ports[source_port].netdev;
-
 	return skb;
 }
 
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 247774d149f9..548c00254c07 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -11,6 +11,7 @@
  * GNU General Public License for more details.
  *
  */
+#include <linux/dsa/lan9303.h>
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -39,11 +40,30 @@
  */
 
 #define LAN9303_TAG_LEN 4
-#define LAN9303_MAX_PORTS 3
+# define LAN9303_TAG_TX_USE_ALR BIT(3)
+# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4)
+# define LAN9303_TAG_RX_IGMP BIT(3)
+# define LAN9303_TAG_RX_STP BIT(4)
+# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \
+					LAN9303_TAG_RX_STP)
+
+/* Decide whether to transmit using ALR lookup, or transmit directly to
+ * port using tag. ALR learning is performed only when using ALR lookup.
+ * If the two external ports are bridged and the frame is unicast,
+ * then use ALR lookup to allow ALR learning on CPU port.
+ * Otherwise transmit directly to port with STP state override.
+ * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1
+ */
+static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
+{
+	struct lan9303 *chip = dp->ds->priv;
+
+	return chip->is_bridged && !is_multicast_ether_addr(dest_addr);
+}
 
 static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u16 *lan9303_tag;
 
 	/* insert a special VLAN tag between the MAC addresses
@@ -63,26 +83,21 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
 	lan9303_tag[0] = htons(ETH_P_8021Q);
-	lan9303_tag[1] = htons(p->dp->index | BIT(4));
+	lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ?
+				LAN9303_TAG_TX_USE_ALR :
+				dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
+	lan9303_tag[1] = htons(lan9303_tag[1]);
 
 	return skb;
 }
 
 static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
-			struct packet_type *pt, struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	u16 *lan9303_tag;
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds;
+	u16 lan9303_tag1;
 	unsigned int source_port;
 
-	ds = dst->ds[0];
-
-	if (unlikely(!ds)) {
-		dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
-		return NULL;
-	}
-
 	if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
 		dev_warn_ratelimited(&dev->dev,
 				     "Dropping packet, cannot pull\n");
@@ -102,27 +117,22 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
 		return NULL;
 	}
 
-	source_port = ntohs(lan9303_tag[1]) & 0x3;
+	lan9303_tag1 = ntohs(lan9303_tag[1]);
+	source_port = lan9303_tag1 & 0x3;
 
-	if (source_port >= LAN9303_MAX_PORTS) {
+	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	if (!skb->dev) {
 		dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
 		return NULL;
 	}
 
-	if (!ds->ports[source_port].netdev) {
-		dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
-		return NULL;
-	}
-
 	/* remove the special VLAN tag between the MAC addresses
 	 * and the current ethertype field.
 	 */
 	skb_pull_rcsum(skb, 2 + 2);
 	memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
 		2 * ETH_ALEN);
-
-	/* forward the packet to the dedicated interface */
-	skb->dev = ds->ports[source_port].netdev;
+	skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
 
 	return skb;
 }
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 2f32b7ea3365..8475434af7d5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
 static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 				    struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u8 *mtk_tag;
 
 	if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
@@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	/* Build the tag after the MAC Source Address */
 	mtk_tag = skb->data + 2 * ETH_ALEN;
 	mtk_tag[0] = 0;
-	mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+	mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
 	mtk_tag[2] = 0;
 	mtk_tag[3] = 0;
 
@@ -44,11 +44,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_switch *ds;
 	int port;
 	__be16 *phdr, hdr;
 
@@ -69,25 +66,27 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 		skb->data - ETH_HLEN - MTK_HDR_LEN,
 		2 * ETH_ALEN);
 
-	/* This protocol doesn't support cascading multiple
-	 * switches so it's safe to assume the switch is first
-	 * in the tree.
-	 */
-	ds = dst->ds[0];
-	if (!ds)
-		return NULL;
-
 	/* Get source port information */
 	port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
-	if (!ds->ports[port].netdev)
-		return NULL;
 
-	skb->dev = ds->ports[port].netdev;
+	skb->dev = dsa_master_find_slave(dev, 0, port);
+	if (!skb->dev)
+		return NULL;
 
 	return skb;
 }
 
+static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+				int *offset)
+{
+	*offset = 4;
+	*proto = ((__be16 *)skb->data)[1];
+
+	return 0;
+}
+
 const struct dsa_device_ops mtk_netdev_ops = {
-	.xmit	= mtk_tag_xmit,
-	.rcv	= mtk_tag_rcv,
+	.xmit		= mtk_tag_xmit,
+	.rcv		= mtk_tag_rcv,
+	.flow_dissect	= mtk_tag_flow_dissect,
 };
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1867a3d11f28..613f4ee97771 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -38,7 +38,7 @@
 
 static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u16 *phdr, hdr;
 
 	dev->stats.tx_packets++;
@@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Set the version field, and set destination port information */
 	hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
-		QCA_HDR_XMIT_FROM_CPU |
-		BIT(p->dp->index);
+		QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
 
 	*phdr = htons(hdr);
 
@@ -63,12 +62,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds;
 	u8 ver;
 	int port;
 	__be16 *phdr, hdr;
@@ -93,20 +88,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
 		ETH_HLEN - QCA_HDR_LEN);
 
-	/* This protocol doesn't support cascading multiple switches so it's
-	 * safe to assume the switch is first in the tree
-	 */
-	ds = cpu_dp->ds;
-	if (!ds)
-		return NULL;
-
 	/* Get source port information */
 	port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
-	if (!ds->ports[port].netdev)
-		return NULL;
 
-	/* Update skb & forward the frame accordingly */
-	skb->dev = ds->ports[port].netdev;
+	skb->dev = dsa_master_find_slave(dev, 0, port);
+	if (!skb->dev)
+		return NULL;
 
 	return skb;
 }
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b09e56214005..7d20e1f3de28 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -16,7 +16,7 @@
 
 static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct sk_buff *nskb;
 	int padlen;
 	u8 *trailer;
@@ -40,7 +40,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_set_network_header(nskb, skb_network_header(skb) - skb->head);
 	skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head);
 	skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
-	kfree_skb(skb);
+	consume_skb(skb);
 
 	if (padlen) {
 		skb_put_zero(nskb, padlen);
@@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	trailer = skb_put(nskb, 4);
 	trailer[0] = 0x80;
-	trailer[1] = 1 << p->dp->index;
+	trailer[1] = 1 << dp->index;
 	trailer[2] = 0x10;
 	trailer[3] = 0x00;
 
@@ -56,12 +56,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
-	struct dsa_switch_tree *dst = dev->dsa_ptr;
-	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
-	struct dsa_switch *ds = cpu_dp->ds;
 	u8 *trailer;
 	int source_port;
 
@@ -74,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
 		return NULL;
 
 	source_port = trailer[1] & 7;
-	if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	if (!skb->dev)
 		return NULL;
 
 	pskb_trim_rcsum(skb, skb->len - 4);
 
-	skb->dev = ds->ports[source_port].netdev;
-
 	return skb;
 }
 
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 4e7bdb213cd0..b8cd43c9ed5b 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -314,7 +314,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
 	hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
 	ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr);
 
-	skb_put_padto(skb, ETH_ZLEN + HSR_HLEN);
+	if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
+		return;
 
 	hsr_forward_skb(skb, master);
 	return;
@@ -327,12 +328,12 @@ out:
 
 /* Announce (supervision frame) timer function
  */
-static void hsr_announce(unsigned long data)
+static void hsr_announce(struct timer_list *t)
 {
 	struct hsr_priv *hsr;
 	struct hsr_port *master;
 
-	hsr = (struct hsr_priv *) data;
+	hsr = from_timer(hsr, t, announce_timer);
 
 	rcu_read_lock();
 	master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
@@ -462,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
 	hsr->sequence_nr = HSR_SEQNR_START;
 	hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
 
-	setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr);
-
-	setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr);
+	timer_setup(&hsr->announce_timer, hsr_announce, 0);
+	timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
 
 	ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
 	hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 284a9b820df8..286ceb41ac0c 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
 /* Remove stale sequence_nr records. Called by timer every
  * HSR_LIFE_CHECK_INTERVAL (two seconds or so).
  */
-void hsr_prune_nodes(unsigned long data)
+void hsr_prune_nodes(struct timer_list *t)
 {
-	struct hsr_priv *hsr;
+	struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
 	struct hsr_node *node;
 	struct hsr_port *port;
 	unsigned long timestamp;
 	unsigned long time_a, time_b;
 
-	hsr = (struct hsr_priv *) data;
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
 		/* Shorthand */
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 4e04f0e868e9..370b45998121 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
 int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
 			   u16 sequence_nr);
 
-void hsr_prune_nodes(unsigned long data);
+void hsr_prune_nodes(struct timer_list *t);
 
 int hsr_create_self_node(struct list_head *self_node_db,
 			 unsigned char addr_a[ETH_ALEN],
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ac7c96b73ad5..d8de3bcfb103 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __IEEE802154_6LOWPAN_I_H__
 #define __IEEE802154_6LOWPAN_I_H__
 
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index de2661cd0328..974765b7d92a 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -54,7 +54,7 @@
 
 static int open_count;
 
-static struct header_ops lowpan_header_ops = {
+static const struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
 
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 30d875dff6b5..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
 	fq->daddr = *arg->dst;
 }
 
-static void lowpan_frag_expire(unsigned long data)
+static void lowpan_frag_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
 	struct net *net;
 
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
 
 	spin_lock(&fq->q.lock);
@@ -580,19 +581,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 {
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
-	int res;
 
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	res = inet_frags_init_net(&ieee802154_lowpan->frags);
-	if (res)
-		return res;
-	res = lowpan_frags_ns_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&ieee802154_lowpan->frags);
-	return res;
+	inet_frags_init_net(&ieee802154_lowpan->frags);
+
+	return lowpan_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 9b92ade687a3..f05b7bdae2aa 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_IEEE802154) += ieee802154.o
 obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
 obj-y += 6lowpan/
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 81141f58d079..1c19f575d574 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __IEEE802154_CORE_H
 #define __IEEE802154_CORE_H
 
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 6bde9e5a5503..96636e3b7aa9 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
 	return genlmsg_reply(msg, info);
 }
 
-static const struct genl_ops ieee8021154_ops[] = {
+static const struct genl_ops ieee802154_ops[] = {
 	/* see nl-phy.c */
 	IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
 			ieee802154_dump_phy),
@@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = {
 	.version	= 1,
 	.maxattr	= IEEE802154_ATTR_MAX,
 	.module		= THIS_MODULE,
-	.ops		= ieee8021154_ops,
-	.n_ops		= ARRAY_SIZE(ieee8021154_ops),
+	.ops		= ieee802154_ops,
+	.n_ops		= ARRAY_SIZE(ieee802154_ops),
 	.mcgrps		= ieee802154_mcgrps,
 	.n_mcgrps	= ARRAY_SIZE(ieee802154_mcgrps),
 };
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 3846a89d0958..8c4b6d08954c 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __IEEE802154_NL802154_H
 #define __IEEE802154_NL802154_H
 
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 4441c63b3ea6..598f5af49775 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __CFG802154_RDEV_OPS
 #define __CFG802154_RDEV_OPS
 
diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h
index aa42e39ecbec..337545b639e9 100644
--- a/net/ieee802154/sysfs.h
+++ b/net/ieee802154/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __IEEE802154_SYSFS_H
 #define __IEEE802154_SYSFS_H
 
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 9a471e41ec73..19c2e5d60e76 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Based on net/wireless/trace.h */
 
 #undef TRACE_SYSTEM
diff --git a/net/ife/ife.c b/net/ife/ife.c
index f360341c72eb..7d1ec76e7f43 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -137,6 +137,6 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
 EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
 
 MODULE_AUTHOR("Jamal Hadi Salim <jhs@mojatatu.com>");
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
 MODULE_DESCRIPTION("Inter-FE LFB action");
 MODULE_LICENSE("GPL");
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 91a2557942fa..f48fe6fc7e8c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES
 	  address into account. Furthermore, the TOS (Type-Of-Service) field
 	  of the packet can be used for routing decisions as well.
 
-	  If you are interested in this, please see the preliminary
-	  documentation at <http://www.compendium.com.ar/policy-routing.txt>
-	  and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>.
-	  You will need supporting software from
-	  <ftp://ftp.tux.org/pub/net/ip-routing/>.
+	  If you need more information, see the Linux Advanced
+	  Routing and Traffic Control documentation at
+	  <http://lartc.org/howto/lartc.rpdb.html>
 
 	  If unsure, say N.
 
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index afcb435adfbe..c6c8ad1d4b6d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux TCP/IP (INET) layer.
 #
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2e548eca3489..ce4aa827be05 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
 	unsigned char old_state;
-	int err;
+	int err, tcp_fastopen;
 
 	lock_sock(sk);
 
@@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog)
 		 * because the socket was in TCP_LISTEN state previously but
 		 * was shutdown() rather than close().
 		 */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
-		    (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+		tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+		if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+		    (tcp_fastopen & TFO_SERVER_ENABLE) &&
 		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
 			fastopen_queue_tune(sk, backlog);
-			tcp_fastopen_init_key_once(true);
+			tcp_fastopen_init_key_once(sock_net(sk));
 		}
 
 		err = inet_csk_listen_start(sk, backlog);
@@ -826,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
 		err = -ENOTCONN;
 		/* Hack to wake up other listeners, who can poll for
 		   POLLHUP, even on eg. unconnected UDP sockets -- RR */
+		/* fall through */
 	default:
 		sk->sk_shutdown |= how;
 		if (sk->sk_prot->shutdown)
@@ -839,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how)
 	case TCP_LISTEN:
 		if (!(how & RCV_SHUTDOWN))
 			break;
-		/* Fall through */
+		/* fall through */
 	case TCP_SYN_SENT:
 		err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
 		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
@@ -944,6 +946,8 @@ const struct proto_ops inet_stream_ops = {
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.sendpage_locked   = tcp_sendpage_locked,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -1219,10 +1223,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features)
 {
-	bool udpfrag = false, fixedid = false, gso_partial, encap;
+	bool fixedid = false, gso_partial, encap;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	const struct net_offload *ops;
-	unsigned int offset = 0;
 	struct iphdr *iph;
 	int proto, tot_len;
 	int nhoff;
@@ -1257,7 +1260,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
 	if (!skb->encapsulation || encap) {
-		udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
 		fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
 
 		/* fixed ID is invalid if DF bit is not set */
@@ -1277,13 +1279,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	skb = segs;
 	do {
 		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
-		if (udpfrag) {
-			iph->frag_off = htons(offset >> 3);
-			if (skb->next)
-				iph->frag_off |= htons(IP_MF);
-			offset += skb->len - nhoff - ihl;
-			tot_len = skb->len - nhoff;
-		} else if (skb_is_gso(skb)) {
+		if (skb_is_gso(skb)) {
 			if (!fixedid) {
 				iph->id = htons(id);
 				id += skb_shinfo(skb)->gso_segs;
@@ -1602,6 +1598,9 @@ static const struct net_protocol igmp_protocol = {
 };
 #endif
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct net_protocol tcp_protocol = {
 	.early_demux	=	tcp_v4_early_demux,
 	.early_demux_handler =  tcp_v4_early_demux,
@@ -1612,6 +1611,9 @@ static struct net_protocol tcp_protocol = {
 	.icmp_strict_tag_validation = 1,
 };
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct net_protocol udp_protocol = {
 	.early_demux =	udp_v4_early_demux,
 	.early_demux_handler =	udp_v4_early_demux,
@@ -1778,6 +1780,11 @@ static const struct net_offload ipip_offload = {
 	},
 };
 
+static int __init ipip_offload_init(void)
+{
+	return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
+}
+
 static int __init ipv4_offload_init(void)
 {
 	/*
@@ -1787,9 +1794,10 @@ static int __init ipv4_offload_init(void)
 		pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
 	if (tcpv4_offload_init() < 0)
 		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+	if (ipip_offload_init() < 0)
+		pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
 
 	dev_add_offload(&ip_packet_offload);
-	inet_add_offload(&ipip_offload, IPPROTO_IPIP);
 	return 0;
 }
 
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 37db44f60718..4dd95cdd8070 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -240,7 +240,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 		if (err == -EINPROGRESS)
 			goto out;
 
-		if (err == -EBUSY)
+		if (err == -ENOSPC)
 			err = NET_XMIT_DROP;
 		goto out_free;
 	}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8b52179ddc6e..a8d7c5a9fb05 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -171,7 +171,7 @@ struct neigh_table arp_tbl = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 64,
 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
 			[NEIGH_VAR_PROXY_DELAY]	= (8 * HZ) / 10,
@@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	case SIOCSARP:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
+		/* fall through */
 	case SIOCGARP:
 		err = copy_from_user(&r, arg, sizeof(struct arpreq));
 		if (err)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ae8f54cb321..82178cc69c96 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
 	buf = NULL;
 
 	req_inet = inet_rsk(req);
-	opt = xchg(&req_inet->opt, opt);
+	opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
 	if (opt)
 		kfree_rcu(opt, rcu);
 
@@ -1973,11 +1973,13 @@ req_setattr_failure:
  * values on failure.
  *
  */
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
 {
+	struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
 	int hdr_delta = 0;
-	struct ip_options_rcu *opt = *opt_ptr;
 
+	if (!opt || opt->opt.cipso == 0)
+		return 0;
 	if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
 		u8 cipso_len;
 		u8 cipso_off;
@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
  */
 void cipso_v4_sock_delattr(struct sock *sk)
 {
-	int hdr_delta;
-	struct ip_options_rcu *opt;
 	struct inet_sock *sk_inet;
+	int hdr_delta;
 
 	sk_inet = inet_sk(sk);
-	opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
-	if (!opt || opt->opt.cipso == 0)
-		return;
 
 	hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
 	if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
  */
 void cipso_v4_req_delattr(struct request_sock *req)
 {
-	struct ip_options_rcu *opt;
-	struct inet_request_sock *req_inet;
-
-	req_inet = inet_rsk(req);
-	opt = req_inet->opt;
-	if (!opt || opt->opt.cipso == 0)
-		return;
-
-	cipso_v4_delopt(&req_inet->opt);
+	cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
 }
 
 /**
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 38d9af9b917c..a4573bccd6da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
  */
 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 {
-	u32 hash = inet_addr_hash(net, addr);
 	struct net_device *result = NULL;
 	struct in_ifaddr *ifa;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
-		if (ifa->ifa_local == addr) {
-			struct net_device *dev = ifa->ifa_dev->dev;
-
-			if (!net_eq(dev_net(dev), net))
-				continue;
-			result = dev;
-			break;
-		}
-	}
-	if (!result) {
+	ifa = inet_lookup_ifaddr_rcu(net, addr);
+	if (!ifa) {
 		struct flowi4 fl4 = { .daddr = addr };
 		struct fib_result res = { 0 };
 		struct fib_table *local;
@@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 		    res.type == RTN_LOCAL)
 			result = FIB_RES_DEV(res);
+	} else {
+		result = ifa->ifa_dev->dev;
 	}
 	if (result && devref)
 		dev_hold(result);
@@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 }
 EXPORT_SYMBOL(__ip_dev_find);
 
+/* called under RCU lock */
+struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
+{
+	u32 hash = inet_addr_hash(net, addr);
+	struct in_ifaddr *ifa;
+
+	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
+		if (ifa->ifa_local == addr &&
+		    net_eq(dev_net(ifa->ifa_dev->dev), net))
+			return ifa;
+
+	return NULL;
+}
+
 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 
 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -438,7 +444,7 @@ static void check_lifetime(struct work_struct *work);
 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
-			     u32 portid)
+			     u32 portid, struct netlink_ext_ack *extack)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -483,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 	 */
 	ivi.ivi_addr = ifa->ifa_address;
 	ivi.ivi_dev = ifa->ifa_dev;
+	ivi.extack = extack;
 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
 					   NETDEV_UP, &ivi);
 	ret = notifier_to_errno(ret);
@@ -515,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 
 static int inet_insert_ifa(struct in_ifaddr *ifa)
 {
-	return __inet_insert_ifa(ifa, NULL, 0);
+	return __inet_insert_ifa(ifa, NULL, 0, NULL);
 }
 
 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
@@ -896,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 				return ret;
 			}
 		}
-		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
+					 extack);
 	} else {
 		inet_free_ifa(ifa);
 
@@ -1516,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		if (inetdev_valid_mtu(dev->mtu))
 			break;
 		/* disable IP when MTU is not enough */
+		/* fall through */
 	case NETDEV_UNREGISTER:
 		inetdev_destroy(in_dev);
 		break;
@@ -1751,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev,
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int err, rem;
 
-	if (dev && !__in_dev_get_rtnl(dev))
+	if (dev && !__in_dev_get_rcu(dev))
 		return -EAFNOSUPPORT;
 
 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
@@ -1775,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev,
 
 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
 {
-	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int rem;
 
@@ -2491,9 +2500,9 @@ void __init devinet_init(void)
 
 	rtnl_af_register(&inet_af_ops);
 
-	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
+	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
+	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
-		      inet_netconf_dump_devconf, NULL);
+		      inet_netconf_dump_devconf, 0);
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0cbee0a666ff..d57aa64fa7c7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -258,7 +258,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 		esp_output_udp_encap(x, skb, esp);
 
 	if (!skb_cloned(skb)) {
-		if (tailen <= skb_availroom(skb)) {
+		if (tailen <= skb_tailroom(skb)) {
 			nfrags = 1;
 			trailer = skb;
 			tail = skb_tail_pointer(trailer);
@@ -292,8 +292,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 
 			kunmap_atomic(vaddr);
 
-			spin_unlock_bh(&x->lock);
-
 			nfrags = skb_shinfo(skb)->nr_frags;
 
 			__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -301,6 +299,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			skb_shinfo(skb)->nr_frags = ++nfrags;
 
 			pfrag->offset = pfrag->offset + allocsize;
+
+			spin_unlock_bh(&x->lock);
+
 			nfrags++;
 
 			skb->len += tailen;
@@ -381,7 +382,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 		           (unsigned char *)esph - skb->data,
 		           assoclen + ivlen + esp->clen + alen);
 	if (unlikely(err < 0))
-		goto error;
+		goto error_free;
 
 	if (!esp->inplace) {
 		int allocsize;
@@ -392,7 +393,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 		spin_lock_bh(&x->lock);
 		if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
 			spin_unlock_bh(&x->lock);
-			goto error;
+			goto error_free;
 		}
 
 		skb_shinfo(skb)->nr_frags = 1;
@@ -409,7 +410,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			           (unsigned char *)esph - skb->data,
 			           assoclen + ivlen + esp->clen + alen);
 		if (unlikely(err < 0))
-			goto error;
+			goto error_free;
 	}
 
 	if ((x->props.flags & XFRM_STATE_ESN))
@@ -431,7 +432,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	case -EINPROGRESS:
 		goto error;
 
-	case -EBUSY:
+	case -ENOSPC:
 		err = NET_XMIT_DROP;
 		break;
 
@@ -442,8 +443,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 
 	if (sg != dsg)
 		esp_ssg_unref(x, tmp);
-	kfree(tmp);
 
+error_free:
+	kfree(tmp);
 error:
 	return err;
 }
@@ -499,18 +501,59 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	return esp_output_tail(x, skb, &esp);
 }
 
+static inline int esp_remove_trailer(struct sk_buff *skb)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct crypto_aead *aead = x->data;
+	int alen, hlen, elen;
+	int padlen, trimlen;
+	__wsum csumdiff;
+	u8 nexthdr[2];
+	int ret;
+
+	alen = crypto_aead_authsize(aead);
+	hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	elen = skb->len - hlen;
+
+	if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+		ret = xo->proto;
+		goto out;
+	}
+
+	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+		BUG();
+
+	ret = -EINVAL;
+	padlen = nexthdr[0];
+	if (padlen + 2 + alen >= elen) {
+		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+				    padlen + 2, elen - alen);
+		goto out;
+	}
+
+	trimlen = alen + padlen + 2;
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+		skb->csum = csum_block_sub(skb->csum, csumdiff,
+					   skb->len - trimlen);
+	}
+	pskb_trim(skb, skb->len - trimlen);
+
+	ret = nexthdr[1];
+
+out:
+	return ret;
+}
+
 int esp_input_done2(struct sk_buff *skb, int err)
 {
 	const struct iphdr *iph;
 	struct xfrm_state *x = xfrm_input_state(skb);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct crypto_aead *aead = x->data;
-	int alen = crypto_aead_authsize(aead);
 	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
-	int elen = skb->len - hlen;
 	int ihl;
-	u8 nexthdr[2];
-	int padlen;
 
 	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
 		kfree(ESP_SKB_CB(skb)->tmp);
@@ -518,16 +561,10 @@ int esp_input_done2(struct sk_buff *skb, int err)
 	if (unlikely(err))
 		goto out;
 
-	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
-		BUG();
-
-	err = -EINVAL;
-	padlen = nexthdr[0];
-	if (padlen + 2 + alen >= elen)
+	err = esp_remove_trailer(skb);
+	if (unlikely(err < 0))
 		goto out;
 
-	/* ... check padding bits here. Silly. :-) */
-
 	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 
@@ -568,15 +605,12 @@ int esp_input_done2(struct sk_buff *skb, int err)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	pskb_trim(skb, skb->len - alen - padlen - 2);
-	__skb_pull(skb, hlen);
+	skb_pull_rcsum(skb, hlen);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -ihl);
 
-	err = nexthdr[1];
-
 	/* RFC4303: Drop dummy packets without any error */
 	if (err == IPPROTO_NONE)
 		err = -EINVAL;
@@ -695,8 +729,10 @@ skip_cow:
 
 	sg_init_table(sg, nfrags);
 	err = skb_to_sgvec(skb, sg, 0, skb->len);
-	if (unlikely(err < 0))
+	if (unlikely(err < 0)) {
+		kfree(tmp);
 		goto out;
+	}
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index e0666016a764..f8b918c766b0 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -182,11 +182,13 @@ out:
 static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct crypto_aead *aead = x->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
 		return -EINVAL;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	if (!(xo->flags & CRYPTO_DONE))
+		skb->ip_summed = CHECKSUM_NONE;
 
 	return esp_input_done2(skb, 0);
 }
@@ -257,7 +259,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_
 	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
 
 	err = esp_output_tail(x, skb, &esp);
-	if (err < 0)
+	if (err)
 		return err;
 
 	secpath_reset(skb);
@@ -303,3 +305,4 @@ module_init(esp4_offload_init);
 module_exit(esp4_offload_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a159a3c..f52d27a422c3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -73,6 +73,11 @@ fail:
 	fib_free_table(main_table);
 	return -ENOMEM;
 }
+
+static bool fib4_has_custom_rules(struct net *net)
+{
+	return false;
+}
 #else
 
 struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 	}
 	return NULL;
 }
+
+static bool fib4_has_custom_rules(struct net *net)
+{
+	return net->ipv4.fib_has_custom_rules;
+}
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 static void fib_replace_table(struct net *net, struct fib_table *old,
@@ -345,9 +355,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	if (res.type != RTN_UNICAST &&
 	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
 		goto e_inval;
-	if (!rpf && !fib_num_tclassid_users(net) &&
-	    (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
-		goto last_resort;
 	fib_combine_itag(itag, &res);
 	dev_match = false;
 
@@ -402,13 +409,28 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 			struct in_device *idev, u32 *itag)
 {
 	int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
+	struct net *net = dev_net(dev);
 
-	if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
-	    IN_DEV_ACCEPT_LOCAL(idev) &&
+	if (!r && !fib_num_tclassid_users(net) &&
 	    (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
+		if (IN_DEV_ACCEPT_LOCAL(idev))
+			goto ok;
+		/* with custom local routes in place, checking local addresses
+		 * only will be too optimistic, with custom rules, checking
+		 * local addresses only can be too strict, e.g. due to vrf
+		 */
+		if (net->ipv4.fib_has_custom_local_routes ||
+		    fib4_has_custom_rules(net))
+			goto full_check;
+		if (inet_lookup_ifaddr_rcu(net, src))
+			return -EINVAL;
+
+ok:
 		*itag = 0;
 		return 0;
 	}
+
+full_check:
 	return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
 }
 
@@ -759,6 +781,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	err = fib_table_insert(net, tb, &cfg, extack);
+	if (!err && cfg.fc_type == RTN_LOCAL)
+		net->ipv4.fib_has_custom_local_routes = true;
 errout:
 	return err;
 }
@@ -1247,22 +1271,28 @@ static int __net_init ip_fib_net_init(struct net *net)
 	int err;
 	size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
 
-	net->ipv4.fib_seq = 0;
+	err = fib4_notifier_init(net);
+	if (err)
+		return err;
 
 	/* Avoid false sharing : Use at least a full cache line */
 	size = max_t(size_t, size, L1_CACHE_BYTES);
 
 	net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
-	if (!net->ipv4.fib_table_hash)
-		return -ENOMEM;
+	if (!net->ipv4.fib_table_hash) {
+		err = -ENOMEM;
+		goto err_table_hash_alloc;
+	}
 
 	err = fib4_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto err_rules_init;
 	return 0;
 
-fail:
+err_rules_init:
 	kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+	fib4_notifier_exit(net);
 	return err;
 }
 
@@ -1292,6 +1322,7 @@ static void ip_fib_net_exit(struct net *net)
 #endif
 	rtnl_unlock();
 	kfree(net->ipv4.fib_table_hash);
+	fib4_notifier_exit(net);
 }
 
 static int __net_init fib_net_init(struct net *net)
@@ -1341,7 +1372,7 @@ void __init ip_fib_init(void)
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 
-	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
 }
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 769ab87ebc4b..e6ff282bb7f4 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _FIB_LOOKUP_H
 #define _FIB_LOOKUP_H
 
@@ -32,6 +33,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 				 struct netlink_ext_ack *extack);
 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 		 struct netlink_ext_ack *extack);
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
 		  unsigned int);
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d975947..b804ccbdb241 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
 #include <linux/kernel.h>
+#include <linux/export.h>
 #include <net/net_namespace.h>
+#include <net/fib_notifier.h>
 #include <net/netns/ipv4.h>
 #include <net/ip_fib.h>
 
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
 {
-	info->net = net;
-	return nb->notifier_call(nb, event_type, info);
+	info->family = AF_INET;
+	return call_fib_notifier(nb, net, event_type, info);
 }
 
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
-		       struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
 {
+	ASSERT_RTNL();
+
+	info->family = AF_INET;
 	net->ipv4.fib_seq++;
-	info->net = net;
-	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+	return call_fib_notifiers(net, event_type, info);
 }
 
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
 {
-	unsigned int fib_seq = 0;
-	struct net *net;
-
-	rtnl_lock();
-	for_each_net(net)
-		fib_seq += net->ipv4.fib_seq;
-	rtnl_unlock();
+	ASSERT_RTNL();
 
-	return fib_seq;
+	return net->ipv4.fib_seq + fib4_rules_seq_read(net);
 }
 
-static bool fib_dump_is_consistent(struct notifier_block *nb,
-				   void (*cb)(struct notifier_block *nb),
-				   unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
 {
-	atomic_notifier_chain_register(&fib_chain, nb);
-	if (fib_seq == fib_seq_sum())
-		return true;
-	atomic_notifier_chain_unregister(&fib_chain, nb);
-	if (cb)
-		cb(nb);
-	return false;
+	int err;
+
+	err = fib4_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	fib_notify(net, nb);
+
+	return 0;
 }
 
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb))
-{
-	int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+	.family		= AF_INET,
+	.fib_seq_read	= fib4_seq_read,
+	.fib_dump	= fib4_dump,
+	.owner		= THIS_MODULE,
+};
 
-	do {
-		unsigned int fib_seq = fib_seq_sum();
-		struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
 
-		/* Mutex semantics guarantee that every change done to
-		 * FIB tries before we read the change sequence counter
-		 * is now visible to us.
-		 */
-		rcu_read_lock();
-		for_each_net_rcu(net) {
-			fib_rules_notify(net, nb);
-			fib_notify(net, nb);
-		}
-		rcu_read_unlock();
+	net->ipv4.fib_seq = 0;
 
-		if (fib_dump_is_consistent(nb, cb, fib_seq))
-			return 0;
-	} while (++retries < FIB_DUMP_MAX_RETRIES);
+	ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv4.notifier_ops = ops;
 
-	return -EBUSY;
+	return 0;
 }
-EXPORT_SYMBOL(register_fib_notifier);
 
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
 {
-	return atomic_notifier_chain_unregister(&fib_chain, nb);
+	fib_notifier_ops_unregister(net->ipv4.notifier_ops);
 }
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf977eb2..35d646a62ad4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib4_rule_default);
 
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET);
+}
+
 int __fib_lookup(struct net *net, struct flowi4 *flp,
 		 struct fib_result *res, unsigned int flags)
 {
@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
 	return NULL;
 }
 
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
-				  enum fib_event_type event_type,
-				  struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
-				   enum fib_event_type event_type,
-				   struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-	struct fib_rules_ops *ops = net->ipv4.rules_ops;
-	struct fib_rule *rule;
-
-	list_for_each_entry_rcu(rule, &ops->rules_list, list)
-		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
 static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 	[FRA_FLOW]	= { .type = NLA_U32 },
@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
 
 	err = 0;
 errout:
@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
 errout:
 	return err;
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ec3a9ce281a6..f04d944f8abe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
 #include <net/netlink.h>
 #include <net/nexthop.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include "fib_lookup.h"
 
@@ -219,7 +220,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
 	} endfor_nexthops(fi);
 
 	m = fi->fib_metrics;
-	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
 		kfree(m);
 	kfree(fi);
 }
@@ -600,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi)
 		atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
 	} endfor_nexthops(fi);
 }
-
-static inline void fib_add_weight(struct fib_info *fi,
-				  const struct fib_nh *nh)
-{
-	fi->fib_weight += nh->nh_weight;
-}
-
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
 
 #define fib_rebalance(fi) do { } while (0)
-#define fib_add_weight(fi, nh) do { } while (0)
 
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
@@ -695,6 +688,40 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 	return 0;
 }
 
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
+{
+	struct nlattr *nla;
+	int remaining;
+
+	if (!cfg->fc_mx)
+		return true;
+
+	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+		int type = nla_type(nla);
+		u32 val;
+
+		if (!type)
+			continue;
+		if (type > RTAX_MAX)
+			return false;
+
+		if (type == RTAX_CC_ALGO) {
+			char tmp[TCP_CA_NAME_MAX];
+			bool ecn_ca = false;
+
+			nla_strlcpy(tmp, nla, sizeof(tmp));
+			val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
+		} else {
+			val = nla_get_u32(nla);
+		}
+
+		if (fi->fib_metrics->metrics[type - 1] != val)
+			return false;
+	}
+
+	return true;
+}
+
 
 /*
  * Picture
@@ -739,8 +766,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
  *					|
  *					|-> {local prefix} (terminal node)
  */
-static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
-			struct fib_nh *nh, struct netlink_ext_ack *extack)
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+			struct netlink_ext_ack *extack)
 {
 	int err = 0;
 	struct net *net;
@@ -1003,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			char tmp[TCP_CA_NAME_MAX];
 
 			nla_strlcpy(tmp, nla, sizeof(tmp));
-			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+			val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
 			if (val == TCP_CA_UNSPEC)
 				return -EINVAL;
 		} else {
@@ -1089,7 +1116,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 			kfree(fi);
 			return ERR_PTR(err);
 		}
-		atomic_set(&fi->fib_metrics->refcnt, 1);
+		refcount_set(&fi->fib_metrics->refcnt, 1);
 	} else {
 		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 	}
@@ -1223,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		int linkdown = 0;
 
 		change_nexthops(fi) {
-			err = fib_check_nh(cfg, fi, nexthop_nh, extack);
+			err = fib_check_nh(cfg, nexthop_nh, extack);
 			if (err != 0)
 				goto failure;
 			if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1240,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 
 	change_nexthops(fi) {
 		fib_info_update_nh_saddr(net, nexthop_nh);
-		fib_add_weight(fi, nexthop_nh);
 	} endfor_nexthops(fi)
 
 	fib_rebalance(fi);
@@ -1330,8 +1356,6 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
 	if (fi->fib_nhs == 1) {
-		struct in_device *in_dev;
-
 		if (fi->fib_nh->nh_gw &&
 		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
 			goto nla_put_failure;
@@ -1339,11 +1363,17 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
 			goto nla_put_failure;
 		if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
-			in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
+			struct in_device *in_dev;
+
+			rcu_read_lock();
+			in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
 			if (in_dev &&
 			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
 				rtm->rtm_flags |= RTNH_F_DEAD;
+			rcu_read_unlock();
 		}
+		if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
+			rtm->rtm_flags |= RTNH_F_OFFLOAD;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid &&
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1363,18 +1393,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			goto nla_put_failure;
 
 		for_nexthops(fi) {
-			struct in_device *in_dev;
-
 			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
 			if (!rtnh)
 				goto nla_put_failure;
 
 			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
 			if (nh->nh_flags & RTNH_F_LINKDOWN) {
-				in_dev = __in_dev_get_rtnl(nh->nh_dev);
+				struct in_device *in_dev;
+
+				rcu_read_lock();
+				in_dev = __in_dev_get_rcu(nh->nh_dev);
 				if (in_dev &&
 				    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
 					rtnh->rtnh_flags |= RTNH_F_DEAD;
+				rcu_read_unlock();
 			}
 			rtnh->rtnh_hops = nh->nh_weight - 1;
 			rtnh->rtnh_ifindex = nh->nh_oif;
@@ -1451,14 +1483,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
 		if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		    fib_nh->nh_flags & RTNH_F_LINKDOWN)
 			break;
-		return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
-					  &info.info);
+		return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+					   &info.info);
 	case FIB_EVENT_NH_DEL:
 		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
 		    (fib_nh->nh_flags & RTNH_F_DEAD))
-			return call_fib_notifiers(dev_net(fib_nh->nh_dev),
-						  event_type, &info.info);
+			return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+						   event_type, &info.info);
 	default:
 		break;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c69dda6..5ddc4aefff12 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,39 +81,40 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_notifier.h>
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
 static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
 				   enum fib_event_type event_type, u32 dst,
-				   int dst_len, struct fib_info *fi,
-				   u8 tos, u8 type, u32 tb_id)
+				   int dst_len, struct fib_alias *fa)
 {
 	struct fib_entry_notifier_info info = {
 		.dst = dst,
 		.dst_len = dst_len,
-		.fi = fi,
-		.tos = tos,
-		.type = type,
-		.tb_id = tb_id,
+		.fi = fa->fa_info,
+		.tos = fa->fa_tos,
+		.type = fa->fa_type,
+		.tb_id = fa->tb_id,
 	};
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return call_fib4_notifier(nb, net, event_type, &info.info);
 }
 
 static int call_fib_entry_notifiers(struct net *net,
 				    enum fib_event_type event_type, u32 dst,
-				    int dst_len, struct fib_info *fi,
-				    u8 tos, u8 type, u32 tb_id)
+				    int dst_len, struct fib_alias *fa,
+				    struct netlink_ext_ack *extack)
 {
 	struct fib_entry_notifier_info info = {
+		.info.extack = extack,
 		.dst = dst,
 		.dst_len = dst_len,
-		.fi = fi,
-		.tos = tos,
-		.type = type,
-		.tb_id = tb_id,
+		.fi = fa->fa_info,
+		.tos = fa->fa_tos,
+		.type = fa->fa_type,
+		.tb_id = fa->tb_id,
 	};
-	return call_fib_notifiers(net, event_type, &info.info);
+	return call_fib4_notifiers(net, event_type, &info.info);
 }
 
 #define MAX_STAT_DEPTH 32
@@ -1215,9 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 			new_fa->fa_default = -1;
 
 			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
-						 key, plen, fi,
-						 new_fa->fa_tos, cfg->fc_type,
-						 tb->tb_id);
+						 key, plen, new_fa, extack);
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
 				  tb->tb_id, &cfg->fc_nlinfo, nlflags);
 
@@ -1272,8 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 		tb->tb_num_default++;
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
-	call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type,
-				 tb->tb_id);
+	call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
 		  &cfg->fc_nlinfo, nlflags);
 succeeded:
@@ -1562,7 +1560,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
-		    fib_nh_match(cfg, fi, extack) == 0) {
+		    fib_nh_match(cfg, fi, extack) == 0 &&
+		    fib_metrics_match(cfg, fi)) {
 			fa_to_delete = fa;
 			break;
 		}
@@ -1572,8 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 		return -ESRCH;
 
 	call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
-				 fa_to_delete->fa_info, tos,
-				 fa_to_delete->fa_type, tb->tb_id);
+				 fa_to_delete, extack);
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 
@@ -1890,9 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
 
 			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
 						 n->key,
-						 KEYLENGTH - fa->fa_slen,
-						 fi, fa->fa_tos, fa->fa_type,
-						 tb->tb_id);
+						 KEYLENGTH - fa->fa_slen, fa,
+						 NULL);
 			hlist_del_rcu(&fa->fa_list);
 			fib_release_info(fa->fa_info);
 			alias_free_mem_rcu(fa);
@@ -1930,8 +1927,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
 			continue;
 
 		call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
-					KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
-					fa->fa_type, fa->tb_id);
+					KEYLENGTH - fa->fa_slen, fa);
 	}
 }
 
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99170b1..1859c473b21a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	__be16 protocol = skb->protocol;
 	u16 mac_len = skb->mac_len;
 	int gre_offset, outer_hlen;
-	bool need_csum, ufo, gso_partial;
+	bool need_csum, gso_partial;
 
 	if (!skb->encapsulation)
 		goto out;
@@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
 	skb->encap_hdr_csum = need_csum;
 
-	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
 	features &= skb->dev->hw_enc_features;
 
-	/* The only checksum offload we care about from here on out is the
-	 * outer one so strip the existing checksum feature flags based
-	 * on the fact that we will be computing our checksum in software.
-	 */
-	if (ufo) {
-		features &= ~NETIF_F_CSUM_MASK;
-		if (!need_csum)
-			features |= NETIF_F_HW_CSUM;
-	}
-
 	/* segment inner packet. */
 	segs = skb_mac_gso_segment(skb, features);
 	if (IS_ERR_OR_NULL(segs)) {
@@ -98,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 		greh = (struct gre_base_hdr *)skb_transport_header(skb);
 		pcsum = (__sum16 *)(greh + 1);
 
-		if (gso_partial) {
+		if (gso_partial && skb_is_gso(skb)) {
 			unsigned int partial_adj;
 
 			/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c2be26b98b5f..1617604c9284 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	int type = icmp_param->data.icmph.type;
 	int code = icmp_param->data.icmph.code;
 
-	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
+	if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
 		return;
 
 	/* Needed by both icmp_global_allow and icmp_xmit_lock */
@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
+	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
 		goto out_unlock;
 
 
@@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto)
 }
 
 /*
- *	Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
+ *	Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
  *	ICMP_PARAMETERPROB.
  */
 
@@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb)
 	if (iph->ihl < 5) /* Mangled header, drop. */
 		goto out_err;
 
-	if (icmph->type == ICMP_DEST_UNREACH) {
+	switch (icmph->type) {
+	case ICMP_DEST_UNREACH:
 		switch (icmph->code & 15) {
 		case ICMP_NET_UNREACH:
 		case ICMP_HOST_UNREACH:
@@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb)
 		}
 		if (icmph->code > NR_ICMP_UNREACH)
 			goto out;
-	} else if (icmph->type == ICMP_PARAMETERPROB)
+		break;
+	case ICMP_PARAMETERPROB:
 		info = ntohl(icmph->un.gateway) >> 24;
+		break;
+	case ICMP_TIME_EXCEEDED:
+		__ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
+		if (icmph->code == ICMP_EXC_FRAGTIME)
+			goto out;
+		break;
+	}
 
 	/*
 	 *	Throw it at our lower layers
@@ -959,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb)
 	 */
 	icmp_param.data.times[1] = inet_current_timestamp();
 	icmp_param.data.times[2] = icmp_param.data.times[1];
-	if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
-		BUG();
+
+	BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
+
 	icmp_param.data.icmph	   = *icmp_hdr(skb);
 	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
 	icmp_param.data.icmph.code = 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 498706b072fb..ab183af0b5b6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb)
 {
 	/* This basically follows the spec line by line -- see RFC1112 */
 	struct igmphdr *ih;
-	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+	struct net_device *dev = skb->dev;
+	struct in_device *in_dev;
 	int len = skb->len;
 	bool dropped = true;
 
+	if (netif_is_l3_master(dev)) {
+		dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif);
+		if (!dev)
+			goto drop;
+	}
+
+	in_dev = __in_dev_get_rcu(dev);
 	if (!in_dev)
 		goto drop;
 
@@ -2549,7 +2557,8 @@ done:
 /*
  * check if a multicast source filter allows delivery for a given <src,dst,intf>
  */
-int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
+int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
+		   int dif, int sdif)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *pmc;
@@ -2564,7 +2573,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	rcu_read_lock();
 	for_each_pmc_rcu(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
-		    pmc->multi.imr_ifindex == dif)
+		    (pmc->multi.imr_ifindex == dif ||
+		     (sdif && pmc->multi.imr_ifindex == sdif)))
 			break;
 	}
 	ret = inet->mc_all;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4089c013cb03..4ca46dc08e63 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
  *                          IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
  *                          and 0.0.0.0 equals to 0.0.0.0 only
  */
-static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
-				const struct in6_addr *sk2_rcv_saddr6,
-				__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
-				bool sk1_ipv6only, bool sk2_ipv6only,
-				bool match_wildcard)
+static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
+				 const struct in6_addr *sk2_rcv_saddr6,
+				 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+				 bool sk1_ipv6only, bool sk2_ipv6only,
+				 bool match_wildcard)
 {
 	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
 	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
 	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
 		if (!sk2_ipv6only) {
 			if (sk1_rcv_saddr == sk2_rcv_saddr)
-				return 1;
+				return true;
 			if (!sk1_rcv_saddr || !sk2_rcv_saddr)
 				return match_wildcard;
 		}
-		return 0;
+		return false;
 	}
 
 	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
-		return 1;
+		return true;
 
 	if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
 	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
-		return 1;
+		return true;
 
 	if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
 	    !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
-		return 1;
+		return true;
 
 	if (sk2_rcv_saddr6 &&
 	    ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
-		return 1;
+		return true;
 
-	return 0;
+	return false;
 }
 #endif
 
@@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
  * match_wildcard == false: addresses must be exactly the same, i.e.
  *                          0.0.0.0 only equals to 0.0.0.0
  */
-static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
-				bool sk2_ipv6only, bool match_wildcard)
+static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+				 bool sk2_ipv6only, bool match_wildcard)
 {
 	if (!sk2_ipv6only) {
 		if (sk1_rcv_saddr == sk2_rcv_saddr)
-			return 1;
+			return true;
 		if (!sk1_rcv_saddr || !sk2_rcv_saddr)
 			return match_wildcard;
 	}
-	return 0;
+	return false;
 }
 
-int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
-			 bool match_wildcard)
+bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+			  bool match_wildcard)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == AF_INET6)
@@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tb->fast_sk_family == AF_INET6)
 		return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
-					    &sk->sk_v6_rcv_saddr,
+					    inet6_rcv_saddr(sk),
 					    tb->fast_rcv_saddr,
 					    sk->sk_rcv_saddr,
 					    tb->fast_ipv6_only,
@@ -321,13 +321,14 @@ tb_found:
 			goto fail_unlock;
 	}
 success:
-	if (!hlist_empty(&tb->owners)) {
+	if (hlist_empty(&tb->owners)) {
 		tb->fastreuse = reuse;
 		if (sk->sk_reuseport) {
 			tb->fastreuseport = FASTREUSEPORT_ANY;
 			tb->fastuid = uid;
 			tb->fast_rcv_saddr = sk->sk_rcv_saddr;
 			tb->fast_ipv6_only = ipv6_only_sock(sk);
+			tb->fast_sk_family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 			tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 #endif
@@ -354,6 +355,7 @@ success:
 				tb->fastuid = uid;
 				tb->fast_rcv_saddr = sk->sk_rcv_saddr;
 				tb->fast_ipv6_only = ipv6_only_sock(sk);
+				tb->fast_sk_family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 				tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 #endif
@@ -473,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 		}
 		spin_unlock_bh(&queue->fastopenq.lock);
 	}
+	mem_cgroup_sk_alloc(newsk);
 out:
 	release_sock(sk);
 	if (req)
@@ -492,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept);
  * to optimize.
  */
 void inet_csk_init_xmit_timers(struct sock *sk,
-			       void (*retransmit_handler)(unsigned long),
-			       void (*delack_handler)(unsigned long),
-			       void (*keepalive_handler)(unsigned long))
+			       void (*retransmit_handler)(struct timer_list *t),
+			       void (*delack_handler)(struct timer_list *t),
+			       void (*keepalive_handler)(struct timer_list *t))
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
-			(unsigned long)sk);
-	setup_timer(&icsk->icsk_delack_timer, delack_handler,
-			(unsigned long)sk);
-	setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
+	timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
+	timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
+	timer_setup(&sk->sk_timer, keepalive_handler, 0);
 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
 }
 EXPORT_SYMBOL(inet_csk_init_xmit_timers);
@@ -537,9 +538,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct net *net = read_pnet(&ireq->ireq_net);
-	struct ip_options_rcu *opt = ireq->opt;
+	struct ip_options_rcu *opt;
 	struct rtable *rt;
 
+	opt = ireq_opt_deref(ireq);
+
 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -573,10 +576,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 	struct flowi4 *fl4;
 	struct rtable *rt;
 
+	opt = rcu_dereference(ireq->ireq_opt);
 	fl4 = &newinet->cork.fl.u.ip4;
 
-	rcu_read_lock();
-	opt = rcu_dereference(newinet->inet_opt);
 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -589,13 +591,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 		goto no_route;
 	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
-	rcu_read_unlock();
 	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
 no_route:
-	rcu_read_unlock();
 	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
@@ -674,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
 
-static void reqsk_timer_handler(unsigned long data)
+static void reqsk_timer_handler(struct timer_list *t)
 {
-	struct request_sock *req = (struct request_sock *)data;
+	struct request_sock *req = from_timer(req, t, rsk_timer);
 	struct sock *sk_listener = req->rsk_listener;
 	struct net *net = sock_net(sk_listener);
 	struct inet_connection_sock *icsk = inet_csk(sk_listener);
@@ -747,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
 	req->num_timeout = 0;
 	req->sk = NULL;
 
-	setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
-			    (unsigned long)req);
+	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
 	mod_timer(&req->rsk_timer, jiffies + timeout);
 
 	inet_ehash_insert(req_to_sk(req), NULL);
@@ -916,7 +915,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
 		tcp_sk(child)->fastopen_rsk = NULL;
 	}
 	inet_csk_destroy_sock(child);
-	reqsk_put(req);
 }
 
 struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
@@ -987,6 +985,7 @@ void inet_csk_listen_stop(struct sock *sk)
 		sock_hold(child);
 
 		inet_child_forget(sk, req, child);
+		reqsk_put(req);
 		bh_unlock_sock(child);
 		local_bh_enable();
 		sock_put(child);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3828b3a805cd..c9c35b61a027 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -93,8 +93,17 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
 
-static size_t inet_sk_attr_size(void)
+static size_t inet_sk_attr_size(struct sock *sk,
+				const struct inet_diag_req_v2 *req,
+				bool net_admin)
 {
+	const struct inet_diag_handler *handler;
+	size_t aux = 0;
+
+	handler = inet_diag_table[req->sdiag_protocol];
+	if (handler && handler->idiag_get_aux_size)
+		aux = handler->idiag_get_aux_size(sk, net_admin);
+
 	return	  nla_total_size(sizeof(struct tcp_info))
 		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
 		+ nla_total_size(1) /* INET_DIAG_TOS */
@@ -105,6 +114,7 @@ static size_t inet_sk_attr_size(void)
 		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
 		+ nla_total_size(TCP_CA_NAME_MAX)
 		+ nla_total_size(sizeof(struct tcpvegas_info))
+		+ aux
 		+ 64;
 }
 
@@ -260,6 +270,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 
 	handler->idiag_get_info(sk, r, info);
 
+	if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux)
+		if (handler->idiag_get_aux(sk, net_admin, skb) < 0)
+			goto errout;
+
 	if (sk->sk_state < TCP_TIME_WAIT) {
 		union tcp_cc_info info;
 		size_t sz = 0;
@@ -274,6 +288,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			goto errout;
 	}
 
+	if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+		u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+		classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+
+		if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+			goto errout;
+	}
+
 out:
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -438,6 +463,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req)
 {
+	bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
 	struct net *net = sock_net(in_skb->sk);
 	struct sk_buff *rep;
 	struct sock *sk;
@@ -447,7 +473,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 	if (IS_ERR(sk))
 		return PTR_ERR(sk);
 
-	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
+	rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
 	if (!rep) {
 		err = -ENOMEM;
 		goto out;
@@ -456,8 +482,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 	err = sk_diag_fill(sk, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
-			   nlh->nlmsg_seq, 0, nlh,
-			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
+			   nlh->nlmsg_seq, 0, nlh, net_admin);
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		nlmsg_free(rep);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 96e95e83cc61..26a3d0315728 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
 	spin_unlock(&hb->chain_lock);
 
 	hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-		f->frag_expire((unsigned long) fq);
+		f->frag_expire(&fq->timer);
 
 	return evicted;
 }
@@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work)
 
 	local_bh_disable();
 
-	for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+	for (i = READ_ONCE(f->next_bucket); budget; --budget) {
 		evicted += inet_evict_bucket(f, &f->hash[i]);
 		i = (i + 1) & (INETFRAGS_HASHSZ - 1);
 		if (evicted > INETFRAGS_EVICT_MAX)
@@ -234,10 +234,8 @@ evict_again:
 	cond_resched();
 
 	if (read_seqretry(&f->rnd_seqlock, seq) ||
-	    percpu_counter_sum(&nf->mem))
+	    sum_frag_mem_limit(nf))
 		goto evict_again;
-
-	percpu_counter_destroy(&nf->mem);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
 
@@ -368,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 	f->constructor(q, arg);
 	add_frag_mem_limit(nf, f->qsize);
 
-	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
 	refcount_set(&q->refcnt, 1);
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2e3389d614d1..e7d15fb0d94d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
 				const unsigned short hnum, const __be32 daddr,
-				const int dif, bool exact_dif)
+				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
 	struct inet_sock *inet = inet_sk(sk);
@@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			score += 4;
 		}
 		if (sk->sk_bound_dev_if || exact_dif) {
-			if (sk->sk_bound_dev_if != dif)
+			bool dev_match = (sk->sk_bound_dev_if == dif ||
+					  sk->sk_bound_dev_if == sdif);
+
+			if (exact_dif && !dev_match)
 				return -1;
-			score += 4;
+			if (sk->sk_bound_dev_if && dev_match)
+				score += 4;
 		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
@@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    struct sk_buff *skb, int doff,
 				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
-				    const int dif)
+				    const int dif, const int sdif)
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net,
 	u32 phash = 0;
 
 	sk_for_each_rcu(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+		score = compute_score(sk, net, hnum, daddr,
+				      dif, sdif, exact_dif);
 		if (score > hiscore) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net,
 				  struct inet_hashinfo *hashinfo,
 				  const __be32 saddr, const __be16 sport,
 				  const __be32 daddr, const u16 hnum,
-				  const int dif)
+				  const int dif, const int sdif)
 {
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
@@ -286,11 +291,12 @@ begin:
 		if (sk->sk_hash != hash)
 			continue;
 		if (likely(INET_MATCH(sk, net, acookie,
-				      saddr, daddr, ports, dif))) {
+				      saddr, daddr, ports, dif, sdif))) {
 			if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 				goto out;
 			if (unlikely(!INET_MATCH(sk, net, acookie,
-						 saddr, daddr, ports, dif))) {
+						 saddr, daddr, ports,
+						 dif, sdif))) {
 				sock_gen_put(sk);
 				goto begin;
 			}
@@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	__be32 daddr = inet->inet_rcv_saddr;
 	__be32 saddr = inet->inet_daddr;
 	int dif = sk->sk_bound_dev_if;
+	struct net *net = sock_net(sk);
+	int sdif = l3mdev_master_ifindex_by_index(net, dif);
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
-	struct net *net = sock_net(sk);
 	unsigned int hash = inet_ehashfn(net, daddr, lport,
 					 saddr, inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 			continue;
 
 		if (likely(INET_MATCH(sk2, net, acookie,
-					 saddr, daddr, ports, dif))) {
+					 saddr, daddr, ports, dif, sdif))) {
 			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
@@ -449,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
 			return reuseport_add_sock(sk, sk2);
 	}
 
-	/* Initial allocation may have already happened via setsockopt */
-	if (!rcu_access_pointer(sk->sk_reuseport_cb))
-		return reuseport_alloc(sk);
-	return 0;
+	return reuseport_alloc(sk);
 }
 
 int __inet_hash(struct sock *sk, struct sock *osk)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 5b039159e67a..c690cd0d9b3f 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/kmemcheck.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <net/inet_hashtables.h>
@@ -142,9 +141,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
 
-static void tw_timer_handler(unsigned long data)
+static void tw_timer_handler(struct timer_list *t)
 {
-	struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+	struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
 
 	if (tw->tw_kill)
 		__NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 	if (tw) {
 		const struct inet_sock *inet = inet_sk(sk);
 
-		kmemcheck_annotate_bitfield(tw, flags);
-
 		tw->tw_dr	    = dr;
 		/* Give us an identity. */
 		tw->tw_daddr	    = inet->inet_daddr;
@@ -188,8 +185,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 		tw->tw_prot	    = sk->sk_prot_creator;
 		atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
 		twsk_net_set(tw, sock_net(sk));
-		setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
-				   (unsigned long)tw);
+		timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
 		/*
 		 * Because we use RCU lookups, we should not set tw_refcnt
 		 * to a non null value before everything is setup for this
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c5a117cc6619..914d56928578 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -33,7 +33,7 @@
  *  also be removed if the pool is overloaded i.e. if the total amount of
  *  entries is greater-or-equal than the threshold.
  *
- *  Node pool is organised as an AVL tree.
+ *  Node pool is organised as an RB tree.
  *  Such an implementation has been chosen not just for fun.  It's a way to
  *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
  *  amount of long living nodes in a single hash slot would significantly delay
@@ -45,7 +45,7 @@
  *      AND reference count being 0.
  *  3.  Global variable peer_total is modified under the pool lock.
  *  4.  struct inet_peer fields modification:
- *		avl_left, avl_right, avl_parent, avl_height: pool lock
+ *		rb_node: pool lock
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
@@ -53,30 +53,15 @@
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
-static LIST_HEAD(gc_list);
-static const int gc_delay = 60 * HZ;
-static struct delayed_work gc_work;
-static DEFINE_SPINLOCK(gc_lock);
-
-#define node_height(x) x->avl_height
-
-#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
-#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
-static const struct inet_peer peer_fake_node = {
-	.avl_left	= peer_avl_empty_rcu,
-	.avl_right	= peer_avl_empty_rcu,
-	.avl_height	= 0
-};
-
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
-	bp->root = peer_avl_empty_rcu;
+	bp->rb_root = RB_ROOT;
 	seqlock_init(&bp->lock);
 	bp->total = 0;
 }
 EXPORT_SYMBOL_GPL(inet_peer_base_init);
 
-#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+#define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
@@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
-static void inetpeer_gc_worker(struct work_struct *work)
-{
-	struct inet_peer *p, *n, *c;
-	struct list_head list;
-
-	spin_lock_bh(&gc_lock);
-	list_replace_init(&gc_list, &list);
-	spin_unlock_bh(&gc_lock);
-
-	if (list_empty(&list))
-		return;
-
-	list_for_each_entry_safe(p, n, &list, gc_list) {
-
-		if (need_resched())
-			cond_resched();
-
-		c = rcu_dereference_protected(p->avl_left, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_left = peer_avl_empty_rcu;
-		}
-
-		c = rcu_dereference_protected(p->avl_right, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_right = peer_avl_empty_rcu;
-		}
-
-		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
-
-		if (refcount_read(&p->refcnt) == 1) {
-			list_del(&p->gc_list);
-			kmem_cache_free(peer_cachep, p);
-		}
-	}
-
-	if (list_empty(&list))
-		return;
-
-	spin_lock_bh(&gc_lock);
-	list_splice(&list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -153,225 +91,65 @@ void __init inet_initpeers(void)
 			sizeof(struct inet_peer),
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
-
-	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
-#define rcu_deref_locked(X, BASE)				\
-	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
-
-/*
- * Called with local BH disabled and the pool lock held.
- */
-#define lookup(_daddr, _stack, _base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-								\
-	stackptr = _stack;					\
-	*stackptr++ = &_base->root;				\
-	for (u = rcu_deref_locked(_base->root, _base);		\
-	     u != peer_avl_empty;) {				\
-		int cmp = inetpeer_addr_cmp(_daddr, &u->daddr);	\
-		if (cmp == 0)					\
-			break;					\
-		if (cmp == -1)					\
-			v = &u->avl_left;			\
-		else						\
-			v = &u->avl_right;			\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, _base);		\
-	}							\
-	u;							\
-})
-
-/*
- * Called with rcu_read_lock()
- * Because we hold no lock against a writer, its quite possible we fall
- * in an endless loop.
- * But every pointer we follow is guaranteed to be valid thanks to RCU.
- * We exit from this function if number of links exceeds PEER_MAXDEPTH
- */
-static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
-				    struct inet_peer_base *base)
+/* Called with rcu_read_lock() or base->lock held */
+static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
+				struct inet_peer_base *base,
+				unsigned int seq,
+				struct inet_peer *gc_stack[],
+				unsigned int *gc_cnt,
+				struct rb_node **parent_p,
+				struct rb_node ***pp_p)
 {
-	struct inet_peer *u = rcu_dereference(base->root);
-	int count = 0;
+	struct rb_node **pp, *parent, *next;
+	struct inet_peer *p;
 
-	while (u != peer_avl_empty) {
-		int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
+	pp = &base->rb_root.rb_node;
+	parent = NULL;
+	while (1) {
+		int cmp;
+
+		next = rcu_dereference_raw(*pp);
+		if (!next)
+			break;
+		parent = next;
+		p = rb_entry(parent, struct inet_peer, rb_node);
+		cmp = inetpeer_addr_cmp(daddr, &p->daddr);
 		if (cmp == 0) {
-			/* Before taking a reference, check if this entry was
-			 * deleted (refcnt=0)
-			 */
-			if (!refcount_inc_not_zero(&u->refcnt)) {
-				u = NULL;
-			}
-			return u;
+			if (!refcount_inc_not_zero(&p->refcnt))
+				break;
+			return p;
+		}
+		if (gc_stack) {
+			if (*gc_cnt < PEER_MAX_GC)
+				gc_stack[(*gc_cnt)++] = p;
+		} else if (unlikely(read_seqretry(&base->lock, seq))) {
+			break;
 		}
 		if (cmp == -1)
-			u = rcu_dereference(u->avl_left);
+			pp = &next->rb_left;
 		else
-			u = rcu_dereference(u->avl_right);
-		if (unlikely(++count == PEER_MAXDEPTH))
-			break;
+			pp = &next->rb_right;
 	}
+	*parent_p = parent;
+	*pp_p = pp;
 	return NULL;
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start, base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-	*stackptr++ = &start->avl_left;				\
-	v = &start->avl_left;					\
-	for (u = rcu_deref_locked(*v, base);			\
-	     u->avl_right != peer_avl_empty_rcu;) {		\
-		v = &u->avl_right;				\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, base);			\
-	}							\
-	u;							\
-})
-
-/* Called with local BH disabled and the pool lock held.
- * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.
- */
-static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
-			       struct inet_peer __rcu ***stackend,
-			       struct inet_peer_base *base)
-{
-	struct inet_peer __rcu **nodep;
-	struct inet_peer *node, *l, *r;
-	int lh, rh;
-
-	while (stackend > stack) {
-		nodep = *--stackend;
-		node = rcu_deref_locked(*nodep, base);
-		l = rcu_deref_locked(node->avl_left, base);
-		r = rcu_deref_locked(node->avl_right, base);
-		lh = node_height(l);
-		rh = node_height(r);
-		if (lh > rh + 1) { /* l: RH+2 */
-			struct inet_peer *ll, *lr, *lrl, *lrr;
-			int lrh;
-			ll = rcu_deref_locked(l->avl_left, base);
-			lr = rcu_deref_locked(l->avl_right, base);
-			lrh = node_height(lr);
-			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
-				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
-				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
-				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
-				l->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, l);
-			} else { /* ll: RH, lr: RH+1 */
-				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
-				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = rh + 1; /* node: RH+1 */
-				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
-				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
-				l->avl_height = rh + 1;	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
-				lr->avl_height = rh + 2;
-				RCU_INIT_POINTER(*nodep, lr);
-			}
-		} else if (rh > lh + 1) { /* r: LH+2 */
-			struct inet_peer *rr, *rl, *rlr, *rll;
-			int rlh;
-			rr = rcu_deref_locked(r->avl_right, base);
-			rl = rcu_deref_locked(r->avl_left, base);
-			rlh = node_height(rl);
-			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
-				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
-				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
-				r->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, r);
-			} else { /* rr: RH, rl: RH+1 */
-				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
-				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = lh + 1; /* node: LH+1 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
-				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
-				r->avl_height = lh + 1;	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
-				rl->avl_height = lh + 2;
-				RCU_INIT_POINTER(*nodep, rl);
-			}
-		} else {
-			node->avl_height = (lh > rh ? lh : rh) + 1;
-		}
-	}
-}
-
-/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n, base)					\
-do {								\
-	n->avl_height = 1;					\
-	n->avl_left = peer_avl_empty_rcu;			\
-	n->avl_right = peer_avl_empty_rcu;			\
-	/* lockless readers can catch us now */			\
-	rcu_assign_pointer(**--stackptr, n);			\
-	peer_avl_rebalance(stack, stackptr, base);		\
-} while (0)
-
 static void inetpeer_free_rcu(struct rcu_head *head)
 {
 	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
 }
 
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
-			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
-{
-	struct inet_peer __rcu ***stackptr, ***delp;
-
-	if (lookup(&p->daddr, stack, base) != p)
-		BUG();
-	delp = stackptr - 1; /* *delp[0] == p */
-	if (p->avl_left == peer_avl_empty_rcu) {
-		*delp[0] = p->avl_right;
-		--stackptr;
-	} else {
-		/* look for a node to insert instead of p */
-		struct inet_peer *t;
-		t = lookup_rightempty(p, base);
-		BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
-		**--stackptr = t->avl_left;
-		/* t is removed, t->daddr > x->daddr for any
-		 * x in p->avl_left subtree.
-		 * Put t in the old place of p. */
-		RCU_INIT_POINTER(*delp[0], t);
-		t->avl_left = p->avl_left;
-		t->avl_right = p->avl_right;
-		t->avl_height = p->avl_height;
-		BUG_ON(delp[1] != &p->avl_left);
-		delp[1] = &t->avl_left; /* was &p->avl_left */
-	}
-	peer_avl_rebalance(stack, stackptr, base);
-	base->total--;
-	call_rcu(&p->rcu, inetpeer_free_rcu);
-}
-
 /* perform garbage collect on all items stacked during a lookup */
-static int inet_peer_gc(struct inet_peer_base *base,
-			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
-			struct inet_peer __rcu ***stackptr)
+static void inet_peer_gc(struct inet_peer_base *base,
+			 struct inet_peer *gc_stack[],
+			 unsigned int gc_cnt)
 {
-	struct inet_peer *p, *gchead = NULL;
+	struct inet_peer *p;
 	__u32 delta, ttl;
-	int cnt = 0;
+	int i;
 
 	if (base->total >= inet_peer_threshold)
 		ttl = 0; /* be aggressive */
@@ -379,43 +157,38 @@ static int inet_peer_gc(struct inet_peer_base *base,
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
 					base->total / inet_peer_threshold * HZ;
-	stackptr--; /* last stack slot is peer_avl_empty */
-	while (stackptr > stack) {
-		stackptr--;
-		p = rcu_deref_locked(**stackptr, base);
-		if (refcount_read(&p->refcnt) == 1) {
-			smp_rmb();
-			delta = (__u32)jiffies - p->dtime;
-			if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
-				p->gc_next = gchead;
-				gchead = p;
-			}
-		}
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		delta = (__u32)jiffies - p->dtime;
+		if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
+			gc_stack[i] = NULL;
 	}
-	while ((p = gchead) != NULL) {
-		gchead = p->gc_next;
-		cnt++;
-		unlink_from_pool(p, base, stack);
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		if (p) {
+			rb_erase(&p->rb_node, &base->rb_root);
+			base->total--;
+			call_rcu(&p->rcu, inetpeer_free_rcu);
+		}
 	}
-	return cnt;
 }
 
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
 			       int create)
 {
-	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer *p;
-	unsigned int sequence;
-	int invalidated, gccnt = 0;
+	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
+	struct rb_node **pp, *parent;
+	unsigned int gc_cnt, seq;
+	int invalidated;
 
 	/* Attempt a lockless lookup first.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock();
-	sequence = read_seqbegin(&base->lock);
-	p = lookup_rcu(daddr, base);
-	invalidated = read_seqretry(&base->lock, sequence);
+	seq = read_seqbegin(&base->lock);
+	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
+	invalidated = read_seqretry(&base->lock, seq);
 	rcu_read_unlock();
 
 	if (p)
@@ -428,36 +201,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
+	parent = NULL;
 	write_seqlock_bh(&base->lock);
-relookup:
-	p = lookup(daddr, stack, base);
-	if (p != peer_avl_empty) {
-		refcount_inc(&p->refcnt);
-		write_sequnlock_bh(&base->lock);
-		return p;
-	}
-	if (!gccnt) {
-		gccnt = inet_peer_gc(base, stack, stackptr);
-		if (gccnt && create)
-			goto relookup;
-	}
-	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
-	if (p) {
-		p->daddr = *daddr;
-		refcount_set(&p->refcnt, 2);
-		atomic_set(&p->rid, 0);
-		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
-		p->rate_tokens = 0;
-		/* 60*HZ is arbitrary, but chosen enough high so that the first
-		 * calculation of tokens is at its maximum.
-		 */
-		p->rate_last = jiffies - 60*HZ;
-		INIT_LIST_HEAD(&p->gc_list);
-
-		/* Link the node. */
-		link_to_pool(p, base);
-		base->total++;
+
+	gc_cnt = 0;
+	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
+	if (!p && create) {
+		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+		if (p) {
+			p->daddr = *daddr;
+			refcount_set(&p->refcnt, 2);
+			atomic_set(&p->rid, 0);
+			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+			p->rate_tokens = 0;
+			/* 60*HZ is arbitrary, but chosen enough high so that the first
+			 * calculation of tokens is at its maximum.
+			 */
+			p->rate_last = jiffies - 60*HZ;
+
+			rb_link_node(&p->rb_node, parent, pp);
+			rb_insert_color(&p->rb_node, &base->rb_root);
+			base->total++;
+		}
 	}
+	if (gc_cnt)
+		inet_peer_gc(base, gc_stack, gc_cnt);
 	write_sequnlock_bh(&base->lock);
 
 	return p;
@@ -467,8 +235,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 void inet_putpeer(struct inet_peer *p)
 {
 	p->dtime = (__u32)jiffies;
-	smp_mb__before_atomic();
-	refcount_dec(&p->refcnt);
+
+	if (refcount_dec_and_test(&p->refcnt))
+		call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
 
@@ -513,30 +282,19 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
-static void inetpeer_inval_rcu(struct rcu_head *head)
-{
-	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
-
-	spin_lock_bh(&gc_lock);
-	list_add_tail(&p->gc_list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
-	struct inet_peer *root;
+	struct rb_node *p = rb_first(&base->rb_root);
 
-	write_seqlock_bh(&base->lock);
+	while (p) {
+		struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
 
-	root = rcu_deref_locked(base->root, base);
-	if (root != peer_avl_empty) {
-		base->root = peer_avl_empty_rcu;
-		base->total = 0;
-		call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
+		p = rb_next(p);
+		rb_erase(&peer->rb_node, &base->rb_root);
+		inet_putpeer(peer);
+		cond_resched();
 	}
 
-	write_sequnlock_bh(&base->lock);
+	base->total = 0;
 }
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0a7b96646f..2dd21c3281a1 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9a8cfac503dc..bbf1b94942c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -190,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user)
 /*
  * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
  */
-static void ip_expire(unsigned long arg)
+static void ip_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct ipq *qp;
 	struct net *net;
 
-	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+	qp = container_of(frag, struct ipq, q);
 	net = container_of(qp->q.net, struct net, ipv4.frags);
 
 	rcu_read_lock();
@@ -844,8 +846,6 @@ static void __init ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
-	int res;
-
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -871,13 +871,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 
 	net->ipv4.frags.max_dist = 64;
 
-	res = inet_frags_init_net(&net->ipv4.frags);
-	if (res)
-		return res;
-	res = ip4_frags_ns_ctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->ipv4.frags);
-	return res;
+	inet_frags_init_net(&net->ipv4.frags);
+
+	return ip4_frags_ns_ctl_register(net);
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7a7829e839c2..bb6239169b1a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -48,6 +48,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 #include <net/dst_metadata.h>
+#include <net/erspan.h>
 
 /*
    Problems & solutions
@@ -112,9 +113,12 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate);
 
 static unsigned int ipgre_net_id __read_mostly;
 static unsigned int gre_tap_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
 
 static void ipgre_err(struct sk_buff *skb, u32 info,
 		      const struct tnl_ptk_info *tpi)
@@ -246,6 +250,79 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	ipgre_err(skb, info, &tpi);
 }
 
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+		      int gre_hdr_len)
+{
+	struct net *net = dev_net(skb->dev);
+	struct metadata_dst *tun_dst = NULL;
+	struct ip_tunnel_net *itn;
+	struct ip_tunnel *tunnel;
+	struct erspanhdr *ershdr;
+	const struct iphdr *iph;
+	__be32 index;
+	int len;
+
+	itn = net_generic(net, erspan_net_id);
+	len = gre_hdr_len + sizeof(*ershdr);
+
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+
+	iph = ip_hdr(skb);
+	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+
+	/* The original GRE header does not have key field,
+	 * Use ERSPAN 10-bit session ID as key.
+	 */
+	tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+	index = ershdr->md.index;
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+				  tpi->flags | TUNNEL_KEY,
+				  iph->saddr, iph->daddr, tpi->key);
+
+	if (tunnel) {
+		if (__iptunnel_pull_header(skb,
+					   gre_hdr_len + sizeof(*ershdr),
+					   htons(ETH_P_TEB),
+					   false, false) < 0)
+			goto drop;
+
+		if (tunnel->collect_md) {
+			struct ip_tunnel_info *info;
+			struct erspan_metadata *md;
+			__be64 tun_id;
+			__be16 flags;
+
+			tpi->flags |= TUNNEL_KEY;
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = ip_tun_rx_dst(skb, flags,
+						tun_id, sizeof(*md));
+			if (!tun_dst)
+				return PACKET_REJECT;
+
+			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+			if (!md)
+				return PACKET_REJECT;
+
+			md->index = index;
+			info = &tun_dst->u.tun_info;
+			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+			info->options_len = sizeof(*md);
+		} else {
+			tunnel->index = ntohl(index);
+		}
+
+		skb_reset_mac_header(skb);
+		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+		return PACKET_RCVD;
+	}
+drop:
+	kfree_skb(skb);
+	return PACKET_RCVD;
+}
+
 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 {
@@ -328,6 +405,11 @@ static int gre_rcv(struct sk_buff *skb)
 	if (hdr_len < 0)
 		goto drop;
 
+	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+			return 0;
+	}
+
 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 		return 0;
 
@@ -376,39 +458,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
 	return ip_route_output_key(net, fl);
 }
 
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
-			__be16 proto)
+static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
+				      struct net_device *dev,
+				      struct flowi4 *fl,
+				      int tunnel_hlen)
 {
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
-	struct flowi4 fl;
 	int min_headroom;
-	int tunnel_hlen;
-	__be16 df, flags;
 	bool use_cache;
 	int err;
 
 	tun_info = skb_tunnel_info(skb);
-	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
-		     ip_tunnel_info_af(tun_info) != AF_INET))
-		goto err_free_skb;
-
 	key = &tun_info->key;
 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+
 	if (use_cache)
-		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
+		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
 	if (!rt) {
-		rt = gre_get_rt(skb, dev, &fl, key);
+		rt = gre_get_rt(skb, dev, fl, key);
 		if (IS_ERR(rt))
-				goto err_free_skb;
+			goto err_free_skb;
 		if (use_cache)
 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
-					  fl.saddr);
+					  fl->saddr);
 	}
 
-	tunnel_hlen = gre_calc_hlen(key->tun_flags);
-
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ tunnel_hlen + sizeof(struct iphdr);
 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -420,6 +496,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (unlikely(err))
 			goto err_free_rt;
 	}
+	return rt;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NULL;
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			__be16 proto)
+{
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct rtable *rt = NULL;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df, flags;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+	tunnel_hlen = gre_calc_hlen(key->tun_flags);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
 
 	/* Push Tunnel header. */
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@@ -442,6 +549,64 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			   __be16 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct erspan_metadata *md;
+	struct rtable *rt = NULL;
+	bool truncate = false;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+
+	/* ERSPAN has fixed 8 byte GRE header */
+	tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
+
+	if (gre_handle_offloads(skb, false))
+		goto err_free_rt;
+
+	if (skb->len > dev->mtu + dev->hard_header_len) {
+		pskb_trim(skb, dev->mtu + dev->hard_header_len);
+		truncate = true;
+	}
+
+	md = ip_tunnel_info_opts(tun_info);
+	if (!md)
+		goto err_free_rt;
+
+	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+			    ntohl(md->index), truncate);
+
+	gre_build_header(skb, 8, TUNNEL_SEQ,
+			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+		      key->tos, key->ttl, df, false);
+	return;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+}
+
 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -503,6 +668,86 @@ free_skb:
 	return NETDEV_TX_OK;
 }
 
+static inline u8 tos_to_cos(u8 tos)
+{
+	u8 dscp, cos;
+
+	dscp = tos >> 2;
+	cos = dscp >> 3;
+	return cos;
+}
+
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate)
+{
+	struct iphdr *iphdr = ip_hdr(skb);
+	struct ethhdr *eth = eth_hdr(skb);
+	enum erspan_encap_type enc_type;
+	struct erspanhdr *ershdr;
+	struct qtag_prefix {
+		__be16 eth_type;
+		__be16 tci;
+	} *qp;
+	u16 vlan_tci = 0;
+
+	enc_type = ERSPAN_ENCAP_NOVLAN;
+
+	/* If mirrored packet has vlan tag, extract tci and
+	 *  perserve vlan header in the mirrored frame.
+	 */
+	if (eth->h_proto == htons(ETH_P_8021Q)) {
+		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+		vlan_tci = ntohs(qp->tci);
+		enc_type = ERSPAN_ENCAP_INFRAME;
+	}
+
+	skb_push(skb, sizeof(*ershdr));
+	ershdr = (struct erspanhdr *)skb->data;
+	memset(ershdr, 0, sizeof(*ershdr));
+
+	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+				 (ERSPAN_VERSION << VER_OFFSET));
+	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
+			   (enc_type << EN_OFFSET & EN_MASK) |
+			   ((truncate << T_OFFSET) & T_MASK));
+	ershdr->md.index = htonl(index & INDEX_MASK);
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+			       struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	bool truncate = false;
+
+	if (tunnel->collect_md) {
+		erspan_fb_xmit(skb, dev, skb->protocol);
+		return NETDEV_TX_OK;
+	}
+
+	if (gre_handle_offloads(skb, false))
+		goto free_skb;
+
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto free_skb;
+
+	if (skb->len > dev->mtu + dev->hard_header_len) {
+		pskb_trim(skb, dev->mtu + dev->hard_header_len);
+		truncate = true;
+	}
+
+	/* Push ERSPAN header */
+	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+	tunnel->parms.o_flags &= ~TUNNEL_KEY;
+	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+	return NETDEV_TX_OK;
+
+free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
@@ -528,20 +773,46 @@ free_skb:
 	return NETDEV_TX_OK;
 }
 
+static void ipgre_link_update(struct net_device *dev, bool set_mtu)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int len;
+
+	len = tunnel->tun_hlen;
+	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+	len = tunnel->tun_hlen - len;
+	tunnel->hlen = tunnel->hlen + len;
+
+	dev->needed_headroom = dev->needed_headroom + len;
+	if (set_mtu)
+		dev->mtu = max_t(int, dev->mtu - len, 68);
+
+	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
+		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+		    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
+			dev->features |= NETIF_F_GSO_SOFTWARE;
+			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		}
+		dev->features |= NETIF_F_LLTX;
+	}
+}
+
 static int ipgre_tunnel_ioctl(struct net_device *dev,
 			      struct ifreq *ifr, int cmd)
 {
-	int err;
 	struct ip_tunnel_parm p;
+	int err;
 
 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 		return -EFAULT;
+
 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
-		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
-		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+		    p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
+		    ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
 			return -EINVAL;
 	}
+
 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 
@@ -549,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
 	if (err)
 		return err;
 
+	if (cmd == SIOCCHGTUNNEL) {
+		struct ip_tunnel *t = netdev_priv(dev);
+
+		t->parms.i_flags = p.i_flags;
+		t->parms.o_flags = p.o_flags;
+
+		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+			ipgre_link_update(dev, true);
+	}
+
 	p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
 	p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
 
 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 		return -EFAULT;
+
 	return 0;
 }
 
@@ -766,15 +1048,14 @@ static int __net_init ipgre_init_net(struct net *net)
 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
 }
 
-static void __net_exit ipgre_exit_net(struct net *net)
+static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
 {
-	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
-	ip_tunnel_delete_net(itn, &ipgre_link_ops);
+	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
 }
 
 static struct pernet_operations ipgre_net_ops = {
 	.init = ipgre_init_net,
-	.exit = ipgre_exit_net,
+	.exit_batch = ipgre_exit_batch_net,
 	.id   = &ipgre_net_id,
 	.size = sizeof(struct ip_tunnel_net),
 };
@@ -828,6 +1109,42 @@ out:
 	return ipgre_tunnel_validate(tb, data, extack);
 }
 
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
+			   struct netlink_ext_ack *extack)
+{
+	__be16 flags = 0;
+	int ret;
+
+	if (!data)
+		return 0;
+
+	ret = ipgre_tap_validate(tb, data, extack);
+	if (ret)
+		return ret;
+
+	/* ERSPAN should only have GRE sequence and key flag */
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (!data[IFLA_GRE_COLLECT_METADATA] &&
+	    flags != (GRE_SEQ | GRE_KEY))
+		return -EINVAL;
+
+	/* ERSPAN Session ID only has 10-bit. Since we reuse
+	 * 32-bit key field as ID, check it's range.
+	 */
+	if (data[IFLA_GRE_IKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	if (data[IFLA_GRE_OKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int ipgre_netlink_parms(struct net_device *dev,
 				struct nlattr *data[],
 				struct nlattr *tb[],
@@ -892,6 +1209,13 @@ static int ipgre_netlink_parms(struct net_device *dev,
 	if (data[IFLA_GRE_FWMARK])
 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
 
+	if (data[IFLA_GRE_ERSPAN_INDEX]) {
+		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+		if (t->index & ~INDEX_MASK)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -933,6 +1257,7 @@ static int gre_tap_init(struct net_device *dev)
 {
 	__gre_tunnel_init(dev);
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netif_keep_dst(dev);
 
 	return ip_tunnel_init(dev);
 }
@@ -949,6 +1274,39 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
+static int erspan_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int t_hlen;
+
+	tunnel->tun_hlen = 8;
+	tunnel->parms.iph.protocol = IPPROTO_GRE;
+	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+		       sizeof(struct erspanhdr);
+	t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
+	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
+	netif_keep_dst(dev);
+
+	return ip_tunnel_init(dev);
+}
+
+static const struct net_device_ops erspan_netdev_ops = {
+	.ndo_init		= erspan_tunnel_init,
+	.ndo_uninit		= ip_tunnel_uninit,
+	.ndo_start_xmit		= erspan_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
+};
+
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -986,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 			    struct netlink_ext_ack *extack)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
 	__u32 fwmark = t->fwmark;
+	struct ip_tunnel_parm p;
 	int err;
 
 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
@@ -1001,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
 	if (err < 0)
 		return err;
-	return ip_tunnel_changelink(dev, tb, &p, fwmark);
+
+	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+	if (err < 0)
+		return err;
+
+	t->parms.i_flags = p.i_flags;
+	t->parms.o_flags = p.o_flags;
+
+	if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+		ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+	return 0;
 }
 
 static size_t ipgre_get_size(const struct net_device *dev)
@@ -1041,6 +1410,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
 		nla_total_size(1) +
 		/* IFLA_GRE_FWMARK */
 		nla_total_size(4) +
+		/* IFLA_GRE_ERSPAN_INDEX */
+		nla_total_size(4) +
 		0;
 }
 
@@ -1083,12 +1454,25 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			goto nla_put_failure;
 	}
 
+	if (t->index)
+		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+			goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static void erspan_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->netdev_ops = &erspan_netdev_ops;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	ip_tunnel_setup(dev, erspan_net_id);
+}
+
 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
@@ -1107,6 +1491,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
+	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
 };
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -1139,6 +1524,21 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+	.kind		= "erspan",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= erspan_setup,
+	.validate	= erspan_validate,
+	.newlink	= ipgre_newlink,
+	.changelink	= ipgre_changelink,
+	.dellink	= ip_tunnel_dellink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
+};
+
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 					u8 name_assign_type)
 {
@@ -1189,19 +1589,36 @@ static int __net_init ipgre_tap_init_net(struct net *net)
 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
 }
 
-static void __net_exit ipgre_tap_exit_net(struct net *net)
+static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
 {
-	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
-	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
+	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
 }
 
 static struct pernet_operations ipgre_tap_net_ops = {
 	.init = ipgre_tap_init_net,
-	.exit = ipgre_tap_exit_net,
+	.exit_batch = ipgre_tap_exit_batch_net,
 	.id   = &gre_tap_net_id,
 	.size = sizeof(struct ip_tunnel_net),
 };
 
+static int __net_init erspan_init_net(struct net *net)
+{
+	return ip_tunnel_init_net(net, erspan_net_id,
+				  &erspan_link_ops, "erspan0");
+}
+
+static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
+{
+	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
+}
+
+static struct pernet_operations erspan_net_ops = {
+	.init = erspan_init_net,
+	.exit_batch = erspan_exit_batch_net,
+	.id   = &erspan_net_id,
+	.size = sizeof(struct ip_tunnel_net),
+};
+
 static int __init ipgre_init(void)
 {
 	int err;
@@ -1214,7 +1631,11 @@ static int __init ipgre_init(void)
 
 	err = register_pernet_device(&ipgre_tap_net_ops);
 	if (err < 0)
-		goto pnet_tap_faied;
+		goto pnet_tap_failed;
+
+	err = register_pernet_device(&erspan_net_ops);
+	if (err < 0)
+		goto pnet_erspan_failed;
 
 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	if (err < 0) {
@@ -1230,15 +1651,23 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
+	err = rtnl_link_register(&erspan_link_ops);
+	if (err < 0)
+		goto erspan_link_failed;
+
 	return 0;
 
+erspan_link_failed:
+	rtnl_link_unregister(&ipgre_tap_ops);
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 add_proto_failed:
+	unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
 	unregister_pernet_device(&ipgre_tap_net_ops);
-pnet_tap_faied:
+pnet_tap_failed:
 	unregister_pernet_device(&ipgre_net_ops);
 	return err;
 }
@@ -1247,9 +1676,11 @@ static void __exit ipgre_fini(void)
 {
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
+	rtnl_link_unregister(&erspan_link_ops);
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	unregister_pernet_device(&ipgre_tap_net_ops);
 	unregister_pernet_device(&ipgre_net_ops);
+	unregister_pernet_device(&erspan_net_ops);
 }
 
 module_init(ipgre_init);
@@ -1257,5 +1688,7 @@ module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("gre");
 MODULE_ALIAS_RTNL_LINK("gretap");
+MODULE_ALIAS_RTNL_LINK("erspan");
 MODULE_ALIAS_NETDEV("gre0");
 MODULE_ALIAS_NETDEV("gretap0");
+MODULE_ALIAS_NETDEV("erspan0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fa2dc8f692c6..57fc13c6ab2b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -311,9 +311,10 @@ drop:
 static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct rtable *rt;
+	int (*edemux)(struct sk_buff *skb);
 	struct net_device *dev = skb->dev;
-	void (*edemux)(struct sk_buff *skb);
+	struct rtable *rt;
+	int err;
 
 	/* if ingress device is enslaved to an L3 master device pass the
 	 * skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 		ipprot = rcu_dereference(inet_protos[protocol]);
 		if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
-			edemux(skb);
+			err = edemux(skb);
+			if (unlikely(err))
+				goto drop_error;
 			/* must reload iph, skb->head might have changed */
 			iph = ip_hdr(skb);
 		}
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	 *	how the packet travels inside Linux networking.
 	 */
 	if (!skb_valid_dst(skb)) {
-		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-					       iph->tos, dev);
-		if (unlikely(err)) {
-			if (err == -EXDEV)
-				__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
-			goto drop;
-		}
+		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					   iph->tos, dev);
+		if (unlikely(err))
+			goto drop_error;
 	}
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 drop:
 	kfree_skb(skb);
 	return NET_RX_DROP;
+
+drop_error:
+	if (err == -EXDEV)
+		__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+	goto drop;
 }
 
 /*
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 93157f2f4758..ed194d46c00e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -86,8 +87,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
  * NOTE: dopt cannot point to skb.
  */
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt)
 {
 	unsigned char *sptr, *dptr;
 	int soffset, doffset;
@@ -140,7 +141,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 						__be32 addr;
 
 						memcpy(&addr, dptr+soffset-1, 4);
-						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
+						if (inet_addr_type(net, addr) != RTN_UNICAST) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
@@ -174,9 +175,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 				doffset -= 4;
 		}
 		if (doffset > 3) {
-			__be32 daddr = fib_compute_spec_dst(skb);
-
-			memcpy(&start[doffset-1], &daddr, 4);
 			dopt->faddr = faddr;
 			dptr[0] = start[0];
 			dptr[1] = doffset+3;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e153c40c2436..e8e675be60ec 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy)
 	return csum;
 }
 
-static inline int ip_ufo_append_data(struct sock *sk,
-			struct sk_buff_head *queue,
-			int getfrag(void *from, char *to, int offset, int len,
-			       int odd, struct sk_buff *skb),
-			void *from, int length, int hh_len, int fragheaderlen,
-			int transhdrlen, int maxfraglen, unsigned int flags)
-{
-	struct sk_buff *skb;
-	int err;
-
-	/* There is support for UDP fragmentation offload by network
-	 * device, so create one single skb packet containing complete
-	 * udp datagram
-	 */
-	skb = skb_peek_tail(queue);
-	if (!skb) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
-
-		if (!skb)
-			return err;
-
-		/* reserve space for Hardware header */
-		skb_reserve(skb, hh_len);
-
-		/* create space for UDP/IP header */
-		skb_put(skb, fragheaderlen + transhdrlen);
-
-		/* initialize network header pointer */
-		skb_reset_network_header(skb);
-
-		/* initialize protocol header pointer */
-		skb->transport_header = skb->network_header + fragheaderlen;
-
-		skb->csum = 0;
-
-		if (flags & MSG_CONFIRM)
-			skb_set_dst_pending_confirm(skb, 1);
-
-		__skb_queue_tail(queue, skb);
-	} else if (skb_is_gso(skb)) {
-		goto append;
-	}
-
-	skb->ip_summed = CHECKSUM_PARTIAL;
-	/* specify the length of each IP datagram fragment */
-	skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
-	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-
-append:
-	return skb_append_datato_frags(sk, skb, getfrag, from,
-				       (length - transhdrlen));
-}
-
 static int __ip_append_data(struct sock *sk,
 			    struct flowi4 *fl4,
 			    struct sk_buff_head *queue,
@@ -965,19 +910,6 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((skb && skb_is_gso(skb)) ||
-	    (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
-	    (skb_queue_len(queue) <= 1) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
-		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
-					 hh_len, fragheaderlen, transhdrlen,
-					 maxfraglen, flags);
-		if (err)
-			goto error;
-		return 0;
-	}
 
 	/* So, what's going on in the loop below?
 	 *
@@ -1288,28 +1220,14 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 	if (!skb)
 		return -EINVAL;
 
-	if ((size + skb->len > mtu) &&
-	    (skb_queue_len(&sk->sk_write_queue) == 1) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO)) {
-		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			return -EOPNOTSUPP;
-
-		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	}
 	cork->length += size;
 
 	while (size > 0) {
-		if (skb_is_gso(skb)) {
-			len = size;
-		} else {
+		/* Check if the remaining data fits into current packet. */
+		len = mtu - skb->len;
+		if (len < size)
+			len = maxfraglen - skb->len;
 
-			/* Check if the remaining data fits into current packet. */
-			len = mtu - skb->len;
-			if (len < size)
-				len = maxfraglen - skb->len;
-		}
 		if (len <= 0) {
 			struct sk_buff *skb_prev;
 			int alloclen;
@@ -1603,7 +1521,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	int err;
 	int oif;
 
-	if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
+	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
 	ipc.addr = daddr;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ecc4b4a2413e..60fb1eb7d7d8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -80,7 +81,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 }
 
 
-static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
+				 struct sk_buff *skb)
 {
 	unsigned char optbuf[sizeof(struct ip_options) + 40];
 	struct ip_options *opt = (struct ip_options *)optbuf;
@@ -88,7 +90,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	if (IPCB(skb)->opt.optlen == 0)
 		return;
 
-	if (ip_options_echo(opt, skb)) {
+	if (ip_options_echo(net, opt, skb)) {
 		msg->msg_flags |= MSG_CTRUNC;
 		return;
 	}
@@ -204,7 +206,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 	}
 
 	if (flags & IP_CMSG_RETOPTS) {
-		ip_cmsg_recv_retopts(msg, skb);
+		ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 
 		flags &= ~IP_CMSG_RETOPTS;
 		if (!flags)
@@ -1219,22 +1221,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		 * (e.g., process binds socket to eth0 for Tx which is
 		 * redirected to loopback in the rtable/dst).
 		 */
+		struct rtable *rt = skb_rtable(skb);
+		bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
+
 		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
 			pktinfo->ipi_ifindex = inet_iif(skb);
+		else if (l3slave && rt && rt->rt_iif)
+			pktinfo->ipi_ifindex = rt->rt_iif;
 
 		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
 	} else {
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
 	}
-	/* We need to keep the dst for __ip_options_echo()
-	 * We could restrict the test to opt.ts_needtime || opt.srr,
-	 * but the following is good enough as IP options are not often used.
-	 */
-	if (unlikely(IPCB(skb)->opt.optlen))
-		skb_dst_force(skb);
-	else
-		skb_dst_drop(skb);
+	skb_dst_drop(skb);
 }
 
 int ip_setsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 129d1a3616f8..fe6fee728ce4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -176,7 +176,7 @@ skip_key_lookup:
 		return cand;
 
 	t = rcu_dereference(itn->collect_md_tun);
-	if (t)
+	if (t && t->dev->flags & IFF_UP)
 		return t;
 
 	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
@@ -618,8 +618,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		ip_rt_put(rt);
 		goto tx_dropped;
 	}
-	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
-		      key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+		      df, !net_eq(tunnel->net, dev_net(dev)));
 	return;
 tx_error:
 	dev->stats.tx_errors++;
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
 	}
 }
 
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
+void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
+			   struct rtnl_link_ops *ops)
 {
+	struct ip_tunnel_net *itn;
+	struct net *net;
 	LIST_HEAD(list);
 
 	rtnl_lock();
-	ip_tunnel_destroy(itn, &list, ops);
+	list_for_each_entry(net, net_list, exit_list) {
+		itn = net_generic(net, id);
+		ip_tunnel_destroy(itn, &list, ops);
+	}
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
-EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
 
 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 		      struct ip_tunnel_parm *p, __u32 fwmark)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0192c255e508..949f432a5f04 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 	struct ip_tunnel_parm *parms = &tunnel->parms;
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *tdev;	/* Device to other host */
+	int pkt_len = skb->len;
 	int err;
 	int mtu;
 
@@ -197,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	if (tunnel->err_count > 0) {
-		if (time_before(jiffies,
-				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
-			tunnel->err_count--;
-			dst_link_failure(skb);
-		} else
-			tunnel->err_count = 0;
-	}
-
 	mtu = dst_mtu(dst);
 	if (skb->len > mtu) {
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
@@ -229,7 +221,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	err = dst_output(tunnel->net, skb->sk, skb);
 	if (net_xmit_eval(err) == 0)
-		err = skb->len;
+		err = pkt_len;
 	iptunnel_xmit_stats(dev, err);
 	return NETDEV_TX_OK;
 
@@ -452,15 +444,14 @@ static int __net_init vti_init_net(struct net *net)
 	return 0;
 }
 
-static void __net_exit vti_exit_net(struct net *net)
+static void __net_exit vti_exit_batch_net(struct list_head *list_net)
 {
-	struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
-	ip_tunnel_delete_net(itn, &vti_link_ops);
+	ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
 }
 
 static struct pernet_operations vti_net_ops = {
 	.init = vti_init_net,
-	.exit = vti_exit_net,
+	.exit_batch = vti_exit_batch_net,
 	.id   = &vti_net_id,
 	.size = sizeof(struct ip_tunnel_net),
 };
@@ -584,33 +575,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
-static bool is_vti_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti_netdev_ops;
-}
-
-static int vti_device_event(struct notifier_block *unused,
-			    unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-
-	if (!is_vti_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(tunnel->net, dev_net(dev)))
-			xfrm_garbage_collect(tunnel->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti_notifier_block __read_mostly = {
-	.notifier_call = vti_device_event,
-};
-
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -618,8 +582,6 @@ static int __init vti_init(void)
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
-	register_netdevice_notifier(&vti_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -652,7 +614,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -664,7 +625,6 @@ static void __exit vti_fini(void)
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
-	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4c5dfe6bd34d..abdebca848c9 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
  *  user-supplied information to configure own IP address and routes.
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fb1ad22b5e29..c891235b4966 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
 
 static int ipip_err(struct sk_buff *skb, u32 info)
 {
-
-/* All the routers (except for Linux) return only
-   8 bytes of packet payload. It means, that precise relaying of
-   ICMP in the real Internet is absolutely infeasible.
- */
+	/* All the routers (except for Linux) return only
+	 * 8 bytes of packet payload. It means, that precise relaying of
+	 * ICMP in the real Internet is absolutely infeasible.
+	 */
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 	const struct iphdr *iph = (const struct iphdr *)skb->data;
-	struct ip_tunnel *t;
-	int err;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
+	struct ip_tunnel *t;
+	int err = 0;
+
+	switch (type) {
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+			/* Impossible event. */
+			goto out;
+		default:
+			/* All others are translated to HOST_UNREACH.
+			 * rfc2003 contains "deep thoughts" about NET_UNREACH,
+			 * I believe they are just ether pollution. --ANK
+			 */
+			break;
+		}
+		break;
+
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			goto out;
+		break;
+
+	case ICMP_REDIRECT:
+		break;
+
+	default:
+		goto out;
+	}
 
-	err = -ENOENT;
 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 			     iph->daddr, iph->saddr, 0);
-	if (!t)
+	if (!t) {
+		err = -ENOENT;
 		goto out;
+	}
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
-		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, iph->protocol, 0);
-		err = 0;
+		ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+				 iph->protocol, 0);
 		goto out;
 	}
 
 	if (type == ICMP_REDIRECT) {
-		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      iph->protocol, 0);
-		err = 0;
+		ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
 		goto out;
 	}
 
-	if (t->parms.iph.daddr == 0)
+	if (t->parms.iph.daddr == 0) {
+		err = -ENOENT;
 		goto out;
+	}
 
-	err = 0;
 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 		goto out;
 
@@ -634,15 +659,14 @@ static int __net_init ipip_init_net(struct net *net)
 	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
 }
 
-static void __net_exit ipip_exit_net(struct net *net)
+static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
 {
-	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
-	ip_tunnel_delete_net(itn, &ipip_link_ops);
+	ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
 }
 
 static struct pernet_operations ipip_net_ops = {
 	.init = ipip_init_net,
-	.exit = ipip_exit_net,
+	.exit_batch = ipip_exit_batch_net,
 	.id   = &ipip_net_id,
 	.size = sizeof(struct ip_tunnel_net),
 };
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 06863ea3fc5b..40a43ad294cb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,6 +67,7 @@
 #include <net/fib_rules.h>
 #include <linux/netconf.h>
 #include <net/nexthop.h>
+#include <net/switchdev.h>
 
 struct ipmr_rule {
 	struct fib_rule		common;
@@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
 	rtnl_unlock();
 }
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+	return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
 #else
 #define ipmr_for_each_table(mrt, net) \
 	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
@@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 	net->ipv4.mrt = NULL;
 	rtnl_unlock();
 }
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+	return 0;
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+	return true;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
 #endif
 
 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -587,6 +620,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 }
 #endif
 
+static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
+					struct net *net,
+					enum fib_event_type event_type,
+					struct vif_device *vif,
+					vifi_t vif_index, u32 tb_id)
+{
+	struct vif_entry_notifier_info info = {
+		.info = {
+			.family = RTNL_FAMILY_IPMR,
+			.net = net,
+		},
+		.dev = vif->dev,
+		.vif_index = vif_index,
+		.vif_flags = vif->flags,
+		.tb_id = tb_id,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_vif_entry_notifiers(struct net *net,
+					 enum fib_event_type event_type,
+					 struct vif_device *vif,
+					 vifi_t vif_index, u32 tb_id)
+{
+	struct vif_entry_notifier_info info = {
+		.info = {
+			.family = RTNL_FAMILY_IPMR,
+			.net = net,
+		},
+		.dev = vif->dev,
+		.vif_index = vif_index,
+		.vif_flags = vif->flags,
+		.tb_id = tb_id,
+	};
+
+	ASSERT_RTNL();
+	net->ipv4.ipmr_seq++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
+					struct net *net,
+					enum fib_event_type event_type,
+					struct mfc_cache *mfc, u32 tb_id)
+{
+	struct mfc_entry_notifier_info info = {
+		.info = {
+			.family = RTNL_FAMILY_IPMR,
+			.net = net,
+		},
+		.mfc = mfc,
+		.tb_id = tb_id
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifiers(struct net *net,
+					 enum fib_event_type event_type,
+					 struct mfc_cache *mfc, u32 tb_id)
+{
+	struct mfc_entry_notifier_info info = {
+		.info = {
+			.family = RTNL_FAMILY_IPMR,
+			.net = net,
+		},
+		.mfc = mfc,
+		.tb_id = tb_id
+	};
+
+	ASSERT_RTNL();
+	net->ipv4.ipmr_seq++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
 /**
  *	vif_delete - Delete a VIF entry
  *	@notify: Set to 1, if the caller is a notifier_call
@@ -594,6 +703,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 		      struct list_head *head)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct vif_device *v;
 	struct net_device *dev;
 	struct in_device *in_dev;
@@ -603,6 +713,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 	v = &mrt->vif_table[vifi];
 
+	if (VIF_EXISTS(mrt, vifi))
+		call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
+					      mrt->id);
+
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
 	v->dev = NULL;
@@ -652,10 +766,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
 	kmem_cache_free(mrt_cachep, c);
 }
 
-static inline void ipmr_cache_free(struct mfc_cache *c)
+void ipmr_cache_free(struct mfc_cache *c)
 {
 	call_rcu(&c->rcu, ipmr_cache_free_rcu);
 }
+EXPORT_SYMBOL(ipmr_cache_free);
 
 /* Destroy an unresolved cache entry, killing queued skbs
  * and reporting error to netlink readers.
@@ -754,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 		   struct vifctl *vifc, int mrtsock)
 {
 	int vifi = vifc->vifc_vifi;
+	struct switchdev_attr attr = {
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+	};
 	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
@@ -828,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 
 	/* Fill in the VIF structures */
 
+	attr.orig_dev = dev;
+	if (!switchdev_port_attr_get(dev, &attr)) {
+		memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
+		v->dev_parent_id.id_len = attr.u.ppid.id_len;
+	} else {
+		v->dev_parent_id.id_len = 0;
+	}
 	v->rate_limit = vifc->vifc_rate_limit;
 	v->local = vifc->vifc_lcl_addr.s_addr;
 	v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -851,6 +976,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	if (vifi+1 > mrt->maxvif)
 		mrt->maxvif = vifi+1;
 	write_unlock_bh(&mrt_lock);
+	call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
 	return 0;
 }
 
@@ -949,6 +1075,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 	if (c) {
 		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 		c->mfc_un.res.minvif = MAXVIFS;
+		refcount_set(&c->mfc_un.res.refcount, 1);
 	}
 	return c;
 }
@@ -1150,6 +1277,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 
 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct mfc_cache *c;
 
 	/* The entries are added/deleted only under RTNL */
@@ -1161,8 +1289,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 		return -ENOENT;
 	rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
 	list_del_rcu(&c->list);
+	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mroute_netlink_event(mrt, c, RTM_DELROUTE);
-	ipmr_cache_free(c);
+	ipmr_cache_put(c);
 
 	return 0;
 }
@@ -1189,6 +1318,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
+		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
+					      mrt->id);
 		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 		return 0;
 	}
@@ -1238,6 +1369,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		ipmr_cache_resolve(net, mrt, uc, c);
 		ipmr_cache_free(uc);
 	}
+	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
 	mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 	return 0;
 }
@@ -1245,6 +1377,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 /* Close the multicast socket, and clear the vif tables etc */
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct mfc_cache *c, *tmp;
 	LIST_HEAD(list);
 	int i;
@@ -1263,8 +1396,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 			continue;
 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
 		list_del_rcu(&c->list);
+		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+					      mrt->id);
 		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_cache_free(c);
+		ipmr_cache_put(c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
@@ -1393,6 +1528,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	case MRT_ADD_MFC:
 	case MRT_DEL_MFC:
 		parent = -1;
+		/* fall through */
 	case MRT_ADD_MFC_PROXY:
 	case MRT_DEL_MFC_PROXY:
 		if (optlen != sizeof(mfc)) {
@@ -1724,10 +1860,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
 	return dst_output(net, sk, skb);
 }
 
+#ifdef CONFIG_NET_SWITCHDEV
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+				   int in_vifi, int out_vifi)
+{
+	struct vif_device *out_vif = &mrt->vif_table[out_vifi];
+	struct vif_device *in_vif = &mrt->vif_table[in_vifi];
+
+	if (!skb->offload_mr_fwd_mark)
+		return false;
+	if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
+		return false;
+	return netdev_phys_item_id_same(&out_vif->dev_parent_id,
+					&in_vif->dev_parent_id);
+}
+#else
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+				   int in_vifi, int out_vifi)
+{
+	return false;
+}
+#endif
+
 /* Processing handlers for ipmr_forward */
 
 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
-			    struct sk_buff *skb, struct mfc_cache *c, int vifi)
+			    int in_vifi, struct sk_buff *skb,
+			    struct mfc_cache *c, int vifi)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &mrt->vif_table[vifi];
@@ -1748,6 +1907,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		goto out_free;
 	}
 
+	if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
+		goto out_free;
+
 	if (vif->flags & VIFF_TUNNEL) {
 		rt = ip_route_output_ports(net, &fl4, NULL,
 					   vif->remote, vif->local,
@@ -1925,8 +2087,8 @@ forward:
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 				if (skb2)
-					ipmr_queue_xmit(net, mrt, skb2, cache,
-							psend);
+					ipmr_queue_xmit(net, mrt, true_vifi,
+							skb2, cache, psend);
 			}
 			psend = ct;
 		}
@@ -1937,9 +2099,10 @@ last_forward:
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 			if (skb2)
-				ipmr_queue_xmit(net, mrt, skb2, cache, psend);
+				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
+						cache, psend);
 		} else {
-			ipmr_queue_xmit(net, mrt, skb, cache, psend);
+			ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
 			return;
 		}
 	}
@@ -2156,6 +2319,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	    nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
 		return -EMSGSIZE;
 
+	if (c->mfc_flags & MFC_OFFLOAD)
+		rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
 	if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
 		return -EMSGSIZE;
 
@@ -3048,14 +3214,87 @@ static const struct net_protocol pim_protocol = {
 };
 #endif
 
+static unsigned int ipmr_seq_read(struct net *net)
+{
+	ASSERT_RTNL();
+
+	return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
+}
+
+static int ipmr_dump(struct net *net, struct notifier_block *nb)
+{
+	struct mr_table *mrt;
+	int err;
+
+	err = ipmr_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	ipmr_for_each_table(mrt, net) {
+		struct vif_device *v = &mrt->vif_table[0];
+		struct mfc_cache *mfc;
+		int vifi;
+
+		/* Notifiy on table VIF entries */
+		read_lock(&mrt_lock);
+		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+			if (!v->dev)
+				continue;
+
+			call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
+						     v, vifi, mrt->id);
+		}
+		read_unlock(&mrt_lock);
+
+		/* Notify on table MFC entries */
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+			call_ipmr_mfc_entry_notifier(nb, net,
+						     FIB_EVENT_ENTRY_ADD, mfc,
+						     mrt->id);
+	}
+
+	return 0;
+}
+
+static const struct fib_notifier_ops ipmr_notifier_ops_template = {
+	.family		= RTNL_FAMILY_IPMR,
+	.fib_seq_read	= ipmr_seq_read,
+	.fib_dump	= ipmr_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ipmr_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	net->ipv4.ipmr_seq = 0;
+
+	ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv4.ipmr_notifier_ops = ops;
+
+	return 0;
+}
+
+static void __net_exit ipmr_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
+	net->ipv4.ipmr_notifier_ops = NULL;
+}
+
 /* Setup for IP multicast routing */
 static int __net_init ipmr_net_init(struct net *net)
 {
 	int err;
 
+	err = ipmr_notifier_init(net);
+	if (err)
+		goto ipmr_notifier_fail;
+
 	err = ipmr_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto ipmr_rules_fail;
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -3072,7 +3311,9 @@ proc_cache_fail:
 proc_vif_fail:
 	ipmr_rules_exit(net);
 #endif
-fail:
+ipmr_rules_fail:
+	ipmr_notifier_exit(net);
+ipmr_notifier_fail:
 	return err;
 }
 
@@ -3082,6 +3323,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	remove_proc_entry("ip_mr_cache", net->proc_net);
 	remove_proc_entry("ip_mr_vif", net->proc_net);
 #endif
+	ipmr_notifier_exit(net);
 	ipmr_rules_exit(net);
 }
 
@@ -3114,14 +3356,14 @@ int __init ip_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
-		      ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
+		      ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
-		      ipmr_rtm_route, NULL, NULL);
+		      ipmr_rtm_route, NULL, 0);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
-		      ipmr_rtm_route, NULL, NULL);
+		      ipmr_rtm_route, NULL, 0);
 
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
-		      NULL, ipmr_rtm_dumplink, NULL);
+		      NULL, ipmr_rtm_dumplink, 0);
 	return 0;
 
 #ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index f462fee66ac8..adcdae358365 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the netfilter modules on top of IPv4.
 #
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0bc3c3d73e61..f88221aebc9d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -268,14 +268,14 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		acpar.targinfo = t->data;
 		verdict = t->u.kernel.target->target(skb, &acpar);
 
-		/* Target might have changed stuff. */
-		arp = arp_hdr(skb);
-
-		if (verdict == XT_CONTINUE)
+		if (verdict == XT_CONTINUE) {
+			/* Target might have changed stuff. */
+			arp = arp_hdr(skb);
 			e = arpt_next_entry(e);
-		else
+		} else {
 			/* Verdict */
 			break;
+		}
 	} while (!acpar.hotdrop);
 	xt_write_recseq_end(addend);
 	local_bh_enable();
@@ -629,7 +629,27 @@ static void get_counters(const struct xt_table_info *t,
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
+			cond_resched();
+		}
+	}
+}
+
+static void get_old_counters(const struct xt_table_info *t,
+			     struct xt_counters counters[])
+{
+	struct arpt_entry *iter;
+	unsigned int cpu, i;
+
+	for_each_possible_cpu(cpu) {
+		i = 0;
+		xt_entry_foreach(iter, t->entries, t->size) {
+			struct xt_counters *tmp;
+
+			tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+			ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+			++i;
 		}
+		cond_resched();
 	}
 }
 
@@ -909,8 +929,7 @@ static int __do_replace(struct net *net, const char *name,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters, and synchronize with replace */
-	get_counters(oldinfo, counters);
+	get_old_counters(oldinfo, counters);
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries;
@@ -1117,7 +1136,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
 	struct xt_entry_target *t;
-	struct xt_target *target;
 	struct arpt_entry *de;
 	unsigned int origsize;
 	int h;
@@ -1132,7 +1150,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
 
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = compat_arpt_get_target(e);
-	target = t->u.kernel.target;
 	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2a55a40211cb..4cbe5e80f3bf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,12 +35,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv4 packet filter");
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)		WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
 void *ipt_alloc_initial_table(const struct xt_table *info)
 {
 	return xt_alloc_initial_table(ipt, IPT);
@@ -151,7 +145,7 @@ static const char *const comments[] = {
 	[NF_IP_TRACE_COMMENT_POLICY]	= "policy",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
 	.type = NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
@@ -263,7 +257,7 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.hotdrop = false;
 	acpar.state   = state;
 
-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	WARN_ON(!(table->valid_hooks & (1 << hook)));
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
 	private = table->private;
@@ -293,7 +287,7 @@ ipt_do_table(struct sk_buff *skb,
 		const struct xt_entry_match *ematch;
 		struct xt_counters *counter;
 
-		IP_NF_ASSERT(e);
+		WARN_ON(!e);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, acpar.fragoff)) {
  no_match:
@@ -312,7 +306,7 @@ ipt_do_table(struct sk_buff *skb,
 		ADD_COUNTER(*counter, skb->len, 1);
 
 		t = ipt_get_target(e);
-		IP_NF_ASSERT(t->u.kernel.target);
+		WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
@@ -352,13 +346,14 @@ ipt_do_table(struct sk_buff *skb,
 		acpar.targinfo = t->data;
 
 		verdict = t->u.kernel.target->target(skb, &acpar);
-		/* Target might have changed stuff. */
-		ip = ip_hdr(skb);
-		if (verdict == XT_CONTINUE)
+		if (verdict == XT_CONTINUE) {
+			/* Target might have changed stuff. */
+			ip = ip_hdr(skb);
 			e = ipt_next_entry(e);
-		else
+		} else {
 			/* Verdict */
 			break;
+		}
 	} while (!acpar.hotdrop);
 
 	xt_write_recseq_end(addend);
@@ -781,10 +776,31 @@ get_counters(const struct xt_table_info *t,
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
+			cond_resched();
 		}
 	}
 }
 
+static void get_old_counters(const struct xt_table_info *t,
+			     struct xt_counters counters[])
+{
+	struct ipt_entry *iter;
+	unsigned int cpu, i;
+
+	for_each_possible_cpu(cpu) {
+		i = 0;
+		xt_entry_foreach(iter, t->entries, t->size) {
+			const struct xt_counters *tmp;
+
+			tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+			ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+			++i; /* macro does multi eval of i */
+		}
+
+		cond_resched();
+	}
+}
+
 static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
 	unsigned int countersize;
@@ -1074,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters, and synchronize with replace */
-	get_counters(oldinfo, counters);
+	get_old_counters(oldinfo, counters);
 
 	/* Decrease module usage counts and free resource */
 	xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
@@ -1355,7 +1370,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
 	struct xt_entry_target *t;
-	struct xt_target *target;
 	struct ipt_entry *de;
 	unsigned int origsize;
 	int h;
@@ -1374,7 +1388,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = compat_ipt_get_target(e);
-	target = t->u.kernel.target;
 	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d72decb80f9..17b4ca562944 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -117,7 +117,8 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
 		 * functions are also incrementing the refcount on their own,
 		 * so it's safe to remove the entry even if it's in use. */
 #ifdef CONFIG_PROC_FS
-		proc_remove(c->pde);
+		if (cn->procdir)
+			proc_remove(c->pde);
 #endif
 		return;
 	}
@@ -624,7 +625,7 @@ arp_mangle(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops cip_arp_ops __read_mostly = {
+static const struct nf_hook_ops cip_arp_ops = {
 	.hook = arp_mangle,
 	.pf = NFPROTO_ARP,
 	.hooknum = NF_ARP_OUT,
@@ -815,6 +816,7 @@ static void clusterip_net_exit(struct net *net)
 #ifdef CONFIG_PROC_FS
 	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 	proc_remove(cn->procdir);
+	cn->procdir = NULL;
 #endif
 	nf_unregister_net_hook(net, &cip_arp_ops);
 }
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f1528f7175a8..f75fc6b53115 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 	if (synproxy == NULL)
 		return NF_ACCEPT;
 
-	if (nf_is_loopback_packet(skb))
+	if (nf_is_loopback_packet(skb) ||
+	    ip_hdr(skb)->protocol != IPPROTO_TCP)
 		return NF_ACCEPT;
 
 	thoff = ip_hdrlen(skb);
@@ -416,7 +417,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
 	{
 		.hook		= ipv4_synproxy_hook,
 		.pf		= NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 138a24bc76ad..a1a07b338ccf 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -67,7 +67,7 @@ static unsigned int iptable_nat_ipv4_local_fn(void *priv,
 	return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
 }
 
-static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= iptable_nat_ipv4_in,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e14ed11a35c..89af9d88ca21 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void ipv4_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "src=%pI4 dst=%pI4 ",
-		   &tuple->src.u3.ip, &tuple->dst.u3.ip);
-}
-
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -174,7 +167,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
 
 /* Connection tracking may drop packets, but never alters them, so
    make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
 	{
 		.hook		= ipv4_conntrack_in,
 		.pf		= NFPROTO_IPV4,
@@ -303,11 +296,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 
 	return 0;
 }
-
-static int ipv4_nlattr_tuple_size(void)
-{
-	return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static struct nf_sockopt_ops so_getorigdst = {
@@ -356,18 +344,17 @@ static void ipv4_hooks_unregister(struct net *net)
 	mutex_unlock(&register_ipv4_hooks);
 }
 
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.l3proto	 = PF_INET,
-	.name		 = "ipv4",
 	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
-	.print_tuple	 = ipv4_print_tuple,
 	.get_l4proto	 = ipv4_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
-	.nlattr_tuple_size = ipv4_nlattr_tuple_size,
 	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
 	.nla_policy	 = ipv4_nla_policy,
+	.nla_size	 = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
+			   NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),  /* CTA_IP_V4_DST */
 #endif
 	.net_ns_get	 = ipv4_hooks_register,
 	.net_ns_put	 = ipv4_hooks_unregister,
@@ -398,24 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 
 static int ipv4_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
-					    ARRAY_SIZE(builtin_l4proto4));
-	if (ret < 0)
-		return ret;
-	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: pernet registration failed\n");
-		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
-						ARRAY_SIZE(builtin_l4proto4));
-	}
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+					     ARRAY_SIZE(builtin_l4proto4));
 }
 
 static void ipv4_net_exit(struct net *net)
 {
-	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
 					ARRAY_SIZE(builtin_l4proto4));
 }
@@ -433,6 +408,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
 	need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
+	    nf_conntrack_l3proto_ipv4.nla_size))
+		return -EINVAL;
+#endif
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 73c591d8a9a8..1849fedd9b81 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "type=%u code=%u id=%u ",
-		   tuple->dst.u.icmp.type,
-		   tuple->dst.u.icmp.code,
-		   ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmp_get_timeouts(struct net *net)
 {
 	return &icmp_pernet(net)->timeout;
@@ -91,8 +81,6 @@ static int icmp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
@@ -137,7 +125,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
-	NF_CT_ASSERT(!skb_nfct(skb));
+	WARN_ON(skb_nfct(skb));
 	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 
 	/* Are they talking about one of our connections? */
@@ -176,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+			   u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
 /* Small and modified version of icmp_rcv */
 static int
 icmp_error(struct net *net, struct nf_conn *tmpl,
@@ -188,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	/* Not enough header? */
 	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
 	if (icmph == NULL) {
-		if (LOG_INVALID(net, IPPROTO_ICMP))
-			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
-				      NULL, "nf_ct_icmp: short packet ");
+		icmp_error_log(skb, net, pf, "short packet");
 		return -NF_ACCEPT;
 	}
 
 	/* See ip_conntrack_proto_tcp.c */
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-		if (LOG_INVALID(net, IPPROTO_ICMP))
-			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_icmp: bad HW ICMP checksum ");
+		icmp_error_log(skb, net, pf, "bad hw icmp checksum");
 		return -NF_ACCEPT;
 	}
 
@@ -210,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	 *		  discarded.
 	 */
 	if (icmph->type > NR_ICMP_TYPES) {
-		if (LOG_INVALID(net, IPPROTO_ICMP))
-			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_icmp: invalid ICMP type ");
+		icmp_error_log(skb, net, pf, "invalid icmp type");
 		return -NF_ACCEPT;
 	}
 
@@ -270,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
 	return 0;
 }
 
-static int icmp_nlattr_tuple_size(void)
+static unsigned int icmp_nlattr_tuple_size(void)
 {
-	return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
 }
 #endif
 
@@ -362,10 +355,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_ICMP,
-	.name			= "icmp",
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
-	.print_tuple		= icmp_print_tuple,
 	.packet			= icmp_packet,
 	.get_timeouts		= icmp_get_timeouts,
 	.new			= icmp_new,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 346bf7ccac08..37fe1616ca0b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -90,7 +90,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv4_defrag_ops[] = {
+static const struct nf_hook_ops ipv4_defrag_ops[] = {
 	{
 		.hook		= ipv4_conntrack_defrag,
 		.pf		= NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 2f3895ddc275..df5c2a2061a4 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -25,7 +25,7 @@
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index c83a9963269b..4388de0e5380 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -24,7 +24,7 @@
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 574f7ebba0b6..ac8342dcb55e 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 	if (set_h245_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons((port & htons(1)) ? nated_port + 1 :
-						    nated_port)) == 0) {
-		/* Save ports */
-		info->rtp_port[i][dir] = rtp_port;
-		info->rtp_port[i][!dir] = htons(nated_port);
-	} else {
+						    nated_port))) {
 		nf_ct_unexpect_related(rtp_exp);
 		nf_ct_unexpect_related(rtcp_exp);
 		return -1;
 	}
 
+	/* Save ports */
+	info->rtp_port[i][dir] = rtp_port;
+	info->rtp_port[i][!dir] = htons(nated_port);
+
 	/* Success */
 	pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
 		 &rtp_exp->tuple.src.u3.ip,
@@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 	/* Modify signal */
 	if (set_h225_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
-			  htons(nated_port)) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = htons(nated_port);
-	} else {
+			  htons(nated_port))) {
 		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
+	/* Save ports */
+	info->sig_port[dir] = port;
+	info->sig_port[!dir] = htons(nated_port);
+
 	pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
 		 &exp->tuple.src.u3.ip,
 		 ntohs(exp->tuple.src.u.tcp.port),
@@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 	/* Modify signal */
 	if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
 			  &ct->tuplehash[!dir].tuple.dst.u3,
-			  htons(nated_port)) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = htons(nated_port);
-
-		/* Fix for Gnomemeeting */
-		if (idx > 0 &&
-		    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
-		    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr(skb, protoff, data, 0, &taddr[0],
-				      &ct->tuplehash[!dir].tuple.dst.u3,
-				      info->sig_port[!dir]);
-		}
-	} else {
+			  htons(nated_port))) {
 		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
+	/* Save ports */
+	info->sig_port[dir] = port;
+	info->sig_port[!dir] = htons(nated_port);
+
+	/* Fix for Gnomemeeting */
+	if (idx > 0 &&
+	    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+	    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+		if (set_h225_addr(skb, protoff, data, 0, &taddr[0],
+				  &ct->tuplehash[!dir].tuple.dst.u3,
+				  info->sig_port[!dir])) {
+			nf_ct_unexpect_related(exp);
+			return -1;
+		}
+	}
+
 	/* Success */
 	pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
 		 &exp->tuple.src.u3.ip,
@@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
-			   &ct->tuplehash[!dir].tuple.dst.u3,
-			   htons(nated_port)) == 0) {
+	if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  htons(nated_port))) {
 		nf_ct_unexpect_related(exp);
 		return -1;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index feedd759ca80..0443ca4120b0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -190,7 +190,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 	struct nf_conntrack_tuple target;
 	unsigned long statusbit;
 
-	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 			else
 				return NF_ACCEPT;
 		}
-		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+		/* Only ICMPs can be IP_CT_IS_REPLY: */
+		/* fall through */
 	case IP_CT_NEW:
 		/* Seen it before?  This can happen for loopback, retrans,
 		 * or local packets.
@@ -306,8 +307,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 
 	default:
 		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index f39037fca923..0c366aad89cb 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -34,12 +34,12 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	const struct rtable *rt;
 	__be32 newsrc, nh;
 
-	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+	WARN_ON(hooknum != NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
@@ -96,7 +96,7 @@ static int masq_device_event(struct notifier_block *this,
 		 * conntracks which were associated with that device,
 		 * and forget them.
 		 */
-		NF_CT_ASSERT(dev->ifindex != 0);
+		WARN_ON(dev->ifindex == 0);
 
 		nf_ct_iterate_cleanup_net(net, device_cmp,
 					  (void *)(long)dev->ifindex, 0, 0);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index eeacbdaf7cdf..5cd06ba3535d 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -132,6 +132,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
 	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 		goto free_nskb;
 
+	niph = ip_hdr(nskb);
+
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(skb_dst(nskb)))
 		goto free_nskb;
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index de3681df2ce7..e50976e3c213 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 			const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	u32 *dst = &regs->data[priv->dreg];
 	const struct net_device *dev = NULL;
-	const struct iphdr *iph;
+	struct iphdr *iph, _iph;
 	__be32 addr;
 
 	if (priv->flags & NFTA_FIB_F_IIF)
@@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 	else if (priv->flags & NFTA_FIB_F_OIF)
 		dev = nft_out(pkt);
 
-	iph = ip_hdr(pkt->skb);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
 	if (priv->flags & NFTA_FIB_F_DADDR)
 		addr = iph->daddr;
 	else
@@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		   const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	u32 *dest = &regs->data[priv->dreg];
-	const struct iphdr *iph;
+	struct iphdr *iph, _iph;
 	struct fib_result res;
 	struct flowi4 fl4 = {
 		.flowi4_scope = RT_SCOPE_UNIVERSE,
@@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		return;
 	}
 
-	iph = ip_hdr(pkt->skb);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
 	if (ipv4_is_zeronet(iph->saddr)) {
 		if (ipv4_is_lbcast(iph->daddr) ||
 		    ipv4_is_local_multicast(iph->daddr)) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 43eb6567b3a0..9f37c4727861 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -206,18 +206,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
 	SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
 	SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
-	SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED),
-	SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG),
-	SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE),
-	SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED),
 	SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
-	SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER),
 	SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
 	SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
 	SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
 	SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY),
 	SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING),
-	SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER),
 	SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER),
 	SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER),
 	SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER),
@@ -230,14 +224,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
 	SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES),
 	SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS),
-	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
 	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
 	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
 	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
 	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
-	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
 	SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
 	SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
 	SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b0bb5d0a30bd..33b70bfd1122 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -122,7 +122,8 @@ void raw_unhash_sk(struct sock *sk)
 EXPORT_SYMBOL_GPL(raw_unhash_sk);
 
 struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
-		unsigned short num, __be32 raddr, __be32 laddr, int dif)
+			     unsigned short num, __be32 raddr, __be32 laddr,
+			     int dif, int sdif)
 {
 	sk_for_each_from(sk) {
 		struct inet_sock *inet = inet_sk(sk);
@@ -130,7 +131,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
 		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
 		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+		      sk->sk_bound_dev_if != sdif))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -171,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
  */
 static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 {
+	int sdif = inet_sdif(skb);
 	struct sock *sk;
 	struct hlist_head *head;
 	int delivered = 0;
@@ -184,13 +187,13 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 	net = dev_net(skb->dev);
 	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
 			     iph->saddr, iph->daddr,
-			     skb->dev->ifindex);
+			     skb->dev->ifindex, sdif);
 
 	while (sk) {
 		delivered = 1;
 		if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
 		    ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
-				   skb->dev->ifindex)) {
+				   skb->dev->ifindex, sdif)) {
 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
 			/* Not releasing hash table! */
@@ -199,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 		}
 		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
 				     iph->saddr, iph->daddr,
-				     skb->dev->ifindex);
+				     skb->dev->ifindex, sdif);
 	}
 out:
 	read_unlock(&raw_v4_hashinfo.lock);
@@ -297,12 +300,15 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	read_lock(&raw_v4_hashinfo.lock);
 	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
 	if (raw_sk) {
+		int dif = skb->dev->ifindex;
+		int sdif = inet_sdif(skb);
+
 		iph = (const struct iphdr *)skb->data;
 		net = dev_net(skb->dev);
 
 		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
 						iph->daddr, iph->saddr,
-						skb->dev->ifindex)) != NULL) {
+						dif, sdif)) != NULL) {
 			raw_err(raw_sk, skb, info);
 			raw_sk = sk_next(raw_sk);
 			iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e1a51ca68d23..c200065ef9a5 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -46,13 +46,13 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
 		sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
 				     r->id.idiag_dst[0],
 				     r->id.idiag_src[0],
-				     r->id.idiag_if);
+				     r->id.idiag_if, 0);
 #if IS_ENABLED(CONFIG_IPV6)
 	else
 		sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
 				     (const struct in6_addr *)r->id.idiag_src,
 				     (const struct in6_addr *)r->id.idiag_dst,
-				     r->id.idiag_if);
+				     r->id.idiag_if, 0);
 #endif
 	return sk;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7effa62beed3..43b69af242e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs)
 {
 	u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
 	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
-	u32 old = ACCESS_ONCE(*p_tstamp);
+	u32 old = READ_ONCE(*p_tstamp);
 	u32 now = (u32)jiffies;
 	u32 new, delta = 0;
 
@@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rt;
+	u32 genid, hval;
 	unsigned int i;
 	int depth;
-	u32 hval = fnhe_hashfun(daddr);
+
+	genid = fnhe_genid(dev_net(nh->nh_dev));
+	hval = fnhe_hashfun(daddr);
 
 	spin_lock_bh(&fnhe_lock);
 
@@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	}
 
 	if (fnhe) {
+		if (fnhe->fnhe_genid != genid)
+			fnhe->fnhe_genid = genid;
 		if (gw)
 			fnhe->fnhe_gw = gw;
-		if (pmtu) {
+		if (pmtu)
 			fnhe->fnhe_pmtu = pmtu;
-			fnhe->fnhe_expires = max(1UL, expires);
-		}
+		fnhe->fnhe_expires = max(1UL, expires);
 		/* Update all cached dsts too */
 		rt = rcu_dereference(fnhe->fnhe_rth_input);
 		if (rt)
@@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_next = hash->chain;
 			rcu_assign_pointer(hash->chain, fnhe);
 		}
-		fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
+		fnhe->fnhe_genid = genid;
 		fnhe->fnhe_daddr = daddr;
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
@@ -1250,7 +1254,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)
 static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 {
 	unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
-	unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+	unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
 				    ip_rt_min_advmss);
 
 	return min(advmss, IPV4_MAX_PMTU - header_size);
@@ -1267,7 +1271,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	if (mtu)
 		return mtu;
 
-	mtu = dst->dev->mtu;
+	mtu = READ_ONCE(dst->dev->mtu);
 
 	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
 		if (rt->rt_uses_gateway && mtu > 576)
@@ -1398,7 +1402,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
 		kfree(p);
 
 	if (!list_empty(&rt->rt_uncached)) {
@@ -1456,7 +1460,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
 		if (fi->fib_metrics != &dst_default_metrics) {
 			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
-			atomic_inc(&fi->fib_metrics->refcnt);
+			refcount_inc(&fi->fib_metrics->refcnt);
 		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
@@ -1520,43 +1524,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 EXPORT_SYMBOL(rt_dst_alloc);
 
 /* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-				u8 tos, struct net_device *dev, int our)
+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			  u8 tos, struct net_device *dev,
+			  struct in_device *in_dev, u32 *itag)
 {
-	struct rtable *rth;
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	unsigned int flags = RTCF_MULTICAST;
-	u32 itag = 0;
 	int err;
 
 	/* Primary sanity checks. */
-
 	if (!in_dev)
 		return -EINVAL;
 
 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
 	    skb->protocol != htons(ETH_P_IP))
-		goto e_inval;
+		return -EINVAL;
 
 	if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
-		goto e_inval;
+		return -EINVAL;
 
 	if (ipv4_is_zeronet(saddr)) {
 		if (!ipv4_is_local_multicast(daddr))
-			goto e_inval;
+			return -EINVAL;
 	} else {
 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
-					  in_dev, &itag);
+					  in_dev, itag);
 		if (err < 0)
-			goto e_err;
+			return err;
 	}
+	return 0;
+}
+
+/* called in rcu_read_lock() section */
+static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			     u8 tos, struct net_device *dev, int our)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	unsigned int flags = RTCF_MULTICAST;
+	struct rtable *rth;
+	u32 itag = 0;
+	int err;
+
+	err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
+	if (err)
+		return err;
+
 	if (our)
 		flags |= RTCF_LOCAL;
 
 	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
 	if (!rth)
-		goto e_nobufs;
+		return -ENOBUFS;
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
@@ -1572,13 +1589,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	skb_dst_set(skb, &rth->dst);
 	return 0;
-
-e_nobufs:
-	return -ENOBUFS;
-e_inval:
-	return -EINVAL;
-e_err:
-	return err;
 }
 
 
@@ -2236,7 +2246,7 @@ add:
 	if (!rth)
 		return ERR_PTR(-ENOBUFS);
 
-	rth->rt_iif	= orig_oif ? : 0;
+	rth->rt_iif = orig_oif;
 	if (res->table)
 		rth->rt_table_id = res->table->tb_id;
 
@@ -2439,6 +2449,12 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		/* L3 master device is the loopback for that domain */
 		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
 			net->loopback_dev;
+
+		/* make sure orig_oif points to fib result device even
+		 * though packet rx/tx happens over loopback or l3mdev
+		 */
+		orig_oif = FIB_RES_OIF(*res);
+
 		fl4->flowi4_oif = dev_out->ifindex;
 		flags |= RTCF_LOCAL;
 		goto make_route;
@@ -2501,7 +2517,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 	struct rtable *ort = (struct rtable *) dst_orig;
 	struct rtable *rt;
 
-	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
 	if (rt) {
 		struct dst_entry *new = &rt->dst;
 
@@ -2763,14 +2779,21 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
 		table_id = rt->rt_table_id;
 
-	if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+	if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
+		if (!res.fi) {
+			err = fib_props[res.type].error;
+			if (!err)
+				err = -EHOSTUNREACH;
+			goto errout_free;
+		}
 		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
 				    rt->rt_type, res.prefix, res.prefixlen,
 				    fl4.flowi4_tos, res.fi, 0);
-	else
+	} else {
 		err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
 				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+	}
 	if (err < 0)
 		goto errout_free;
 
@@ -3019,7 +3042,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 
 int __init ip_rt_init(void)
 {
-	int rc = 0;
 	int cpu;
 
 	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@ -3068,14 +3090,15 @@ int __init ip_rt_init(void)
 	xfrm_init();
 	xfrm4_init();
 #endif
-	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 
 #ifdef CONFIG_SYSCTL
 	register_pernet_subsys(&sysctl_route_ops);
 #endif
 	register_pernet_subsys(&rt_genid_ops);
 	register_pernet_subsys(&ipv4_inetpeer_ops);
-	return rc;
+	return 0;
 }
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 03ad8778c395..fda37f2862c9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 	 */
-	ireq->opt = tcp_v4_save_options(skb);
+	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	/* Try to redo what tcp_v4_send_synack did. */
 	req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
 
-	tcp_select_initial_window(tcp_full_space(sk), req->mss,
+	tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
 				  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
 				  dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9bf809726066..93e172118a94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
  *
@@ -25,6 +26,7 @@
 #include <net/inet_frag.h>
 #include <net/ping.h>
 #include <net/protocol.h>
+#include <net/netevent.h>
 
 static int zero;
 static int one = 1;
@@ -45,6 +47,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 
+/* obsolete */
+static int sysctl_tcp_low_latency __read_mostly;
+
 /* Update system visible IP port range */
 static void set_local_port_range(struct net *net, int range[2])
 {
@@ -196,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct net *net = container_of(ctl->data, struct net,
+				       ipv4.tcp_congestion_control);
 	char val[TCP_CA_NAME_MAX];
 	struct ctl_table tbl = {
 		.data = val,
@@ -203,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 	};
 	int ret;
 
-	tcp_get_default_congestion_control(val);
+	tcp_get_default_congestion_control(net, val);
 
 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	if (write && ret == 0)
-		ret = tcp_set_default_congestion_control(val);
+		ret = tcp_set_default_congestion_control(net, val);
 	return ret;
 }
 
@@ -248,10 +255,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
 	return ret;
 }
 
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos)
 {
+	struct net *net = container_of(table->data, struct net,
+	    ipv4.sysctl_tcp_fastopen);
 	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 	struct tcp_fastopen_context *ctxt;
 	int ret;
@@ -262,7 +271,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	ctxt = rcu_dereference(tcp_fastopen_ctx);
+	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
 	if (ctxt)
 		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
 	else
@@ -279,12 +288,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 			ret = -EINVAL;
 			goto bad_key;
 		}
-		/* Generate a dummy secret but don't publish it. This
-		 * is needed so we don't regenerate a new key on the
-		 * first invocation of tcp_fastopen_cookie_gen
-		 */
-		tcp_fastopen_init_key_once(false);
-		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+		tcp_fastopen_reset_cipher(net, NULL, user_key,
+					  TCP_FASTOPEN_KEY_LENGTH);
 	}
 
 bad_key:
@@ -355,11 +360,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
 					     void __user *buffer,
 					     size_t *lenp, loff_t *ppos)
 {
+	struct net *net = container_of(table->data, struct net,
+	    ipv4.sysctl_tcp_fastopen_blackhole_timeout);
 	int ret;
 
 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
-		tcp_fastopen_active_timeout_reset();
+		atomic_set(&net->ipv4.tfo_active_disable_times, 0);
 
 	return ret;
 }
@@ -382,15 +389,25 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
 	return ret;
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	struct net *net = container_of(table->data, struct net,
+	    ipv4.sysctl_fib_multipath_hash_policy);
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+
+	return ret;
+}
+#endif
+
 static struct ctl_table ipv4_table[] = {
 	{
-		.procname	= "tcp_retrans_collapse",
-		.data		= &sysctl_tcp_retrans_collapse,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_max_orphans",
 		.data		= &sysctl_tcp_max_orphans,
 		.maxlen		= sizeof(int),
@@ -398,48 +415,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_fastopen",
-		.data		= &sysctl_tcp_fastopen,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_fastopen_key",
-		.mode		= 0600,
-		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
-		.proc_handler	= proc_tcp_fastopen_key,
-	},
-	{
-		.procname	= "tcp_fastopen_blackhole_timeout_sec",
-		.data		= &sysctl_tcp_fastopen_blackhole_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_tfo_blackhole_detect_timeout,
-		.extra1		= &zero,
-	},
-	{
-		.procname	= "tcp_abort_on_overflow",
-		.data		= &sysctl_tcp_abort_on_overflow,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_stdurg",
-		.data		= &sysctl_tcp_stdurg,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_rfc1337",
-		.data		= &sysctl_tcp_rfc1337,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "inet_peer_threshold",
 		.data		= &inet_peer_threshold,
 		.maxlen		= sizeof(int),
@@ -461,34 +436,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
-		.procname	= "tcp_fack",
-		.data		= &sysctl_tcp_fack,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_recovery",
-		.data		= &sysctl_tcp_recovery,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_max_reordering",
-		.data		= &sysctl_tcp_max_reordering,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_dsack",
-		.data		= &sysctl_tcp_dsack,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_mem",
 		.maxlen		= sizeof(sysctl_tcp_mem),
 		.data		= &sysctl_tcp_mem,
@@ -496,113 +443,12 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_doulongvec_minmax,
 	},
 	{
-		.procname	= "tcp_wmem",
-		.data		= &sysctl_tcp_wmem,
-		.maxlen		= sizeof(sysctl_tcp_wmem),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-	},
-	{
-		.procname	= "tcp_rmem",
-		.data		= &sysctl_tcp_rmem,
-		.maxlen		= sizeof(sysctl_tcp_rmem),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-	},
-	{
-		.procname	= "tcp_app_win",
-		.data		= &sysctl_tcp_app_win,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_adv_win_scale",
-		.data		= &sysctl_tcp_adv_win_scale,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &tcp_adv_win_scale_min,
-		.extra2		= &tcp_adv_win_scale_max,
-	},
-	{
-		.procname	= "tcp_frto",
-		.data		= &sysctl_tcp_frto,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_min_rtt_wlen",
-		.data		= &sysctl_tcp_min_rtt_wlen,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_low_latency",
 		.data		= &sysctl_tcp_low_latency,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "tcp_no_metrics_save",
-		.data		= &sysctl_tcp_nometrics_save,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_moderate_rcvbuf",
-		.data		= &sysctl_tcp_moderate_rcvbuf,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_tso_win_divisor",
-		.data		= &sysctl_tcp_tso_win_divisor,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_congestion_control",
-		.mode		= 0644,
-		.maxlen		= TCP_CA_NAME_MAX,
-		.proc_handler	= proc_tcp_congestion_control,
-	},
-	{
-		.procname	= "tcp_workaround_signed_windows",
-		.data		= &sysctl_tcp_workaround_signed_windows,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_limit_output_bytes",
-		.data		= &sysctl_tcp_limit_output_bytes,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_challenge_ack_limit",
-		.data		= &sysctl_tcp_challenge_ack_limit,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "tcp_slow_start_after_idle",
-		.data		= &sysctl_tcp_slow_start_after_idle,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 #ifdef CONFIG_NETLABEL
 	{
 		.procname	= "cipso_cache_enable",
@@ -646,65 +492,6 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler   = proc_allowed_congestion_control,
 	},
 	{
-		.procname       = "tcp_thin_linear_timeouts",
-		.data           = &sysctl_tcp_thin_linear_timeouts,
-		.maxlen         = sizeof(int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec
-	},
-	{
-		.procname	= "tcp_early_retrans",
-		.data		= &sysctl_tcp_early_retrans,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &four,
-	},
-	{
-		.procname	= "tcp_min_tso_segs",
-		.data		= &sysctl_tcp_min_tso_segs,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-		.extra2		= &gso_max_segs,
-	},
-	{
-		.procname	= "tcp_pacing_ss_ratio",
-		.data		= &sysctl_tcp_pacing_ss_ratio,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &thousand,
-	},
-	{
-		.procname	= "tcp_pacing_ca_ratio",
-		.data		= &sysctl_tcp_pacing_ca_ratio,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &thousand,
-	},
-	{
-		.procname	= "tcp_autocorking",
-		.data		= &sysctl_tcp_autocorking,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
-	},
-	{
-		.procname	= "tcp_invalid_ratelimit",
-		.data		= &sysctl_tcp_invalid_ratelimit,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_ms_jiffies,
-	},
-	{
 		.procname	= "tcp_available_ulp",
 		.maxlen		= TCP_ULP_BUF_MAX,
 		.mode		= 0444,
@@ -973,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = {
 	},
 #endif
 	{
+		.procname	= "tcp_congestion_control",
+		.data		= &init_net.ipv4.tcp_congestion_control,
+		.mode		= 0644,
+		.maxlen		= TCP_CA_NAME_MAX,
+		.proc_handler	= proc_tcp_congestion_control,
+	},
+	{
 		.procname	= "tcp_keepalive_time",
 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_time,
 		.maxlen		= sizeof(int),
@@ -1082,6 +876,28 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tcp_fastopen",
+		.data		= &init_net.ipv4.sysctl_tcp_fastopen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_fastopen_key",
+		.mode		= 0600,
+		.data		= &init_net.ipv4.sysctl_tcp_fastopen,
+		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+		.proc_handler	= proc_tcp_fastopen_key,
+	},
+	{
+		.procname	= "tcp_fastopen_blackhole_timeout_sec",
+		.data		= &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_tfo_blackhole_detect_timeout,
+		.extra1		= &zero,
+	},
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	{
 		.procname	= "fib_multipath_use_neigh",
@@ -1097,7 +913,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_fib_multipath_hash_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_fib_multipath_hash_policy,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
@@ -1141,6 +957,216 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tcp_early_retrans",
+		.data		= &init_net.ipv4.sysctl_tcp_early_retrans,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &four,
+	},
+	{
+		.procname	= "tcp_recovery",
+		.data		= &init_net.ipv4.sysctl_tcp_recovery,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname       = "tcp_thin_linear_timeouts",
+		.data           = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec
+	},
+	{
+		.procname	= "tcp_slow_start_after_idle",
+		.data		= &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_retrans_collapse",
+		.data		= &init_net.ipv4.sysctl_tcp_retrans_collapse,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_stdurg",
+		.data		= &init_net.ipv4.sysctl_tcp_stdurg,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_rfc1337",
+		.data		= &init_net.ipv4.sysctl_tcp_rfc1337,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_abort_on_overflow",
+		.data		= &init_net.ipv4.sysctl_tcp_abort_on_overflow,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_fack",
+		.data		= &init_net.ipv4.sysctl_tcp_fack,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_max_reordering",
+		.data		= &init_net.ipv4.sysctl_tcp_max_reordering,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_dsack",
+		.data		= &init_net.ipv4.sysctl_tcp_dsack,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_app_win",
+		.data		= &init_net.ipv4.sysctl_tcp_app_win,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_adv_win_scale",
+		.data		= &init_net.ipv4.sysctl_tcp_adv_win_scale,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_adv_win_scale_min,
+		.extra2		= &tcp_adv_win_scale_max,
+	},
+	{
+		.procname	= "tcp_frto",
+		.data		= &init_net.ipv4.sysctl_tcp_frto,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_no_metrics_save",
+		.data		= &init_net.ipv4.sysctl_tcp_nometrics_save,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_moderate_rcvbuf",
+		.data		= &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_tso_win_divisor",
+		.data		= &init_net.ipv4.sysctl_tcp_tso_win_divisor,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_workaround_signed_windows",
+		.data		= &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_limit_output_bytes",
+		.data		= &init_net.ipv4.sysctl_tcp_limit_output_bytes,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_challenge_ack_limit",
+		.data		= &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_min_tso_segs",
+		.data		= &init_net.ipv4.sysctl_tcp_min_tso_segs,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &gso_max_segs,
+	},
+	{
+		.procname	= "tcp_min_rtt_wlen",
+		.data		= &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_autocorking",
+		.data		= &init_net.ipv4.sysctl_tcp_autocorking,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
+		.procname	= "tcp_invalid_ratelimit",
+		.data		= &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+	},
+	{
+		.procname	= "tcp_pacing_ss_ratio",
+		.data		= &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &thousand,
+	},
+	{
+		.procname	= "tcp_pacing_ca_ratio",
+		.data		= &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &thousand,
+	},
+	{
+		.procname	= "tcp_wmem",
+		.data		= &init_net.ipv4.sysctl_tcp_wmem,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_wmem),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+	},
+	{
+		.procname	= "tcp_rmem",
+		.data		= &init_net.ipv4.sysctl_tcp_rmem,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_rmem),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..bf97317e6c97 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,6 +269,8 @@
 #include <linux/err.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/errqueue.h>
+#include <linux/static_key.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -281,24 +283,22 @@
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 
-int sysctl_tcp_min_tso_segs __read_mostly = 2;
-
-int sysctl_tcp_autocorking __read_mostly = 1;
+#include <trace/events/tcp.h>
 
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
 long sysctl_tcp_mem[3] __read_mostly;
-int sysctl_tcp_wmem[3] __read_mostly;
-int sysctl_tcp_rmem[3] __read_mostly;
-
 EXPORT_SYMBOL(sysctl_tcp_mem);
-EXPORT_SYMBOL(sysctl_tcp_rmem);
-EXPORT_SYMBOL(sysctl_tcp_wmem);
 
 atomic_long_t tcp_memory_allocated;	/* Current allocated memory. */
 EXPORT_SYMBOL(tcp_memory_allocated);
 
+#if IS_ENABLED(CONFIG_SMC)
+DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
+EXPORT_SYMBOL(tcp_have_smc);
+#endif
+
 /*
  * Current number of TCP sockets.
  */
@@ -388,6 +388,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
 	return period;
 }
 
+static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
+{
+	u32 rate = READ_ONCE(tp->rate_delivered);
+	u32 intv = READ_ONCE(tp->rate_interval_us);
+	u64 rate64 = 0;
+
+	if (rate && intv) {
+		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+		do_div(rate64, intv);
+	}
+	return rate64;
+}
+
 /* Address-family independent initialization for a tcp_sock.
  *
  * NOTE: A lot of things set to zero explicitly by call to
@@ -399,9 +412,10 @@ void tcp_init_sock(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->out_of_order_queue = RB_ROOT;
+	sk->tcp_rtx_queue = RB_ROOT;
 	tcp_init_xmit_timers(sk);
-	tcp_prequeue_init(tp);
 	INIT_LIST_HEAD(&tp->tsq_node);
+	INIT_LIST_HEAD(&tp->tsorted_sent_queue);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -428,6 +442,7 @@ void tcp_init_sock(struct sock *sk)
 	tcp_assign_congestion_control(sk);
 
 	tp->tsoffset = 0;
+	tp->rack.reo_wnd_steps = 1;
 
 	sk->sk_state = TCP_CLOSE;
 
@@ -436,15 +451,29 @@ void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_sync_mss = tcp_sync_mss;
 
-	sk->sk_sndbuf = sysctl_tcp_wmem[1];
-	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+	sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 
 	sk_sockets_allocated_inc(sk);
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
-static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
+void tcp_init_transfer(struct sock *sk, int bpf_op)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	tcp_mtup_init(sk);
+	icsk->icsk_af_ops->rebuild_header(sk);
+	tcp_init_metrics(sk);
+	tcp_call_bpf(sk, bpf_op);
+	tcp_init_congestion_control(sk);
+	tcp_init_buffer_space(sk);
+}
+
+static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+{
+	struct sk_buff *skb = tcp_write_queue_tail(sk);
+
 	if (tsflags && skb) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -662,7 +691,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
 				int size_goal)
 {
 	return skb->len < size_goal &&
-	       sysctl_tcp_autocorking &&
+	       sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
 	       skb != tcp_write_queue_head(sk) &&
 	       refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
 }
@@ -673,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	if (!tcp_send_head(sk))
-		return;
-
 	skb = tcp_write_queue_tail(sk);
+	if (!skb)
+		return;
 	if (!(flags & MSG_MORE) || forced_push(tp))
 		tcp_mark_push(tp, skb);
 
@@ -856,6 +884,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 			 * available to the caller, no more, no less.
 			 */
 			skb->reserved_tailroom = skb->end - skb->tail - size;
+			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
 			return skb;
 		}
 		__kfree_skb(skb);
@@ -935,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 		int copy, i;
 		bool can_coalesce;
 
-		if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+		if (!skb || (copy = size_goal - skb->len) <= 0 ||
 		    !tcp_skb_can_collapse_to(skb)) {
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
 			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
-						  skb_queue_empty(&sk->sk_write_queue));
+					tcp_rtx_and_write_queues_empty(sk));
 			if (!skb)
 				goto wait_for_memory;
 
@@ -1014,7 +1043,7 @@ wait_for_memory:
 
 out:
 	if (copied) {
-		tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+		tcp_tx_timestamp(sk, sk->sk_tsflags);
 		if (!(flags & MSG_SENDPAGE_NOTLAST))
 			tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 	}
@@ -1034,23 +1063,29 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 
-int tcp_sendpage(struct sock *sk, struct page *page, int offset,
-		 size_t size, int flags)
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags)
 {
-	ssize_t res;
-
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !sk_check_csum_caps(sk))
-		return sock_no_sendpage(sk->sk_socket, page, offset, size,
-					flags);
-
-	lock_sock(sk);
+		return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
 
-	res = do_tcp_sendpages(sk, page, offset, size, flags);
+	return do_tcp_sendpages(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
+
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendpage_locked(sk, page, offset, size, flags);
 	release_sock(sk);
-	return res;
+
+	return ret;
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
@@ -1107,7 +1142,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	struct sockaddr *uaddr = msg->msg_name;
 	int err, flags;
 
-	if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+	if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
 	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
 	     uaddr->sa_family == AF_UNSPEC))
 		return -EOPNOTSUPP;
@@ -1144,9 +1179,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	return err;
 }
 
-int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct ubuf_info *uarg = NULL;
 	struct sk_buff *skb;
 	struct sockcm_cookie sockc;
 	int flags, err, copied = 0;
@@ -1155,9 +1191,25 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	bool sg;
 	long timeo;
 
-	lock_sock(sk);
-
 	flags = msg->msg_flags;
+
+	if (flags & MSG_ZEROCOPY && size) {
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			err = -EINVAL;
+			goto out_err;
+		}
+
+		skb = tcp_write_queue_tail(sk);
+		uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
+		if (!uarg) {
+			err = -ENOBUFS;
+			goto out_err;
+		}
+
+		if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+			uarg->zerocopy = 0;
+	}
+
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
 		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
 		if (err == -EINPROGRESS && copied_syn > 0)
@@ -1223,7 +1275,7 @@ restart:
 		int max = size_goal;
 
 		skb = tcp_write_queue_tail(sk);
-		if (tcp_send_head(sk)) {
+		if (skb) {
 			if (skb->ip_summed == CHECKSUM_NONE)
 				max = mss_now;
 			copy = max - skb->len;
@@ -1243,7 +1295,7 @@ new_segment:
 				process_backlog = false;
 				goto restart;
 			}
-			first_skb = skb_queue_empty(&sk->sk_write_queue);
+			first_skb = tcp_rtx_and_write_queues_empty(sk);
 			skb = sk_stream_alloc_skb(sk,
 						  select_size(sk, sg, first_skb),
 						  sk->sk_allocation,
@@ -1281,7 +1333,7 @@ new_segment:
 			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
 			if (err)
 				goto do_fault;
-		} else {
+		} else if (!uarg || !uarg->zerocopy) {
 			bool merge = true;
 			int i = skb_shinfo(skb)->nr_frags;
 			struct page_frag *pfrag = sk_page_frag(sk);
@@ -1319,6 +1371,13 @@ new_segment:
 				page_ref_inc(pfrag->page);
 			}
 			pfrag->offset += copy;
+		} else {
+			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
+			if (err == -EMSGSIZE || err == -EEXIST)
+				goto new_segment;
+			if (err < 0)
+				goto do_error;
+			copy = err;
 		}
 
 		if (!copied)
@@ -1361,11 +1420,11 @@ wait_for_memory:
 
 out:
 	if (copied) {
-		tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
+		tcp_tx_timestamp(sk, sockc.tsflags);
 		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 	}
 out_nopush:
-	release_sock(sk);
+	sock_zerocopy_put(uarg);
 	return copied + copied_syn;
 
 do_fault:
@@ -1382,6 +1441,7 @@ do_error:
 	if (copied + copied_syn)
 		goto out;
 out_err:
+	sock_zerocopy_put_abort(uarg);
 	err = sk_stream_error(sk, flags, err);
 	/* make sure we wake any epoll edge trigger waiter */
 	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -1389,9 +1449,20 @@ out_err:
 		sk->sk_write_space(sk);
 		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
 	}
-	release_sock(sk);
 	return err;
 }
+EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
+
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendmsg_locked(sk, msg, size);
+	release_sock(sk);
+
+	return ret;
+}
 EXPORT_SYMBOL(tcp_sendmsg);
 
 /*
@@ -1450,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
 
 	/* XXX -- need to support SO_PEEK_OFF */
 
+	skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
+		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
+		if (err)
+			return err;
+		copied += skb->len;
+	}
+
 	skb_queue_walk(&sk->sk_write_queue, skb) {
 		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
 		if (err)
@@ -1525,20 +1603,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		tcp_send_ack(sk);
 }
 
-static void tcp_prequeue_process(struct sock *sk)
-{
-	struct sk_buff *skb;
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
-
-	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-		sk_backlog_rcv(sk, skb);
-
-	/* Clear memory counter. */
-	tp->ucopy.memory = 0;
-}
-
 static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1652,6 +1716,61 @@ int tcp_peek_len(struct socket *sock)
 }
 EXPORT_SYMBOL(tcp_peek_len);
 
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+				    struct scm_timestamping *tss)
+{
+	if (skb->tstamp)
+		tss->ts[0] = ktime_to_timespec(skb->tstamp);
+	else
+		tss->ts[0] = (struct timespec) {0};
+
+	if (skb_hwtstamps(skb)->hwtstamp)
+		tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+	else
+		tss->ts[2] = (struct timespec) {0};
+}
+
+/* Similar to __sock_recv_timestamp, but does not require an skb */
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+			struct scm_timestamping *tss)
+{
+	struct timeval tv;
+	bool has_timestamping = false;
+
+	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
+		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+					 sizeof(tss->ts[0]), &tss->ts[0]);
+			} else {
+				tv.tv_sec = tss->ts[0].tv_sec;
+				tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+
+				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+					 sizeof(tv), &tv);
+			}
+		}
+
+		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+			has_timestamping = true;
+		else
+			tss->ts[0] = (struct timespec) {0};
+	}
+
+	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+			has_timestamping = true;
+		else
+			tss->ts[2] = (struct timespec) {0};
+	}
+
+	if (has_timestamping) {
+		tss->ts[1] = (struct timespec) {0};
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
+			 sizeof(*tss), tss);
+	}
+}
+
 /*
  *	This routine copies from a sock struct into the user buffer.
  *
@@ -1671,9 +1790,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	int err;
 	int target;		/* Read at least this many bytes */
 	long timeo;
-	struct task_struct *user_recv = NULL;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
+	struct scm_timestamping tss;
+	bool has_tss = false;
 
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
@@ -1806,51 +1926,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 
 		tcp_cleanup_rbuf(sk, copied);
 
-		if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
-			/* Install new reader */
-			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
-				user_recv = current;
-				tp->ucopy.task = user_recv;
-				tp->ucopy.msg = msg;
-			}
-
-			tp->ucopy.len = len;
-
-			WARN_ON(tp->copied_seq != tp->rcv_nxt &&
-				!(flags & (MSG_PEEK | MSG_TRUNC)));
-
-			/* Ugly... If prequeue is not empty, we have to
-			 * process it before releasing socket, otherwise
-			 * order will be broken at second iteration.
-			 * More elegant solution is required!!!
-			 *
-			 * Look: we have the following (pseudo)queues:
-			 *
-			 * 1. packets in flight
-			 * 2. backlog
-			 * 3. prequeue
-			 * 4. receive_queue
-			 *
-			 * Each queue can be processed only if the next ones
-			 * are empty. At this point we have empty receive_queue.
-			 * But prequeue _can_ be not empty after 2nd iteration,
-			 * when we jumped to start of loop because backlog
-			 * processing added something to receive_queue.
-			 * We cannot release_sock(), because backlog contains
-			 * packets arrived _after_ prequeued ones.
-			 *
-			 * Shortly, algorithm is clear --- to process all
-			 * the queues in order. We could make it more directly,
-			 * requeueing packets from backlog to prequeue, if
-			 * is not empty. It is more elegant, but eats cycles,
-			 * unfortunately.
-			 */
-			if (!skb_queue_empty(&tp->ucopy.prequeue))
-				goto do_prequeue;
-
-			/* __ Set realtime policy in scheduler __ */
-		}
-
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1859,31 +1934,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			sk_wait_data(sk, &timeo, last);
 		}
 
-		if (user_recv) {
-			int chunk;
-
-			/* __ Restore normal policy in scheduler __ */
-
-			chunk = len - tp->ucopy.len;
-			if (chunk != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-
-			if (tp->rcv_nxt == tp->copied_seq &&
-			    !skb_queue_empty(&tp->ucopy.prequeue)) {
-do_prequeue:
-				tcp_prequeue_process(sk);
-
-				chunk = len - tp->ucopy.len;
-				if (chunk != 0) {
-					NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-					len -= chunk;
-					copied += chunk;
-				}
-			}
-		}
 		if ((flags & MSG_PEEK) &&
 		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
 			net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@ -1941,6 +1991,10 @@ skip_copy:
 		if (used + offset < skb->len)
 			continue;
 
+		if (TCP_SKB_CB(skb)->has_rxtstamp) {
+			tcp_update_recv_tstamps(skb, &tss);
+			has_tss = true;
+		}
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK))
@@ -1955,29 +2009,13 @@ skip_copy:
 		break;
 	} while (len > 0);
 
-	if (user_recv) {
-		if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-			int chunk;
-
-			tp->ucopy.len = copied > 0 ? len : 0;
-
-			tcp_prequeue_process(sk);
-
-			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-		}
-
-		tp->ucopy.task = NULL;
-		tp->ucopy.len = 0;
-	}
-
 	/* According to UNIX98, msg_name/msg_namelen are ignored
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
 
+	if (has_tss)
+		tcp_recv_timestamp(msg, sk, &tss);
+
 	/* Clean up data we have read: This will do ACK frames. */
 	tcp_cleanup_rbuf(sk, copied);
 
@@ -2002,6 +2040,8 @@ void tcp_set_state(struct sock *sk, int state)
 {
 	int oldstate = sk->sk_state;
 
+	trace_tcp_set_state(sk, oldstate, state);
+
 	switch (state) {
 	case TCP_ESTABLISHED:
 		if (oldstate != TCP_ESTABLISHED)
@@ -2289,6 +2329,37 @@ static inline bool tcp_need_reset(int state)
 		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
 }
 
+static void tcp_rtx_queue_purge(struct sock *sk)
+{
+	struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
+
+	while (p) {
+		struct sk_buff *skb = rb_to_skb(p);
+
+		p = rb_next(p);
+		/* Since we are deleting whole queue, no need to
+		 * list_del(&skb->tcp_tsorted_anchor)
+		 */
+		tcp_rtx_queue_unlink(skb, sk);
+		sk_wmem_free_skb(sk, skb);
+	}
+}
+
+void tcp_write_queue_purge(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+		tcp_skb_tsorted_anchor_cleanup(skb);
+		sk_wmem_free_skb(sk, skb);
+	}
+	tcp_rtx_queue_purge(sk);
+	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
+	sk_mem_reclaim(sk);
+	tcp_clear_all_retrans_hints(tcp_sk(sk));
+}
+
 int tcp_disconnect(struct sock *sk, int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
@@ -2347,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	 * issue in __tcp_select_window()
 	 */
 	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
-	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
 	dst_release(sk->sk_rx_dst);
@@ -2439,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk,
 				return -EINVAL;
 
 			tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
-			if (sysctl_tcp_fack)
-				tcp_enable_fack(tp);
 			break;
 		case TCPOPT_TIMESTAMP:
 			if (opt.opt_val != 0)
@@ -2481,7 +2549,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		name[val] = 0;
 
 		lock_sock(sk);
-		err = tcp_set_congestion_control(sk, name, true);
+		err = tcp_set_congestion_control(sk, name, true, true);
 		release_sock(sk);
 		return err;
 	}
@@ -2503,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		release_sock(sk);
 		return err;
 	}
+	case TCP_FASTOPEN_KEY: {
+		__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+		if (optlen != sizeof(key))
+			return -EINVAL;
+
+		if (copy_from_user(key, optval, optlen))
+			return -EFAULT;
+
+		return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+	}
 	default:
 		/* fallthru */
 		break;
@@ -2734,7 +2813,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	case TCP_FASTOPEN:
 		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
 		    TCPF_LISTEN))) {
-			tcp_fastopen_init_key_once(true);
+			tcp_fastopen_init_key_once(net);
 
 			fastopen_queue_tune(sk, val);
 		} else {
@@ -2744,7 +2823,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	case TCP_FASTOPEN_CONNECT:
 		if (val > 1 || val < 0) {
 			err = -EINVAL;
-		} else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+		} else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
 			if (sk->sk_state == TCP_CLOSE)
 				tp->fastopen_connect = val;
 			else
@@ -2753,6 +2832,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			err = -EOPNOTSUPP;
 		}
 		break;
+	case TCP_FASTOPEN_NO_COOKIE:
+		if (val > 1 || val < 0)
+			err = -EINVAL;
+		else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+			err = -EINVAL;
+		else
+			tp->fastopen_no_cookie = val;
+		break;
 	case TCP_TIMESTAMP:
 		if (!tp->repair)
 			err = -EPERM;
@@ -2823,7 +2910,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now, intv;
+	u32 now;
 	u64 rate64;
 	bool slow;
 	u32 rate;
@@ -2890,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_lost = tp->lost_out;
 	info->tcpi_retrans = tp->retrans_out;
-	info->tcpi_fackets = tp->fackets_out;
 
 	now = tcp_jiffies32;
 	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
@@ -2922,13 +3008,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_data_segs_out = tp->data_segs_out;
 
 	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
-	rate = READ_ONCE(tp->rate_delivered);
-	intv = READ_ONCE(tp->rate_interval_us);
-	if (rate && intv) {
-		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
-		do_div(rate64, intv);
+	rate64 = tcp_compute_delivery_rate(tp);
+	if (rate64)
 		info->tcpi_delivery_rate = rate64;
-	}
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2938,8 +3020,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *stats;
 	struct tcp_info info;
+	u64 rate64;
+	u32 rate;
 
-	stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
+			  3 * nla_total_size(sizeof(u32)) +
+			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
 
@@ -2954,6 +3040,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 			  tp->data_segs_out, TCP_NLA_PAD);
 	nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
 			  tp->total_retrans, TCP_NLA_PAD);
+
+	rate = READ_ONCE(sk->sk_pacing_rate);
+	rate64 = rate != ~0U ? rate : ~0ULL;
+	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
+
+	rate64 = tcp_compute_delivery_rate(tp);
+	nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
+
+	nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+	nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
+	nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+
+	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
 	return stats;
 }
 
@@ -3075,6 +3175,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EFAULT;
 		return 0;
 
+	case TCP_FASTOPEN_KEY: {
+		__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+		struct tcp_fastopen_context *ctx;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		rcu_read_lock();
+		ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
+		if (ctx)
+			memcpy(key, ctx->key, sizeof(key));
+		else
+			len = 0;
+		rcu_read_unlock();
+
+		len = min_t(unsigned int, len, sizeof(key));
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, key, len))
+			return -EFAULT;
+		return 0;
+	}
 	case TCP_THIN_LINEAR_TIMEOUTS:
 		val = tp->thin_lto;
 		break;
@@ -3137,6 +3259,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		val = tp->fastopen_connect;
 		break;
 
+	case TCP_FASTOPEN_NO_COOKIE:
+		val = tp->fastopen_no_cookie;
+		break;
+
 	case TCP_TIMESTAMP:
 		val = tcp_time_stamp_raw() + tp->tsoffset;
 		break;
@@ -3502,13 +3628,13 @@ void __init tcp_init(void)
 	max_wshare = min(4UL*1024*1024, limit);
 	max_rshare = min(6UL*1024*1024, limit);
 
-	sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
-	sysctl_tcp_wmem[1] = 16*1024;
-	sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
+	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
+	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
+	init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
-	sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
-	sysctl_tcp_rmem[1] = 87380;
-	sysctl_tcp_rmem[2] = max(87380, max_rshare);
+	init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
+	init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
+	init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
 
 	pr_info("Hash tables configured (established %u bind %u)\n",
 		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 609965f0e298..fc3614377413 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -49,7 +49,6 @@ MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wma
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
 	u32	last_max_cwnd;	/* last maximum snd_cwnd */
-	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	epoch_start;	/* beginning of an epoch */
@@ -72,7 +71,6 @@ static void bictcp_init(struct sock *sk)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	bictcp_reset(ca);
-	ca->loss_cwnd = 0;
 
 	if (initial_ssthresh)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
@@ -172,22 +170,12 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	else
 		ca->last_max_cwnd = tp->snd_cwnd;
 
-	ca->loss_cwnd = tp->snd_cwnd;
-
 	if (tp->snd_cwnd <= low_window)
 		return max(tp->snd_cwnd >> 1U, 2U);
 	else
 		return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	const struct bictcp *ca = inet_csk_ca(sk);
-
-	return max(tp->snd_cwnd, ca->loss_cwnd);
-}
-
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss)
@@ -214,7 +202,7 @@ static struct tcp_congestion_ops bictcp __read_mostly = {
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
-	.undo_cwnd	= bictcp_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
 	.name		= "bic",
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 50a0f3e51d5b..06fbe102a425 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -85,7 +85,6 @@ struct cdg {
 	u8  state;
 	u8  delack;
 	u32 rtt_seq;
-	u32 undo_cwnd;
 	u32 shadow_wnd;
 	u16 backoff_cnt;
 	u16 sample_cnt;
@@ -330,8 +329,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
 	struct cdg *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	ca->undo_cwnd = tp->snd_cwnd;
-
 	if (ca->state == CDG_BACKOFF)
 		return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
 
@@ -344,13 +341,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
 	return max(2U, tp->snd_cwnd >> 1);
 }
 
-static u32 tcp_cdg_undo_cwnd(struct sock *sk)
-{
-	struct cdg *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd);
-}
-
 static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
 {
 	struct cdg *ca = inet_csk_ca(sk);
@@ -399,11 +389,11 @@ static void tcp_cdg_release(struct sock *sk)
 	kfree(ca->gradients);
 }
 
-struct tcp_congestion_ops tcp_cdg __read_mostly = {
+static struct tcp_congestion_ops tcp_cdg __read_mostly = {
 	.cong_avoid = tcp_cdg_cong_avoid,
 	.cwnd_event = tcp_cdg_cwnd_event,
 	.pkts_acked = tcp_cdg_acked,
-	.undo_cwnd = tcp_cdg_undo_cwnd,
+	.undo_cwnd = tcp_reno_undo_cwnd,
 	.ssthresh = tcp_cdg_ssthresh,
 	.release = tcp_cdg_release,
 	.init = tcp_cdg_init,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..bc6c02f16243 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
 }
 
 /* Must be called with rcu lock held */
-static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
+						       const char *name)
 {
-	const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+	struct tcp_congestion_ops *ca = tcp_ca_find(name);
+
 #ifdef CONFIG_MODULES
 	if (!ca && capable(CAP_NET_ADMIN)) {
 		rcu_read_unlock();
@@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
 }
 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
 
-u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
+u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca)
 {
 	const struct tcp_congestion_ops *ca;
 	u32 key = TCP_CA_UNSPEC;
@@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
 	might_sleep();
 
 	rcu_read_lock();
-	ca = __tcp_ca_find_autoload(name);
+	ca = tcp_ca_find_autoload(net, name);
 	if (ca) {
 		key = ca->key;
 		*ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
@@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
 /* Assign choice of congestion control. */
 void tcp_assign_congestion_control(struct sock *sk)
 {
+	struct net *net = sock_net(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_congestion_ops *ca;
+	const struct tcp_congestion_ops *ca;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
-		if (likely(try_module_get(ca->owner))) {
-			icsk->icsk_ca_ops = ca;
-			goto out;
-		}
-		/* Fallback to next available. The last really
-		 * guaranteed fallback is Reno from this list.
-		 */
-	}
-out:
+	ca = rcu_dereference(net->ipv4.tcp_congestion_control);
+	if (unlikely(!try_module_get(ca->owner)))
+		ca = &tcp_reno;
+	icsk->icsk_ca_ops = ca;
 	rcu_read_unlock();
-	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 
+	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 	if (ca->flags & TCP_CONG_NEEDS_ECN)
 		INET_ECN_xmit(sk);
 	else
@@ -189,8 +186,8 @@ void tcp_init_congestion_control(struct sock *sk)
 		INET_ECN_dontxmit(sk);
 }
 
-void tcp_reinit_congestion_control(struct sock *sk,
-				   const struct tcp_congestion_ops *ca)
+static void tcp_reinit_congestion_control(struct sock *sk,
+					  const struct tcp_congestion_ops *ca)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk)
 }
 
 /* Used by sysctl to change default congestion control */
-int tcp_set_default_congestion_control(const char *name)
+int tcp_set_default_congestion_control(struct net *net, const char *name)
 {
 	struct tcp_congestion_ops *ca;
-	int ret = -ENOENT;
-
-	spin_lock(&tcp_cong_list_lock);
-	ca = tcp_ca_find(name);
-#ifdef CONFIG_MODULES
-	if (!ca && capable(CAP_NET_ADMIN)) {
-		spin_unlock(&tcp_cong_list_lock);
+	const struct tcp_congestion_ops *prev;
+	int ret;
 
-		request_module("tcp_%s", name);
-		spin_lock(&tcp_cong_list_lock);
-		ca = tcp_ca_find(name);
-	}
-#endif
+	rcu_read_lock();
+	ca = tcp_ca_find_autoload(net, name);
+	if (!ca) {
+		ret = -ENOENT;
+	} else if (!try_module_get(ca->owner)) {
+		ret = -EBUSY;
+	} else {
+		prev = xchg(&net->ipv4.tcp_congestion_control, ca);
+		if (prev)
+			module_put(prev->owner);
 
-	if (ca) {
-		ca->flags |= TCP_CONG_NON_RESTRICTED;	/* default is always allowed */
-		list_move(&ca->list, &tcp_cong_list);
+		ca->flags |= TCP_CONG_NON_RESTRICTED;
 		ret = 0;
 	}
-	spin_unlock(&tcp_cong_list_lock);
+	rcu_read_unlock();
 
 	return ret;
 }
@@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name)
 /* Set default value from kernel configuration at bootup */
 static int __init tcp_congestion_default(void)
 {
-	return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+	return tcp_set_default_congestion_control(&init_net,
+						  CONFIG_DEFAULT_TCP_CONG);
 }
 late_initcall(tcp_congestion_default);
 
@@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
 }
 
 /* Get current default congestion control */
-void tcp_get_default_congestion_control(char *name)
+void tcp_get_default_congestion_control(struct net *net, char *name)
 {
-	struct tcp_congestion_ops *ca;
-	/* We will always have reno... */
-	BUG_ON(list_empty(&tcp_cong_list));
+	const struct tcp_congestion_ops *ca;
 
 	rcu_read_lock();
-	ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
+	ca = rcu_dereference(net->ipv4.tcp_congestion_control);
 	strncpy(name, ca->name, TCP_CA_NAME_MAX);
 	rcu_read_unlock();
 }
@@ -338,7 +332,7 @@ out:
  * tcp_reinit_congestion_control (if the current congestion control was
  * already initialized.
  */
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_congestion_ops *ca;
@@ -351,18 +345,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
 	if (!load)
 		ca = tcp_ca_find(name);
 	else
-		ca = __tcp_ca_find_autoload(name);
+		ca = tcp_ca_find_autoload(sock_net(sk), name);
+
 	/* No change asking for existing value */
 	if (ca == icsk->icsk_ca_ops) {
 		icsk->icsk_ca_setsockopt = 1;
 		goto out;
 	}
+
 	if (!ca) {
 		err = -ENOENT;
 	} else if (!load) {
-		icsk->icsk_ca_ops = ca;
-		if (!try_module_get(ca->owner))
+		const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
+
+		if (try_module_get(ca->owner)) {
+			if (reinit) {
+				tcp_reinit_congestion_control(sk, ca);
+			} else {
+				icsk->icsk_ca_ops = ca;
+				module_put(old_ca->owner);
+			}
+		} else {
 			err = -EBUSY;
+		}
 	} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
 		     ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
 		err = -EPERM;
@@ -456,7 +461,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+	return max(tp->snd_cwnd, tp->prior_cwnd);
 }
 EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 57ae5b5ae643..78bfadfcf342 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -83,7 +83,6 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
 	u32	last_max_cwnd;	/* last maximum snd_cwnd */
-	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	bic_origin_point;/* origin point of bic function */
@@ -142,7 +141,6 @@ static void bictcp_init(struct sock *sk)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	bictcp_reset(ca);
-	ca->loss_cwnd = 0;
 
 	if (hystart)
 		bictcp_hystart_reset(sk);
@@ -366,18 +364,9 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	else
 		ca->last_max_cwnd = tp->snd_cwnd;
 
-	ca->loss_cwnd = tp->snd_cwnd;
-
 	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
-	struct bictcp *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss) {
@@ -470,7 +459,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
-	.undo_cwnd	= bictcp_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cwnd_event	= bictcp_cwnd_event,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index a748c74aa8b7..abbf0edcf6c2 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -16,6 +16,7 @@
 
 #include <linux/tcp.h>
 
+#include <net/netlink.h>
 #include <net/tcp.h>
 
 static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -36,6 +37,100 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		tcp_get_info(sk, info);
 }
 
+#ifdef CONFIG_TCP_MD5SIG
+static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
+				 const struct tcp_md5sig_key *key)
+{
+	info->tcpm_family = key->family;
+	info->tcpm_prefixlen = key->prefixlen;
+	info->tcpm_keylen = key->keylen;
+	memcpy(info->tcpm_key, key->key, key->keylen);
+
+	if (key->family == AF_INET)
+		info->tcpm_addr[0] = key->addr.a4.s_addr;
+	#if IS_ENABLED(CONFIG_IPV6)
+	else if (key->family == AF_INET6)
+		memcpy(&info->tcpm_addr, &key->addr.a6,
+		       sizeof(info->tcpm_addr));
+	#endif
+}
+
+static int tcp_diag_put_md5sig(struct sk_buff *skb,
+			       const struct tcp_md5sig_info *md5sig)
+{
+	const struct tcp_md5sig_key *key;
+	struct tcp_diag_md5sig *info;
+	struct nlattr *attr;
+	int md5sig_count = 0;
+
+	hlist_for_each_entry_rcu(key, &md5sig->head, node)
+		md5sig_count++;
+	if (md5sig_count == 0)
+		return 0;
+
+	attr = nla_reserve(skb, INET_DIAG_MD5SIG,
+			   md5sig_count * sizeof(struct tcp_diag_md5sig));
+	if (!attr)
+		return -EMSGSIZE;
+
+	info = nla_data(attr);
+	memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
+	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+		tcp_diag_md5sig_fill(info++, key);
+		if (--md5sig_count == 0)
+			break;
+	}
+
+	return 0;
+}
+#endif
+
+static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
+			    struct sk_buff *skb)
+{
+#ifdef CONFIG_TCP_MD5SIG
+	if (net_admin) {
+		struct tcp_md5sig_info *md5sig;
+		int err = 0;
+
+		rcu_read_lock();
+		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+		if (md5sig)
+			err = tcp_diag_put_md5sig(skb, md5sig);
+		rcu_read_unlock();
+		if (err < 0)
+			return err;
+	}
+#endif
+
+	return 0;
+}
+
+static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
+{
+	size_t size = 0;
+
+#ifdef CONFIG_TCP_MD5SIG
+	if (net_admin && sk_fullsock(sk)) {
+		const struct tcp_md5sig_info *md5sig;
+		const struct tcp_md5sig_key *key;
+		size_t md5sig_count = 0;
+
+		rcu_read_lock();
+		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+		if (md5sig) {
+			hlist_for_each_entry_rcu(key, &md5sig->head, node)
+				md5sig_count++;
+		}
+		rcu_read_unlock();
+		size += nla_total_size(md5sig_count *
+				       sizeof(struct tcp_diag_md5sig));
+	}
+#endif
+
+	return size;
+}
+
 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
@@ -68,13 +163,15 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
 #endif
 
 static const struct inet_diag_handler tcp_diag_handler = {
-	.dump		 = tcp_diag_dump,
-	.dump_one	 = tcp_diag_dump_one,
-	.idiag_get_info	 = tcp_diag_get_info,
-	.idiag_type	 = IPPROTO_TCP,
-	.idiag_info_size = sizeof(struct tcp_info),
+	.dump			= tcp_diag_dump,
+	.dump_one		= tcp_diag_dump_one,
+	.idiag_get_info		= tcp_diag_get_info,
+	.idiag_get_aux		= tcp_diag_get_aux,
+	.idiag_get_aux_size	= tcp_diag_get_aux_size,
+	.idiag_type		= IPPROTO_TCP,
+	.idiag_info_size	= sizeof(struct tcp_info),
 #ifdef CONFIG_INET_DIAG_DESTROY
-	.destroy	 = tcp_diag_destroy,
+	.destroy		= tcp_diag_destroy,
 #endif
 };
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ce9c7fef200f..78c192ee03a4 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/crypto.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -9,15 +10,18 @@
 #include <net/inetpeer.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
-
-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(struct net *net)
 {
-	static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+	u8 key[TCP_FASTOPEN_KEY_LENGTH];
+	struct tcp_fastopen_context *ctxt;
+
+	rcu_read_lock();
+	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+	if (ctxt) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
 
 	/* tcp_fastopen_reset_cipher publishes the new context
 	 * atomically, so we allow this race happening here.
@@ -25,8 +29,8 @@ void tcp_fastopen_init_key_once(bool publish)
 	 * All call sites of tcp_fastopen_cookie_gen also check
 	 * for a valid cookie, so this is an acceptable risk.
 	 */
-	if (net_get_random_once(key, sizeof(key)) && publish)
-		tcp_fastopen_reset_cipher(key, sizeof(key));
+	get_random_bytes(key, sizeof(key));
+	tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
 }
 
 static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -37,10 +41,37 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
 	kfree(ctx);
 }
 
-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+void tcp_fastopen_destroy_cipher(struct sock *sk)
+{
+	struct tcp_fastopen_context *ctx;
+
+	ctx = rcu_dereference_protected(
+			inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
+	if (ctx)
+		call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
+}
+
+void tcp_fastopen_ctx_destroy(struct net *net)
+{
+	struct tcp_fastopen_context *ctxt;
+
+	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+	ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+				lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+	rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
+	spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+	if (ctxt)
+		call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
+}
+
+int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
+			      void *key, unsigned int len)
 {
-	int err;
 	struct tcp_fastopen_context *ctx, *octx;
+	struct fastopen_queue *q;
+	int err;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -61,26 +92,37 @@ error:		kfree(ctx);
 	}
 	memcpy(ctx->key, key, len);
 
-	spin_lock(&tcp_fastopen_ctx_lock);
 
-	octx = rcu_dereference_protected(tcp_fastopen_ctx,
-				lockdep_is_held(&tcp_fastopen_ctx_lock));
-	rcu_assign_pointer(tcp_fastopen_ctx, ctx);
-	spin_unlock(&tcp_fastopen_ctx_lock);
+	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+	if (sk) {
+		q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+		octx = rcu_dereference_protected(q->ctx,
+			lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+		rcu_assign_pointer(q->ctx, ctx);
+	} else {
+		octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+			lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+		rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+	}
+	spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
 
 	if (octx)
 		call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
 	return err;
 }
 
-static bool __tcp_fastopen_cookie_gen(const void *path,
+static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
 				      struct tcp_fastopen_cookie *foc)
 {
 	struct tcp_fastopen_context *ctx;
 	bool ok = false;
 
 	rcu_read_lock();
-	ctx = rcu_dereference(tcp_fastopen_ctx);
+
+	ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+	if (!ctx)
+		ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+
 	if (ctx) {
 		crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
 		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -96,7 +138,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
  *
  * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
  */
-static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+static bool tcp_fastopen_cookie_gen(struct sock *sk,
+				    struct request_sock *req,
 				    struct sk_buff *syn,
 				    struct tcp_fastopen_cookie *foc)
 {
@@ -104,7 +147,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
 		const struct iphdr *iph = ip_hdr(syn);
 
 		__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
-		return __tcp_fastopen_cookie_gen(path, foc);
+		return __tcp_fastopen_cookie_gen(sk, path, foc);
 	}
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -112,13 +155,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
 		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
 		struct tcp_fastopen_cookie tmp;
 
-		if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+		if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
 			struct in6_addr *buf = &tmp.addr;
 			int i;
 
 			for (i = 0; i < 4; i++)
 				buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
-			return __tcp_fastopen_cookie_gen(buf, foc);
+			return __tcp_fastopen_cookie_gen(sk, buf, foc);
 		}
 	}
 #endif
@@ -171,7 +214,6 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 
 static struct sock *tcp_fastopen_create_child(struct sock *sk,
 					      struct sk_buff *skb,
-					      struct dst_entry *dst,
 					      struct request_sock *req)
 {
 	struct tcp_sock *tp;
@@ -217,12 +259,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
 	refcount_set(&req->rsk_refcnt, 2);
 
 	/* Now finish processing the fastopen child socket. */
-	inet_csk(child)->icsk_af_ops->rebuild_header(child);
-	tcp_init_congestion_control(child);
-	tcp_mtup_init(child);
-	tcp_init_metrics(child);
-	tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
-	tcp_init_buffer_space(child);
+	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
 
 	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 
@@ -272,6 +309,15 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 	return true;
 }
 
+static bool tcp_fastopen_no_cookie(const struct sock *sk,
+				   const struct dst_entry *dst,
+				   int flag)
+{
+	return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+	       tcp_sk(sk)->fastopen_no_cookie ||
+	       (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
+}
+
 /* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
  * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
  * cookie request (foc->len == 0).
@@ -279,27 +325,29 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
 			      struct tcp_fastopen_cookie *foc,
-			      struct dst_entry *dst)
+			      const struct dst_entry *dst)
 {
-	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+	int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	struct sock *child;
 
 	if (foc->len == 0) /* Client requests a cookie */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
 
-	if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+	if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
 	      (syn_data || foc->len >= 0) &&
 	      tcp_fastopen_queue_check(sk))) {
 		foc->len = -1;
 		return NULL;
 	}
 
-	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+	if (syn_data &&
+	    tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
 		goto fastopen;
 
 	if (foc->len >= 0 &&  /* Client presents or requests a cookie */
-	    tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+	    tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
 	    foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
 	    foc->len == valid_foc.len &&
 	    !memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -312,7 +360,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 		 * data in SYN_RECV state.
 		 */
 fastopen:
-		child = tcp_fastopen_create_child(sk, skb, dst, req);
+		child = tcp_fastopen_create_child(sk, skb, req);
 		if (child) {
 			foc->len = -1;
 			NET_INC_STATS(sock_net(sk),
@@ -332,6 +380,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 			       struct tcp_fastopen_cookie *cookie)
 {
 	unsigned long last_syn_loss = 0;
+	const struct dst_entry *dst;
 	int syn_loss = 0;
 
 	tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
@@ -349,7 +398,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 		return false;
 	}
 
-	if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+	dst = __sk_dst_get(sk);
+
+	if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
 		cookie->len = -1;
 		return true;
 	}
@@ -403,25 +454,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
  * TFO connection with data exchanges.
  */
 
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
 /* Disable active TFO and record current jiffies and
  * tfo_active_disable_times
  */
 void tcp_fastopen_active_disable(struct sock *sk)
 {
-	atomic_inc(&tfo_active_disable_times);
-	tfo_active_disable_stamp = jiffies;
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+	struct net *net = sock_net(sk);
 
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
-	atomic_set(&tfo_active_disable_times, 0);
+	atomic_inc(&net->ipv4.tfo_active_disable_times);
+	net->ipv4.tfo_active_disable_stamp = jiffies;
+	NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
 }
 
 /* Calculate timeout for tfo active disable
@@ -430,17 +472,18 @@ void tcp_fastopen_active_timeout_reset(void)
  */
 bool tcp_fastopen_active_should_disable(struct sock *sk)
 {
-	int tfo_da_times = atomic_read(&tfo_active_disable_times);
-	int multiplier;
+	unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+	int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
 	unsigned long timeout;
+	int multiplier;
 
 	if (!tfo_da_times)
 		return false;
 
 	/* Limit timout to max: 2^6 * initial timeout */
 	multiplier = 1 << min(tfo_da_times - 1, 6);
-	timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
-	if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+	timeout = multiplier * tfo_bh_timeout * HZ;
+	if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
 		return true;
 
 	/* Mark check bit so we can check for successful active TFO
@@ -459,27 +502,25 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
 void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct rb_node *p;
-	struct sk_buff *skb;
 	struct dst_entry *dst;
+	struct sk_buff *skb;
 
 	if (!tp->syn_fastopen)
 		return;
 
 	if (!tp->data_segs_in) {
-		p = rb_first(&tp->out_of_order_queue);
-		if (p && !rb_next(p)) {
-			skb = rb_entry(p, struct sk_buff, rbnode);
+		skb = skb_rb_first(&tp->out_of_order_queue);
+		if (skb && !skb_rb_next(skb)) {
 			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
 				tcp_fastopen_active_disable(sk);
 				return;
 			}
 		}
 	} else if (tp->syn_fastopen_ch &&
-		   atomic_read(&tfo_active_disable_times)) {
+		   atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
 		dst = sk_dst_get(sk);
 		if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
-			tcp_fastopen_active_timeout_reset();
+			atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
 		dst_release(dst);
 	}
 }
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6d9879e93648..d1c33c91eadc 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,7 +94,6 @@ static const struct hstcp_aimd_val {
 
 struct hstcp {
 	u32	ai;
-	u32	loss_cwnd;
 };
 
 static void hstcp_init(struct sock *sk)
@@ -153,22 +152,14 @@ static u32 hstcp_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	/* Do multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
 }
 
-static u32 hstcp_cwnd_undo(struct sock *sk)
-{
-	const struct hstcp *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
 	.init		= hstcp_init,
 	.ssthresh	= hstcp_ssthresh,
-	.undo_cwnd	= hstcp_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= hstcp_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 3eb78cde6ff0..082d479462fa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -66,7 +66,6 @@ static inline void htcp_reset(struct htcp *ca)
 
 static u32 htcp_cwnd_undo(struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
 	if (ca->undo_last_cong) {
@@ -76,7 +75,7 @@ static u32 htcp_cwnd_undo(struct sock *sk)
 		ca->undo_last_cong = 0;
 	}
 
-	return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
+	return tcp_reno_undo_cwnd(sk);
 }
 
 static inline void measure_rtt(struct sock *sk, u32 srtt)
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 60352ff4f5a8..7c843578f233 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,7 +48,6 @@ struct illinois {
 	u32	end_seq;	/* right edge of current RTT */
 	u32	alpha;		/* Additive increase */
 	u32	beta;		/* Muliplicative decrease */
-	u32	loss_cwnd;	/* cwnd on loss */
 	u16	acked;		/* # packets acked by current ACK */
 	u8	rtt_above;	/* average rtt has gone above threshold */
 	u8	rtt_low;	/* # of rtts measurements below threshold */
@@ -297,18 +296,10 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	/* Multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
 }
 
-static u32 tcp_illinois_cwnd_undo(struct sock *sk)
-{
-	const struct illinois *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 /* Extract info for Tcp socket info provided via netlink. */
 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 				union tcp_cc_info *info)
@@ -336,7 +327,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
-	.undo_cwnd	= tcp_illinois_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
 	.get_info	= tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53de1424c13c..f844c06c0676 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -75,25 +76,10 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <linux/errqueue.h>
+#include <trace/events/tcp.h>
+#include <linux/static_key.h>
 
-int sysctl_tcp_fack __read_mostly;
-int sysctl_tcp_max_reordering __read_mostly = 300;
-int sysctl_tcp_dsack __read_mostly = 1;
-int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-
-/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 1000;
-
-int sysctl_tcp_stdurg __read_mostly;
-int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-int sysctl_tcp_frto __read_mostly = 2;
-int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
-int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_early_retrans __read_mostly = 3;
-int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -114,7 +100,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
-#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK)
 #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
 
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
@@ -334,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
 	sndmem *= nr_segs * per_mss;
 
 	if (sk->sk_sndbuf < sndmem)
-		sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+		sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
 }
 
 /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -367,8 +353,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
-	int truesize = tcp_win_from_space(skb->truesize) >> 1;
-	int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+	int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+	int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
 
 	while (tp->rcv_ssthresh <= window) {
 		if (truesize <= skb->len)
@@ -393,7 +379,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 		/* Check #2. Increase window, if skb with such overhead
 		 * will fit to rcvbuf in future.
 		 */
-		if (tcp_win_from_space(skb->truesize) <= skb->len)
+		if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
 			incr = 2 * tp->advmss;
 		else
 			incr = __tcp_grow_window(sk, skb);
@@ -419,11 +405,11 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 	/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
 	 * Allow enough cushion so that sender is not limited by our window
 	 */
-	if (sysctl_tcp_moderate_rcvbuf)
+	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
 		rcvmem <<= 2;
 
 	if (sk->sk_rcvbuf < rcvmem)
-		sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
+		sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
 }
 
 /* 4. Try to fixup all. It is made immediately after connection enters
@@ -431,6 +417,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
  */
 void tcp_init_buffer_space(struct sock *sk)
 {
+	int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
 	struct tcp_sock *tp = tcp_sk(sk);
 	int maxwin;
 
@@ -449,14 +436,14 @@ void tcp_init_buffer_space(struct sock *sk)
 	if (tp->window_clamp >= maxwin) {
 		tp->window_clamp = maxwin;
 
-		if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+		if (tcp_app_win && maxwin > 4 * tp->advmss)
 			tp->window_clamp = max(maxwin -
-					       (maxwin >> sysctl_tcp_app_win),
+					       (maxwin >> tcp_app_win),
 					       4 * tp->advmss);
 	}
 
 	/* Force reservation of one segment. */
-	if (sysctl_tcp_app_win &&
+	if (tcp_app_win &&
 	    tp->window_clamp > 2 * tp->advmss &&
 	    tp->window_clamp + tp->advmss > maxwin)
 		tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@ -470,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct net *net = sock_net(sk);
 
 	icsk->icsk_ack.quick = 0;
 
-	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+	if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
 	    !tcp_under_memory_pressure(sk) &&
 	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
 		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-				    sysctl_tcp_rmem[2]);
+				    net->ipv4.sysctl_tcp_rmem[2]);
 	}
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -609,7 +597,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	 * <prev RTT . ><current RTT .. ><next RTT .... >
 	 */
 
-	if (sysctl_tcp_moderate_rcvbuf &&
+	if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
 	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
 		int rcvwin, rcvmem, rcvbuf;
 
@@ -633,10 +621,11 @@ void tcp_rcv_space_adjust(struct sock *sk)
 		}
 
 		rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
-		while (tcp_win_from_space(rcvmem) < tp->advmss)
+		while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
 			rcvmem += 128;
 
-		rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
+		rcvbuf = min(rcvwin / tp->advmss * rcvmem,
+			     sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
 		if (rcvbuf > sk->sk_rcvbuf) {
 			sk->sk_rcvbuf = rcvbuf;
 
@@ -780,15 +769,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
 	tp->srtt_us = max(1U, srtt);
 }
 
-/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
- * Note: TCP stack does not yet implement pacing.
- * FQ packet scheduler can be used to implement cheap but effective
- * TCP pacing, to smooth the burst on large writes when packets
- * in flight is significantly lower than cwnd (or rwin)
- */
-int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
-int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
-
 static void tcp_update_pacing_rate(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
@@ -806,21 +786,21 @@ static void tcp_update_pacing_rate(struct sock *sk)
 	 *	 end of slow start and should slow down.
 	 */
 	if (tp->snd_cwnd < tp->snd_ssthresh / 2)
-		rate *= sysctl_tcp_pacing_ss_ratio;
+		rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
 	else
-		rate *= sysctl_tcp_pacing_ca_ratio;
+		rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
 
 	rate *= max(tp->snd_cwnd, tp->packets_out);
 
 	if (likely(tp->srtt_us))
 		do_div(rate, tp->srtt_us);
 
-	/* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
+	/* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate
 	 * without any lock. We want to make sure compiler wont store
 	 * intermediate values in this location.
 	 */
-	ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate,
-						sk->sk_max_pacing_rate);
+	WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
+					     sk->sk_max_pacing_rate));
 }
 
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@ -862,60 +842,46 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
-/*
- * Packet counting of FACK is based on in-order assumptions, therefore TCP
- * disables it when reordering is detected
- */
-void tcp_disable_fack(struct tcp_sock *tp)
-{
-	/* RFC3517 uses different metric in lost marker => reset on change */
-	if (tcp_is_fack(tp))
-		tp->lost_skb_hint = NULL;
-	tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
-}
-
 /* Take a notice that peer is sending D-SACKs */
 static void tcp_dsack_seen(struct tcp_sock *tp)
 {
 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+	tp->rack.dsack_seen = 1;
 }
 
-static void tcp_update_reordering(struct sock *sk, const int metric,
-				  const int ts)
+/* It's reordering when higher sequence was delivered (i.e. sacked) before
+ * some lower never-retransmitted sequence ("low_seq"). The maximum reordering
+ * distance is approximated in full-mss packet distance ("reordering").
+ */
+static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
+				      const int ts)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mib_idx;
+	const u32 mss = tp->mss_cache;
+	u32 fack, metric;
 
-	if (WARN_ON_ONCE(metric < 0))
+	fack = tcp_highest_sack_seq(tp);
+	if (!before(low_seq, fack))
 		return;
 
-	if (metric > tp->reordering) {
-		tp->reordering = min(sysctl_tcp_max_reordering, metric);
-
+	metric = fack - low_seq;
+	if ((metric > tp->reordering * mss) && mss) {
 #if FASTRETRANS_DEBUG > 1
 		pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
 			 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
 			 tp->reordering,
-			 tp->fackets_out,
+			 0,
 			 tp->sacked_out,
 			 tp->undo_marker ? tp->undo_retrans : 0);
 #endif
-		tcp_disable_fack(tp);
+		tp->reordering = min_t(u32, (metric + mss - 1) / mss,
+				       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
 	}
 
 	tp->rack.reord = 1;
-
 	/* This exciting event is worth to be remembered. 8) */
-	if (ts)
-		mib_idx = LINUX_MIB_TCPTSREORDER;
-	else if (tcp_is_reno(tp))
-		mib_idx = LINUX_MIB_TCPRENOREORDER;
-	else if (tcp_is_fack(tp))
-		mib_idx = LINUX_MIB_TCPFACKREORDER;
-	else
-		mib_idx = LINUX_MIB_TCPSACKREORDER;
-
-	NET_INC_STATS(sock_net(sk), mib_idx);
+	NET_INC_STATS(sock_net(sk),
+		      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
 }
 
 /* This must be called before lost_out is incremented */
@@ -989,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
  * 3. Loss detection event of two flavors:
  *	A. Scoreboard estimator decided the packet is lost.
  *	   A'. Reno "three dupacks" marks head of queue lost.
- *	   A''. Its FACK modification, head until snd.fack is lost.
  *	B. SACK arrives sacking SND.NXT at the moment, when the
  *	   segment was retransmitted.
  * 4. D-SACK added new rule: D-SACK changes any tag to S.
@@ -1132,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 }
 
 struct tcp_sacktag_state {
-	int	reord;
-	int	fack_count;
+	u32	reord;
 	/* Timestamps for earliest and latest never-retransmitted segment
 	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
 	 * but congestion control should still get an accurate delay signal.
@@ -1142,6 +1106,7 @@ struct tcp_sacktag_state {
 	u64	last_sackt;
 	struct rate_sample *rate;
 	int	flag;
+	unsigned int mss_now;
 };
 
 /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1191,7 +1156,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 		if (pkt_len >= skb->len && !in_sack)
 			return 0;
 
-		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+		err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+				   pkt_len, mss, GFP_ATOMIC);
 		if (err < 0)
 			return err;
 	}
@@ -1207,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk,
 			  u64 xmit_time)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int fack_count = state->fack_count;
 
 	/* Account D-SACK for retransmitted packet. */
 	if (dup_sack && (sacked & TCPCB_RETRANS)) {
 		if (tp->undo_marker && tp->undo_retrans > 0 &&
 		    after(end_seq, tp->undo_marker))
 			tp->undo_retrans--;
-		if (sacked & TCPCB_SACKED_ACKED)
-			state->reord = min(fack_count, state->reord);
+		if ((sacked & TCPCB_SACKED_ACKED) &&
+		    before(start_seq, state->reord))
+				state->reord = start_seq;
 	}
 
 	/* Nothing to do; acked frame is about to be dropped (was ACKed). */
@@ -1241,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
 				 * which was in hole. It is reordering.
 				 */
 				if (before(start_seq,
-					   tcp_highest_sack_seq(tp)))
-					state->reord = min(fack_count,
-							   state->reord);
+					   tcp_highest_sack_seq(tp)) &&
+				    before(start_seq, state->reord))
+					state->reord = start_seq;
+
 				if (!after(end_seq, tp->high_seq))
 					state->flag |= FLAG_ORIG_SACK_ACKED;
 				if (state->first_sackt == 0)
@@ -1262,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk,
 		tp->sacked_out += pcount;
 		tp->delivered += pcount;  /* Out-of-order packets delivered */
 
-		fack_count += pcount;
-
 		/* Lost marker hint past SACKed? Tweak RFC3517 cnt */
-		if (!tcp_is_fack(tp) && tp->lost_skb_hint &&
+		if (tp->lost_skb_hint &&
 		    before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
 			tp->lost_cnt_hint += pcount;
-
-		if (fack_count > tp->fackets_out)
-			tp->fackets_out = fack_count;
 	}
 
 	/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1288,13 +1250,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
 /* Shift newly-SACKed bytes from this skb to the immediately previous
  * already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
  */
-static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+			    struct sk_buff *skb,
 			    struct tcp_sacktag_state *state,
 			    unsigned int pcount, int shifted, int mss,
 			    bool dup_sack)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
 	u32 start_seq = TCP_SKB_CB(skb)->seq;	/* start of newly-SACKed */
 	u32 end_seq = start_seq + shifted;	/* end of newly-SACKed */
 
@@ -1363,8 +1325,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
 		TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
 
-	tcp_unlink_write_queue(skb, sk);
-	sk_wmem_free_skb(sk, skb);
+	tcp_rtx_queue_unlink_and_free(skb, sk);
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
 
@@ -1414,9 +1375,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 		goto fallback;
 
 	/* Can only happen with delayed DSACK + discard craziness */
-	if (unlikely(skb == tcp_write_queue_head(sk)))
+	prev = skb_rb_prev(skb);
+	if (!prev)
 		goto fallback;
-	prev = tcp_write_queue_prev(sk, skb);
 
 	if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
 		goto fallback;
@@ -1495,18 +1456,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 
 	if (!skb_shift(prev, skb, len))
 		goto fallback;
-	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+	if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
 		goto out;
 
 	/* Hole filled allows collapsing with the next as well, this is very
 	 * useful when hole on every nth skb pattern happens
 	 */
-	if (prev == tcp_write_queue_tail(sk))
+	skb = skb_rb_next(prev);
+	if (!skb)
 		goto out;
-	skb = tcp_write_queue_next(sk, prev);
 
 	if (!skb_can_shift(skb) ||
-	    (skb == tcp_send_head(sk)) ||
 	    ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
 	    (mss != tcp_skb_seglen(skb)))
 		goto out;
@@ -1514,11 +1474,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	len = skb->len;
 	if (skb_shift(prev, skb, len)) {
 		pcount += tcp_skb_pcount(skb);
-		tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+		tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+				len, mss, 0);
 	}
 
 out:
-	state->fack_count += pcount;
 	return prev;
 
 noop:
@@ -1538,13 +1498,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *tmp;
 
-	tcp_for_write_queue_from(skb, sk) {
+	skb_rbtree_walk_from(skb) {
 		int in_sack = 0;
 		bool dup_sack = dup_sack_in;
 
-		if (skb == tcp_send_head(sk))
-			break;
-
 		/* queue is in-order => we can short-circuit the walk early */
 		if (!before(TCP_SKB_CB(skb)->seq, end_seq))
 			break;
@@ -1593,34 +1550,48 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 						tcp_skb_pcount(skb),
 						skb->skb_mstamp);
 			tcp_rate_skb_delivered(sk, skb, state->rate);
+			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+				list_del_init(&skb->tcp_tsorted_anchor);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
 				    tcp_highest_sack_seq(tp)))
 				tcp_advance_highest_sack(sk, skb);
 		}
-
-		state->fack_count += tcp_skb_pcount(skb);
 	}
 	return skb;
 }
 
-/* Avoid all extra work that is being done by sacktag while walking in
- * a normal way
- */
+static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+					   struct tcp_sacktag_state *state,
+					   u32 seq)
+{
+	struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+	struct sk_buff *skb;
+
+	while (*p) {
+		parent = *p;
+		skb = rb_to_skb(parent);
+		if (before(seq, TCP_SKB_CB(skb)->seq)) {
+			p = &parent->rb_left;
+			continue;
+		}
+		if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+			p = &parent->rb_right;
+			continue;
+		}
+		return skb;
+	}
+	return NULL;
+}
+
 static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
 					struct tcp_sacktag_state *state,
 					u32 skip_to_seq)
 {
-	tcp_for_write_queue_from(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
-
-		if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
-			break;
+	if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+		return skb;
 
-		state->fack_count += tcp_skb_pcount(skb);
-	}
-	return skb;
+	return tcp_sacktag_bsearch(sk, state, skip_to_seq);
 }
 
 static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 	int first_sack_index;
 
 	state->flag = 0;
-	state->reord = tp->packets_out;
+	state->reord = tp->snd_nxt;
 
-	if (!tp->sacked_out) {
-		if (WARN_ON(tp->fackets_out))
-			tp->fackets_out = 0;
+	if (!tp->sacked_out)
 		tcp_highest_sack_reset(sk);
-	}
 
 	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
 					 num_sacks, prior_snd_una);
@@ -1742,8 +1710,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 		}
 	}
 
-	skb = tcp_write_queue_head(sk);
-	state->fack_count = 0;
+	state->mss_now = tcp_current_mss(sk);
+	skb = NULL;
 	i = 0;
 
 	if (!tp->sacked_out) {
@@ -1800,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 				skb = tcp_highest_sack(sk);
 				if (!skb)
 					break;
-				state->fack_count = tp->fackets_out;
 				cache++;
 				goto walk;
 			}
@@ -1815,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 			skb = tcp_highest_sack(sk);
 			if (!skb)
 				break;
-			state->fack_count = tp->fackets_out;
 		}
 		skb = tcp_sacktag_skip(skb, sk, state, start_seq);
 
@@ -1835,9 +1801,8 @@ advance_sp:
 	for (j = 0; j < used_sacks; j++)
 		tp->recv_sack_cache[i++] = sp[j];
 
-	if ((state->reord < tp->fackets_out) &&
-	    ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
-		tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+	if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker)
+		tcp_check_sack_reordering(sk, state->reord, 0);
 
 	tcp_verify_left_out(tp);
 out:
@@ -1875,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp)
 static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	if (tcp_limit_reno_sacked(tp))
-		tcp_update_reordering(sk, tp->packets_out + addend, 0);
+
+	if (!tcp_limit_reno_sacked(tp))
+		return;
+
+	tp->reordering = min_t(u32, tp->packets_out + addend,
+			       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
 }
 
 /* Emulate SACKs for SACKless connection: account for a new dupack. */
@@ -1922,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp)
 	tp->lost_out = 0;
 	tp->undo_marker = 0;
 	tp->undo_retrans = -1;
-	tp->fackets_out = 0;
 	tp->sacked_out = 0;
 }
 
@@ -1952,6 +1921,7 @@ void tcp_enter_loss(struct sock *sk)
 	    !after(tp->high_seq, tp->snd_una) ||
 	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
+		tp->prior_cwnd = tp->snd_cwnd;
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
 		tcp_init_undo(tp);
@@ -1966,19 +1936,15 @@ void tcp_enter_loss(struct sock *sk)
 	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
-	skb = tcp_write_queue_head(sk);
+	skb = tcp_rtx_queue_head(sk);
 	is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
 	if (is_reneg) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
 		tp->sacked_out = 0;
-		tp->fackets_out = 0;
 	}
 	tcp_clear_all_retrans_hints(tp);
 
-	tcp_for_write_queue(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
-
+	skb_rbtree_walk_from(skb) {
 		mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
 			     is_reneg);
 		if (mark_lost)
@@ -2012,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk)
 	 * falsely raise the receive window, which results in repeated
 	 * timeouts and stop-and-go behavior.
 	 */
-	tp->frto = sysctl_tcp_frto &&
+	tp->frto = net->ipv4.sysctl_tcp_frto &&
 		   (new_recovery || icsk->icsk_retransmits) &&
 		   !inet_csk(sk)->icsk_mtup.probe_size;
 }
@@ -2041,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag)
 	return false;
 }
 
-static inline int tcp_fackets_out(const struct tcp_sock *tp)
-{
-	return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
-}
-
 /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
  * counter when SACK is enabled (without SACK, sacked_out is used for
  * that purpose).
  *
- * Instead, with FACK TCP uses fackets_out that includes both SACKed
- * segments up to the highest received SACK block so far and holes in
- * between them.
- *
  * With reordering, holes may still be in flight, so RFC3517 recovery
  * uses pure sacked_out (total number of SACKed segments) even though
  * it violates the RFC that uses duplicate ACKs, often these are equal
@@ -2063,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp)
  */
 static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
 {
-	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
+	return tp->sacked_out + 1;
 }
 
-/* Linux NewReno/SACK/FACK/ECN state machine.
+/* Linux NewReno/SACK/ECN state machine.
  * --------------------------------------
  *
  * "Open"	Normal state, no dubious events, fast path.
@@ -2131,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
  *		dynamically measured and adjusted. This is implemented in
  *		tcp_rack_mark_lost.
  *
- *		FACK (Disabled by default. Subsumbed by RACK):
- *		It is the simplest heuristics. As soon as we decided
- *		that something is lost, we decide that _all_ not SACKed
- *		packets until the most forward SACK are lost. I.e.
- *		lost_out = fackets_out - sacked_out and left_out = fackets_out.
- *		It is absolutely correct estimate, if network does not reorder
- *		packets. And it loses any connection to reality when reordering
- *		takes place. We use FACK by default until reordering
- *		is suspected on the path to this destination.
- *
  *		If the receiver does not support SACK:
  *
  *		NewReno (RFC6582): in Recovery we assume that one segment
@@ -2189,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
 }
 
 /* Detect loss in event "A" above by marking head of queue up as lost.
- * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * For non-SACK(Reno) senders, the first "packets" number of segments
  * are considered lost. For RFC3517 SACK, a segment is considered lost if it
  * has at least tp->reordering SACKed seqments above it; "packets" refers to
  * the maximum SACKed segments to pass before reaching this limit.
@@ -2204,20 +2151,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
 
 	WARN_ON(packets > tp->packets_out);
-	if (tp->lost_skb_hint) {
-		skb = tp->lost_skb_hint;
-		cnt = tp->lost_cnt_hint;
+	skb = tp->lost_skb_hint;
+	if (skb) {
 		/* Head already handled? */
-		if (mark_head && skb != tcp_write_queue_head(sk))
+		if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
 			return;
+		cnt = tp->lost_cnt_hint;
 	} else {
-		skb = tcp_write_queue_head(sk);
+		skb = tcp_rtx_queue_head(sk);
 		cnt = 0;
 	}
 
-	tcp_for_write_queue_from(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
+	skb_rbtree_walk_from(skb) {
 		/* TODO: do this better */
 		/* this is not the most efficient way to do this... */
 		tp->lost_skb_hint = skb;
@@ -2227,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 			break;
 
 		oldcnt = cnt;
-		if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+		if (tcp_is_reno(tp) ||
 		    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 			cnt += tcp_skb_pcount(skb);
 
 		if (cnt > packets) {
-			if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+			if (tcp_is_sack(tp) ||
 			    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
 			    (oldcnt >= packets))
 				break;
@@ -2241,7 +2186,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 			/* If needed, chop off the prefix to mark as lost. */
 			lost = (packets - oldcnt) * mss;
 			if (lost < skb->len &&
-			    tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+			    tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+					 lost, mss, GFP_ATOMIC) < 0)
 				break;
 			cnt = packets;
 		}
@@ -2262,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 
 	if (tcp_is_reno(tp)) {
 		tcp_mark_head_lost(sk, 1, 1);
-	} else if (tcp_is_fack(tp)) {
-		int lost = tp->fackets_out - tp->reordering;
-		if (lost <= 0)
-			lost = 1;
-		tcp_mark_head_lost(sk, lost, 0);
 	} else {
 		int sacked_upto = tp->sacked_out - tp->reordering;
 		if (sacked_upto >= 0)
@@ -2325,16 +2266,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
 	if (tp->retrans_out)
 		return true;
 
-	skb = tcp_write_queue_head(sk);
+	skb = tcp_rtx_queue_head(sk);
 	if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
 		return true;
 
 	return false;
 }
 
-#if FASTRETRANS_DEBUG > 1
 static void DBGUNDO(struct sock *sk, const char *msg)
 {
+#if FASTRETRANS_DEBUG > 1
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 
@@ -2356,10 +2297,8 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 			 tp->packets_out);
 	}
 #endif
-}
-#else
-#define DBGUNDO(x...) do { } while (0)
 #endif
+}
 
 static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 {
@@ -2368,9 +2307,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
 	if (unmark_loss) {
 		struct sk_buff *skb;
 
-		tcp_for_write_queue(skb, sk) {
-			if (skb == tcp_send_head(sk))
-				break;
+		skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
 		tp->lost_out = 0;
@@ -2415,6 +2352,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 			mib_idx = LINUX_MIB_TCPFULLUNDO;
 
 		NET_INC_STATS(sock_net(sk), mib_idx);
+	} else if (tp->rack.reo_wnd_persist) {
+		tp->rack.reo_wnd_persist--;
 	}
 	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
 		/* Hold old state until something *above* high_seq
@@ -2434,6 +2373,8 @@ static bool tcp_try_undo_dsack(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (tp->undo_marker && !tp->undo_retrans) {
+		tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+					       tp->rack.reo_wnd_persist + 1);
 		DBGUNDO(sk, "D-SACK");
 		tcp_undo_cwnd_reduction(sk, false);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@ -2613,11 +2554,8 @@ void tcp_simple_retransmit(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	unsigned int mss = tcp_current_mss(sk);
-	u32 prior_lost = tp->lost_out;
 
-	tcp_for_write_queue(skb, sk) {
-		if (skb == tcp_send_head(sk))
-			break;
+	skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
 		if (tcp_skb_seglen(skb) > mss &&
 		    !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2630,7 +2568,7 @@ void tcp_simple_retransmit(struct sock *sk)
 
 	tcp_clear_retrans_hints_partial(tp);
 
-	if (prior_lost == tp->lost_out)
+	if (!tp->lost_out)
 		return;
 
 	if (tcp_is_reno(tp))
@@ -2711,7 +2649,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 			 * is updated in tcp_ack()). Otherwise fall back to
 			 * the conventional recovery.
 			 */
-			if (tcp_send_head(sk) &&
+			if (!tcp_write_queue_empty(sk) &&
 			    after(tcp_wnd_end(tp), tp->snd_nxt)) {
 				*rexmit = REXMIT_NEW;
 				return;
@@ -2738,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
 }
 
 /* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	if (tp->undo_marker && tcp_packet_delayed(tp)) {
 		/* Plain luck! Hole if filled with delayed
-		 * packet, rather than with a retransmit.
+		 * packet, rather than with a retransmit. Check reordering.
 		 */
-		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+		tcp_check_sack_reordering(sk, prior_snd_una, 1);
 
 		/* We are getting evidence that the reordering degree is higher
 		 * than we realized. If there are no retransmits out then we
@@ -2773,7 +2711,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Use RACK to detect loss */
-	if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+	if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
 		u32 prior_retrans = tp->retrans_out;
 
 		tcp_rack_mark_lost(sk);
@@ -2782,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
 	}
 }
 
+static bool tcp_force_fast_retransmit(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	return after(tcp_highest_sack_seq(tp),
+		     tp->snd_una + tp->reordering * tp->mss_cache);
+}
+
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2794,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
  * It does _not_ decide what to send, it is made in function
  * tcp_xmit_retransmit_queue().
  */
-static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 				  bool is_dupack, int *ack_flag, int *rexmit)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fast_rexmit = 0, flag = *ack_flag;
 	bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
-				    (tcp_fackets_out(tp) > tp->reordering));
+				     tcp_force_fast_retransmit(sk));
 
-	if (WARN_ON(!tp->packets_out && tp->sacked_out))
+	if (!tp->packets_out && tp->sacked_out)
 		tp->sacked_out = 0;
-	if (WARN_ON(!tp->sacked_out && tp->fackets_out))
-		tp->fackets_out = 0;
 
 	/* Now state machine starts.
 	 * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -2853,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 			if (tcp_is_reno(tp) && is_dupack)
 				tcp_add_reno_sack(sk);
 		} else {
-			if (tcp_try_undo_partial(sk, acked))
+			if (tcp_try_undo_partial(sk, prior_snd_una))
 				return;
 			/* Partial ACK arrived. Force fast retransmit. */
 			do_lost = tcp_is_reno(tp) ||
-				  tcp_fackets_out(tp) > tp->reordering;
+				  tcp_force_fast_retransmit(sk);
 		}
 		if (tcp_try_undo_dsack(sk)) {
 			tcp_try_keep_open(sk);
@@ -2872,6 +2816,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 		      (*ack_flag & FLAG_LOST_RETRANS)))
 			return;
 		/* Change state if cwnd is undone or retransmits are lost */
+		/* fall through */
 	default:
 		if (tcp_is_reno(tp)) {
 			if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2912,8 +2857,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 
 static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
 {
+	u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
 
 	minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
 			   rtt_us ? : jiffies_to_usecs(1));
@@ -3009,8 +2954,7 @@ void tcp_rearm_rto(struct sock *sk)
 			/* delta_us may not be positive if the socket is locked
 			 * when the retrans timer fires and is rescheduled.
 			 */
-			if (delta_us > 0)
-				rto = usecs_to_jiffies(delta_us);
+			rto = usecs_to_jiffies(max_t(int, delta_us, 1));
 		}
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
 					  TCP_RTO_MAX);
@@ -3056,28 +3000,31 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
 
 	shinfo = skb_shinfo(skb);
 	if (!before(shinfo->tskey, prior_snd_una) &&
-	    before(shinfo->tskey, tcp_sk(sk)->snd_una))
-		__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+	    before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+		tcp_skb_tsorted_save(skb) {
+			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+		} tcp_skb_tsorted_restore(skb);
+	}
 }
 
 /* Remove acknowledged frames from the retransmission queue. If our packet
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-			       u32 prior_snd_una, int *acked,
+static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
+			       u32 prior_snd_una,
 			       struct tcp_sacktag_state *sack)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u64 first_ackt, last_ackt;
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 prior_sacked = tp->sacked_out;
-	u32 reord = tp->packets_out;
+	u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */
+	struct sk_buff *skb, *next;
 	bool fully_acked = true;
 	long sack_rtt_us = -1L;
 	long seq_rtt_us = -1L;
 	long ca_rtt_us = -1L;
-	struct sk_buff *skb;
 	u32 pkts_acked = 0;
 	u32 last_in_flight = 0;
 	bool rtt_update;
@@ -3085,8 +3032,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 	first_ackt = 0;
 
-	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+	for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+		const u32 start_seq = scb->seq;
 		u8 sacked = scb->sacked;
 		u32 acked_pcount;
 
@@ -3103,8 +3051,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 				break;
 			fully_acked = false;
 		} else {
-			/* Speedup tcp_unlink_write_queue() and next loop */
-			prefetchw(skb->next);
 			acked_pcount = tcp_skb_pcount(skb);
 		}
 
@@ -3119,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 				first_ackt = last_ackt;
 
 			last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
-			reord = min(pkts_acked, reord);
+			if (before(start_seq, reord))
+				reord = start_seq;
 			if (!after(scb->end_seq, tp->high_seq))
 				flag |= FLAG_ORIG_SACK_ACKED;
 		}
@@ -3156,12 +3103,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		if (!fully_acked)
 			break;
 
-		tcp_unlink_write_queue(skb, sk);
-		sk_wmem_free_skb(sk, skb);
+		next = skb_rb_next(skb);
 		if (unlikely(skb == tp->retransmit_skb_hint))
 			tp->retransmit_skb_hint = NULL;
 		if (unlikely(skb == tp->lost_skb_hint))
 			tp->lost_skb_hint = NULL;
+		tcp_rtx_queue_unlink_and_free(skb, sk);
 	}
 
 	if (!skb)
@@ -3197,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			int delta;
 
 			/* Non-retransmitted hole got filled? That's reordering */
-			if (reord < prior_fackets && reord <= tp->fackets_out)
-				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
+			if (before(reord, prior_fack))
+				tcp_check_sack_reordering(sk, reord, 0);
 
-			delta = tcp_is_fack(tp) ? pkts_acked :
-						  prior_sacked - tp->sacked_out;
+			delta = prior_sacked - tp->sacked_out;
 			tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
 		}
-
-		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-
 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
 		   sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
 		/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3247,18 +3190,19 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		}
 	}
 #endif
-	*acked = pkts_acked;
 	return flag;
 }
 
 static void tcp_ack_probe(struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct sk_buff *head = tcp_send_head(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Was it a usable window open? */
-
-	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+	if (!head)
+		return;
+	if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
 		icsk->icsk_backoff = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
 		/* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3378,7 +3322,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 			tp->pred_flags = 0;
 			tcp_fast_path_check(sk);
 
-			if (tcp_send_head(sk))
+			if (!tcp_write_queue_empty(sk))
 				tcp_slow_start_after_idle_check(sk);
 
 			if (nwin > tp->max_window) {
@@ -3399,7 +3343,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
 	if (*last_oow_ack_time) {
 		s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
 
-		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+		if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
 			NET_INC_STATS(net, mib_idx);
 			return true;	/* rate-limited: don't send yet! */
 		}
@@ -3435,10 +3379,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 	static u32 challenge_timestamp;
 	static unsigned int challenge_count;
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct net *net = sock_net(sk);
 	u32 count, now;
 
 	/* First check our per-socket dupack rate limit. */
-	if (__tcp_oow_rate_limited(sock_net(sk),
+	if (__tcp_oow_rate_limited(net,
 				   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
 				   &tp->last_oow_ack_time))
 		return;
@@ -3446,16 +3391,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 	/* Then check host-wide RFC 5961 rate limit. */
 	now = jiffies / HZ;
 	if (now != challenge_timestamp) {
-		u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+		u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+		u32 half = (ack_limit + 1) >> 1;
 
 		challenge_timestamp = now;
-		WRITE_ONCE(challenge_count, half +
-			   prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+		WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
 	}
 	count = READ_ONCE(challenge_count);
 	if (count > 0) {
 		WRITE_ONCE(challenge_count, count - 1);
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+		NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
 		tcp_send_ack(sk);
 	}
 }
@@ -3553,18 +3498,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	bool is_dupack = false;
-	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
 	u32 delivered = tp->delivered;
 	u32 lost = tp->lost;
-	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+	u32 prior_fack;
 
 	sack_state.first_sackt = 0;
 	sack_state.rate = &rs;
 
-	/* We very likely will need to access write queue head. */
-	prefetchw(sk->sk_write_queue.next);
+	/* We very likely will need to access rtx queue. */
+	prefetch(sk->tcp_rtx_queue.rb_node);
 
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
@@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		icsk->icsk_retransmits = 0;
 	}
 
-	prior_fackets = tp->fackets_out;
+	prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
 	rs.prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* ts_recent update must be made after we are sure that the packet
@@ -3646,8 +3590,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		goto no_queue;
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
-				    &sack_state);
+	flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state);
+
+	tcp_rack_update_reo_wnd(sk, &rs);
 
 	if (tp->tlp_high_seq)
 		tcp_process_tlp_ack(sk, ack, flag);
@@ -3657,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+				      &rexmit);
 	}
 
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
@@ -3673,13 +3619,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 no_queue:
 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
 	if (flag & FLAG_DSACKING_ACK)
-		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+				      &rexmit);
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
 	 */
-	if (tcp_send_head(sk))
-		tcp_ack_probe(sk);
+	tcp_ack_probe(sk);
 
 	if (tp->tlp_high_seq)
 		tcp_process_tlp_ack(sk, ack, flag);
@@ -3696,7 +3642,8 @@ old_ack:
 	if (TCP_SKB_CB(skb)->sacked) {
 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
 						&sack_state);
-		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+		tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+				      &rexmit);
 		tcp_xmit_recovery(sk, rexmit);
 	}
 
@@ -3721,6 +3668,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
 	foc->exp = exp_opt;
 }
 
+static void smc_parse_options(const struct tcphdr *th,
+			      struct tcp_options_received *opt_rx,
+			      const unsigned char *ptr,
+			      int opsize)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (th->syn && !(opsize & 1) &&
+		    opsize >= TCPOLEN_EXP_SMC_BASE &&
+		    get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+			opt_rx->smc_ok = 1;
+	}
+#endif
+}
+
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
@@ -3828,6 +3790,9 @@ void tcp_parse_options(const struct net *net,
 					tcp_parse_fastopen_option(opsize -
 						TCPOLEN_EXP_FASTOPEN_BASE,
 						ptr + 2, th->syn, foc, true);
+				else
+					smc_parse_options(th, opt_rx, ptr,
+							  opsize);
 				break;
 
 			}
@@ -3995,6 +3960,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
 /* When we get a reset we do this. */
 void tcp_reset(struct sock *sk)
 {
+	trace_tcp_receive_reset(sk);
+
 	/* We want the right error as BSD sees it (and indeed as we do). */
 	switch (sk->sk_state) {
 	case TCP_SYN_SENT:
@@ -4117,7 +4084,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+	if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
 		int mib_idx;
 
 		if (before(seq, tp->rcv_nxt))
@@ -4152,7 +4119,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+		if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4271,6 +4238,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 /**
  * tcp_try_coalesce - try to merge skb to prior one
  * @sk: socket
+ * @dest: destination queue
  * @to: prior buffer
  * @from: buffer to add in queue
  * @fragstolen: pointer to boolean
@@ -4303,6 +4271,12 @@ static bool tcp_try_coalesce(struct sock *sk,
 	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
 	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
 	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
+
+	if (TCP_SKB_CB(from)->has_rxtstamp) {
+		TCP_SKB_CB(to)->has_rxtstamp = true;
+		to->tstamp = from->tstamp;
+	}
+
 	return true;
 }
 
@@ -4325,7 +4299,7 @@ static void tcp_ofo_queue(struct sock *sk)
 
 	p = rb_first(&tp->out_of_order_queue);
 	while (p) {
-		skb = rb_entry(p, struct sk_buff, rbnode);
+		skb = rb_to_skb(p);
 		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
 			break;
 
@@ -4389,7 +4363,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct rb_node **p, *q, *parent;
+	struct rb_node **p, *parent;
 	struct sk_buff *skb1;
 	u32 seq, end_seq;
 	bool fragstolen;
@@ -4429,7 +4403,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	/* In the typical case, we are adding an skb to the end of the list.
 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
 	 */
-	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+	if (tcp_try_coalesce(sk, tp->ooo_last_skb,
+			     skb, &fragstolen)) {
 coalesce_done:
 		tcp_grow_window(sk, skb);
 		kfree_skb_partial(skb, fragstolen);
@@ -4447,7 +4422,7 @@ coalesce_done:
 	parent = NULL;
 	while (*p) {
 		parent = *p;
-		skb1 = rb_entry(parent, struct sk_buff, rbnode);
+		skb1 = rb_to_skb(parent);
 		if (before(seq, TCP_SKB_CB(skb1)->seq)) {
 			p = &parent->rb_left;
 			continue;
@@ -4479,7 +4454,8 @@ coalesce_done:
 				__kfree_skb(skb1);
 				goto merge_right;
 			}
-		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+		} else if (tcp_try_coalesce(sk, skb1,
+					    skb, &fragstolen)) {
 			goto coalesce_done;
 		}
 		p = &parent->rb_right;
@@ -4491,9 +4467,7 @@ insert:
 
 merge_right:
 	/* Remove other segments covered by skb. */
-	while ((q = rb_next(&skb->rbnode)) != NULL) {
-		skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+	while ((skb1 = skb_rb_next(skb)) != NULL) {
 		if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
 			break;
 		if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4508,7 +4482,7 @@ merge_right:
 		tcp_drop(sk, skb1);
 	}
 	/* If there is no skb after us, we are the last_skb ! */
-	if (!q)
+	if (!skb1)
 		tp->ooo_last_skb = skb;
 
 add_sack:
@@ -4530,7 +4504,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 
 	__skb_pull(skb, hdrlen);
 	eaten = (tail &&
-		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+		 tcp_try_coalesce(sk, tail,
+				  skb, fragstolen)) ? 1 : 0;
 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
 	if (!eaten) {
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -4592,8 +4567,8 @@ err:
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	bool fragstolen = false;
-	int eaten = -1;
+	bool fragstolen;
+	int eaten;
 
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
 		__kfree_skb(skb);
@@ -4615,32 +4590,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			goto out_of_window;
 
 		/* Ok. In sequence. In window. */
-		if (tp->ucopy.task == current &&
-		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
-		    sock_owned_by_user(sk) && !tp->urg_data) {
-			int chunk = min_t(unsigned int, skb->len,
-					  tp->ucopy.len);
-
-			__set_current_state(TASK_RUNNING);
-
-			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
-				tp->ucopy.len -= chunk;
-				tp->copied_seq += chunk;
-				eaten = (chunk == skb->len);
-				tcp_rcv_space_adjust(sk);
-			}
-		}
-
-		if (eaten <= 0) {
 queue_and_out:
-			if (eaten < 0) {
-				if (skb_queue_len(&sk->sk_receive_queue) == 0)
-					sk_forced_mem_schedule(sk, skb->truesize);
-				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
-					goto drop;
-			}
-			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-		}
+		if (skb_queue_len(&sk->sk_receive_queue) == 0)
+			sk_forced_mem_schedule(sk, skb->truesize);
+		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+			goto drop;
+
+		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
@@ -4712,7 +4668,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
 	if (list)
 		return !skb_queue_is_last(list, skb) ? skb->next : NULL;
 
-	return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+	return skb_rb_next(skb);
 }
 
 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4733,7 +4689,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
 }
 
 /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
-static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
@@ -4741,7 +4697,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
 
 	while (*p) {
 		parent = *p;
-		skb1 = rb_entry(parent, struct sk_buff, rbnode);
+		skb1 = rb_to_skb(parent);
 		if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
 			p = &parent->rb_left;
 		else
@@ -4788,7 +4744,7 @@ restart:
 		 *   overlaps to the next one.
 		 */
 		if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
-		    (tcp_win_from_space(skb->truesize) > skb->len ||
+		    (tcp_win_from_space(sk, skb->truesize) > skb->len ||
 		     before(TCP_SKB_CB(skb)->seq, start))) {
 			end_of_skbs = false;
 			break;
@@ -4860,26 +4816,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb, *head;
-	struct rb_node *p;
 	u32 start, end;
 
-	p = rb_first(&tp->out_of_order_queue);
-	skb = rb_entry_safe(p, struct sk_buff, rbnode);
+	skb = skb_rb_first(&tp->out_of_order_queue);
 new_range:
 	if (!skb) {
-		p = rb_last(&tp->out_of_order_queue);
-		/* Note: This is possible p is NULL here. We do not
-		 * use rb_entry_safe(), as ooo_last_skb is valid only
-		 * if rbtree is not empty.
-		 */
-		tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+		tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
 		return;
 	}
 	start = TCP_SKB_CB(skb)->seq;
 	end = TCP_SKB_CB(skb)->end_seq;
 
 	for (head = skb;;) {
-		skb = tcp_skb_next(skb, NULL);
+		skb = skb_rb_next(skb);
 
 		/* Range is terminated when we see a gap or when
 		 * we are at the queue end.
@@ -4922,14 +4871,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
 	do {
 		prev = rb_prev(node);
 		rb_erase(node, &tp->out_of_order_queue);
-		tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+		tcp_drop(sk, rb_to_skb(node));
 		sk_mem_reclaim(sk);
 		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 		    !tcp_under_memory_pressure(sk))
 			break;
 		node = prev;
 	} while (node);
-	tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+	tp->ooo_last_skb = rb_to_skb(prev);
 
 	/* Reset SACK state.  A conforming SACK implementation will
 	 * do the same at a timeout based retransmit.  When a connection
@@ -5104,7 +5053,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 ptr = ntohs(th->urg_ptr);
 
-	if (ptr && !sysctl_tcp_stdurg)
+	if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
 		ptr--;
 	ptr += ntohl(th->seq);
 
@@ -5190,26 +5139,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
 	}
 }
 
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int chunk = skb->len - hlen;
-	int err;
-
-	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
-	else
-		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
-
-	if (!err) {
-		tp->ucopy.len -= chunk;
-		tp->copied_seq += chunk;
-		tcp_rcv_space_adjust(sk);
-	}
-
-	return err;
-}
-
 /* Accept RST for rcv_nxt - 1 after a FIN.
  * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
  * FIN is sent followed by a RST packet. The RST is sent with the same
@@ -5362,8 +5291,9 @@ discard:
  *	tcp_data_queue when everything is OK.
  */
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len)
+			 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_mstamp_refresh(tp);
@@ -5449,56 +5379,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			int eaten = 0;
 			bool fragstolen = false;
 
-			if (tp->ucopy.task == current &&
-			    tp->copied_seq == tp->rcv_nxt &&
-			    len - tcp_header_len <= tp->ucopy.len &&
-			    sock_owned_by_user(sk)) {
-				__set_current_state(TASK_RUNNING);
-
-				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
-					/* Predicted packet is in window by definition.
-					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-					 * Hence, check seq<=rcv_wup reduces to:
-					 */
-					if (tcp_header_len ==
-					    (sizeof(struct tcphdr) +
-					     TCPOLEN_TSTAMP_ALIGNED) &&
-					    tp->rcv_nxt == tp->rcv_wup)
-						tcp_store_ts_recent(tp);
-
-					tcp_rcv_rtt_measure_ts(sk, skb);
-
-					__skb_pull(skb, tcp_header_len);
-					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
-					NET_INC_STATS(sock_net(sk),
-							LINUX_MIB_TCPHPHITSTOUSER);
-					eaten = 1;
-				}
-			}
-			if (!eaten) {
-				if (tcp_checksum_complete(skb))
-					goto csum_error;
+			if (tcp_checksum_complete(skb))
+				goto csum_error;
 
-				if ((int)skb->truesize > sk->sk_forward_alloc)
-					goto step5;
+			if ((int)skb->truesize > sk->sk_forward_alloc)
+				goto step5;
 
-				/* Predicted packet is in window by definition.
-				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-				 * Hence, check seq<=rcv_wup reduces to:
-				 */
-				if (tcp_header_len ==
-				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
-				    tp->rcv_nxt == tp->rcv_wup)
-					tcp_store_ts_recent(tp);
+			/* Predicted packet is in window by definition.
+			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+			 * Hence, check seq<=rcv_wup reduces to:
+			 */
+			if (tcp_header_len ==
+			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+			    tp->rcv_nxt == tp->rcv_wup)
+				tcp_store_ts_recent(tp);
 
-				tcp_rcv_rtt_measure_ts(sk, skb);
+			tcp_rcv_rtt_measure_ts(sk, skb);
 
-				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
-				/* Bulk data transfer: receiver */
-				eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
-						      &fragstolen);
-			}
+			/* Bulk data transfer: receiver */
+			eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+					      &fragstolen);
 
 			tcp_event_data_recv(sk, skb);
 
@@ -5571,20 +5473,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 		security_inet_conn_established(sk, skb);
 	}
 
-	/* Make sure socket is routed, for correct metrics.  */
-	icsk->icsk_af_ops->rebuild_header(sk);
-
-	tcp_init_metrics(sk);
-	tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
-	tcp_init_congestion_control(sk);
+	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
 
 	/* Prevent spurious tcp_cwnd_restart() on first data
 	 * packet.
 	 */
 	tp->lsndtime = tcp_jiffies32;
 
-	tcp_init_buffer_space(sk);
-
 	if (sock_flag(sk, SOCK_KEEPOPEN))
 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
 
@@ -5592,14 +5487,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 		__tcp_fast_path_on(tp, tp->snd_wnd);
 	else
 		tp->pred_flags = 0;
-
 }
 
 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 				    struct tcp_fastopen_cookie *cookie)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+	struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
 	u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
 	bool syn_drop = false;
 
@@ -5634,9 +5528,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
 
 	if (data) { /* Retransmit unacked data in SYN */
-		tcp_for_write_queue_from(data, sk) {
-			if (data == tcp_send_head(sk) ||
-			    __tcp_retransmit_skb(sk, data, 1))
+		skb_rbtree_walk_from(data) {
+			if (__tcp_retransmit_skb(sk, data, 1))
 				break;
 		}
 		tcp_rearm_rto(sk);
@@ -5654,6 +5547,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 	return false;
 }
 
+static void smc_check_reset_syn(struct tcp_sock *tp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (tp->syn_smc && !tp->rx_opt.smc_ok)
+			tp->syn_smc = 0;
+	}
+#endif
+}
+
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 const struct tcphdr *th)
 {
@@ -5749,10 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}
 
-		if (tcp_is_sack(tp) && sysctl_tcp_fack)
-			tcp_enable_fack(tp);
-
-		tcp_mtup_init(sk);
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
@@ -5761,6 +5660,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * is initialized. */
 		tp->copied_seq = tp->rcv_nxt;
 
+		smc_check_reset_syn(tp);
+
 		smp_mb();
 
 		tcp_finish_connect(sk, skb);
@@ -5978,15 +5879,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (req) {
 			inet_csk(sk)->icsk_retransmits = 0;
 			reqsk_fastopen_remove(sk, req, false);
+			/* Re-arm the timer because data may have been sent out.
+			 * This is similar to the regular data transmission case
+			 * when new data has just been ack'ed.
+			 *
+			 * (TFO) - we could try to be more aggressive and
+			 * retransmitting any data sooner based on when they
+			 * are sent out.
+			 */
+			tcp_rearm_rto(sk);
 		} else {
-			/* Make sure socket is routed, for correct metrics. */
-			icsk->icsk_af_ops->rebuild_header(sk);
-			tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
-			tcp_init_congestion_control(sk);
-
-			tcp_mtup_init(sk);
+			tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
 			tp->copied_seq = tp->rcv_nxt;
-			tcp_init_buffer_space(sk);
 		}
 		smp_mb();
 		tcp_set_state(sk, TCP_ESTABLISHED);
@@ -6006,19 +5910,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (tp->rx_opt.tstamp_ok)
 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 
-		if (req) {
-			/* Re-arm the timer because data may have been sent out.
-			 * This is similar to the regular data transmission case
-			 * when new data has just been ack'ed.
-			 *
-			 * (TFO) - we could try to be more aggressive and
-			 * retransmitting any data sooner based on when they
-			 * are sent out.
-			 */
-			tcp_rearm_rto(sk);
-		} else
-			tcp_init_metrics(sk);
-
 		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
 			tcp_update_pacing_rate(sk);
 
@@ -6115,6 +6006,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	case TCP_LAST_ACK:
 		if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
 			break;
+		/* fall through */
 	case TCP_FIN_WAIT1:
 	case TCP_FIN_WAIT2:
 		/* RFC 793 says to queue data in these states,
@@ -6223,6 +6115,9 @@ static void tcp_openreq_init(struct request_sock *req,
 	ireq->ir_rmt_port = tcp_hdr(skb)->source;
 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 	ireq->ir_mark = inet_request_mark(sk, skb);
+#if IS_ENABLED(CONFIG_SMC)
+	ireq->smc_ok = rx_opt->smc_ok;
+#endif
 }
 
 struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@ -6235,8 +6130,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
 	if (req) {
 		struct inet_request_sock *ireq = inet_rsk(req);
 
-		kmemcheck_annotate_bitfield(ireq, flags);
-		ireq->opt = NULL;
+		ireq->ireq_opt = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
 		ireq->pktopts = NULL;
 #endif
@@ -6307,9 +6201,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct sock *fastopen_sk = NULL;
-	struct dst_entry *dst = NULL;
 	struct request_sock *req;
 	bool want_cookie = false;
+	struct dst_entry *dst;
 	struct flowi fl;
 
 	/* TW buckets are converted to open requests without
@@ -6359,6 +6253,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (tmp_opt.tstamp_ok)
 		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
 
+	dst = af_ops->route_req(sk, &fl, req);
+	if (!dst)
+		goto drop_and_free;
+
 	if (!want_cookie && !isn) {
 		/* Kill the following clause, if you dislike this way. */
 		if (!net->ipv4.sysctl_tcp_syncookies &&
@@ -6379,11 +6277,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 		isn = af_ops->init_seq(skb);
 	}
-	if (!dst) {
-		dst = af_ops->route_req(sk, &fl, req);
-		if (!dst)
-			goto drop_and_free;
-	}
 
 	tcp_ecn_create_request(req, skb, sk, dst);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9252c7df809..c6bc0c4d19c6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,7 +85,7 @@
 #include <crypto/hash.h>
 #include <linux/scatterlist.h>
 
-int sysctl_tcp_low_latency __read_mostly;
+#include <trace/events/tcp.h>
 
 #ifdef CONFIG_TCP_MD5SIG
 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
@@ -385,7 +385,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 				       th->dest, iph->saddr, ntohs(th->source),
-				       inet_iif(icmp_skb));
+				       inet_iif(icmp_skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;
@@ -482,7 +482,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 					       TCP_TIMEOUT_INIT;
 		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
 
-		skb = tcp_write_queue_head(sk);
+		skb = tcp_rtx_queue_head(sk);
 		BUG_ON(!skb);
 
 		tcp_mstamp_refresh(tp);
@@ -661,7 +661,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 		sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 					     ip_hdr(skb)->saddr,
 					     th->source, ip_hdr(skb)->daddr,
-					     ntohs(th->source), inet_iif(skb));
+					     ntohs(th->source), inet_iif(skb),
+					     tcp_v4_sdif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
 			goto out;
@@ -702,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 	 * routing might fail in this case. No choice here, if we choose to force
 	 * input interface, we will misroute in case of asymmetric route.
 	 */
-	if (sk)
+	if (sk) {
 		arg.bound_dev_if = sk->sk_bound_dev_if;
+		trace_tcp_send_reset(sk, skb);
+	}
 
 	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
 		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -878,7 +881,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    ireq->opt);
+					    ireq_opt_deref(ireq));
 		err = net_xmit_eval(err);
 	}
 
@@ -890,7 +893,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
  */
 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 {
-	kfree(inet_rsk(req)->opt);
+	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
 }
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1266,10 +1269,11 @@ static void tcp_v4_init_req(struct request_sock *req,
 			    struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
+	struct net *net = sock_net(sk_listener);
 
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-	ireq->opt = tcp_v4_save_options(skb);
+	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
 }
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1356,10 +1360,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	sk_daddr_set(newsk, ireq->ir_rmt_addr);
 	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
 	newsk->sk_bound_dev_if = ireq->ir_iif;
-	newinet->inet_saddr	      = ireq->ir_loc_addr;
-	inet_opt	      = ireq->opt;
-	rcu_assign_pointer(newinet->inet_opt, inet_opt);
-	ireq->opt	      = NULL;
+	newinet->inet_saddr   = ireq->ir_loc_addr;
+	inet_opt	      = rcu_dereference(ireq->ireq_opt);
+	RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	newinet->rcv_tos      = ip_hdr(skb)->tos;
@@ -1404,9 +1407,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-	if (*own_req)
+	if (likely(*own_req)) {
 		tcp_move_syn(newtp, req);
-
+		ireq->ireq_opt = NULL;
+	} else {
+		newinet->inet_opt = NULL;
+	}
 	return newsk;
 
 exit_overflow:
@@ -1417,6 +1423,7 @@ exit:
 	tcp_listendrop(sk);
 	return NULL;
 put_and_exit:
+	newinet->inet_opt = NULL;
 	inet_csk_prepare_forced_close(newsk);
 	tcp_done(newsk);
 	goto exit;
@@ -1458,7 +1465,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		return 0;
 	}
 
@@ -1504,28 +1511,28 @@ csum_err:
 }
 EXPORT_SYMBOL(tcp_v4_do_rcv);
 
-void tcp_v4_early_demux(struct sk_buff *skb)
+int tcp_v4_early_demux(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
 	const struct tcphdr *th;
 	struct sock *sk;
 
 	if (skb->pkt_type != PACKET_HOST)
-		return;
+		return 0;
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
-		return;
+		return 0;
 
 	iph = ip_hdr(skb);
 	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr) / 4)
-		return;
+		return 0;
 
 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 				       iph->saddr, th->source,
 				       iph->daddr, ntohs(th->dest),
-				       skb->skb_iif);
+				       skb->skb_iif, inet_sdif(skb));
 	if (sk) {
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
@@ -1539,63 +1546,9 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 				skb_dst_set_noref(skb, dst);
 		}
 	}
+	return 0;
 }
 
-/* Packet is added to VJ-style prequeue for processing in process
- * context, if a reader task is waiting. Apparently, this exciting
- * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
- * failed somewhere. Latency? Burstiness? Well, at least now we will
- * see, why it failed. 8)8)				  --ANK
- *
- */
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (sysctl_tcp_low_latency || !tp->ucopy.task)
-		return false;
-
-	if (skb->len <= tcp_hdrlen(skb) &&
-	    skb_queue_len(&tp->ucopy.prequeue) == 0)
-		return false;
-
-	/* Before escaping RCU protected region, we need to take care of skb
-	 * dst. Prequeue is only enabled for established sockets.
-	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
-	 * Instead of doing full sk_rx_dst validity here, let's perform
-	 * an optimistic check.
-	 */
-	if (likely(sk->sk_rx_dst))
-		skb_dst_drop(skb);
-	else
-		skb_dst_force_safe(skb);
-
-	__skb_queue_tail(&tp->ucopy.prequeue, skb);
-	tp->ucopy.memory += skb->truesize;
-	if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
-	    tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		struct sk_buff *skb1;
-
-		BUG_ON(sock_owned_by_user(sk));
-		__NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
-				skb_queue_len(&tp->ucopy.prequeue));
-
-		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb1);
-
-		tp->ucopy.memory = 0;
-	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible_sync_poll(sk_sleep(sk),
-					   POLLIN | POLLRDNORM | POLLRDBAND);
-		if (!inet_csk_ack_scheduled(sk))
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  (3 * tcp_rto_min(sk)) / 4,
-						  TCP_RTO_MAX);
-	}
-	return true;
-}
-EXPORT_SYMBOL(tcp_prequeue);
-
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
@@ -1645,6 +1598,7 @@ EXPORT_SYMBOL(tcp_filter);
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	int sdif = inet_sdif(skb);
 	const struct iphdr *iph;
 	const struct tcphdr *th;
 	bool refcounted;
@@ -1692,10 +1646,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
 	TCP_SKB_CB(skb)->sacked	 = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 
 lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
-			       th->dest, &refcounted);
+			       th->dest, sdif, &refcounted);
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1722,9 +1678,9 @@ process:
 		 */
 		sock_hold(sk);
 		refcounted = true;
-		if (tcp_filter(sk, skb))
-			goto discard_and_relse;
-		nsk = tcp_check_req(sk, skb, req, false);
+		nsk = NULL;
+		if (!tcp_filter(sk, skb))
+			nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
@@ -1770,8 +1726,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v4_do_rcv(sk, skb);
+		ret = tcp_v4_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
@@ -1824,15 +1779,17 @@ do_time_wait:
 							__tcp_hdrlen(th),
 							iph->saddr, th->source,
 							iph->daddr, th->dest,
-							inet_iif(skb));
+							inet_iif(skb),
+							sdif);
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
 			refcounted = false;
 			goto process;
 		}
-		/* Fall through to ACK */
 	}
+		/* to ACK */
+		/* fall through */
 	case TCP_TW_ACK:
 		tcp_v4_timewait_ack(sk, skb);
 		break;
@@ -1912,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
+	trace_tcp_destroy_sock(sk);
+
 	tcp_clear_xmit_timers(sk);
 
 	tcp_cleanup_congestion_control(sk);
@@ -1936,9 +1895,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 #endif
 
-	/* Clean prequeue, it must be empty really */
-	__skb_queue_purge(&tp->ucopy.prequeue);
-
 	/* Clean up a referenced TCP bind bucket. */
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
@@ -1947,6 +1903,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	/* If socket is aborted during connect operation */
 	tcp_free_fastopen_req(tp);
+	tcp_fastopen_destroy_cipher(sk);
 	tcp_saved_syn_free(tp);
 
 	sk_sockets_allocated_dec(sk);
@@ -2452,8 +2409,8 @@ struct proto tcp_prot = {
 	.memory_allocated	= &tcp_memory_allocated,
 	.memory_pressure	= &tcp_memory_pressure,
 	.sysctl_mem		= sysctl_tcp_mem,
-	.sysctl_wmem		= sysctl_tcp_wmem,
-	.sysctl_rmem		= sysctl_tcp_rmem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
@@ -2473,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net)
 {
 	int cpu;
 
+	module_put(net->ipv4.tcp_congestion_control->owner);
+
 	for_each_possible_cpu(cpu)
 		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
 	free_percpu(net->ipv4.tcp_sk);
@@ -2527,6 +2486,50 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_sack = 1;
 	net->ipv4.sysctl_tcp_window_scaling = 1;
 	net->ipv4.sysctl_tcp_timestamps = 1;
+	net->ipv4.sysctl_tcp_early_retrans = 3;
+	net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
+	net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior.  */
+	net->ipv4.sysctl_tcp_retrans_collapse = 1;
+	net->ipv4.sysctl_tcp_max_reordering = 300;
+	net->ipv4.sysctl_tcp_dsack = 1;
+	net->ipv4.sysctl_tcp_app_win = 31;
+	net->ipv4.sysctl_tcp_adv_win_scale = 1;
+	net->ipv4.sysctl_tcp_frto = 2;
+	net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
+	/* This limits the percentage of the congestion window which we
+	 * will allow a single TSO frame to consume.  Building TSO frames
+	 * which are too large can cause TCP streams to be bursty.
+	 */
+	net->ipv4.sysctl_tcp_tso_win_divisor = 3;
+	/* Default TSQ limit of four TSO segments */
+	net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+	/* rfc5961 challenge ack rate limiting */
+	net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
+	net->ipv4.sysctl_tcp_min_tso_segs = 2;
+	net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
+	net->ipv4.sysctl_tcp_autocorking = 1;
+	net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
+	net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
+	net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
+	if (net != &init_net) {
+		memcpy(net->ipv4.sysctl_tcp_rmem,
+		       init_net.ipv4.sysctl_tcp_rmem,
+		       sizeof(init_net.ipv4.sysctl_tcp_rmem));
+		memcpy(net->ipv4.sysctl_tcp_wmem,
+		       init_net.ipv4.sysctl_tcp_wmem,
+		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
+	}
+	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+	spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
+	net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+	atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+
+	/* Reno is always built in */
+	if (!net_eq(net, &init_net) &&
+	    try_module_get(init_net.ipv4.tcp_congestion_control->owner))
+		net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control;
+	else
+		net->ipv4.tcp_congestion_control = &tcp_reno;
 
 	return 0;
 fail:
@@ -2537,7 +2540,12 @@ fail:
 
 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
+	struct net *net;
+
 	inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
+	list_for_each_entry(net, net_exit_list, exit_list)
+		tcp_fastopen_ctx_destroy(net);
 }
 
 static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 102b2c90bb80..7097f92d16e5 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
@@ -20,8 +21,6 @@
 #include <net/tcp.h>
 #include <net/genetlink.h>
 
-int sysctl_tcp_nometrics_save __read_mostly;
-
 static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
 						   const struct inetpeer_addr *daddr,
 						   struct net *net, unsigned int hash);
@@ -330,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
 	int m;
 
 	sk_dst_confirm(sk);
-	if (sysctl_tcp_nometrics_save || !dst)
+	if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
 		return;
 
 	rcu_read_lock();
@@ -471,10 +470,8 @@ void tcp_init_metrics(struct sock *sk)
 		tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	}
 	val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
-	if (val && tp->reordering != val) {
-		tcp_disable_fack(tp);
+	if (val && tp->reordering != val)
 		tp->reordering = val;
-	}
 
 	crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
 	rcu_read_unlock();
@@ -892,10 +889,14 @@ static void tcp_metrics_flush_all(struct net *net)
 
 	for (row = 0; row < max_rows; row++, hb++) {
 		struct tcp_metrics_block __rcu **pp;
+		bool match;
+
 		spin_lock_bh(&tcp_metrics_lock);
 		pp = &hb->chain;
 		for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
-			if (net_eq(tm_net(tm), net)) {
+			match = net ? net_eq(tm_net(tm), net) :
+				!atomic_read(&tm_net(tm)->count);
+			if (match) {
 				*pp = tm->tcpm_next;
 				kfree_rcu(tm, rcu_head);
 			} else {
@@ -1018,14 +1019,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit tcp_net_metrics_exit(struct net *net)
+static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
 {
-	tcp_metrics_flush_all(net);
+	tcp_metrics_flush_all(NULL);
 }
 
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
-	.init	=	tcp_net_metrics_init,
-	.exit	=	tcp_net_metrics_exit,
+	.init		=	tcp_net_metrics_init,
+	.exit_batch	=	tcp_net_metrics_exit_batch,
 };
 
 void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ff83c1637d8..e36eff0403f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,13 +23,12 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
+#include <linux/static_key.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
 #include <net/busy_poll.h>
 
-int sysctl_tcp_abort_on_overflow __read_mostly;
-
 static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
 	if (seq == s_win)
@@ -180,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			 * Oh well... nobody has a sufficient solution to this
 			 * protocol bug yet.
 			 */
-			if (sysctl_tcp_rfc1337 == 0) {
+			if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
 kill:
 				inet_twsk_deschedule_put(tw);
 				return TCP_TW_SUCCESS;
@@ -298,8 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 			key = tp->af_specific->md5_lookup(sk, sk);
 			if (key) {
 				tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
-				if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool())
-					BUG();
+				BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
 			}
 		} while (0);
 #endif
@@ -371,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
 		full_space = rcv_wnd * mss;
 
 	/* tcp_full_space because it is guaranteed to be the first packet */
-	tcp_select_initial_window(full_space,
+	tcp_select_initial_window(sk_listener, full_space,
 		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
 		&req->rsk_rcv_wnd,
 		&req->rsk_window_clamp,
@@ -417,6 +415,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
 }
 EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
 
+static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
+				    struct request_sock *req,
+				    struct tcp_sock *newtp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	struct inet_request_sock *ireq;
+
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		ireq = inet_rsk(req);
+		if (oldtp->syn_smc && !ireq->smc_ok)
+			newtp->syn_smc = 0;
+	}
+#endif
+}
+
 /* This is not only more efficient than what we used to do, it eliminates
  * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
  *
@@ -434,6 +447,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct tcp_sock *newtp = tcp_sk(newsk);
+		struct tcp_sock *oldtp = tcp_sk(sk);
+
+		smc_check_reset_syn_req(oldtp, req, newtp);
 
 		/* Now setup tcp_sock */
 		newtp->pred_flags = 0;
@@ -445,8 +461,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
 
-		tcp_prequeue_init(newtp);
 		INIT_LIST_HEAD(&newtp->tsq_node);
+		INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
 
 		tcp_init_wl(newtp, treq->rcv_isn);
 
@@ -459,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->packets_out = 0;
 		newtp->retrans_out = 0;
 		newtp->sacked_out = 0;
-		newtp->fackets_out = 0;
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		newtp->tlp_high_seq = 0;
 		newtp->lsndtime = tcp_jiffies32;
@@ -493,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 						       keepalive_time_when(newtp));
 
 		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
-		if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
-			if (sysctl_tcp_fack)
-				tcp_enable_fack(newtp);
-		}
+		newtp->rx_opt.sack_ok = ireq->sack_ok;
 		newtp->window_clamp = req->rsk_window_clamp;
 		newtp->rcv_ssthresh = req->rsk_rcv_wnd;
 		newtp->rcv_wnd = req->rsk_rcv_wnd;
@@ -535,6 +547,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->syn_data_acked = 0;
 		newtp->rack.mstamp = 0;
 		newtp->rack.advanced = 0;
+		newtp->rack.reo_wnd_steps = 1;
+		newtp->rack.last_delivered = 0;
+		newtp->rack.reo_wnd_persist = 0;
+		newtp->rack.dsack_seen = 0;
 
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
@@ -765,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
-	if (!sysctl_tcp_abort_on_overflow) {
+	if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
 		inet_rsk(req)->acked = 1;
 		return NULL;
 	}
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 6d650ed3cb59..0b5a05bd82e3 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -39,7 +39,7 @@
  * nv_cong_dec_mult	Decrease cwnd by X% (30%) of congestion when detected
  * nv_ssthresh_factor	On congestion set ssthresh to this * <desired cwnd> / 8
  * nv_rtt_factor	RTT averaging factor
- * nv_loss_dec_factor	Decrease cwnd by this (50%) when losses occur
+ * nv_loss_dec_factor	Decrease cwnd to this (80%) when losses occur
  * nv_dec_eval_min_calls	Wait this many RTT measurements before dec cwnd
  * nv_inc_eval_min_calls	Wait this many RTT measurements before inc cwnd
  * nv_ssthresh_eval_min_calls	Wait this many RTT measurements before stopping
@@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2;
 static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
 static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
 static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
-static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */
+static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
 static int nv_cwnd_growth_rate_neg __read_mostly = 8;
 static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
 static int nv_dec_eval_min_calls __read_mostly = 60;
@@ -86,7 +86,6 @@ struct tcpnv {
 				 * < 0 => less than 1 packet/RTT */
 	u8  available8;
 	u16 available16;
-	u32 loss_cwnd;	/* cwnd at last loss */
 	u8  nv_allow_cwnd_growth:1, /* whether cwnd can grow */
 		nv_reset:1,	    /* whether to reset values */
 		nv_catchup:1;	    /* whether we are growing because
@@ -102,6 +101,11 @@ struct tcpnv {
 	u32 nv_last_rtt;	/* last rtt */
 	u32 nv_min_rtt;		/* active min rtt. Used to determine slope */
 	u32 nv_min_rtt_new;	/* min rtt for future use */
+	u32 nv_base_rtt;        /* If non-zero it represents the threshold for
+				 * congestion */
+	u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
+				 * set to 80% of nv_base_rtt. It helps reduce
+				 * unfairness between flows */
 	u32 nv_rtt_max_rate;	/* max rate seen during current RTT */
 	u32 nv_rtt_start_seq;	/* current RTT ends when packet arrives
 				 * acking beyond nv_rtt_start_seq */
@@ -121,7 +125,6 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	ca->nv_reset = 0;
-	ca->loss_cwnd = 0;
 	ca->nv_no_cong_cnt = 0;
 	ca->nv_rtt_cnt = 0;
 	ca->nv_last_rtt = 0;
@@ -134,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
 static void tcpnv_init(struct sock *sk)
 {
 	struct tcpnv *ca = inet_csk_ca(sk);
+	int base_rtt;
 
 	tcpnv_reset(ca, sk);
 
+	/* See if base_rtt is available from socket_ops bpf program.
+	 * It is meant to be used in environments, such as communication
+	 * within a datacenter, where we have reasonable estimates of
+	 * RTTs
+	 */
+	base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+	if (base_rtt > 0) {
+		ca->nv_base_rtt = base_rtt;
+		ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
+	} else {
+		ca->nv_base_rtt = 0;
+		ca->nv_lower_bound_rtt = 0;
+	}
+
 	ca->nv_allow_cwnd_growth = 1;
 	ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
 	ca->nv_min_rtt = NV_INIT_RTT;
@@ -146,6 +164,19 @@ static void tcpnv_init(struct sock *sk)
 	ca->cwnd_growth_factor = 0;
 }
 
+/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
+ * bounds to RTT.
+ */
+inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
+{
+	if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
+		return ca->nv_lower_bound_rtt;
+	else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
+		return ca->nv_base_rtt;
+	else
+		return val;
+}
+
 static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -177,19 +208,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcpnv_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	struct tcpnv *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
 }
 
-static u32 tcpnv_undo_cwnd(struct sock *sk)
-{
-	struct tcpnv *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static void tcpnv_state(struct sock *sk, u8 new_state)
 {
 	struct tcpnv *ca = inet_csk_ca(sk);
@@ -220,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcpnv *ca = inet_csk_ca(sk);
 	unsigned long now = jiffies;
-	s64 rate64 = 0;
+	u64 rate64;
 	u32 rate, max_win, cwnd_by_slope;
 	u32 avg_rtt;
 	u32 bytes_acked = 0;
@@ -262,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
 	}
 
 	/* rate in 100's bits per second */
-	rate64 = ((u64)sample->in_flight) * 8000000;
-	rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+	rate64 = ((u64)sample->in_flight) * 80000;
+	do_div(rate64, avg_rtt ?: 1);
+	rate = (u32)rate64;
 
 	/* Remember the maximum rate seen during this RTT
 	 * Note: It may be more than one RTT. This function should be
@@ -276,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
 	if (ca->nv_eval_call_cnt < 255)
 		ca->nv_eval_call_cnt++;
 
+	/* Apply bounds to rtt. Only used to update min_rtt */
+	avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
+
 	/* update min rtt if necessary */
 	if (avg_rtt < ca->nv_min_rtt)
 		ca->nv_min_rtt = avg_rtt;
@@ -446,7 +472,7 @@ static struct tcp_congestion_ops tcpnv __read_mostly = {
 	.ssthresh	= tcpnv_recalc_ssthresh,
 	.cong_avoid	= tcpnv_cong_avoid,
 	.set_state	= tcpnv_state,
-	.undo_cwnd	= tcpnv_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.pkts_acked     = tcpnv_acked,
 	.get_info	= tcpnv_get_info,
 
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 11f69bbf9307..b6a2aa1dcf56 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 	 * is freed by GSO engine
 	 */
 	if (copy_destructor) {
+		int delta;
+
 		swap(gso_skb->sk, skb->sk);
 		swap(gso_skb->destructor, skb->destructor);
 		sum_truesize += skb->truesize;
-		refcount_add(sum_truesize - gso_skb->truesize,
-			   &skb->sk->sk_wmem_alloc);
+		delta = sum_truesize - gso_skb->truesize;
+		/* In some pathological cases, delta can be negative.
+		 * We need to either use refcount_add() or refcount_sub_and_test()
+		 */
+		if (likely(delta >= 0))
+			refcount_add(delta, &skb->sk->sk_wmem_alloc);
+		else
+			WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
 	}
 
 	delta = htonl(oldlen + (skb_tail_pointer(skb) -
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7661a68d498..540b7d92cc70 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,40 +41,25 @@
 #include <linux/compiler.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/static_key.h>
 
-/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse __read_mostly = 1;
-
-/* People can turn this on to work with those rare, broken TCPs that
- * interpret the window field as a signed quantity.
- */
-int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
-
-/* Default TSQ limit of four TSO segments */
-int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
-
-/* This limits the percentage of the congestion window which we
- * will allow a single TSO frame to consume.  Building TSO frames
- * which are too large can cause TCP streams to be bursty.
- */
-int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-
-/* By default, RFC2861 behavior.  */
-int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+#include <trace/events/tcp.h>
 
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			   int push_one, gfp_t gfp);
 
 /* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int prior_packets = tp->packets_out;
 
-	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 
+	__skb_unlink(skb, &sk->sk_write_queue);
+	tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
 	tp->packets_out += tcp_skb_pcount(skb);
 	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 		tcp_rearm_rto(sk);
@@ -203,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
  * be a multiple of mss if possible. We assume here that mss >= 1.
  * This MUST be enforced by all callers.
  */
-void tcp_select_initial_window(int __space, __u32 mss,
+void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
 			       int wscale_ok, __u8 *rcv_wscale,
 			       __u32 init_rcv_wnd)
@@ -227,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	 * which we interpret as a sign the remote TCP is not
 	 * misinterpreting the window field as a signed quantity.
 	 */
-	if (sysctl_tcp_workaround_signed_windows)
+	if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
 		(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
 	else
 		(*rcv_wnd) = space;
@@ -235,7 +220,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	(*rcv_wscale) = 0;
 	if (wscale_ok) {
 		/* Set window scaling on max possible window */
-		space = max_t(u32, space, sysctl_tcp_rmem[2]);
+		space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
 		space = max_t(u32, space, sysctl_rmem_max);
 		space = min_t(u32, space, *window_clamp);
 		while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
@@ -287,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
 	/* Make sure we do not exceed the maximum possible
 	 * scaled window.
 	 */
-	if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
+	if (!tp->rx_opt.rcv_wscale &&
+	    sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
 		new_win = min(new_win, MAX_TCP_WINDOW);
 	else
 		new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -395,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
 static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 {
 	skb->ip_summed = CHECKSUM_PARTIAL;
-	skb->csum = 0;
 
 	TCP_SKB_CB(skb)->tcp_flags = flags;
 	TCP_SKB_CB(skb)->sacked = 0;
@@ -418,6 +403,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
 #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
+#define OPTION_SMC		(1 << 9)
+
+static void smc_options_write(__be32 *ptr, u16 *options)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (unlikely(OPTION_SMC & *options)) {
+			*ptr++ = htonl((TCPOPT_NOP  << 24) |
+				       (TCPOPT_NOP  << 16) |
+				       (TCPOPT_EXP <<  8) |
+				       (TCPOLEN_EXP_SMC_BASE));
+			*ptr++ = htonl(TCPOPT_SMC_MAGIC);
+		}
+	}
+#endif
+}
 
 struct tcp_out_options {
 	u16 options;		/* bit field of OPTION_* */
@@ -536,6 +537,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		}
 		ptr += (len + 3) >> 2;
 	}
+
+	smc_options_write(ptr, &options);
+}
+
+static void smc_set_option(const struct tcp_sock *tp,
+			   struct tcp_out_options *opts,
+			   unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (tp->syn_smc) {
+			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+				opts->options |= OPTION_SMC;
+				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+			}
+		}
+	}
+#endif
+}
+
+static void smc_set_option_cond(const struct tcp_sock *tp,
+				const struct inet_request_sock *ireq,
+				struct tcp_out_options *opts,
+				unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+	if (static_branch_unlikely(&tcp_have_smc)) {
+		if (tp->syn_smc && ireq->smc_ok) {
+			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+				opts->options |= OPTION_SMC;
+				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+			}
+		}
+	}
+#endif
 }
 
 /* Compute TCP options for SYN packets. This is not the final
@@ -603,11 +639,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
+	smc_set_option(tp, opts, &remaining);
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
 /* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct request_sock *req,
+static unsigned int tcp_synack_options(const struct sock *sk,
+				       struct request_sock *req,
 				       unsigned int mss, struct sk_buff *skb,
 				       struct tcp_out_options *opts,
 				       const struct tcp_md5sig_key *md5,
@@ -663,6 +702,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
 		}
 	}
 
+	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -739,8 +780,10 @@ static void tcp_tsq_handler(struct sock *sk)
 		struct tcp_sock *tp = tcp_sk(sk);
 
 		if (tp->lost_out > tp->retrans_out &&
-		    tp->snd_cwnd > tcp_packets_in_flight(tp))
+		    tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+			tcp_mstamp_refresh(tp);
 			tcp_xmit_retransmit_queue(sk);
+		}
 
 		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
 			       0, GFP_ATOMIC);
@@ -971,6 +1014,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 		      HRTIMER_MODE_ABS_PINNED);
 }
 
+static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	skb->skb_mstamp = tp->tcp_mstamp;
+	list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+}
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -991,6 +1040,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	struct tcp_skb_cb *tcb;
 	struct tcp_out_options opts;
 	unsigned int tcp_options_size, tcp_header_size;
+	struct sk_buff *oskb = NULL;
 	struct tcp_md5sig_key *md5;
 	struct tcphdr *th;
 	int err;
@@ -998,19 +1048,22 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	BUG_ON(!skb || !tcp_skb_pcount(skb));
 	tp = tcp_sk(sk);
 
-	skb->skb_mstamp = tp->tcp_mstamp;
 	if (clone_it) {
 		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
 			- tp->snd_una;
-		tcp_rate_skb_sent(sk, skb);
+		oskb = skb;
+
+		tcp_skb_tsorted_save(oskb) {
+			if (unlikely(skb_cloned(oskb)))
+				skb = pskb_copy(oskb, gfp_mask);
+			else
+				skb = skb_clone(oskb, gfp_mask);
+		} tcp_skb_tsorted_restore(oskb);
 
-		if (unlikely(skb_cloned(skb)))
-			skb = pskb_copy(skb, gfp_mask);
-		else
-			skb = skb_clone(skb, gfp_mask);
 		if (unlikely(!skb))
 			return -ENOBUFS;
 	}
+	skb->skb_mstamp = tp->tcp_mstamp;
 
 	inet = inet_sk(sk);
 	tcb = TCP_SKB_CB(skb);
@@ -1122,12 +1175,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
 
-	if (likely(err <= 0))
-		return err;
-
-	tcp_enter_cwr(sk);
-
-	return net_xmit_eval(err);
+	if (unlikely(err > 0)) {
+		tcp_enter_cwr(sk);
+		err = net_xmit_eval(err);
+	}
+	if (!err && oskb) {
+		tcp_update_skb_after_send(tp, oskb);
+		tcp_rate_skb_sent(sk, oskb);
+	}
+	return err;
 }
 
 /* This routine just queues the buffer for sending.
@@ -1162,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 	}
 }
 
-/* When a modification to fackets out becomes necessary, we need to check
- * skb is counted to fackets_out or not.
- */
-static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
-				   int decr)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (!tp->sacked_out || tcp_is_reno(tp))
-		return;
-
-	if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
-		tp->fackets_out -= decr;
-}
-
 /* Pcount in the middle of the write queue got changed, we need to do various
  * tweaks to fix counters
  */
@@ -1197,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
 	if (tcp_is_reno(tp) && decr > 0)
 		tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
 
-	tcp_adjust_fackets_out(sk, skb, decr);
-
 	if (tp->lost_skb_hint &&
 	    before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
-	    (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
+	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		tp->lost_cnt_hint -= decr;
 
 	tcp_verify_left_out(tp);
@@ -1236,12 +1275,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
 	TCP_SKB_CB(skb)->eor = 0;
 }
 
+/* Insert buff after skb on the write or rtx queue of sk.  */
+static void tcp_insert_write_queue_after(struct sk_buff *skb,
+					 struct sk_buff *buff,
+					 struct sock *sk,
+					 enum tcp_queue tcp_queue)
+{
+	if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+		__skb_queue_after(&sk->sk_write_queue, skb, buff);
+	else
+		tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+}
+
 /* Function to create two new TCP segments.  Shrinks the given segment
  * to the specified size and appends a new segment with the rest of the
  * packet to the list.  This won't be called frequently, I hope.
  * Remember, these are still headerless SKBs at this point.
  */
-int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+		 struct sk_buff *skb, u32 len,
 		 unsigned int mss_now, gfp_t gfp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1324,7 +1376,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	/* Link BUFF into the send queue. */
 	__skb_header_release(buff);
-	tcp_insert_write_queue_after(skb, buff, sk);
+	tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+	if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
+		list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
 
 	return 0;
 }
@@ -1602,7 +1656,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 		if (tp->packets_out > tp->snd_cwnd_used)
 			tp->snd_cwnd_used = tp->packets_out;
 
-		if (sysctl_tcp_slow_start_after_idle &&
+		if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
 		    (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
 		    !ca_ops->cong_control)
 			tcp_cwnd_application_limited(sk);
@@ -1611,10 +1665,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 		 * is caused by insufficient sender buffer:
 		 * 1) just sent some data (see tcp_write_xmit)
 		 * 2) not cwnd limited (this else condition)
-		 * 3) no more data to send (null tcp_send_head )
+		 * 3) no more data to send (tcp_write_queue_empty())
 		 * 4) application is hitting buffer limit (SOCK_NOSPACE)
 		 */
-		if (!tcp_send_head(sk) && sk->sk_socket &&
+		if (tcp_write_queue_empty(sk) && sk->sk_socket &&
 		    test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
 		    (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
 			tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1666,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 {
 	u32 bytes, segs;
 
-	bytes = min(sk->sk_pacing_rate >> 10,
+	bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
 		    sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
 
 	/* Goal is to send at least one packet per ms,
@@ -1689,7 +1743,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
 
 	return tso_segs ? :
-		tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+		tcp_tso_autosize(sk, mss_now,
+				 sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
 }
 
 /* Returns the portion of skb which can be sent right away */
@@ -1803,40 +1858,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
 	return !after(end_seq, tcp_wnd_end(tp));
 }
 
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int cwnd_quota;
-
-	tcp_init_tso_segs(skb, cur_mss);
-
-	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-		return 0;
-
-	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
-		cwnd_quota = 0;
-
-	return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = tcp_send_head(sk);
-
-	return skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk),
-			     (tcp_skb_is_last(sk, skb) ?
-			      tp->nonagle : TCP_NAGLE_PUSH));
-}
-
 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
  * which is put after SKB on the list.  It is very much like
  * tcp_fragment() except that it may make several kinds of assumptions
@@ -1844,7 +1865,8 @@ bool tcp_may_send_now(struct sock *sk)
  * know that all the data is in scatter-gather pages, and that the
  * packet has never been sent out before (and thus is not cloned).
  */
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+			struct sk_buff *skb, unsigned int len,
 			unsigned int mss_now, gfp_t gfp)
 {
 	struct sk_buff *buff;
@@ -1853,7 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* All of a TSO frame must be composed of paged data.  */
 	if (skb->len != skb->data_len)
-		return tcp_fragment(sk, skb, len, mss_now, gfp);
+		return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
 
 	buff = sk_stream_alloc_skb(sk, 0, gfp, true);
 	if (unlikely(!buff))
@@ -1889,7 +1911,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* Link BUFF into the send queue. */
 	__skb_header_release(buff);
-	tcp_insert_write_queue_after(skb, buff, sk);
+	tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
 
 	return 0;
 }
@@ -1939,7 +1961,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
 		goto send_now;
 
-	win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+	win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
 	if (win_divisor) {
 		u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
 
@@ -1959,8 +1981,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 			goto send_now;
 	}
 
-	head = tcp_write_queue_head(sk);
-
+	/* TODO : use tsorted_sent_queue ? */
+	head = tcp_rtx_queue_head(sk);
+	if (!head)
+		goto send_now;
 	age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
 	/* If next ACK is likely to come too late (half srtt), do not defer */
 	if (age < (tp->srtt_us >> 4))
@@ -2091,6 +2115,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	nskb->ip_summed = skb->ip_summed;
 
 	tcp_insert_write_queue_before(nskb, skb, sk);
+	tcp_highest_sack_replace(sk, skb, nskb);
 
 	len = 0;
 	tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2173,18 +2198,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
 {
 	unsigned int limit;
 
-	limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
-	limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+	limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
+	limit = min_t(u32, limit,
+		      sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
 	limit <<= factor;
 
 	if (refcount_read(&sk->sk_wmem_alloc) > limit) {
-		/* Always send the 1st or 2nd skb in write queue.
+		/* Always send skb if rtx queue is empty.
 		 * No need to wait for TX completion to call us back,
 		 * after softirq/tasklet schedule.
 		 * This helps when TX completions are delayed too much.
 		 */
-		if (skb == sk->sk_write_queue.next ||
-		    skb->prev == sk->sk_write_queue.next)
+		if (tcp_rtx_queue_empty(sk))
 			return false;
 
 		set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2235,7 +2260,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
 	 * it's the "most interesting" or current chrono we are
 	 * tracking and starts busy chrono if we have pending data.
 	 */
-	if (tcp_write_queue_empty(sk))
+	if (tcp_rtx_and_write_queues_empty(sk))
 		tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
 	else if (type == tp->chrono_type)
 		tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2268,6 +2293,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 	sent_pkts = 0;
 
+	tcp_mstamp_refresh(tp);
 	if (!push_one) {
 		/* Do MTU probing. */
 		result = tcp_mtu_probe(sk);
@@ -2279,7 +2305,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	}
 
 	max_segs = tcp_tso_segs(sk, mss_now);
-	tcp_mstamp_refresh(tp);
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
@@ -2291,7 +2316,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 		if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
 			/* "skb_mstamp" is used as a start point for the retransmit timer */
-			skb->skb_mstamp = tp->tcp_mstamp;
+			tcp_update_skb_after_send(tp, skb);
 			goto repair; /* Skip network transmission */
 		}
 
@@ -2330,7 +2355,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 						    nonagle);
 
 		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+		    unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+					  skb, limit, mss_now, gfp)))
 			break;
 
 		if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2370,15 +2396,15 @@ repair:
 		tcp_cwnd_validate(sk, is_cwnd_limited);
 		return false;
 	}
-	return !tp->packets_out && tcp_send_head(sk);
+	return !tp->packets_out && !tcp_write_queue_empty(sk);
 }
 
 bool tcp_schedule_loss_probe(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 	u32 timeout, rto_delta_us;
+	int early_retrans;
 
 	/* Don't do any loss probe on a Fast Open connection before 3WHS
 	 * finishes.
@@ -2386,27 +2412,32 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	if (tp->fastopen_rsk)
 		return false;
 
+	early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
 	/* Schedule a loss probe in 2*RTT for SACK capable connections
 	 * in Open state, that are either limited by cwnd or application.
 	 */
-	if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+	if ((early_retrans != 3 && early_retrans != 4) ||
 	    !tp->packets_out || !tcp_is_sack(tp) ||
 	    icsk->icsk_ca_state != TCP_CA_Open)
 		return false;
 
 	if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
-	     tcp_send_head(sk))
+	     !tcp_write_queue_empty(sk))
 		return false;
 
-	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+	/* Probe timeout is 2*rtt. Add minimum RTO to account
 	 * for delayed ack when there's one outstanding packet. If no RTT
 	 * sample is available then probe after TCP_TIMEOUT_INIT.
 	 */
-	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-	if (tp->packets_out == 1)
-		timeout = max_t(u32, timeout,
-				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
-	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+	if (tp->srtt_us) {
+		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+		if (tp->packets_out == 1)
+			timeout += TCP_RTO_MIN;
+		else
+			timeout += TCP_TIMEOUT_MIN;
+	} else {
+		timeout = TCP_TIMEOUT_INIT;
+	}
 
 	/* If the RTO formula yields an earlier time, then use that time. */
 	rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */
@@ -2444,18 +2475,14 @@ void tcp_send_loss_probe(struct sock *sk)
 	int mss = tcp_current_mss(sk);
 
 	skb = tcp_send_head(sk);
-	if (skb) {
-		if (tcp_snd_wnd_test(tp, skb, mss)) {
-			pcount = tp->packets_out;
-			tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
-			if (tp->packets_out > pcount)
-				goto probe_sent;
-			goto rearm_timer;
-		}
-		skb = tcp_write_queue_prev(sk, skb);
-	} else {
-		skb = tcp_write_queue_tail(sk);
+	if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+		pcount = tp->packets_out;
+		tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+		if (tp->packets_out > pcount)
+			goto probe_sent;
+		goto rearm_timer;
 	}
+	skb = skb_rb_last(&sk->tcp_rtx_queue);
 
 	/* At most one outstanding TLP retransmission. */
 	if (tp->tlp_high_seq)
@@ -2473,10 +2500,11 @@ void tcp_send_loss_probe(struct sock *sk)
 		goto rearm_timer;
 
 	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
-		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+		if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+					  (pcount - 1) * mss, mss,
 					  GFP_ATOMIC)))
 			goto rearm_timer;
-		skb = tcp_write_queue_next(sk, skb);
+		skb = skb_rb_next(skb);
 	}
 
 	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2676,7 +2704,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+	struct sk_buff *next_skb = skb_rb_next(skb);
 	int skb_size, next_skb_size;
 
 	skb_size = skb->len;
@@ -2691,9 +2719,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 		else if (!skb_shift(skb, next_skb, next_skb_size))
 			return false;
 	}
-	tcp_highest_sack_combine(sk, next_skb, skb);
-
-	tcp_unlink_write_queue(next_skb, sk);
+	tcp_highest_sack_replace(sk, next_skb, skb);
 
 	if (next_skb->ip_summed == CHECKSUM_PARTIAL)
 		skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2722,7 +2748,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 
 	tcp_skb_collapse_tstamp(skb, next_skb);
 
-	sk_wmem_free_skb(sk, next_skb);
+	tcp_rtx_queue_unlink_and_free(next_skb, sk);
 	return true;
 }
 
@@ -2733,8 +2759,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
 		return false;
 	if (skb_cloned(skb))
 		return false;
-	if (skb == tcp_send_head(sk))
-		return false;
 	/* Some heuristics for collapsing over SACK'd could be invented */
 	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
 		return false;
@@ -2752,12 +2776,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 	struct sk_buff *skb = to, *tmp;
 	bool first = true;
 
-	if (!sysctl_tcp_retrans_collapse)
+	if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
 		return;
 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
 		return;
 
-	tcp_for_write_queue_from_safe(skb, tmp, sk) {
+	skb_rbtree_walk_from_safe(skb, tmp) {
 		if (!tcp_can_collapse(sk, skb))
 			break;
 
@@ -2832,7 +2856,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 
 	len = cur_mss * segs;
 	if (skb->len > len) {
-		if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+		if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+				 cur_mss, GFP_ATOMIC))
 			return -ENOMEM; /* We'll try again later. */
 	} else {
 		if (skb_unclone(skb, GFP_ATOMIC))
@@ -2866,16 +2891,23 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 		     skb_headroom(skb) >= 0xFFFF)) {
 		struct sk_buff *nskb;
 
-		skb->skb_mstamp = tp->tcp_mstamp;
-		nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
-		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-			     -ENOBUFS;
+		tcp_skb_tsorted_save(skb) {
+			nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+			err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+				     -ENOBUFS;
+		} tcp_skb_tsorted_restore(skb);
+
+		if (!err) {
+			tcp_update_skb_after_send(tp, skb);
+			tcp_rate_skb_sent(sk, skb);
+		}
 	} else {
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 	}
 
 	if (likely(!err)) {
 		TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+		trace_tcp_retransmit_skb(sk, skb);
 	} else if (err != -EBUSY) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
 	}
@@ -2912,36 +2944,25 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
  * retransmitted data is acknowledged.  It tries to continue
  * resending the rest of the retransmit queue, until either
  * we've sent it all or the congestion window limit is reached.
- * If doing SACK, the first ACK which comes back for a timeout
- * based retransmit packet might feed us FACK information again.
- * If so, we use it to avoid unnecessarily retransmissions.
  */
 void tcp_xmit_retransmit_queue(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct sk_buff *skb, *rtx_head, *hole = NULL;
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-	struct sk_buff *hole = NULL;
 	u32 max_segs;
 	int mib_idx;
 
 	if (!tp->packets_out)
 		return;
 
-	if (tp->retransmit_skb_hint) {
-		skb = tp->retransmit_skb_hint;
-	} else {
-		skb = tcp_write_queue_head(sk);
-	}
-
+	rtx_head = tcp_rtx_queue_head(sk);
+	skb = tp->retransmit_skb_hint ?: rtx_head;
 	max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
-	tcp_for_write_queue_from(skb, sk) {
+	skb_rbtree_walk_from(skb) {
 		__u8 sacked;
 		int segs;
 
-		if (skb == tcp_send_head(sk))
-			break;
-
 		if (tcp_pacing_check(sk))
 			break;
 
@@ -2986,7 +3007,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		if (tcp_in_cwnd_reduction(sk))
 			tp->prr_out += tcp_skb_pcount(skb);
 
-		if (skb == tcp_write_queue_head(sk) &&
+		if (skb == rtx_head &&
 		    icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
@@ -3028,12 +3049,15 @@ void tcp_send_fin(struct sock *sk)
 	 * Note: in the latter case, FIN packet will be sent after a timeout,
 	 * as TCP stack thinks it has already been transmitted.
 	 */
-	if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+	if (!tskb && tcp_under_memory_pressure(sk))
+		tskb = skb_rb_last(&sk->tcp_rtx_queue);
+
+	if (tskb) {
 coalesce:
 		TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
 		TCP_SKB_CB(tskb)->end_seq++;
 		tp->write_seq++;
-		if (!tcp_send_head(sk)) {
+		if (tcp_write_queue_empty(sk)) {
 			/* This means tskb was already sent.
 			 * Pretend we included the FIN on previous transmit.
 			 * We need to set tp->snd_nxt to the value it would have
@@ -3050,6 +3074,7 @@ coalesce:
 				goto coalesce;
 			return;
 		}
+		INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
 		skb_reserve(skb, MAX_TCP_HEADER);
 		sk_forced_mem_schedule(sk, skb->truesize);
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@ -3086,6 +3111,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Send it off. */
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
+
+	/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
+	 * skb here is different to the troublesome skb, so use NULL
+	 */
+	trace_tcp_send_reset(sk, NULL);
 }
 
 /* Send a crossed SYN-ACK during socket establishment.
@@ -3098,20 +3128,24 @@ int tcp_send_synack(struct sock *sk)
 {
 	struct sk_buff *skb;
 
-	skb = tcp_write_queue_head(sk);
+	skb = tcp_rtx_queue_head(sk);
 	if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
-		pr_debug("%s: wrong queue state\n", __func__);
+		pr_err("%s: wrong queue state\n", __func__);
 		return -EFAULT;
 	}
 	if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
 		if (skb_cloned(skb)) {
-			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+			struct sk_buff *nskb;
+
+			tcp_skb_tsorted_save(skb) {
+				nskb = skb_copy(skb, GFP_ATOMIC);
+			} tcp_skb_tsorted_restore(skb);
 			if (!nskb)
 				return -ENOMEM;
-			tcp_unlink_write_queue(skb, sk);
+			INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+			tcp_rtx_queue_unlink_and_free(skb, sk);
 			__skb_header_release(nskb);
-			__tcp_add_write_queue_head(sk, nskb);
-			sk_wmem_free_skb(sk, skb);
+			tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
 			sk->sk_wmem_queued += nskb->truesize;
 			sk_mem_charge(sk, nskb->truesize);
 			skb = nskb;
@@ -3188,8 +3222,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
 	skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
-	tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
-			  sizeof(*th);
+	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+					     foc) + sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
@@ -3202,13 +3236,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	th->source = htons(ireq->ir_num);
 	th->dest = ireq->ir_rmt_port;
 	skb->mark = ireq->ir_mark;
-	/* Setting of flags are superfluous here for callers (and ECE is
-	 * not even correctly set)
-	 */
-	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
-			     TCPHDR_SYN | TCPHDR_ACK);
-
-	th->seq = htonl(TCP_SKB_CB(skb)->seq);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	th->seq = htonl(tcp_rsk(req)->snt_isn);
 	/* XXX data is queued and acked as is. No buffer/window check */
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
 
@@ -3295,7 +3324,7 @@ static void tcp_connect_init(struct sock *sk)
 	if (rcv_wnd == 0)
 		rcv_wnd = dst_metric(dst, RTAX_INITRWND);
 
-	tcp_select_initial_window(tcp_full_space(sk),
+	tcp_select_initial_window(sk, tcp_full_space(sk),
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
@@ -3334,7 +3363,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
 
 	tcb->end_seq += skb->len;
 	__skb_header_release(skb);
-	__tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
 	tp->write_seq = tcb->end_seq;
@@ -3382,6 +3410,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 		int copied = copy_from_iter(skb_put(syn_data, space), space,
 					    &fo->data->msg_iter);
 		if (unlikely(!copied)) {
+			tcp_skb_tsorted_anchor_cleanup(syn_data);
 			kfree_skb(syn_data);
 			goto fallback;
 		}
@@ -3412,10 +3441,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 	TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
 	if (!err) {
 		tp->syn_data = (fo->copied > 0);
+		tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
 		goto done;
 	}
 
+	/* data was not sent, put it in write_queue */
+	__skb_queue_tail(&sk->sk_write_queue, syn_data);
+	tp->packets_out -= tcp_skb_pcount(syn_data);
+
 fallback:
 	/* Send a regular SYN with Fast Open cookie request option */
 	if (fo->cookie.len > 0)
@@ -3456,6 +3490,7 @@ int tcp_connect(struct sock *sk)
 	tp->retrans_stamp = tcp_time_stamp(tp);
 	tcp_connect_queue_skb(sk, buff);
 	tcp_ecn_send_syn(sk, buff);
+	tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
 
 	/* Send off SYN; include data in Fast Open. */
 	err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3468,6 +3503,11 @@ int tcp_connect(struct sock *sk)
 	 */
 	tp->snd_nxt = tp->write_seq;
 	tp->pushed_seq = tp->write_seq;
+	buff = tcp_send_head(sk);
+	if (unlikely(buff)) {
+		tp->snd_nxt	= TCP_SKB_CB(buff)->seq;
+		tp->pushed_seq	= TCP_SKB_CB(buff)->seq;
+	}
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
@@ -3645,7 +3685,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-			if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+			if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+					 skb, seg_size, mss, GFP_ATOMIC))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(skb, mss);
@@ -3675,7 +3716,7 @@ void tcp_send_probe0(struct sock *sk)
 
 	err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
 
-	if (tp->packets_out || !tcp_send_head(sk)) {
+	if (tp->packets_out || tcp_write_queue_empty(sk)) {
 		/* Cancel probe timer, if it is not required. */
 		icsk->icsk_probes_out = 0;
 		icsk->icsk_backoff = 0;
@@ -3716,6 +3757,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
 		if (unlikely(tcp_passive_fastopen(sk)))
 			tcp_sk(sk)->total_retrans++;
+		trace_tcp_retransmit_synack(sk, req);
 	}
 	return res;
 }
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af24a64..697f4c67b2e3 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void)
  * Note: arguments must match tcp_rcv_established()!
  */
 static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-				 const struct tcphdr *th, unsigned int len)
+				 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
 
@@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				BUG();
 			}
 
-			p->length = skb->len;
+			p->length = len;
 			p->snd_nxt = tp->snd_nxt;
 			p->snd_una = tp->snd_una;
 			p->snd_cwnd = tp->snd_cwnd;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..d3ea89020c69 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/tcp.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
-
 static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -45,7 +44,8 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
 static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
+	u32 min_rtt = tcp_min_rtt(tp);
+	struct sk_buff *skb, *n;
 	u32 reo_wnd;
 
 	*reo_timeout = 0;
@@ -55,48 +55,36 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
 	 * to queuing or delayed ACKs.
 	 */
 	reo_wnd = 1000;
-	if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
-		reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
+	if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+		reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
+		reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
+	}
 
-	tcp_for_write_queue(skb, sk) {
+	list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
+				 tcp_tsorted_anchor) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+		s32 remaining;
 
-		if (skb == tcp_send_head(sk))
-			break;
-
-		/* Skip ones already (s)acked */
-		if (!after(scb->end_seq, tp->snd_una) ||
-		    scb->sacked & TCPCB_SACKED_ACKED)
+		/* Skip ones marked lost but not yet retransmitted */
+		if ((scb->sacked & TCPCB_LOST) &&
+		    !(scb->sacked & TCPCB_SACKED_RETRANS))
 			continue;
 
-		if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
-					tp->rack.end_seq, scb->end_seq)) {
-			/* Step 3 in draft-cheng-tcpm-rack-00.txt:
-			 * A packet is lost if its elapsed time is beyond
-			 * the recent RTT plus the reordering window.
-			 */
-			u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
-							 skb->skb_mstamp);
-			s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
-
-			if (remaining < 0) {
-				tcp_rack_mark_skb_lost(sk, skb);
-				continue;
-			}
-
-			/* Skip ones marked lost but not yet retransmitted */
-			if ((scb->sacked & TCPCB_LOST) &&
-			    !(scb->sacked & TCPCB_SACKED_RETRANS))
-				continue;
+		if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
+					 tp->rack.end_seq, scb->end_seq))
+			break;
 
+		/* A packet is lost if it has not been s/acked beyond
+		 * the recent RTT plus the reordering window.
+		 */
+		remaining = tp->rack.rtt_us + reo_wnd -
+			    tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+		if (remaining < 0) {
+			tcp_rack_mark_skb_lost(sk, skb);
+			list_del_init(&skb->tcp_tsorted_anchor);
+		} else {
 			/* Record maximum wait time (+1 to avoid 0) */
 			*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
-
-		} else if (!(scb->sacked & TCPCB_RETRANS)) {
-			/* Original data are sent sequentially so stop early
-			 * b/c the rest are all sent after rack_sent
-			 */
-			break;
 		}
 	}
 }
@@ -113,7 +101,7 @@ void tcp_rack_mark_lost(struct sock *sk)
 	tp->rack.advanced = 0;
 	tcp_rack_detect_loss(sk, &timeout);
 	if (timeout) {
-		timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+		timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
 					  timeout, inet_csk(sk)->icsk_rto);
 	}
@@ -175,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk)
 	if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
 		tcp_rearm_rto(sk);
 }
+
+/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
+ *
+ * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * by srtt), since there is possibility that spurious retransmission was
+ * due to reordering delay longer than reo_wnd.
+ *
+ * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
+ * no. of successful recoveries (accounts for full DSACK-based loss
+ * recovery undo). After that, reset it to default (min_rtt/4).
+ *
+ * At max, reo_wnd is incremented only once per rtt. So that the new
+ * DSACK on which we are reacting, is due to the spurious retx (approx)
+ * after the reo_wnd has been updated last time.
+ *
+ * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
+ * absolute value to account for change in rtt.
+ */
+void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+	    !rs->prior_delivered)
+		return;
+
+	/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
+	if (before(rs->prior_delivered, tp->rack.last_delivered))
+		tp->rack.dsack_seen = 0;
+
+	/* Adjust the reo_wnd if update is pending */
+	if (tp->rack.dsack_seen) {
+		tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
+					       tp->rack.reo_wnd_steps + 1);
+		tp->rack.dsack_seen = 0;
+		tp->rack.last_delivered = tp->delivered;
+		tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
+	} else if (!tp->rack.reo_wnd_persist) {
+		tp->rack.reo_wnd_steps = 1;
+	}
+}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index f2123075ce6e..addc122f8818 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,10 +15,6 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
-struct scalable {
-	u32 loss_cwnd;
-};
-
 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -36,23 +32,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcp_scalable_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	struct scalable *ca = inet_csk_ca(sk);
-
-	ca->loss_cwnd = tp->snd_cwnd;
 
 	return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
 }
 
-static u32 tcp_scalable_cwnd_undo(struct sock *sk)
-{
-	const struct scalable *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_scalable __read_mostly = {
 	.ssthresh	= tcp_scalable_ssthresh,
-	.undo_cwnd	= tcp_scalable_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_scalable_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index e906014890b6..16df6dd44b98 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,8 +22,6 @@
 #include <linux/gfp.h>
 #include <net/tcp.h>
 
-int sysctl_tcp_thin_linear_timeouts __read_mostly;
-
 /**
  *  tcp_write_err() - close socket and save error info
  *  @sk:  The socket the error has appeared on.
@@ -109,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive)
 
 static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 {
-	struct net *net = sock_net(sk);
+	const struct net *net = sock_net(sk);
+	int mss;
 
 	/* Black hole detection */
-	if (net->ipv4.sysctl_tcp_mtu_probing) {
-		if (!icsk->icsk_mtup.enabled) {
-			icsk->icsk_mtup.enabled = 1;
-			icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
-			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-		} else {
-			struct net *net = sock_net(sk);
-			struct tcp_sock *tp = tcp_sk(sk);
-			int mss;
-
-			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
-			mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
-			mss = max(mss, 68 - tp->tcp_header_len);
-			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
-			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-		}
+	if (!net->ipv4.sysctl_tcp_mtu_probing)
+		return;
+
+	if (!icsk->icsk_mtup.enabled) {
+		icsk->icsk_mtup.enabled = 1;
+		icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+	} else {
+		mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+		mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
+		mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
 	}
+	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 }
 
 
@@ -156,8 +151,13 @@ static bool retransmits_timed_out(struct sock *sk,
 		return false;
 
 	start_ts = tcp_sk(sk)->retrans_stamp;
-	if (unlikely(!start_ts))
-		start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
+	if (unlikely(!start_ts)) {
+		struct sk_buff *head = tcp_rtx_queue_head(sk);
+
+		if (!head)
+			return false;
+		start_ts = tcp_skb_timestamp(head);
+	}
 
 	if (likely(timeout == 0)) {
 		linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk)
 /* Called with BH disabled */
 void tcp_delack_timer_handler(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	sk_mem_reclaim_partial(sk);
@@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk)
 	}
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
-	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-		struct sk_buff *skb;
-
-		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
-
-		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb);
-
-		tp->ucopy.memory = 0;
-	}
-
 	if (inet_csk_ack_scheduled(sk)) {
 		if (!icsk->icsk_ack.pingpong) {
 			/* Delayed ACK missed: inflate ATO. */
@@ -295,15 +283,17 @@ out:
  *
  *  Returns: Nothing (void)
  */
-static void tcp_delack_timer(unsigned long data)
+static void tcp_delack_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_delack_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
 		tcp_delack_timer_handler(sk);
 	} else {
-		inet_csk(sk)->icsk_ack.blocked = 1;
+		icsk->icsk_ack.blocked = 1;
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		/* deleguate our work to tcp_release_cb() */
 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -316,11 +306,12 @@ static void tcp_delack_timer(unsigned long data)
 static void tcp_probe_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct sk_buff *skb = tcp_send_head(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int max_probes;
 	u32 start_ts;
 
-	if (tp->packets_out || !tcp_send_head(sk)) {
+	if (tp->packets_out || !skb) {
 		icsk->icsk_probes_out = 0;
 		return;
 	}
@@ -333,9 +324,9 @@ static void tcp_probe_timer(struct sock *sk)
 	 * corresponding system limit. We also implement similar policy when
 	 * we use RTO to probe window in tcp_retransmit_timer().
 	 */
-	start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+	start_ts = tcp_skb_timestamp(skb);
 	if (!start_ts)
-		tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
+		skb->skb_mstamp = tp->tcp_mstamp;
 	else if (icsk->icsk_user_timeout &&
 		 (s32)(tcp_time_stamp(tp) - start_ts) >
 		 jiffies_to_msecs(icsk->icsk_user_timeout))
@@ -420,7 +411,7 @@ void tcp_retransmit_timer(struct sock *sk)
 	if (!tp->packets_out)
 		goto out;
 
-	WARN_ON(tcp_write_queue_empty(sk));
+	WARN_ON(tcp_rtx_queue_empty(sk));
 
 	tp->tlp_high_seq = 0;
 
@@ -453,7 +444,7 @@ void tcp_retransmit_timer(struct sock *sk)
 			goto out;
 		}
 		tcp_enter_loss(sk);
-		tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
+		tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
 		__sk_dst_reset(sk);
 		goto out_reset_timer;
 	}
@@ -485,7 +476,7 @@ void tcp_retransmit_timer(struct sock *sk)
 
 	tcp_enter_loss(sk);
 
-	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
+	if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
 		/* Retransmission failed because of local congestion,
 		 * do not backoff.
 		 */
@@ -526,7 +517,7 @@ out_reset_timer:
 	 * linear-timeout retransmissions into a black hole
 	 */
 	if (sk->sk_state == TCP_ESTABLISHED &&
-	    (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+	    (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
 	    tcp_stream_is_thin(tp) &&
 	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
 		icsk->icsk_backoff = 0;
@@ -582,9 +573,11 @@ out:
 	sk_mem_reclaim(sk);
 }
 
-static void tcp_write_timer(unsigned long data)
+static void tcp_write_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_retransmit_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
@@ -619,9 +612,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
 EXPORT_SYMBOL_GPL(tcp_set_keepalive);
 
 
-static void tcp_keepalive_timer (unsigned long data)
+static void tcp_keepalive_timer (struct timer_list *t)
 {
-	struct sock *sk = (struct sock *) data;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 elapsed;
@@ -659,7 +652,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	elapsed = keepalive_time_when(tp);
 
 	/* It is alive without keepalive 8) */
-	if (tp->packets_out || tcp_send_head(sk))
+	if (tp->packets_out || !tcp_write_queue_empty(sk))
 		goto resched;
 
 	elapsed = keepalive_time_elapsed(tp);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 218cfcc77650..ee113ff15fd0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
 
 static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
 {
-	return  min(tp->snd_ssthresh, tp->snd_cwnd-1);
+	return  min(tp->snd_ssthresh, tp->snd_cwnd);
 }
 
 static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 248cfc0ff9ae..4f24d0e37d9c 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * TCP Vegas congestion control interface
  */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 76005d4b8dfc..6fcf482d611b 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,7 +30,6 @@ struct veno {
 	u32 basertt;		/* the min of all Veno rtt measurements seen (in usec) */
 	u32 inc;		/* decide whether to increase cwnd */
 	u32 diff;		/* calculate the diff rate */
-	u32 loss_cwnd;		/* cwnd when loss occured */
 };
 
 /* There are several situations when we must "re-start" Veno:
@@ -194,7 +193,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
-	veno->loss_cwnd = tp->snd_cwnd;
 	if (veno->diff < beta)
 		/* in "non-congestive state", cut cwnd by 1/5 */
 		return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -203,17 +201,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 		return max(tp->snd_cwnd >> 1U, 2U);
 }
 
-static u32 tcp_veno_cwnd_undo(struct sock *sk)
-{
-	const struct veno *veno = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_veno __read_mostly = {
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
-	.undo_cwnd	= tcp_veno_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_veno_cong_avoid,
 	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index e6ff99c4bd3b..96e829b2e2fc 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,7 +37,6 @@ struct yeah {
 	u32 fast_count;
 
 	u32 pkts_acked;
-	u32 loss_cwnd;
 };
 
 static void tcp_yeah_init(struct sock *sk)
@@ -220,22 +219,14 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
 
 	yeah->fast_count = 0;
 	yeah->reno_count = max(yeah->reno_count>>1, 2U);
-	yeah->loss_cwnd = tp->snd_cwnd;
 
 	return max_t(int, tp->snd_cwnd - reduction, 2);
 }
 
-static u32 tcp_yeah_cwnd_undo(struct sock *sk)
-{
-	const struct yeah *yeah = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
-	.undo_cwnd      = tcp_yeah_cwnd_undo,
+	.undo_cwnd      = tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_yeah_cong_avoid,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a7c804f73990..e4ff25c947c5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
 		}
 	}
 
-	/* Initial allocation may have already happened via setsockopt */
-	if (!rcu_access_pointer(sk->sk_reuseport_cb))
-		return reuseport_alloc(sk);
-	return 0;
+	return reuseport_alloc(sk);
 }
 
 /**
@@ -380,8 +377,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 
 static int compute_score(struct sock *sk, struct net *net,
 			 __be32 saddr, __be16 sport,
-			 __be32 daddr, unsigned short hnum, int dif,
-			 bool exact_dif)
+			 __be32 daddr, unsigned short hnum,
+			 int dif, int sdif, bool exact_dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -413,10 +410,15 @@ static int compute_score(struct sock *sk, struct net *net,
 	}
 
 	if (sk->sk_bound_dev_if || exact_dif) {
-		if (sk->sk_bound_dev_if != dif)
+		bool dev_match = (sk->sk_bound_dev_if == dif ||
+				  sk->sk_bound_dev_if == sdif);
+
+		if (exact_dif && !dev_match)
 			return -1;
-		score += 4;
+		if (sk->sk_bound_dev_if && dev_match)
+			score += 4;
 	}
+
 	if (sk->sk_incoming_cpu == raw_smp_processor_id())
 		score++;
 	return score;
@@ -436,10 +438,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 
 /* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
-		__be32 saddr, __be16 sport,
-		__be32 daddr, unsigned int hnum, int dif, bool exact_dif,
-		struct udp_hslot *hslot2,
-		struct sk_buff *skb)
+				     __be32 saddr, __be16 sport,
+				     __be32 daddr, unsigned int hnum,
+				     int dif, int sdif, bool exact_dif,
+				     struct udp_hslot *hslot2,
+				     struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness, matches = 0, reuseport = 0;
@@ -449,7 +452,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -477,8 +480,8 @@ static struct sock *udp4_lib_lookup2(struct net *net,
  * harder than this. -DaveM
  */
 struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
-		__be16 sport, __be32 daddr, __be16 dport,
-		int dif, struct udp_table *udptable, struct sk_buff *skb)
+		__be16 sport, __be32 daddr, __be16 dport, int dif,
+		int sdif, struct udp_table *udptable, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
@@ -496,7 +499,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 			goto begin;
 
 		result = udp4_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif,
+					  daddr, hnum, dif, sdif,
 					  exact_dif, hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
@@ -511,7 +514,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif,
+						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2, skb);
 		}
 		return result;
@@ -521,7 +524,7 @@ begin:
 	badness = 0;
 	sk_for_each_rcu(sk, &hslot->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -554,7 +557,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
 				 iph->daddr, dport, inet_iif(skb),
-				 udptable, skb);
+				 inet_sdif(skb), udptable, skb);
 }
 
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@ -576,7 +579,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 	struct sock *sk;
 
 	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
-			       dif, &udp_table, NULL);
+			       dif, 0, &udp_table, NULL);
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -587,7 +590,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup);
 static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 				       __be16 loc_port, __be32 loc_addr,
 				       __be16 rmt_port, __be32 rmt_addr,
-				       int dif, unsigned short hnum)
+				       int dif, int sdif, unsigned short hnum)
 {
 	struct inet_sock *inet = inet_sk(sk);
 
@@ -597,9 +600,10 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 	    (inet->inet_dport != rmt_port && inet->inet_dport) ||
 	    (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
 	    ipv6_only_sock(sk) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+	     sk->sk_bound_dev_if != sdif))
 		return false;
-	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
 		return false;
 	return true;
 }
@@ -628,8 +632,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
-			iph->saddr, uh->source, skb->dev->ifindex, udptable,
-			NULL);
+			       iph->saddr, uh->source, skb->dev->ifindex, 0,
+			       udptable, NULL);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;	/* No socket for error */
@@ -802,7 +806,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 	if (is_udplite)  				 /*     UDP-Lite      */
 		csum = udplite_csum(skb);
 
-	else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
+	else if (sk->sk_no_check_tx) {			 /* UDP csum off */
 
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
@@ -1054,7 +1058,7 @@ back_from_confirm:
 		/* ... which is an evident application bug. --ANK */
 		release_sock(sk);
 
-		net_dbg_ratelimited("cork app bug 2\n");
+		net_dbg_ratelimited("socket already corked\n");
 		err = -EINVAL;
 		goto out;
 	}
@@ -1137,7 +1141,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
 	if (unlikely(!up->pending)) {
 		release_sock(sk);
 
-		net_dbg_ratelimited("udp cork app bug 3\n");
+		net_dbg_ratelimited("cork failed\n");
 		return -EINVAL;
 	}
 
@@ -1176,7 +1180,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
 #endif
-	if (likely(!skb->_skb_refdst))
+	/* all head states execept sp (dst, sk, nf) are always cleared by
+	 * udp_rcv() and we need to preserve secpath, if present, to eventually
+	 * process IP_CMSG_PASSSEC at recvmsg() time
+	 */
+	if (likely(!skb_sec_path(skb)))
 		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
@@ -1201,8 +1209,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
 	if (likely(partial)) {
 		up->forward_deficit += size;
 		size = up->forward_deficit;
-		if (size < (sk->sk_rcvbuf >> 2) &&
-		    !skb_queue_empty(&up->reader_queue))
+		if (size < (sk->sk_rcvbuf >> 2))
 			return;
 	} else {
 		size += up->forward_deficit;
@@ -1386,12 +1393,15 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		unlock_sock_fast(sk, slow);
 	}
 
+	if (!skb_unref(skb))
+		return;
+
 	/* In the more common cases we cleared the head states previously,
 	 * see __udp_queue_rcv_skb().
 	 */
 	if (unlikely(udp_skb_has_head_state(skb)))
 		skb_release_head_state(skb);
-	consume_stateless_skb(skb);
+	__consume_stateless_skb(skb);
 }
 EXPORT_SYMBOL_GPL(skb_consume_udp);
 
@@ -1574,7 +1584,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		return ip_recv_error(sk, msg, len, addr_len);
 
 try_again:
-	peeking = off = sk_peek_offset(sk, flags);
+	peeking = flags & MSG_PEEK;
+	off = sk_peek_offset(sk, flags);
 	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
 	if (!skb)
 		return err;
@@ -1782,13 +1793,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* At recvmsg() time we may access skb->dst or skb->sp depending on
-	 * the IP options and the cmsg flags, elsewhere can we clear all
-	 * pending head states while they are hot in the cache
-	 */
-	if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
-		skb_release_head_state(skb);
-
 	rc = __udp_enqueue_schedule_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
@@ -1809,8 +1813,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 static struct static_key udp_encap_needed __read_mostly;
 void udp_encap_enable(void)
 {
-	if (!static_key_enabled(&udp_encap_needed))
-		static_key_slow_inc(&udp_encap_needed);
+	static_key_enable(&udp_encap_needed);
 }
 EXPORT_SYMBOL(udp_encap_enable);
 
@@ -1849,7 +1852,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		 */
 
 		/* if we're overly short, let UDP handle it */
-		encap_rcv = ACCESS_ONCE(up->encap_rcv);
+		encap_rcv = READ_ONCE(up->encap_rcv);
 		if (encap_rcv) {
 			int ret;
 
@@ -1928,14 +1931,16 @@ drop:
 /* For TCP sockets, sk_rx_dst is protected by socket lock
  * For UDP, we use xchg() to guard against concurrent changes.
  */
-void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 {
 	struct dst_entry *old;
 
 	if (dst_hold_safe(dst)) {
 		old = xchg(&sk->sk_rx_dst, dst);
 		dst_release(old);
+		return old != dst;
 	}
+	return false;
 }
 EXPORT_SYMBOL(udp_sk_rx_dst_set);
 
@@ -1956,6 +1961,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
 	unsigned int offset = offsetof(typeof(*sk), sk_node);
 	int dif = skb->dev->ifindex;
+	int sdif = inet_sdif(skb);
 	struct hlist_node *node;
 	struct sk_buff *nskb;
 
@@ -1970,7 +1976,7 @@ start_lookup:
 
 	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
 		if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
-					 uh->source, saddr, dif, hnum))
+					 uh->source, saddr, dif, sdif, hnum))
 			continue;
 
 		if (!first) {
@@ -2160,7 +2166,7 @@ drop:
 static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 						  __be16 loc_port, __be32 loc_addr,
 						  __be16 rmt_port, __be32 rmt_addr,
-						  int dif)
+						  int dif, int sdif)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(loc_port);
@@ -2174,7 +2180,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 	result = NULL;
 	sk_for_each_rcu(sk, &hslot->head) {
 		if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
-					rmt_port, rmt_addr, dif, hnum)) {
+					rmt_port, rmt_addr, dif, sdif, hnum)) {
 			if (result)
 				return NULL;
 			result = sk;
@@ -2191,7 +2197,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 static struct sock *__udp4_lib_demux_lookup(struct net *net,
 					    __be16 loc_port, __be32 loc_addr,
 					    __be16 rmt_port, __be32 rmt_addr,
-					    int dif)
+					    int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@ -2203,7 +2209,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		if (INET_MATCH(sk, net, acookie, rmt_addr,
-			       loc_addr, ports, dif))
+			       loc_addr, ports, dif, sdif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -2211,47 +2217,46 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 	return NULL;
 }
 
-void udp_v4_early_demux(struct sk_buff *skb)
+int udp_v4_early_demux(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	struct in_device *in_dev = NULL;
 	const struct iphdr *iph;
 	const struct udphdr *uh;
 	struct sock *sk = NULL;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int sdif = inet_sdif(skb);
 	int ours;
 
 	/* validate the packet */
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
-		return;
+		return 0;
 
 	iph = ip_hdr(skb);
 	uh = udp_hdr(skb);
 
-	if (skb->pkt_type == PACKET_BROADCAST ||
-	    skb->pkt_type == PACKET_MULTICAST) {
-		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+	if (skb->pkt_type == PACKET_MULTICAST) {
+		in_dev = __in_dev_get_rcu(skb->dev);
 
 		if (!in_dev)
-			return;
+			return 0;
 
-		/* we are supposed to accept bcast packets */
-		if (skb->pkt_type == PACKET_MULTICAST) {
-			ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
-					       iph->protocol);
-			if (!ours)
-				return;
-		}
+		ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+				       iph->protocol);
+		if (!ours)
+			return 0;
 
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
-						   uh->source, iph->saddr, dif);
+						   uh->source, iph->saddr,
+						   dif, sdif);
 	} else if (skb->pkt_type == PACKET_HOST) {
 		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
-					     uh->source, iph->saddr, dif);
+					     uh->source, iph->saddr, dif, sdif);
 	}
 
 	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
-		return;
+		return 0;
 
 	skb->sk = sk;
 	skb->destructor = sock_efree;
@@ -2260,12 +2265,23 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	if (dst)
 		dst = dst_check(dst, 0);
 	if (dst) {
+		u32 itag = 0;
+
 		/* set noref for now.
 		 * any place which wants to hold dst has to call
 		 * dst_hold_safe()
 		 */
 		skb_dst_set_noref(skb, dst);
+
+		/* for unconnected multicast sockets we need to validate
+		 * the source on each packet
+		 */
+		if (!inet_sk(sk)->inet_daddr && in_dev)
+			return ip_mc_validate_source(skb, iph->daddr,
+						     iph->saddr, iph->tos,
+						     skb->dev, in_dev, &itag);
 	}
+	return 0;
 }
 
 int udp_rcv(struct sk_buff *skb)
@@ -2281,7 +2297,7 @@ void udp_destroy_sock(struct sock *sk)
 	unlock_sock_fast(sk, slow);
 	if (static_key_false(&udp_encap_needed) && up->encap_type) {
 		void (*encap_destroy)(struct sock *sk);
-		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		encap_destroy = READ_ONCE(up->encap_destroy);
 		if (encap_destroy)
 			encap_destroy(sk);
 	}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 4515836d2a3a..d0390d844ac8 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -45,7 +45,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_src[0], req->id.idiag_sport,
 				req->id.idiag_dst[0], req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6)
 		sk = __udp6_lib_lookup(net,
@@ -53,7 +53,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 				req->id.idiag_sport,
 				(struct in6_addr *)req->id.idiag_dst,
 				req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #endif
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
@@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_dst[0], req->id.idiag_dport,
 				req->id.idiag_src[0], req->id.idiag_sport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6) {
 		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
@@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 			sk = __udp4_lib_lookup(net,
 					req->id.idiag_dst[3], req->id.idiag_dport,
 					req->id.idiag_src[3], req->id.idiag_sport,
-					req->id.idiag_if, tbl, NULL);
+					req->id.idiag_if, 0, tbl, NULL);
 
 		else
 			sk = __udp6_lib_lookup(net,
@@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 					req->id.idiag_dport,
 					(struct in6_addr *)req->id.idiag_src,
 					req->id.idiag_sport,
-					req->id.idiag_if, tbl, NULL);
+					req->id.idiag_if, 0, tbl, NULL);
 	}
 #endif
 	else {
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index a8cf8c6fb60c..e7d18b140287 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _UDP4_IMPL_H
 #define _UDP4_IMPL_H
 #include <net/udp.h>
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0932c85b42af..e360d55be555 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	__be16 new_protocol, bool is_ipv6)
 {
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
-	bool remcsum, need_csum, offload_csum, ufo, gso_partial;
+	bool remcsum, need_csum, offload_csum, gso_partial;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct udphdr *uh = udp_hdr(skb);
 	u16 mac_offset = skb->mac_header;
@@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
 	skb->remcsum_offload = remcsum;
 
-	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
 	need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
 	/* Try to offload checksum if possible */
 	offload_csum = !!(need_csum &&
@@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	 * outer one so strip the existing checksum feature flags and
 	 * instead set the flag based on our outer checksum offload value.
 	 */
-	if (remcsum || ufo) {
+	if (remcsum) {
 		features &= ~NETIF_F_CSUM_MASK;
 		if (!need_csum || offload_csum)
 			features |= NETIF_F_HW_CSUM;
@@ -122,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 		 * will be using a length value equal to only one MSS sized
 		 * segment instead of the entire frame.
 		 */
-		if (gso_partial) {
+		if (gso_partial && skb_is_gso(skb)) {
 			uh->len = htons(skb_shinfo(skb)->gso_size +
 					SKB_GSO_CB(skb)->data_offset +
 					skb->head - (unsigned char *)uh);
@@ -189,66 +187,16 @@ out_unlock:
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
-static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
-					 netdev_features_t features)
+static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+					   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	unsigned int mss;
-	__wsum csum;
-	struct udphdr *uh;
-	struct iphdr *iph;
 
 	if (skb->encapsulation &&
 	    (skb_shinfo(skb)->gso_type &
-	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
 		segs = skb_udp_tunnel_segment(skb, features, false);
-		goto out;
-	}
 
-	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto out;
-
-	mss = skb_shinfo(skb)->gso_size;
-	if (unlikely(skb->len <= mss))
-		goto out;
-
-	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-		/* Packet is from an untrusted source, reset gso_segs. */
-
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-		segs = NULL;
-		goto out;
-	}
-
-	/* Do software UFO. Complete and fill in the UDP checksum as
-	 * HW cannot do checksum of UDP packets sent as multiple
-	 * IP fragments.
-	 */
-
-	uh = udp_hdr(skb);
-	iph = ip_hdr(skb);
-
-	uh->check = 0;
-	csum = skb_checksum(skb, 0, skb->len, 0);
-	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
-	if (uh->check == 0)
-		uh->check = CSUM_MANGLED_0;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	/* If there is no outer header we can fake a checksum offload
-	 * due to the fact that we have already done the checksum in
-	 * software prior to segmenting the frame.
-	 */
-	if (!skb->encap_hdr_csum)
-		features |= NETIF_F_HW_CSUM;
-
-	/* Fragment the skb. IP headers of the fragments are updated in
-	 * inet_gso_segment()
-	 */
-	segs = skb_segment(skb, features);
-out:
 	return segs;
 }
 
@@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv4_offload = {
 	.callbacks = {
-		.gso_segment = udp4_ufo_fragment,
+		.gso_segment = udp4_tunnel_segment,
 		.gro_receive  =	udp4_gro_receive,
 		.gro_complete =	udp4_gro_complete,
 	},
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 58bd39fb14b4..6539ff15e9a3 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct udp_tunnel_info ti;
 
-	if (!dev->netdev_ops->ndo_udp_tunnel_add)
+	if (!dev->netdev_ops->ndo_udp_tunnel_add ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
 		return;
 
 	ti.type = type;
@@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
 
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct udp_tunnel_info ti;
+
+	if (!dev->netdev_ops->ndo_udp_tunnel_del ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+		return;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
+
 /* Notify netdevs that UDP port started listening */
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 {
@@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 	for_each_netdev_rcu(net, dev) {
 		if (!dev->netdev_ops->ndo_udp_tunnel_add)
 			continue;
+		if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+			continue;
 		dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
 	}
 	rcu_read_unlock();
@@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
 	for_each_netdev_rcu(net, dev) {
 		if (!dev->netdev_ops->ndo_udp_tunnel_del)
 			continue;
+		if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+			continue;
 		dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
 	}
 	rcu_read_unlock();
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 1fc684111ce6..e50b7fea57ee 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm4_input.c
  *
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 71b4ecc195c7..05017e2c849c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm4_policy.c
  *
@@ -20,7 +21,8 @@
 static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 					    int tos, int oif,
 					    const xfrm_address_t *saddr,
-					    const xfrm_address_t *daddr)
+					    const xfrm_address_t *daddr,
+					    u32 mark)
 {
 	struct rtable *rt;
 
@@ -28,6 +30,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 	fl4->daddr = daddr->a4;
 	fl4->flowi4_tos = tos;
 	fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
+	fl4->flowi4_mark = mark;
 	if (saddr)
 		fl4->saddr = saddr->a4;
 
@@ -42,20 +45,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
-					  const xfrm_address_t *daddr)
+					  const xfrm_address_t *daddr,
+					  u32 mark)
 {
 	struct flowi4 fl4;
 
-	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
+	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
 }
 
 static int xfrm4_get_saddr(struct net *net, int oif,
-			   xfrm_address_t *saddr, xfrm_address_t *daddr)
+			   xfrm_address_t *saddr, xfrm_address_t *daddr,
+			   u32 mark)
 {
 	struct dst_entry *dst;
 	struct flowi4 fl4;
 
-	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
+	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
@@ -213,14 +218,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -259,14 +256,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops_template = {
 	.family =		AF_INET,
-	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.redirect =		xfrm4_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d6660a8c0ea5..80c40b4981bb 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm4_state.c
  *
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 48c452959d2c..ea71e4b0ab7a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -308,22 +308,12 @@ config IPV6_SEG6_LWTUNNEL
 	depends on IPV6
 	select LWTUNNEL
 	select DST_CACHE
+	select IPV6_MULTIPLE_TABLES
 	---help---
 	  Support for encapsulation of packets within an outer IPv6
 	  header and a Segment Routing Header using the lightweight
-	  tunnels mechanism.
-
-	  If unsure, say N.
-
-config IPV6_SEG6_INLINE
-	bool "IPv6: direct Segment Routing Header insertion "
-	depends on IPV6_SEG6_LWTUNNEL
-	---help---
-	  Support for direct insertion of the Segment Routing Header,
-	  also known as inline mode. Be aware that direct insertion of
-	  extension headers (as opposed to encapsulation) may break
-	  multiple mechanisms such as PMTUD or IPSec AH. Use this feature
-	  only if you know exactly what you are doing.
+	  tunnels mechanism. Also enable support for advanced local
+	  processing of SRv6 packets based on their active segment.
 
 	  If unsure, say N.
 
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff0e24b..e0026fa1261b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux TCP/IP (INET6) layer.
 #
@@ -9,7 +10,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-		udp_offload.o seg6.o
+		udp_offload.o seg6.o fib6_notifier.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
@@ -23,7 +24,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_PROC_FS) += proc.o
 ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
 ipv6-$(CONFIG_NETLABEL) += calipso.o
-ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
 ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 
 ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..a0ae1c9d37df 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,11 +152,13 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
 static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
-static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_count_addresses(const struct inet6_dev *idev);
 static int ipv6_generate_stable_address(struct in6_addr *addr,
 					u8 dad_count,
 					const struct inet6_dev *idev);
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		(1 << IN6_ADDR_HSIZE_SHIFT)
 /*
  *	Configured unicast address hash table
  */
@@ -192,8 +194,6 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 
 static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 				struct prefix_info *pinfo);
-static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-			       struct net_device *dev);
 
 static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.forwarding		= 0,
@@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.proxy_ndp		= 0,
 	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 	.disable_ipv6		= 0,
-	.accept_dad		= 1,
+	.accept_dad		= 0,
 	.suppress_frag_ndisc	= 1,
 	.accept_ra_mtu		= 1,
 	.stable_secret		= {
@@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.disable_policy		= 0,
 };
 
-/* Check if a valid qdisc is available */
-static inline bool addrconf_qdisc_ok(const struct net_device *dev)
+/* Check if link is ready: is it up and is a valid qdisc available */
+static inline bool addrconf_link_ready(const struct net_device *dev)
 {
-	return !qdisc_tx_is_noop(dev);
+	return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
 }
 
 static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 
 	ndev->token = in6addr_any;
 
-	if (netif_running(dev) && addrconf_qdisc_ok(dev))
+	if (netif_running(dev) && addrconf_link_ready(dev))
 		ndev->if_flags |= IF_READY;
 
 	ipv6_mc_init_dev(ndev);
@@ -616,23 +616,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct nlattr *tb[NETCONFA_MAX+1];
+	struct inet6_dev *in6_dev = NULL;
+	struct net_device *dev = NULL;
 	struct netconfmsg *ncm;
 	struct sk_buff *skb;
 	struct ipv6_devconf *devconf;
-	struct inet6_dev *in6_dev;
-	struct net_device *dev;
 	int ifindex;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
 			  devconf_ipv6_policy, extack);
 	if (err < 0)
-		goto errout;
+		return err;
 
-	err = -EINVAL;
 	if (!tb[NETCONFA_IFINDEX])
-		goto errout;
+		return -EINVAL;
 
+	err = -EINVAL;
 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
 	switch (ifindex) {
 	case NETCONFA_IFINDEX_ALL:
@@ -642,10 +642,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 		devconf = net->ipv6.devconf_dflt;
 		break;
 	default:
-		dev = __dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index(net, ifindex);
 		if (!dev)
-			goto errout;
-		in6_dev = __in6_dev_get(dev);
+			return -EINVAL;
+		in6_dev = in6_dev_get(dev);
 		if (!in6_dev)
 			goto errout;
 		devconf = &in6_dev->cnf;
@@ -653,7 +653,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 	}
 
 	err = -ENOBUFS;
-	skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+	skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
 	if (!skb)
 		goto errout;
 
@@ -669,6 +669,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 	}
 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout:
+	if (in6_dev)
+		in6_dev_put(in6_dev);
+	if (dev)
+		dev_put(dev);
 	return err;
 }
 
@@ -945,12 +949,50 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 			break;
 	}
 
-	list_add_tail(&ifp->if_list, p);
+	list_add_tail_rcu(&ifp->if_list, p);
+}
+
+static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev, unsigned int hash)
+{
+	struct inet6_ifaddr *ifp;
+
+	hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
+		if (!net_eq(dev_net(ifp->idev->dev), net))
+			continue;
+		if (ipv6_addr_equal(&ifp->addr, addr)) {
+			if (!dev || ifp->idev->dev == dev)
+				return true;
+		}
+	}
+	return false;
 }
 
-static u32 inet6_addr_hash(const struct in6_addr *addr)
+static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
 {
-	return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
+	unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+	int err = 0;
+
+	spin_lock(&addrconf_hash_lock);
+
+	/* Ignore adding duplicate addresses on an interface */
+	if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+		ADBG("ipv6_add_addr: already assigned\n");
+		err = -EEXIST;
+	} else {
+		hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+	}
+
+	spin_unlock(&addrconf_hash_lock);
+
+	return err;
 }
 
 /* On success it returns ifp with increased reference count */
@@ -958,13 +1000,13 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
 static struct inet6_ifaddr *
 ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	      const struct in6_addr *peer_addr, int pfxlen,
-	      int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
+	      int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
+	      bool can_block, struct netlink_ext_ack *extack)
 {
+	gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
 	struct net *net = dev_net(idev->dev);
 	struct inet6_ifaddr *ifa = NULL;
-	struct rt6_info *rt;
-	struct in6_validator_info i6vi;
-	unsigned int hash;
+	struct rt6_info *rt = NULL;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
 
@@ -974,42 +1016,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	     addr_type & IPV6_ADDR_LOOPBACK))
 		return ERR_PTR(-EADDRNOTAVAIL);
 
-	rcu_read_lock_bh();
-
-	in6_dev_hold(idev);
-
 	if (idev->dead) {
 		err = -ENODEV;			/*XXX*/
-		goto out2;
+		goto out;
 	}
 
 	if (idev->cnf.disable_ipv6) {
 		err = -EACCES;
-		goto out2;
-	}
-
-	i6vi.i6vi_addr = *addr;
-	i6vi.i6vi_dev = idev;
-	rcu_read_unlock_bh();
-
-	err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
-
-	rcu_read_lock_bh();
-	err = notifier_to_errno(err);
-	if (err)
-		goto out2;
-
-	spin_lock(&addrconf_hash_lock);
-
-	/* Ignore adding duplicate addresses on an interface */
-	if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
-		ADBG("ipv6_add_addr: already assigned\n");
-		err = -EEXIST;
 		goto out;
 	}
 
-	ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+	/* validator notifier needs to be blocking;
+	 * do not call in atomic context
+	 */
+	if (can_block) {
+		struct in6_validator_info i6vi = {
+			.i6vi_addr = *addr,
+			.i6vi_dev = idev,
+			.extack = extack,
+		};
+
+		err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
+		err = notifier_to_errno(err);
+		if (err < 0)
+			goto out;
+	}
 
+	ifa = kzalloc(sizeof(*ifa), gfp_flags);
 	if (!ifa) {
 		ADBG("ipv6_add_addr: malloc failed\n");
 		err = -ENOBUFS;
@@ -1019,6 +1052,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	rt = addrconf_dst_alloc(idev, addr, false);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
+		rt = NULL;
 		goto out;
 	}
 
@@ -1049,16 +1083,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	ifa->rt = rt;
 
 	ifa->idev = idev;
+	in6_dev_hold(idev);
+
 	/* For caller */
 	refcount_set(&ifa->refcnt, 1);
 
-	/* Add to big hash table */
-	hash = inet6_addr_hash(addr);
+	rcu_read_lock_bh();
 
-	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
-	spin_unlock(&addrconf_hash_lock);
+	err = ipv6_add_addr_hash(idev->dev, ifa);
+	if (err < 0) {
+		rcu_read_unlock_bh();
+		goto out;
+	}
 
 	write_lock(&idev->lock);
+
 	/* Add to inet6_dev unicast addr list. */
 	ipv6_link_dev_addr(idev, ifa);
 
@@ -1069,21 +1108,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	in6_ifa_hold(ifa);
 	write_unlock(&idev->lock);
-out2:
+
 	rcu_read_unlock_bh();
 
-	if (likely(err == 0))
-		inet6addr_notifier_call_chain(NETDEV_UP, ifa);
-	else {
-		kfree(ifa);
-		in6_dev_put(idev);
+	inet6addr_notifier_call_chain(NETDEV_UP, ifa);
+out:
+	if (unlikely(err < 0)) {
+		if (rt)
+			ip6_rt_put(rt);
+		if (ifa) {
+			if (ifa->idev)
+				in6_dev_put(ifa->idev);
+			kfree(ifa);
+		}
 		ifa = ERR_PTR(err);
 	}
 
 	return ifa;
-out:
-	spin_unlock(&addrconf_hash_lock);
-	goto out2;
 }
 
 enum cleanup_prefix_rt_t {
@@ -1204,7 +1245,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
 		action = check_cleanup_prefix_route(ifp, &expires);
 
-	list_del_init(&ifp->if_list);
+	list_del_rcu(&ifp->if_list);
 	__in6_ifa_put(ifp);
 
 	write_unlock_bh(&ifp->idev->lock);
@@ -1226,7 +1267,9 @@ out:
 	in6_ifa_put(ifp);
 }
 
-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
+				struct inet6_ifaddr *ift,
+				bool block)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr addr, *tmpaddr;
@@ -1330,7 +1373,7 @@ retry:
 
 	ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
 			    ipv6_addr_scope(&addr), addr_flags,
-			    tmp_valid_lft, tmp_prefered_lft);
+			    tmp_valid_lft, tmp_prefered_lft, block, NULL);
 	if (IS_ERR(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
@@ -1399,10 +1442,18 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
-static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(struct net *net,
+				     struct inet6_dev *idev)
 {
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-	return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+	if (!idev)
+		return false;
+	if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+		return false;
+	if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+		return false;
+
+	return true;
 #else
 	return false;
 #endif
@@ -1472,7 +1523,7 @@ static int ipv6_get_saddr_eval(struct net *net,
 		/* Rule 3: Avoid deprecated and optimistic addresses */
 		u8 avoid = IFA_F_DEPRECATED;
 
-		if (!ipv6_use_optimistic_addr(score->ifa->idev))
+		if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
 			avoid |= IFA_F_OPTIMISTIC;
 		ret = ipv6_saddr_preferred(score->addr_type) ||
 		      !(score->ifa->flags & avoid);
@@ -1550,8 +1601,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
 {
 	struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
 
-	read_lock_bh(&idev->lock);
-	list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+	list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
 		int i;
 
 		/*
@@ -1601,11 +1651,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
 				}
 				break;
 			} else if (minihiscore < miniscore) {
-				if (hiscore->ifa)
-					in6_ifa_put(hiscore->ifa);
-
-				in6_ifa_hold(score->ifa);
-
 				swap(hiscore, score);
 				hiscore_idx = 1 - hiscore_idx;
 
@@ -1617,7 +1662,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
 		}
 	}
 out:
-	read_unlock_bh(&idev->lock);
 	return hiscore_idx;
 }
 
@@ -1654,6 +1698,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 	int dst_type;
 	bool use_oif_addr = false;
 	int hiscore_idx = 0;
+	int ret = 0;
 
 	dst_type = __ipv6_addr_type(daddr);
 	dst.addr = daddr;
@@ -1729,15 +1774,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 	}
 
 out:
-	rcu_read_unlock();
-
 	hiscore = &scores[hiscore_idx];
 	if (!hiscore->ifa)
-		return -EADDRNOTAVAIL;
+		ret = -EADDRNOTAVAIL;
+	else
+		*saddr = hiscore->ifa->addr;
 
-	*saddr = hiscore->ifa->addr;
-	in6_ifa_put(hiscore->ifa);
-	return 0;
+	rcu_read_unlock();
+	return ret;
 }
 EXPORT_SYMBOL(ipv6_dev_get_saddr);
 
@@ -1777,15 +1821,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 	return err;
 }
 
-static int ipv6_count_addresses(struct inet6_dev *idev)
+static int ipv6_count_addresses(const struct inet6_dev *idev)
 {
+	const struct inet6_ifaddr *ifp;
 	int cnt = 0;
-	struct inet6_ifaddr *ifp;
 
-	read_lock_bh(&idev->lock);
-	list_for_each_entry(ifp, &idev->addr_list, if_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
 		cnt++;
-	read_unlock_bh(&idev->lock);
+	rcu_read_unlock();
 	return cnt;
 }
 
@@ -1800,11 +1844,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
 			    const struct net_device *dev, int strict,
 			    u32 banned_flags)
 {
+	unsigned int hash = inet6_addr_hash(net, addr);
 	struct inet6_ifaddr *ifp;
-	unsigned int hash = inet6_addr_hash(addr);
 	u32 ifp_flags;
 
-	rcu_read_lock_bh();
+	rcu_read_lock();
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
@@ -1818,32 +1862,16 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
 		    !(ifp_flags&banned_flags) &&
 		    (!dev || ifp->idev->dev == dev ||
 		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
-			rcu_read_unlock_bh();
+			rcu_read_unlock();
 			return 1;
 		}
 	}
 
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 	return 0;
 }
 EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
 
-static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-			       struct net_device *dev)
-{
-	unsigned int hash = inet6_addr_hash(addr);
-	struct inet6_ifaddr *ifp;
-
-	hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
-		if (!net_eq(dev_net(ifp->idev->dev), net))
-			continue;
-		if (ipv6_addr_equal(&ifp->addr, addr)) {
-			if (!dev || ifp->idev->dev == dev)
-				return true;
-		}
-	}
-	return false;
-}
 
 /* Compares an address/prefix_len with addresses on device @dev.
  * If one is found it returns true.
@@ -1851,20 +1879,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
 	const unsigned int prefix_len, struct net_device *dev)
 {
-	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa;
+	const struct inet6_ifaddr *ifa;
+	const struct inet6_dev *idev;
 	bool ret = false;
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
-		read_lock_bh(&idev->lock);
-		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
 			ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
 			if (ret)
 				break;
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
 
@@ -1874,22 +1900,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
 
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
 {
-	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa;
+	const struct inet6_ifaddr *ifa;
+	const struct inet6_dev *idev;
 	int	onlink;
 
 	onlink = 0;
 	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
-		read_lock_bh(&idev->lock);
-		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
 			onlink = ipv6_prefix_equal(addr, &ifa->addr,
 						   ifa->prefix_len);
 			if (onlink)
 				break;
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
 	return onlink;
@@ -1899,11 +1923,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
 				     struct net_device *dev, int strict)
 {
+	unsigned int hash = inet6_addr_hash(net, addr);
 	struct inet6_ifaddr *ifp, *result = NULL;
-	unsigned int hash = inet6_addr_hash(addr);
 
-	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1915,7 +1939,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 			}
 		}
 	}
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 
 	return result;
 }
@@ -1934,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		if (ifpub) {
 			in6_ifa_hold(ifpub);
 			spin_unlock_bh(&ifp->lock);
-			ipv6_create_tempaddr(ifpub, ifp);
+			ipv6_create_tempaddr(ifpub, ifp, true);
 			in6_ifa_put(ifpub);
 		} else {
 			spin_unlock_bh(&ifp->lock);
@@ -1967,7 +1991,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 	return err;
 }
 
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct net *net = dev_net(ifp->idev->dev);
@@ -1977,8 +2001,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		return;
 	}
 
-	net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n",
-			     ifp->idev->dev->name, &ifp->addr);
+	net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n",
+			     ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source);
 
 	spin_lock_bh(&ifp->lock);
 
@@ -2017,7 +2041,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 
 		ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
 				     scope, flags, valid_lft,
-				     preferred_lft);
+				     preferred_lft, false, NULL);
 		if (IS_ERR(ifp2))
 			goto lock_errdad;
 
@@ -2313,24 +2337,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	if (!table)
 		return NULL;
 
-	read_lock_bh(&table->tb6_lock);
-	fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+	rcu_read_lock();
+	fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
 	if (!fn)
 		goto out;
 
-	noflags |= RTF_CACHE;
-	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+	for_each_fib6_node_rt_rcu(fn) {
 		if (rt->dst.dev->ifindex != dev->ifindex)
 			continue;
 		if ((rt->rt6i_flags & flags) != flags)
 			continue;
 		if ((rt->rt6i_flags & noflags) != 0)
 			continue;
-		dst_hold(&rt->dst);
+		if (!dst_hold_safe(&rt->dst))
+			rt = NULL;
 		break;
 	}
 out:
-	read_unlock_bh(&table->tb6_lock);
+	rcu_read_unlock();
 	return rt;
 }
 
@@ -2434,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
 		 * no temporary address currently exists.
 		 */
 		read_unlock_bh(&idev->lock);
-		ipv6_create_tempaddr(ifp, NULL);
+		ipv6_create_tempaddr(ifp, NULL, false);
 	} else {
 		read_unlock_bh(&idev->lock);
 	}
@@ -2460,7 +2484,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		int max_addresses = in6_dev->cnf.max_addresses;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-		if (in6_dev->cnf.optimistic_dad &&
+		if ((net->ipv6.devconf_all->optimistic_dad ||
+		     in6_dev->cnf.optimistic_dad) &&
 		    !net->ipv6.devconf_all->forwarding && sllao)
 			addr_flags |= IFA_F_OPTIMISTIC;
 #endif
@@ -2474,7 +2499,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 					    pinfo->prefix_len,
 					    addr_type&IPV6_ADDR_SCOPE_MASK,
 					    addr_flags, valid_lft,
-					    prefered_lft);
+					    prefered_lft, false, NULL);
 
 		if (IS_ERR_OR_NULL(ifp))
 			return -1;
@@ -2784,7 +2809,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			  const struct in6_addr *pfx,
 			  const struct in6_addr *peer_pfx,
 			  unsigned int plen, __u32 ifa_flags,
-			  __u32 prefered_lft, __u32 valid_lft)
+			  __u32 prefered_lft, __u32 valid_lft,
+			  struct netlink_ext_ack *extack)
 {
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
@@ -2843,7 +2869,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	}
 
 	ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
-			    valid_lft, prefered_lft);
+			    valid_lft, prefered_lft, true, extack);
 
 	if (!IS_ERR(ifp)) {
 		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -2928,7 +2954,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
 	rtnl_lock();
 	err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
 			     ireq.ifr6_prefixlen, IFA_F_PERMANENT,
-			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
 	rtnl_unlock();
 	return err;
 }
@@ -2958,7 +2984,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	ifp = ipv6_add_addr(idev, addr, NULL, plen,
 			    scope, IFA_F_PERMANENT,
-			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
+			    true, NULL);
 	if (!IS_ERR(ifp)) {
 		spin_lock_bh(&ifp->lock);
 		ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3030,9 +3057,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 static void init_loopback(struct net_device *dev)
 {
 	struct inet6_dev  *idev;
-	struct net_device *sp_dev;
-	struct inet6_ifaddr *sp_ifa;
-	struct rt6_info *sp_rt;
 
 	/* ::1 */
 
@@ -3045,45 +3069,6 @@ static void init_loopback(struct net_device *dev)
 	}
 
 	add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
-
-	/* Add routes to other interface's IPv6 addresses */
-	for_each_netdev(dev_net(dev), sp_dev) {
-		if (!strcmp(sp_dev->name, dev->name))
-			continue;
-
-		idev = __in6_dev_get(sp_dev);
-		if (!idev)
-			continue;
-
-		read_lock_bh(&idev->lock);
-		list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
-
-			if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
-				continue;
-
-			if (sp_ifa->rt) {
-				/* This dst has been added to garbage list when
-				 * lo device down, release this obsolete dst and
-				 * reallocate a new router for ifa.
-				 */
-				if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
-					ip6_rt_put(sp_ifa->rt);
-					sp_ifa->rt = NULL;
-				} else {
-					continue;
-				}
-			}
-
-			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
-
-			/* Failure cases are ignored */
-			if (!IS_ERR(sp_rt)) {
-				sp_ifa->rt = sp_rt;
-				ip6_ins_rt(sp_rt);
-			}
-		}
-		read_unlock_bh(&idev->lock);
-	}
 }
 
 void addrconf_add_linklocal(struct inet6_dev *idev,
@@ -3093,13 +3078,14 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
 	u32 addr_flags = flags | IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-	if (idev->cnf.optimistic_dad &&
+	if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
+	     idev->cnf.optimistic_dad) &&
 	    !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
 	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
-			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
 	if (!IS_ERR(ifp)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
 		addrconf_dad_start(ifp);
@@ -3321,15 +3307,15 @@ static void addrconf_gre_config(struct net_device *dev)
 static int fixup_permanent_addr(struct inet6_dev *idev,
 				struct inet6_ifaddr *ifp)
 {
-	/* rt6i_ref == 0 means the host route was removed from the
+	/* !rt6i_node means the host route was removed from the
 	 * FIB, for example, if 'lo' device is taken down. In that
 	 * case regenerate the host route.
 	 */
-	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+	if (!ifp->rt || !ifp->rt->rt6i_node) {
 		struct rt6_info *rt, *prev;
 
 		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
-		if (unlikely(IS_ERR(rt)))
+		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 
 		/* ifp->rt can be accessed outside of rtnl */
@@ -3367,6 +3353,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
 		if ((ifp->flags & IFA_F_PERMANENT) &&
 		    fixup_permanent_addr(idev, ifp) < 0) {
 			write_unlock_bh(&idev->lock);
+			in6_ifa_hold(ifp);
 			ipv6_del_addr(ifp);
 			write_lock_bh(&idev->lock);
 
@@ -3435,7 +3422,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			/* restore routes for permanent addresses */
 			addrconf_permanent_addr(dev);
 
-			if (!addrconf_qdisc_ok(dev)) {
+			if (!addrconf_link_ready(dev)) {
 				/* device is not ready yet. */
 				pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
 					dev->name);
@@ -3450,7 +3437,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				run_pending = 1;
 			}
 		} else if (event == NETDEV_CHANGE) {
-			if (!addrconf_qdisc_ok(dev)) {
+			if (!addrconf_link_ready(dev)) {
 				/* device is still not ready. */
 				break;
 			}
@@ -3594,7 +3581,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, *tmp;
-	struct list_head del_list;
 	int _keep_addr;
 	bool keep_addr;
 	int state, i;
@@ -3686,7 +3672,6 @@ restart:
 	 */
 	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
-	INIT_LIST_HEAD(&del_list);
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
 		struct rt6_info *rt = NULL;
 		bool keep;
@@ -3695,8 +3680,6 @@ restart:
 
 		keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
 			!addr_is_local(&ifa->addr);
-		if (!keep)
-			list_move(&ifa->if_list, &del_list);
 
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
@@ -3730,19 +3713,14 @@ restart:
 		}
 
 		write_lock_bh(&idev->lock);
+		if (!keep) {
+			list_del_rcu(&ifa->if_list);
+			in6_ifa_put(ifa);
+		}
 	}
 
 	write_unlock_bh(&idev->lock);
 
-	/* now clean up addresses to be removed */
-	while (!list_empty(&del_list)) {
-		ifa = list_first_entry(&del_list,
-				       struct inet6_ifaddr, if_list);
-		list_del(&ifa->if_list);
-
-		in6_ifa_put(ifa);
-	}
-
 	/* Step 5: Discard anycast and multicast list */
 	if (how) {
 		ipv6_ac_destroy_dev(idev);
@@ -3852,7 +3830,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 		goto out;
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-	    idev->cnf.accept_dad < 1 ||
+	    (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
+	     idev->cnf.accept_dad < 1) ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
 		bump_id = ifp->flags & IFA_F_TENTATIVE;
@@ -3883,7 +3862,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	 */
 	if (ifp->flags & IFA_F_OPTIMISTIC) {
 		ip6_ins_rt(ifp->rt);
-		if (ipv6_use_optimistic_addr(idev)) {
+		if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
 			/* Because optimistic nodes can use this address,
 			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
 			 */
@@ -3939,7 +3918,9 @@ static void addrconf_dad_work(struct work_struct *w)
 		action = DAD_ABORT;
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 
-		if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+		if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
+		     idev->cnf.accept_dad > 1) &&
+		    !idev->cnf.disable_ipv6 &&
 		    !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
 			struct in6_addr addr;
 
@@ -4121,9 +4102,9 @@ struct if6_iter_state {
 
 static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 {
-	struct inet6_ifaddr *ifa = NULL;
 	struct if6_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct inet6_ifaddr *ifa = NULL;
 	int p = 0;
 
 	/* initial bucket if pos is 0 */
@@ -4133,7 +4114,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 	}
 
 	for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
 					 addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
@@ -4159,7 +4140,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct if6_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
+	hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
 		if (!net_eq(dev_net(ifa->idev->dev), net))
 			continue;
 		state->offset++;
@@ -4168,7 +4149,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		state->offset = 0;
-		hlist_for_each_entry_rcu_bh(ifa,
+		hlist_for_each_entry_rcu(ifa,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
@@ -4181,9 +4162,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rcu_bh)
+	__acquires(rcu)
 {
-	rcu_read_lock_bh();
+	rcu_read_lock();
 	return if6_get_first(seq, *pos);
 }
 
@@ -4197,9 +4178,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(rcu_bh)
+	__releases(rcu)
 {
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 }
 
 static int if6_seq_show(struct seq_file *seq, void *v)
@@ -4268,12 +4249,12 @@ void if6_proc_exit(void)
 /* Check if address is a home address configured on any interface. */
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
 {
-	int ret = 0;
+	unsigned int hash = inet6_addr_hash(net, addr);
 	struct inet6_ifaddr *ifp = NULL;
-	unsigned int hash = inet6_addr_hash(addr);
+	int ret = 0;
 
-	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -4282,7 +4263,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
 			break;
 		}
 	}
-	rcu_read_unlock_bh();
+	rcu_read_unlock();
 	return ret;
 }
 #endif
@@ -4372,7 +4353,7 @@ restart:
 						spin_lock(&ifpub->lock);
 						ifpub->regen_count = 0;
 						spin_unlock(&ifpub->lock);
-						ipv6_create_tempaddr(ifpub, ifp);
+						ipv6_create_tempaddr(ifpub, ifp, true);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
 						goto restart;
@@ -4608,7 +4589,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 		 */
 		return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
 				      ifm->ifa_prefixlen, ifa_flags,
-				      preferred_lft, valid_lft);
+				      preferred_lft, valid_lft, extack);
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4935,17 +4916,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
 			  extack);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
-	if (!addr) {
-		err = -EINVAL;
-		goto errout;
-	}
+	if (!addr)
+		return -EINVAL;
 
 	ifm = nlmsg_data(nlh);
 	if (ifm->ifa_index)
-		dev = __dev_get_by_index(net, ifm->ifa_index);
+		dev = dev_get_by_index(net, ifm->ifa_index);
 
 	ifa = ipv6_get_ifaddr(net, addr, dev, 1);
 	if (!ifa) {
@@ -4971,6 +4950,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 errout_ifa:
 	in6_ifa_put(ifa);
 errout:
+	if (dev)
+		dev_put(dev);
 	return err;
 }
 
@@ -4982,9 +4963,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 
 	/* Don't send DELADDR notification for TENTATIVE address,
 	 * since NEWADDR notification is sent only after removing
-	 * TENTATIVE flag.
+	 * TENTATIVE flag, if DAD has not failed.
 	 */
-	if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR)
+	if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
+	    event == RTM_DELADDR)
 		return;
 
 	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
@@ -5077,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
 	array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
 	array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
+	array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5556,7 +5539,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		 * our DAD process, so we don't need
 		 * to do it again
 		 */
-		if (!(ifp->rt->rt6i_node))
+		if (!rcu_access_pointer(ifp->rt->rt6i_node))
 			ip6_ins_rt(ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
@@ -5926,10 +5909,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
 			struct rt6_info *rt = ifa->rt;
-			struct fib6_table *table = rt->rt6i_table;
 			int cpu;
 
-			read_lock(&table->tb6_lock);
+			rcu_read_lock();
 			addrconf_set_nopolicy(ifa->rt, val);
 			if (rt->rt6i_pcpu) {
 				for_each_possible_cpu(cpu) {
@@ -5939,7 +5921,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 					addrconf_set_nopolicy(*rtp, val);
 				}
 			}
-			read_unlock(&table->tb6_lock);
+			rcu_read_unlock();
 		}
 		spin_unlock(&ifa->lock);
 	}
@@ -6005,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
 }
 
 static int minus_one = -1;
+static const int zero = 0;
 static const int one = 1;
 static const int two_five_five = 255;
 
@@ -6376,6 +6359,15 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.proc_handler   = addrconf_sysctl_disable_policy,
 	},
 	{
+		.procname	= "ndisc_tclass",
+		.data		= &ipv6_devconf.ndisc_tclass,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&zero,
+		.extra2		= (void *)&two_five_five,
+	},
+	{
 		/* sentinel */
 	}
 };
@@ -6605,21 +6597,21 @@ int __init addrconf_init(void)
 	rtnl_af_register(&inet6_ops);
 
 	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
-			      NULL);
+			      0);
 	if (err < 0)
 		goto errout;
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
-	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
 	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
-			inet6_dump_ifaddr, NULL);
+			inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
 	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
-			inet6_dump_ifmcaddr, NULL);
+			inet6_dump_ifmcaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
-			inet6_dump_ifacaddr, NULL);
+			inet6_dump_ifacaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
-			inet6_netconf_dump_devconf, NULL);
+			inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
 
 	ipv6_addr_label_rtnl_register();
 
@@ -6646,9 +6638,9 @@ void addrconf_cleanup(void)
 	unregister_pernet_subsys(&addrconf_ops);
 	ipv6_addr_label_cleanup();
 
-	rtnl_lock();
+	rtnl_af_unregister(&inet6_ops);
 
-	__rtnl_af_unregister(&inet6_ops);
+	rtnl_lock();
 
 	/* clean dev list */
 	for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9e3488d50b15..32b564dfd02a 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
 EXPORT_SYMBOL(__ipv6_addr_type);
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain);
+static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);
 
 int register_inet6addr_notifier(struct notifier_block *nb)
 {
@@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
 
 int register_inet6addr_validator_notifier(struct notifier_block *nb)
 {
-	return atomic_notifier_chain_register(&inet6addr_validator_chain, nb);
+	return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
 }
 EXPORT_SYMBOL(register_inet6addr_validator_notifier);
 
 int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
 {
-	return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb);
+	return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
+						  nb);
 }
 EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
 
 int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
 {
-	return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v);
+	return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
 }
 EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 7a428f65c7ec..00e1f8ee08f8 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * IPv6 Address Label subsystem
  * for the IPv6 "Default" Source Address Selection
@@ -18,7 +19,6 @@
 #include <linux/if_addrlabel.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
-#include <linux/refcount.h>
 
 #if 0
 #define ADDRLABEL(x...) printk(x)
@@ -30,30 +30,15 @@
  * Policy Table
  */
 struct ip6addrlbl_entry {
-	possible_net_t lbl_net;
 	struct in6_addr prefix;
 	int prefixlen;
 	int ifindex;
 	int addrtype;
 	u32 label;
 	struct hlist_node list;
-	refcount_t refcnt;
 	struct rcu_head rcu;
 };
 
-static struct ip6addrlbl_table
-{
-	struct hlist_head head;
-	spinlock_t lock;
-	u32 seq;
-} ip6addrlbl_table;
-
-static inline
-struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
-{
-	return read_pnet(&lbl->lbl_net);
-}
-
 /*
  * Default policy table (RFC6724 + extensions)
  *
@@ -125,36 +110,11 @@ static const __net_initconst struct ip6addrlbl_init_table
 	}
 };
 
-/* Object management */
-static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
-{
-	kfree(p);
-}
-
-static void ip6addrlbl_free_rcu(struct rcu_head *h)
-{
-	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
-}
-
-static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
-{
-	return refcount_inc_not_zero(&p->refcnt);
-}
-
-static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
-{
-	if (refcount_dec_and_test(&p->refcnt))
-		call_rcu(&p->rcu, ip6addrlbl_free_rcu);
-}
-
 /* Find label */
-static bool __ip6addrlbl_match(struct net *net,
-			       const struct ip6addrlbl_entry *p,
+static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
 			       const struct in6_addr *addr,
 			       int addrtype, int ifindex)
 {
-	if (!net_eq(ip6addrlbl_net(p), net))
-		return false;
 	if (p->ifindex && p->ifindex != ifindex)
 		return false;
 	if (p->addrtype && p->addrtype != addrtype)
@@ -169,8 +129,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
 						  int type, int ifindex)
 {
 	struct ip6addrlbl_entry *p;
-	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
-		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
+
+	hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+		if (__ip6addrlbl_match(p, addr, type, ifindex))
 			return p;
 	}
 	return NULL;
@@ -196,8 +157,7 @@ u32 ipv6_addr_label(struct net *net,
 }
 
 /* allocate one entry */
-static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
-						 const struct in6_addr *prefix,
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
 						 int prefixlen, int ifindex,
 						 u32 label)
 {
@@ -236,24 +196,22 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 	newp->addrtype = addrtype;
 	newp->label = label;
 	INIT_HLIST_NODE(&newp->list);
-	write_pnet(&newp->lbl_net, net);
-	refcount_set(&newp->refcnt, 1);
 	return newp;
 }
 
 /* add a label */
-static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
+			    int replace)
 {
-	struct hlist_node *n;
 	struct ip6addrlbl_entry *last = NULL, *p = NULL;
+	struct hlist_node *n;
 	int ret = 0;
 
 	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
 		  replace);
 
-	hlist_for_each_entry_safe(p, n,	&ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_safe(p, n,	&net->ipv6.ip6addrlbl_table.head, list) {
 		if (p->prefixlen == newp->prefixlen &&
-		    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
 		    p->ifindex == newp->ifindex &&
 		    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
 			if (!replace) {
@@ -261,7 +219,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 				goto out;
 			}
 			hlist_replace_rcu(&p->list, &newp->list);
-			ip6addrlbl_put(p);
+			kfree_rcu(p, rcu);
 			goto out;
 		} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
 			   (p->prefixlen < newp->prefixlen)) {
@@ -273,10 +231,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 	if (last)
 		hlist_add_behind_rcu(&newp->list, &last->list);
 	else
-		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+		hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
 out:
 	if (!ret)
-		ip6addrlbl_table.seq++;
+		net->ipv6.ip6addrlbl_table.seq++;
 	return ret;
 }
 
@@ -292,14 +250,14 @@ static int ip6addrlbl_add(struct net *net,
 		  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
 		  replace);
 
-	newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
+	newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
 	if (IS_ERR(newp))
 		return PTR_ERR(newp);
-	spin_lock(&ip6addrlbl_table.lock);
-	ret = __ip6addrlbl_add(newp, replace);
-	spin_unlock(&ip6addrlbl_table.lock);
+	spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+	ret = __ip6addrlbl_add(net, newp, replace);
+	spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
 	if (ret)
-		ip6addrlbl_free(newp);
+		kfree(newp);
 	return ret;
 }
 
@@ -315,13 +273,12 @@ static int __ip6addrlbl_del(struct net *net,
 	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 		  __func__, prefix, prefixlen, ifindex);
 
-	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
 		if (p->prefixlen == prefixlen &&
-		    net_eq(ip6addrlbl_net(p), net) &&
 		    p->ifindex == ifindex &&
 		    ipv6_addr_equal(&p->prefix, prefix)) {
 			hlist_del_rcu(&p->list);
-			ip6addrlbl_put(p);
+			kfree_rcu(p, rcu);
 			ret = 0;
 			break;
 		}
@@ -340,9 +297,9 @@ static int ip6addrlbl_del(struct net *net,
 		  __func__, prefix, prefixlen, ifindex);
 
 	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
-	spin_lock(&ip6addrlbl_table.lock);
+	spin_lock(&net->ipv6.ip6addrlbl_table.lock);
 	ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
-	spin_unlock(&ip6addrlbl_table.lock);
+	spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
 	return ret;
 }
 
@@ -354,6 +311,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
 
 	ADDRLABEL(KERN_DEBUG "%s\n", __func__);
 
+	spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
+	INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
+
 	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
 		int ret = ip6addrlbl_add(net,
 					 ip6addrlbl_init_table[i].prefix,
@@ -373,14 +333,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
 	struct hlist_node *n;
 
 	/* Remove all labels belonging to the exiting net */
-	spin_lock(&ip6addrlbl_table.lock);
-	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
-		if (net_eq(ip6addrlbl_net(p), net)) {
-			hlist_del_rcu(&p->list);
-			ip6addrlbl_put(p);
-		}
+	spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+	hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+		hlist_del_rcu(&p->list);
+		kfree_rcu(p, rcu);
 	}
-	spin_unlock(&ip6addrlbl_table.lock);
+	spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
 }
 
 static struct pernet_operations ipv6_addr_label_ops = {
@@ -390,8 +348,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
 
 int __init ipv6_addr_label_init(void)
 {
-	spin_lock_init(&ip6addrlbl_table.lock);
-
 	return register_pernet_subsys(&ipv6_addr_label_ops);
 }
 
@@ -405,6 +361,18 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
 };
 
+static bool addrlbl_ifindex_exists(struct net *net, int ifindex)
+{
+
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	rcu_read_unlock();
+
+	return dev != NULL;
+}
+
 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
@@ -439,7 +407,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 	switch (nlh->nlmsg_type) {
 	case RTM_NEWADDRLABEL:
 		if (ifal->ifal_index &&
-		    !__dev_get_by_index(net, ifal->ifal_index))
+		    !addrlbl_ifindex_exists(net, ifal->ifal_index))
 			return -EINVAL;
 
 		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
@@ -498,11 +466,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
-		if (idx >= s_idx &&
-		    net_eq(ip6addrlbl_net(p), net)) {
+	hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+		if (idx >= s_idx) {
 			err = ip6addrlbl_fill(skb, p,
-					      ip6addrlbl_table.seq,
+					      net->ipv6.ip6addrlbl_table.seq,
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
 					      RTM_NEWADDRLABEL,
@@ -548,55 +515,45 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	if (ifal->ifal_index &&
-	    !__dev_get_by_index(net, ifal->ifal_index))
+	    !addrlbl_ifindex_exists(net, ifal->ifal_index))
 		return -EINVAL;
 
 	if (!tb[IFAL_ADDRESS])
 		return -EINVAL;
 	addr = nla_data(tb[IFAL_ADDRESS]);
 
-	rcu_read_lock();
-	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
-	if (p && !ip6addrlbl_hold(p))
-		p = NULL;
-	lseq = ip6addrlbl_table.seq;
-	rcu_read_unlock();
-
-	if (!p) {
-		err = -ESRCH;
-		goto out;
-	}
-
 	skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
-	if (!skb) {
-		ip6addrlbl_put(p);
+	if (!skb)
 		return -ENOBUFS;
-	}
 
-	err = ip6addrlbl_fill(skb, p, lseq,
-			      NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
-			      RTM_NEWADDRLABEL, 0);
+	err = -ESRCH;
 
-	ip6addrlbl_put(p);
+	rcu_read_lock();
+	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+	lseq = net->ipv6.ip6addrlbl_table.seq;
+	if (p)
+		err = ip6addrlbl_fill(skb, p, lseq,
+				      NETLINK_CB(in_skb).portid,
+				      nlh->nlmsg_seq,
+				      RTM_NEWADDRLABEL, 0);
+	rcu_read_unlock();
 
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
-		goto out;
+	} else {
+		err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 	}
-
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-out:
 	return err;
 }
 
 void __init ipv6_addr_label_rtnl_register(void)
 {
 	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
-			NULL, NULL);
+			NULL, RTNL_FLAG_DOIT_UNLOCKED);
 	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
-			NULL, NULL);
+			NULL, RTNL_FLAG_DOIT_UNLOCKED);
 	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
-			ip6addrlbl_dump, NULL);
+			ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
 }
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a88b5b5b7955..c26f71234b9c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,8 @@ lookup_protocol:
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
+	np->autoflowlabel = ip6_default_np_autolabel(net);
+	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
@@ -554,6 +555,8 @@ const struct proto_ops inet6_stream_ops = {
 	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.sendpage_locked   = tcp_sendpage_locked,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
 	.peek_len	   = tcp_peek_len,
@@ -807,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.idgen_retries = 3;
 	net->ipv6.sysctl.idgen_delay = 1 * HZ;
 	net->ipv6.sysctl.flowlabel_state_ranges = 0;
+	net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
+	net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
+	net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
+	net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
 	atomic_set(&net->ipv6.fib6_sernum, 1);
 
 	err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7802b72196f3..78c974391567 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
 		case NEXTHDR_DEST:
 			if (dir == XFRM_POLICY_OUT)
 				ipv6_rearrange_destopt(iph, exthdr.opth);
+			/* fall through */
 		case NEXTHDR_HOP:
 			if (!zero_out_mutable_opts(exthdr.opth)) {
 				net_dbg_ratelimited("overrun %sopts\n",
@@ -443,7 +444,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 		if (err == -EINPROGRESS)
 			goto out;
 
-		if (err == -EBUSY)
+		if (err == -ENOSPC)
 			err = NET_XMIT_DROP;
 		goto out_free;
 	}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9ed35473dcb5..a902ff8f59be 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -226,7 +226,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	int tailen = esp->tailen;
 
 	if (!skb_cloned(skb)) {
-		if (tailen <= skb_availroom(skb)) {
+		if (tailen <= skb_tailroom(skb)) {
 			nfrags = 1;
 			trailer = skb;
 			tail = skb_tail_pointer(trailer);
@@ -260,8 +260,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 
 			kunmap_atomic(vaddr);
 
-			spin_unlock_bh(&x->lock);
-
 			nfrags = skb_shinfo(skb)->nr_frags;
 
 			__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -269,6 +267,9 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 			skb_shinfo(skb)->nr_frags = ++nfrags;
 
 			pfrag->offset = pfrag->offset + allocsize;
+
+			spin_unlock_bh(&x->lock);
+
 			nfrags++;
 
 			skb->len += tailen;
@@ -345,7 +346,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 		           (unsigned char *)esph - skb->data,
 		           assoclen + ivlen + esp->clen + alen);
 	if (unlikely(err < 0))
-		goto error;
+		goto error_free;
 
 	if (!esp->inplace) {
 		int allocsize;
@@ -356,7 +357,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 		spin_lock_bh(&x->lock);
 		if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
 			spin_unlock_bh(&x->lock);
-			goto error;
+			goto error_free;
 		}
 
 		skb_shinfo(skb)->nr_frags = 1;
@@ -373,7 +374,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 			           (unsigned char *)esph - skb->data,
 			           assoclen + ivlen + esp->clen + alen);
 		if (unlikely(err < 0))
-			goto error;
+			goto error_free;
 	}
 
 	if ((x->props.flags & XFRM_STATE_ESN))
@@ -395,7 +396,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	case -EINPROGRESS:
 		goto error;
 
-	case -EBUSY:
+	case -ENOSPC:
 		err = NET_XMIT_DROP;
 		break;
 
@@ -406,8 +407,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 
 	if (sg != dsg)
 		esp_ssg_unref(x, tmp);
-	kfree(tmp);
 
+error_free:
+	kfree(tmp);
 error:
 	return err;
 }
@@ -461,28 +463,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	return esp6_output_tail(x, skb, &esp);
 }
 
-int esp6_input_done2(struct sk_buff *skb, int err)
+static inline int esp_remove_trailer(struct sk_buff *skb)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct crypto_aead *aead = x->data;
-	int alen = crypto_aead_authsize(aead);
-	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
-	int elen = skb->len - hlen;
-	int hdr_len = skb_network_header_len(skb);
-	int padlen;
+	int alen, hlen, elen;
+	int padlen, trimlen;
+	__wsum csumdiff;
 	u8 nexthdr[2];
+	int ret;
 
-	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
-		kfree(ESP_SKB_CB(skb)->tmp);
+	alen = crypto_aead_authsize(aead);
+	hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	elen = skb->len - hlen;
 
-	if (unlikely(err))
+	if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+		ret = xo->proto;
 		goto out;
+	}
 
-	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
-		BUG();
+	ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
+	BUG_ON(ret);
 
-	err = -EINVAL;
+	ret = -EINVAL;
 	padlen = nexthdr[0];
 	if (padlen + 2 + alen >= elen) {
 		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
@@ -490,17 +494,46 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 		goto out;
 	}
 
-	/* ... check padding bits here. Silly. :-) */
+	trimlen = alen + padlen + 2;
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+		skb->csum = csum_block_sub(skb->csum, csumdiff,
+					   skb->len - trimlen);
+	}
+	pskb_trim(skb, skb->len - trimlen);
+
+	ret = nexthdr[1];
+
+out:
+	return ret;
+}
+
+int esp6_input_done2(struct sk_buff *skb, int err)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct crypto_aead *aead = x->data;
+	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	int hdr_len = skb_network_header_len(skb);
 
-	pskb_trim(skb, skb->len - alen - padlen - 2);
-	__skb_pull(skb, hlen);
+	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+		kfree(ESP_SKB_CB(skb)->tmp);
+
+	if (unlikely(err))
+		goto out;
+
+	err = esp_remove_trailer(skb);
+	if (unlikely(err < 0))
+		goto out;
+
+	skb_postpull_rcsum(skb, skb_network_header(skb),
+			   skb_network_header_len(skb));
+	skb_pull_rcsum(skb, hlen);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -hdr_len);
 
-	err = nexthdr[1];
-
 	/* RFC4303: Drop dummy packets without any error */
 	if (err == IPPROTO_NONE)
 		err = -EINVAL;
@@ -526,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb)
 static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
-	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
 
 	/* For ESN we move the header forward by 4 bytes to
 	 * accomodate the high bits.  We will move it back after
 	 * decryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-		esph = skb_push(skb, 4);
+		struct ip_esp_hdr *esph = skb_push(skb, 4);
+
 		*seqhi = esph->spi;
 		esph->spi = esph->seq_no;
 		esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index f02f131f6435..333a478aa161 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -209,11 +209,13 @@ out:
 static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct crypto_aead *aead = x->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
 		return -EINVAL;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	if (!(xo->flags & CRYPTO_DONE))
+		skb->ip_summed = CHECKSUM_NONE;
 
 	return esp6_input_done2(skb, 0);
 }
@@ -286,7 +288,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
 
 	err = esp6_output_tail(x, skb, &esp);
-	if (err < 0)
+	if (err)
 		return err;
 
 	secpath_reset(skb);
@@ -332,3 +334,4 @@ module_init(esp6_offload_init);
 module_exit(esp6_offload_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cec529c6113..83bd75713535 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -74,8 +74,20 @@ struct tlvtype_proc {
 
 /* An unknown option is detected, decide what to do */
 
-static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
+			       bool disallow_unknowns)
 {
+	if (disallow_unknowns) {
+		/* If unknown TLVs are disallowed by configuration
+		 * then always silently drop packet. Note this also
+		 * means no ICMP parameter problem is sent which
+		 * could be a good property to mitigate a reflection DOS
+		 * attack.
+		 */
+
+		goto drop;
+	}
+
 	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return true;
@@ -89,25 +101,36 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
 		 */
 		if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
 			break;
+		/* fall through */
 	case 2: /* send ICMP PARM PROB regardless and drop packet */
 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
 		return false;
 	}
 
+drop:
 	kfree_skb(skb);
 	return false;
 }
 
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
+static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+			  struct sk_buff *skb,
+			  int max_count)
 {
-	const struct tlvtype_proc *curr;
+	int len = (skb_transport_header(skb)[1] + 1) << 3;
 	const unsigned char *nh = skb_network_header(skb);
 	int off = skb_network_header_len(skb);
-	int len = (skb_transport_header(skb)[1] + 1) << 3;
+	const struct tlvtype_proc *curr;
+	bool disallow_unknowns = false;
+	int tlv_count = 0;
 	int padlen = 0;
 
+	if (unlikely(max_count < 0)) {
+		disallow_unknowns = true;
+		max_count = -max_count;
+	}
+
 	if (skb_transport_offset(skb) + len > skb_headlen(skb))
 		goto bad;
 
@@ -148,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
 		default: /* Other TLV code so scan list */
 			if (optlen > len)
 				goto bad;
+
+			tlv_count++;
+			if (tlv_count > max_count)
+				goto bad;
+
 			for (curr = procs; curr->type >= 0; curr++) {
 				if (curr->type == nh[off]) {
 					/* type specific length/alignment
@@ -158,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
 					break;
 				}
 			}
-			if (curr->type < 0) {
-				if (ip6_tlvopt_unknown(skb, off) == 0)
-					return false;
-			}
+			if (curr->type < 0 &&
+			    !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
+				return false;
+
 			padlen = 0;
 			break;
 		}
@@ -186,7 +214,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-	struct in6_addr tmp_addr;
 	int ret;
 
 	if (opt->dsthao) {
@@ -228,9 +255,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
-	tmp_addr = ipv6h->saddr;
-	ipv6h->saddr = hao->addr;
-	hao->addr = tmp_addr;
+	swap(ipv6h->saddr, hao->addr);
 
 	if (skb->tstamp == 0)
 		__net_timestamp(skb);
@@ -260,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
 	__u16 dstbuf;
 #endif
 	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(skb->dev);
+	int extlen;
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		__IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
 				IPSTATS_MIB_INHDRERRORS);
+fail_and_free:
 		kfree_skb(skb);
 		return -1;
 	}
 
+	extlen = (skb_transport_header(skb)[1] + 1) << 3;
+	if (extlen > net->ipv6.sysctl.max_dst_opts_len)
+		goto fail_and_free;
+
 	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 	dstbuf = opt->dst1;
 #endif
 
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
-		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
+			  init_net.ipv6.sysctl.max_dst_opts_cnt)) {
+		skb->transport_header += extlen;
 		opt = IP6CB(skb);
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 		opt->nhoff = dstbuf;
@@ -805,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
 int ipv6_parse_hopopts(struct sk_buff *skb)
 {
 	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct net *net = dev_net(skb->dev);
+	int extlen;
 
 	/*
 	 * skb_network_header(skb) is equal to skb->data, and
@@ -815,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
 	    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
 				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
+fail_and_free:
 		kfree_skb(skb);
 		return -1;
 	}
 
+	extlen = (skb_transport_header(skb)[1] + 1) << 3;
+	if (extlen > net->ipv6.sysctl.max_hbh_opts_len)
+		goto fail_and_free;
+
 	opt->flags |= IP6SKB_HOPBYHOP;
-	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
-		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+	if (ip6_parse_tlv(tlvprochopopt_lst, skb,
+			  init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
+		skb->transport_header += extlen;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
@@ -882,7 +923,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
 	       (hops - 1) * sizeof(struct in6_addr));
 
 	sr_phdr->segments[0] = **addr_p;
-	*addr_p = &sr_ihdr->segments[hops - 1];
+	*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (sr_has_hmac(sr_phdr)) {
@@ -1174,7 +1215,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
 	{
 		struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
 
-		fl6->daddr = srh->segments[srh->first_segment];
+		fl6->daddr = srh->segments[srh->segments_left];
 		break;
 	}
 	default:
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed730bf..11025f8d124b 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
 				break;
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
+			hdrlen = ipv6_authlen(hp);
 		else
 			hdrlen = ipv6_optlen(hp);
 
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 {
 	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
 	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-	unsigned int len;
 	bool found;
 
 	if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 		start = *offset + sizeof(struct ipv6hdr);
 		nexthdr = ip6->nexthdr;
 	}
-	len = skb->len - start;
 
 	do {
 		struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 
 		if (!found) {
 			nexthdr = hp->nexthdr;
-			len -= hdrlen;
 			start += hdrlen;
 		}
 	} while (!found);
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 000000000000..05f82baaa99e
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,63 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+	return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+	int err;
+
+	err = fib6_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+	.family		= AF_INET6,
+	.fib_seq_read	= fib6_seq_read,
+	.fib_dump	= fib6_dump,
+	.owner		= THIS_MODULE,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv6.notifier_ops = ops;
+
+	return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d88a662..b240f24a6e52 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
 #include <linux/export.h>
 
 #include <net/fib_rules.h>
@@ -29,22 +30,65 @@ struct fib6_rule {
 	u8			tclass;
 };
 
-struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
-				   int flags, pol_lookup_t lookup)
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+	if (r->dst.plen || r->src.plen || r->tclass)
+		return false;
+	return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
 {
-	struct fib_lookup_arg arg = {
-		.lookup_ptr = lookup,
-		.flags = FIB_LOOKUP_NOREF,
-	};
+	if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+	    rule->l3mdev)
+		return false;
+	if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
 
-	/* update flow if oif or iif point to device enslaved to l3mdev */
-	l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET6);
+}
 
-	fib_rules_lookup(net->ipv6.fib6_rules_ops,
-			 flowi6_to_flowi(fl6), flags, &arg);
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET6);
+}
 
-	if (arg.result)
-		return arg.result;
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   int flags, pol_lookup_t lookup)
+{
+	if (net->ipv6.fib6_has_custom_rules) {
+		struct fib_lookup_arg arg = {
+			.lookup_ptr = lookup,
+			.flags = FIB_LOOKUP_NOREF,
+		};
+
+		/* update flow if oif or iif point to device enslaved to l3mdev */
+		l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+		fib_rules_lookup(net->ipv6.fib6_rules_ops,
+				 flowi6_to_flowi(fl6), flags, &arg);
+
+		if (arg.result)
+			return arg.result;
+	} else {
+		struct rt6_info *rt;
+
+		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
+			return &rt->dst;
+		ip6_rt_put(rt);
+		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+		if (rt->dst.error != -EAGAIN)
+			return &rt->dst;
+		ip6_rt_put(rt);
+	}
 
 	dst_hold(&net->ipv6.ip6_null_entry->dst);
 	return &net->ipv6.ip6_null_entry->dst;
@@ -214,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
 	return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d7b113958b1..6ae5dd3f4d0d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 	return (*op & 0xC0) == 0x80;
 }
 
-int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
-			       struct icmp6hdr *thdr, int len)
+void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+				struct icmp6hdr *thdr, int len)
 {
 	struct sk_buff *skb;
 	struct icmp6hdr *icmp6h;
-	int err = 0;
 
 	skb = skb_peek(&sk->sk_write_queue);
 	if (!skb)
-		goto out;
+		return;
 
 	icmp6h = icmp6_hdr(skb);
 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
@@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 						      tmp_csum);
 	}
 	ip6_push_pending_frames(sk);
-out:
-	return err;
 }
 
 struct icmpv6_msg {
@@ -399,6 +396,24 @@ relookup_failed:
 	return ERR_PTR(err);
 }
 
+static int icmp6_iif(const struct sk_buff *skb)
+{
+	int iif = skb->dev->ifindex;
+
+	/* for local traffic to local address, skb dev is the loopback
+	 * device. Check if there is a dst attached to the skb and if so
+	 * get the real device index.
+	 */
+	if (unlikely(iif == LOOPBACK_IFINDEX)) {
+		const struct rt6_info *rt6 = skb_rt6_info(skb);
+
+		if (rt6)
+			iif = rt6->rt6i_idev->dev->ifindex;
+	}
+
+	return iif;
+}
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
@@ -420,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	int iif = 0;
 	int addr_type = 0;
 	int len;
-	int err = 0;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	if ((u8 *)hdr < skb->head ||
@@ -459,9 +473,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	 *	Source addr check
 	 */
 
-	if (__ipv6_addr_needs_scope_id(addr_type))
-		iif = skb->dev->ifindex;
-	else {
+	if (__ipv6_addr_needs_scope_id(addr_type)) {
+		iif = icmp6_iif(skb);
+	} else {
 		dst = skb_dst(skb);
 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
 	}
@@ -508,6 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
+	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
@@ -556,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	rcu_read_lock();
 	idev = __in6_dev_get(skb->dev);
 
-	err = ip6_append_data(sk, icmpv6_getfrag, &msg,
-			      len + sizeof(struct icmp6hdr),
-			      sizeof(struct icmp6hdr),
-			      &ipc6, &fl6, (struct rt6_info *)dst,
-			      MSG_DONTWAIT, &sockc_unused);
-	if (err) {
+	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+			    len + sizeof(struct icmp6hdr),
+			    sizeof(struct icmp6hdr),
+			    &ipc6, &fl6, (struct rt6_info *)dst,
+			    MSG_DONTWAIT, &sockc_unused)) {
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
-		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
-						 len + sizeof(struct icmp6hdr));
+		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+					   len + sizeof(struct icmp6hdr));
 	}
 	rcu_read_unlock();
 out_dst_release:
@@ -663,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct icmpv6_msg msg;
 	struct dst_entry *dst;
 	struct ipcm6_cookie ipc6;
-	int err = 0;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 	struct sockcm_cookie sockc_unused = {0};
 
@@ -682,7 +695,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
-	fl6.flowi6_oif = skb->dev->ifindex;
+	fl6.flowi6_oif = icmp6_iif(skb);
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
@@ -700,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
-	err = ip6_dst_lookup(net, sk, &dst, &fl6);
-	if (err)
+	if (ip6_dst_lookup(net, sk, &dst, &fl6))
 		goto out;
 	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 	if (IS_ERR(dst))
@@ -718,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	ipc6.dontfrag = np->dontfrag;
 	ipc6.opt = NULL;
 
-	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
-				sizeof(struct icmp6hdr), &ipc6, &fl6,
-				(struct rt6_info *)dst, MSG_DONTWAIT,
-				&sockc_unused);
-
-	if (err) {
+	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+			    skb->len + sizeof(struct icmp6hdr),
+			    sizeof(struct icmp6hdr), &ipc6, &fl6,
+			    (struct rt6_info *)dst, MSG_DONTWAIT,
+			    &sockc_unused)) {
 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
-		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
-						 skb->len + sizeof(struct icmp6hdr));
+		icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+					   skb->len + sizeof(struct icmp6hdr));
 	}
 	dst_release(dst);
 out:
@@ -853,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 			goto discard_it;
 		hdr = icmp6_hdr(skb);
 
-		/*
-		 *	Drop through to notify
-		 */
-
+		/* to notify */
+		/* fall through */
 	case ICMPV6_DEST_UNREACH:
 	case ICMPV6_TIME_EXCEED:
 	case ICMPV6_PARAMPROB:
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f62bc39..3c7a11b62334 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
 	};
 };
 
-enum {
-	ILA_ATYPE_IID = 0,
-	ILA_ATYPE_LUID,
-	ILA_ATYPE_VIRT_V4,
-	ILA_ATYPE_VIRT_UNI_V6,
-	ILA_ATYPE_VIRT_MULTI_V6,
-	ILA_ATYPE_RSVD_1,
-	ILA_ATYPE_RSVD_2,
-	ILA_ATYPE_RSVD_3,
-};
-
 #define CSUM_NEUTRAL_FLAG	htonl(0x10000000)
 
 struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
 	struct ila_locator locator_match;
 	__wsum csum_diff;
 	u8 csum_mode;
+	u8 ident_type;
 };
 
 static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998ddbfb..8c88ecf29b93 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
 #include <uapi/linux/ila.h>
 #include "ila.h"
 
-static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+void ila_init_saved_csum(struct ila_params *p)
 {
-	struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+	if (!p->locator_match.v64)
+		return;
 
+	p->csum_diff = compute_csum_diff8(
+				(__be32 *)&p->locator,
+				(__be32 *)&p->locator_match);
+}
+
+static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
+{
 	if (p->locator_match.v64)
 		return p->csum_diff;
 	else
-		return compute_csum_diff8((__be32 *)&iaddr->loc,
-					  (__be32 *)&p->locator);
+		return compute_csum_diff8((__be32 *)&p->locator,
+					  (__be32 *)&iaddr->loc);
 }
 
-static void ila_csum_do_neutral(struct ila_addr *iaddr,
-				struct ila_params *p)
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+	return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
+}
+
+static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
+				    struct ila_params *p)
 {
 	__sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
 	__wsum diff, fval;
 
-	/* Check if checksum adjust value has been cached */
-	if (p->locator_match.v64) {
-		diff = p->csum_diff;
-	} else {
-		diff = compute_csum_diff8((__be32 *)&p->locator,
-					  (__be32 *)iaddr);
-	}
+	diff = get_csum_diff_iaddr(iaddr, p);
 
 	fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
 			CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
 	iaddr->ident.csum_neutral ^= 1;
 }
 
-static void ila_csum_adjust_transport(struct sk_buff *skb,
+static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
 				      struct ila_params *p)
 {
+	__sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
 	__wsum diff;
-	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+	diff = get_csum_diff_iaddr(iaddr, p);
+
+	*adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+				      struct ila_params *p)
+{
 	size_t nhoff = sizeof(struct ipv6hdr);
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__wsum diff;
 
 	switch (ip6h->nexthdr) {
 	case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
 		}
 		break;
 	}
-
-	/* Now change destination address */
-	iaddr->loc = p->locator;
 }
 
 void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
-			     bool set_csum_neutral)
+			     bool sir2ila)
 {
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
 
-	/* First deal with the transport checksum */
-	if (ila_csum_neutral_set(iaddr->ident)) {
-		/* C-bit is set in the locator indicating that this
-		 * is a locator being translated to a SIR address.
-		 * Perform (receiver) checksum-neutral translation.
-		 */
-		if (!set_csum_neutral)
-			ila_csum_do_neutral(iaddr, p);
-	} else {
-		switch (p->csum_mode) {
-		case ILA_CSUM_ADJUST_TRANSPORT:
-			ila_csum_adjust_transport(skb, p);
-			break;
-		case ILA_CSUM_NEUTRAL_MAP:
-			ila_csum_do_neutral(iaddr, p);
-			break;
-		case ILA_CSUM_NO_ACTION:
+	switch (p->csum_mode) {
+	case ILA_CSUM_ADJUST_TRANSPORT:
+		ila_csum_adjust_transport(skb, p);
+		break;
+	case ILA_CSUM_NEUTRAL_MAP:
+		if (sir2ila) {
+			if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
+				/* Checksum flag should never be
+				 * set in a formatted SIR address.
+				 */
+				break;
+			}
+		} else if (!ila_csum_neutral_set(iaddr->ident)) {
+			/* ILA to SIR translation and C-bit isn't
+			 * set so we're good.
+			 */
 			break;
 		}
+		ila_csum_do_neutral_fmt(iaddr, p);
+		break;
+	case ILA_CSUM_NEUTRAL_MAP_AUTO:
+		ila_csum_do_neutral_nofmt(iaddr, p);
+		break;
+	case ILA_CSUM_NO_ACTION:
+		break;
 	}
 
 	/* Now change destination address */
 	iaddr->loc = p->locator;
 }
 
-void ila_init_saved_csum(struct ila_params *p)
-{
-	if (!p->locator_match.v64)
-		return;
-
-	p->csum_diff = compute_csum_diff8(
-				(__be32 *)&p->locator,
-				(__be32 *)&p->locator_match);
-}
-
 static int __init ila_init(void)
 {
 	int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 0c02a09bc351..3d56a2fb6f86 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/errno.h>
 #include <linux/ip.h>
 #include <linux/kernel.h>
@@ -19,6 +20,7 @@ struct ila_lwt {
 	struct ila_params p;
 	struct dst_cache dst_cache;
 	u32 connected : 1;
+	u32 lwt_output : 1;
 };
 
 static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -44,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
-	ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
-				true);
+	if (ilwt->lwt_output)
+		ila_update_ipv6_locator(skb,
+					ila_params_lwtunnel(orig_dst->lwtstate),
+					true);
 
 	if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
 		/* Already have a next hop address in route, no need for
@@ -97,11 +101,15 @@ drop:
 static int ila_input(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
+	struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
 
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
-	ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
+	if (!ilwt->lwt_output)
+		ila_update_ipv6_locator(skb,
+					ila_params_lwtunnel(dst->lwtstate),
+					false);
 
 	return dst->lwtstate->orig_input(skb);
 
@@ -113,6 +121,8 @@ drop:
 static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 	[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
 	[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+	[ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+	[ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
 };
 
 static int ila_build_state(struct nlattr *nla,
@@ -126,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
 	struct lwtunnel_state *newts;
 	const struct fib6_config *cfg6 = cfg;
 	struct ila_addr *iaddr;
+	u8 ident_type = ILA_ATYPE_USE_FORMAT;
+	u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
+	u8 csum_mode = ILA_CSUM_NO_ACTION;
+	bool lwt_output = true;
+	u8 eff_ident_type;
 	int ret;
 
 	if (family != AF_INET6)
 		return -EINVAL;
 
-	if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
-		/* Need to have full locator and at least type field
-		 * included in destination
-		 */
+	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[ILA_ATTR_LOCATOR])
 		return -EINVAL;
-	}
 
 	iaddr = (struct ila_addr *)&cfg6->fc_dst;
 
-	if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
-		/* Don't allow translation for a non-ILA address or checksum
-		 * neutral flag to be set.
+	if (tb[ILA_ATTR_IDENT_TYPE])
+		ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
+
+	if (ident_type == ILA_ATYPE_USE_FORMAT) {
+		/* Infer identifier type from type field in formatted
+		 * identifier.
 		 */
+
+		if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
+			/* Need to have full locator and at least type field
+			 * included in destination
+			 */
+			return -EINVAL;
+		}
+
+		eff_ident_type = iaddr->ident.type;
+	} else {
+		eff_ident_type = ident_type;
+	}
+
+	switch (eff_ident_type) {
+	case ILA_ATYPE_IID:
+		/* Don't allow ILA for IID type */
+		return -EINVAL;
+	case ILA_ATYPE_LUID:
+		break;
+	case ILA_ATYPE_VIRT_V4:
+	case ILA_ATYPE_VIRT_UNI_V6:
+	case ILA_ATYPE_VIRT_MULTI_V6:
+	case ILA_ATYPE_NONLOCAL_ADDR:
+		/* These ILA formats are not supported yet. */
+	default:
 		return -EINVAL;
 	}
 
-	ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
-	if (ret < 0)
-		return ret;
+	if (tb[ILA_ATTR_HOOK_TYPE])
+		hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
+
+	switch (hook_type) {
+	case ILA_HOOK_ROUTE_OUTPUT:
+		lwt_output = true;
+		break;
+	case ILA_HOOK_ROUTE_INPUT:
+		lwt_output = false;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-	if (!tb[ILA_ATTR_LOCATOR])
+	if (tb[ILA_ATTR_CSUM_MODE])
+		csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+	if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
+	    ila_csum_neutral_set(iaddr->ident)) {
+		/* Don't allow translation if checksum neutral bit is
+		 * configured and it's set in the SIR address.
+		 */
 		return -EINVAL;
+	}
 
 	newts = lwtunnel_state_alloc(sizeof(*ilwt));
 	if (!newts)
@@ -165,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
 		return ret;
 	}
 
+	ilwt->lwt_output = !!lwt_output;
+
 	p = ila_params_lwtunnel(newts);
 
+	p->csum_mode = csum_mode;
+	p->ident_type = ident_type;
 	p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
 
 	/* Precompute checksum difference for translation since we
 	 * know both the old locator and the new one.
 	 */
 	p->locator_match = iaddr->loc;
-	p->csum_diff = compute_csum_diff8(
-		(__be32 *)&p->locator_match, (__be32 *)&p->locator);
-
-	if (tb[ILA_ATTR_CSUM_MODE])
-		p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
 
 	ila_init_saved_csum(p);
 
@@ -202,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
 			       struct lwtunnel_state *lwtstate)
 {
 	struct ila_params *p = ila_params_lwtunnel(lwtstate);
+	struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
 
 	if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
 			      ILA_ATTR_PAD))
 		goto nla_put_failure;
+
 	if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
 		goto nla_put_failure;
 
+	if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
+		goto nla_put_failure;
+
+	if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
+		       ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
+					  ILA_HOOK_ROUTE_INPUT))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -219,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
 {
 	return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
 	       nla_total_size(sizeof(u8)) +        /* ILA_ATTR_CSUM_MODE */
+	       nla_total_size(sizeof(u8)) +        /* ILA_ATTR_IDENT_TYPE */
+	       nla_total_size(sizeof(u8)) +        /* ILA_ATTR_HOOK_TYPE */
 	       0;
 }
 
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 77f7f8c7d93d..6eb5e68f112a 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/jhash.h>
 #include <linux/netfilter.h>
 #include <linux/rcupdate.h>
@@ -120,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 	[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
 	[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
 	[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+	[ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
 };
 
 static int parse_nl_config(struct genl_info *info,
@@ -137,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
 
 	if (info->attrs[ILA_ATTR_CSUM_MODE])
 		xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
+	else
+		xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+
+	if (info->attrs[ILA_ATTR_IDENT_TYPE])
+		xp->ip.ident_type = nla_get_u8(
+				info->attrs[ILA_ATTR_IDENT_TYPE]);
+	else
+		xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
 
 	if (info->attrs[ILA_ATTR_IFINDEX])
 		xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -197,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
 	}
 }
 
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
 
 static unsigned int
 ila_nf_input(void *priv,
@@ -208,7 +218,7 @@ ila_nf_input(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+static const struct nf_hook_ops ila_nf_hook_ops[] = {
 	{
 		.hook = ila_nf_input,
 		.pf = NFPROTO_IPV6,
@@ -395,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
 			      (__force u64)ila->xp.ip.locator_match.v64,
 			      ILA_ATTR_PAD) ||
 	    nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
-	    nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
+	    nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
+	    nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
 		return -1;
 
 	return 0;
@@ -606,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
 	.size = sizeof(struct ila_net),
 };
 
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
 {
 	struct ila_map *ila;
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
 
 	/* Assumes skb contains a valid IPv6 header that is pulled */
 
-	if (!ila_addr_is_ila(iaddr)) {
-		/* Type indicates this is not an ILA address */
-		return 0;
-	}
+	/* No check here that ILA type in the mapping matches what is in the
+	 * address. We assume that whatever sender gaves us can be translated.
+	 * The checksum mode however is relevant.
+	 */
 
 	rcu_read_lock();
 
 	ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
 	if (ila)
-		ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
+		ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
 
 	rcu_read_unlock();
 
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b13b8f93079d..b01858f5deb1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net,
 					   const __be16 sport,
 					   const struct in6_addr *daddr,
 					   const u16 hnum,
-					   const int dif)
+					   const int dif, const int sdif)
 {
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
@@ -73,12 +73,12 @@ begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		if (sk->sk_hash != hash)
 			continue;
-		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
 			continue;
 		if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 			goto out;
 
-		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
 			sock_gen_put(sk);
 			goto begin;
 		}
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
 static inline int compute_score(struct sock *sk, struct net *net,
 				const unsigned short hnum,
 				const struct in6_addr *daddr,
-				const int dif, bool exact_dif)
+				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
 
@@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			score++;
 		}
 		if (sk->sk_bound_dev_if || exact_dif) {
-			if (sk->sk_bound_dev_if != dif)
+			bool dev_match = (sk->sk_bound_dev_if == dif ||
+					  sk->sk_bound_dev_if == sdif);
+
+			if (exact_dif && !dev_match)
 				return -1;
-			score++;
+			if (sk->sk_bound_dev_if && dev_match)
+				score++;
 		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
@@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 		struct sk_buff *skb, int doff,
 		const struct in6_addr *saddr,
 		const __be16 sport, const struct in6_addr *daddr,
-		const unsigned short hnum, const int dif)
+		const unsigned short hnum, const int dif, const int sdif)
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 	u32 phash = 0;
 
 	sk_for_each(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
 		if (score > hiscore) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 	bool refcounted;
 
 	sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-			    ntohs(dport), dif, &refcounted);
+			    ntohs(dport), dif, 0, &refcounted);
 	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
 	const struct in6_addr *saddr = &sk->sk_v6_daddr;
 	const int dif = sk->sk_bound_dev_if;
-	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
+	const int sdif = l3mdev_master_ifindex_by_index(net, dif);
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
 						inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -203,7 +208,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		if (sk2->sk_hash != hash)
 			continue;
 
-		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+				       dif, sdif))) {
 			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index c0cbcb259f5a..ec43d18b5ff9 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/udplite.h>
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..2e2804f5823e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,18 +33,11 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
 
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
 static struct kmem_cache *fib6_node_kmem __read_mostly;
 
 struct fib6_cleaner {
@@ -61,9 +54,12 @@ struct fib6_cleaner {
 #define FWS_INIT FWS_L
 #endif
 
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
-static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static struct rt6_info *fib6_find_prefix(struct net *net,
+					 struct fib6_table *table,
+					 struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net,
+					  struct fib6_table *table,
+					  struct fib6_node *fn);
 static int fib6_walk(struct net *net, struct fib6_walker *w);
 static int fib6_walk_continue(struct fib6_walker *w);
 
@@ -109,6 +105,20 @@ enum {
 	FIB6_NO_SERNUM_CHANGE = 0,
 };
 
+void fib6_update_sernum(struct rt6_info *rt)
+{
+	struct fib6_table *table = rt->rt6i_table;
+	struct net *net = dev_net(rt->dst.dev);
+	struct fib6_node *fn;
+
+	spin_lock_bh(&table->tb6_lock);
+	fn = rcu_dereference_protected(rt->rt6i_node,
+			lockdep_is_held(&table->tb6_lock));
+	if (fn)
+		fn->fn_sernum = fib6_new_sernum(net);
+	spin_unlock_bh(&table->tb6_lock);
+}
+
 /*
  *	Auxiliary address test functions for the radix tree.
  *
@@ -139,21 +149,37 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
-static struct fib6_node *node_alloc(void)
+static struct fib6_node *node_alloc(struct net *net)
 {
 	struct fib6_node *fn;
 
 	fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+	if (fn)
+		net->ipv6.rt6_stats->fib_nodes++;
 
 	return fn;
 }
 
-static void node_free(struct fib6_node *fn)
+static void node_free_immediate(struct net *net, struct fib6_node *fn)
+{
+	kmem_cache_free(fib6_node_kmem, fn);
+	net->ipv6.rt6_stats->fib_nodes--;
+}
+
+static void node_free_rcu(struct rcu_head *head)
 {
+	struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
+
 	kmem_cache_free(fib6_node_kmem, fn);
 }
 
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+static void node_free(struct net *net, struct fib6_node *fn)
+{
+	call_rcu(&fn->rcu, node_free_rcu);
+	net->ipv6.rt6_stats->fib_nodes--;
+}
+
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 {
 	int cpu;
 
@@ -172,18 +198,13 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 			*ppcpu_rt = NULL;
 		}
 	}
-
-	free_percpu(non_pcpu_rt->rt6i_pcpu);
-	non_pcpu_rt->rt6i_pcpu = NULL;
 }
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
 
-static void rt6_release(struct rt6_info *rt)
+static void fib6_free_table(struct fib6_table *table)
 {
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
+	inetpeer_invalidate_tree(&table->tb6_peers);
+	kfree(table);
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -194,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
 	 * Initialize table lock at a single place to give lockdep a key,
 	 * tables aren't visible prior to being linked to the list.
 	 */
-	rwlock_init(&tb->tb6_lock);
-
+	spin_lock_init(&tb->tb6_lock);
 	h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
 
 	/*
@@ -214,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
 	table = kzalloc(sizeof(*table), GFP_ATOMIC);
 	if (table) {
 		table->tb6_id = id;
-		table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+		rcu_assign_pointer(table->tb6_root.leaf,
+				   net->ipv6.ip6_null_entry);
 		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 		inet_peer_base_init(&table->tb6_peers);
 	}
@@ -302,12 +323,114 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+	unsigned int h, fib_seq = 0;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+			fib_seq += tb->fib_seq;
+	}
+	rcu_read_unlock();
+
+	return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+				    enum fib_event_type event_type,
+				    struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib6_entry_notifiers(struct net *net,
+				     enum fib_event_type event_type,
+				     struct rt6_info *rt,
+				     struct netlink_ext_ack *extack)
+{
+	struct fib6_entry_notifier_info info = {
+		.info.extack = extack,
+		.rt = rt,
+	};
+
+	rt->rt6i_table->fib_seq++;
+	return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+struct fib6_dump_arg {
+	struct net *net;
+	struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+	if (rt == arg->net->ipv6.ip6_null_entry)
+		return;
+	call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+	struct rt6_info *rt;
+
+	for_each_fib6_walker_rt(w)
+		fib6_rt_dump(rt, w->args);
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+			    struct fib6_walker *w)
+{
+	w->root = &tb->tb6_root;
+	spin_lock_bh(&tb->tb6_lock);
+	fib6_walk(net, w);
+	spin_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib6_dump_arg arg;
+	struct fib6_walker *w;
+	unsigned int h;
+
+	w = kzalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return -ENOMEM;
+
+	w->func = fib6_node_dump;
+	arg.net = net;
+	arg.nb = nb;
+	w->args = &arg;
+
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+			fib6_table_dump(net, tb, w);
+	}
+
+	kfree(w);
+
+	return 0;
+}
+
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
 	struct rt6_info *rt;
 
-	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+	for_each_fib6_walker_rt(w) {
 		res = rt6_dump_route(rt, w->args);
 		if (res < 0) {
 			/* Frame is full, suspend walking */
@@ -366,9 +489,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
 		w->count = 0;
 		w->skip = 0;
 
-		read_lock_bh(&table->tb6_lock);
+		spin_lock_bh(&table->tb6_lock);
 		res = fib6_walk(net, w);
-		read_unlock_bh(&table->tb6_lock);
+		spin_unlock_bh(&table->tb6_lock);
 		if (res > 0) {
 			cb->args[4] = 1;
 			cb->args[5] = w->root->fn_sernum;
@@ -383,9 +506,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
 		} else
 			w->skip = 0;
 
-		read_lock_bh(&table->tb6_lock);
+		spin_lock_bh(&table->tb6_lock);
 		res = fib6_walk_continue(w);
-		read_unlock_bh(&table->tb6_lock);
+		spin_unlock_bh(&table->tb6_lock);
 		if (res <= 0) {
 			fib6_walker_unlink(net, w);
 			cb->args[4] = 0;
@@ -466,11 +589,13 @@ out:
  *	node.
  */
 
-static struct fib6_node *fib6_add_1(struct fib6_node *root,
-				     struct in6_addr *addr, int plen,
-				     int offset, int allow_create,
-				     int replace_required, int sernum,
-				     struct netlink_ext_ack *extack)
+static struct fib6_node *fib6_add_1(struct net *net,
+				    struct fib6_table *table,
+				    struct fib6_node *root,
+				    struct in6_addr *addr, int plen,
+				    int offset, int allow_create,
+				    int replace_required,
+				    struct netlink_ext_ack *extack)
 {
 	struct fib6_node *fn, *in, *ln;
 	struct fib6_node *pn = NULL;
@@ -485,7 +610,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
 	fn = root;
 
 	do {
-		key = (struct rt6key *)((u8 *)fn->leaf + offset);
+		struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+					    lockdep_is_held(&table->tb6_lock));
+		key = (struct rt6key *)((u8 *)leaf + offset);
 
 		/*
 		 *	Prefix match
@@ -511,12 +638,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
 		if (plen == fn->fn_bit) {
 			/* clean up an intermediate node */
 			if (!(fn->fn_flags & RTN_RTINFO)) {
-				rt6_release(fn->leaf);
-				fn->leaf = NULL;
+				RCU_INIT_POINTER(fn->leaf, NULL);
+				rt6_release(leaf);
 			}
 
-			fn->fn_sernum = sernum;
-
 			return fn;
 		}
 
@@ -525,10 +650,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
 		 */
 
 		/* Try to walk down on tree. */
-		fn->fn_sernum = sernum;
 		dir = addr_bit_set(addr, fn->fn_bit);
 		pn = fn;
-		fn = dir ? fn->right : fn->left;
+		fn = dir ?
+		     rcu_dereference_protected(fn->right,
+					lockdep_is_held(&table->tb6_lock)) :
+		     rcu_dereference_protected(fn->left,
+					lockdep_is_held(&table->tb6_lock));
 	} while (fn);
 
 	if (!allow_create) {
@@ -554,19 +682,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
 	 *	Create new leaf node without children.
 	 */
 
-	ln = node_alloc();
+	ln = node_alloc(net);
 
 	if (!ln)
 		return ERR_PTR(-ENOMEM);
 	ln->fn_bit = plen;
-
-	ln->parent = pn;
-	ln->fn_sernum = sernum;
+	RCU_INIT_POINTER(ln->parent, pn);
 
 	if (dir)
-		pn->right = ln;
+		rcu_assign_pointer(pn->right, ln);
 	else
-		pn->left  = ln;
+		rcu_assign_pointer(pn->left, ln);
 
 	return ln;
 
@@ -580,7 +706,8 @@ insert_above:
 	 * and the current
 	 */
 
-	pn = fn->parent;
+	pn = rcu_dereference_protected(fn->parent,
+				       lockdep_is_held(&table->tb6_lock));
 
 	/* find 1st bit in difference between the 2 addrs.
 
@@ -596,14 +723,14 @@ insert_above:
 	 *	(new leaf node)[ln] (old node)[fn]
 	 */
 	if (plen > bit) {
-		in = node_alloc();
-		ln = node_alloc();
+		in = node_alloc(net);
+		ln = node_alloc(net);
 
 		if (!in || !ln) {
 			if (in)
-				node_free(in);
+				node_free_immediate(net, in);
 			if (ln)
-				node_free(ln);
+				node_free_immediate(net, ln);
 			return ERR_PTR(-ENOMEM);
 		}
 
@@ -617,31 +744,28 @@ insert_above:
 
 		in->fn_bit = bit;
 
-		in->parent = pn;
+		RCU_INIT_POINTER(in->parent, pn);
 		in->leaf = fn->leaf;
-		atomic_inc(&in->leaf->rt6i_ref);
-
-		in->fn_sernum = sernum;
+		atomic_inc(&rcu_dereference_protected(in->leaf,
+				lockdep_is_held(&table->tb6_lock))->rt6i_ref);
 
 		/* update parent pointer */
 		if (dir)
-			pn->right = in;
+			rcu_assign_pointer(pn->right, in);
 		else
-			pn->left  = in;
+			rcu_assign_pointer(pn->left, in);
 
 		ln->fn_bit = plen;
 
-		ln->parent = in;
-		fn->parent = in;
-
-		ln->fn_sernum = sernum;
+		RCU_INIT_POINTER(ln->parent, in);
+		rcu_assign_pointer(fn->parent, in);
 
 		if (addr_bit_set(addr, bit)) {
-			in->right = ln;
-			in->left  = fn;
+			rcu_assign_pointer(in->right, ln);
+			rcu_assign_pointer(in->left, fn);
 		} else {
-			in->left  = ln;
-			in->right = fn;
+			rcu_assign_pointer(in->left, ln);
+			rcu_assign_pointer(in->right, fn);
 		}
 	} else { /* plen <= bit */
 
@@ -651,28 +775,26 @@ insert_above:
 		 *	     (old node)[fn] NULL
 		 */
 
-		ln = node_alloc();
+		ln = node_alloc(net);
 
 		if (!ln)
 			return ERR_PTR(-ENOMEM);
 
 		ln->fn_bit = plen;
 
-		ln->parent = pn;
-
-		ln->fn_sernum = sernum;
-
-		if (dir)
-			pn->right = ln;
-		else
-			pn->left  = ln;
+		RCU_INIT_POINTER(ln->parent, pn);
 
 		if (addr_bit_set(&key->addr, plen))
-			ln->right = fn;
+			RCU_INIT_POINTER(ln->right, fn);
 		else
-			ln->left  = fn;
+			RCU_INIT_POINTER(ln->left, fn);
+
+		rcu_assign_pointer(fn->parent, ln);
 
-		fn->parent = ln;
+		if (dir)
+			rcu_assign_pointer(pn->right, ln);
+		else
+			rcu_assign_pointer(pn->left, ln);
 	}
 	return ln;
 }
@@ -718,6 +840,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
 static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
+	struct fib6_table *table = rt->rt6i_table;
+
 	if (atomic_read(&rt->rt6i_ref) != 1) {
 		/* This route is used as dummy address holder in some split
 		 * nodes. It is not leaked, but it still holds other resources,
@@ -726,15 +850,18 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 		 * to still alive ones.
 		 */
 		while (fn) {
-			if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
-				fn->leaf = fib6_find_prefix(net, fn);
-				atomic_inc(&fn->leaf->rt6i_ref);
+			struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+					    lockdep_is_held(&table->tb6_lock));
+			struct rt6_info *new_leaf;
+			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
+				new_leaf = fib6_find_prefix(net, table, fn);
+				atomic_inc(&new_leaf->rt6i_ref);
+				rcu_assign_pointer(fn->leaf, new_leaf);
 				rt6_release(rt);
 			}
-			fn = fn->parent;
+			fn = rcu_dereference_protected(fn->parent,
+				    lockdep_is_held(&table->tb6_lock));
 		}
-		/* No more references are possible at this point. */
-		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
 	}
 }
 
@@ -743,11 +870,14 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-			    struct nl_info *info, struct mx6_config *mxc)
+			    struct nl_info *info, struct mx6_config *mxc,
+			    struct netlink_ext_ack *extack)
 {
+	struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 	struct rt6_info *iter = NULL;
-	struct rt6_info **ins;
-	struct rt6_info **fallback_ins = NULL;
+	struct rt6_info __rcu **ins;
+	struct rt6_info __rcu **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
 	int add = (!info->nlh ||
@@ -762,7 +892,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 	ins = &fn->leaf;
 
-	for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
+	for (iter = leaf; iter;
+	     iter = rcu_dereference_protected(iter->dst.rt6_next,
+				lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
 		/*
 		 *	Search for duplicates
 		 */
@@ -824,7 +956,8 @@ next_iter:
 	if (fallback_ins && !found) {
 		/* No ECMP-able route found, replace first non-ECMP one */
 		ins = fallback_ins;
-		iter = *ins;
+		iter = rcu_dereference_protected(*ins,
+				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 		found++;
 	}
 
@@ -838,7 +971,7 @@ next_iter:
 		struct rt6_info *sibling, *temp_sibling;
 
 		/* Find the first route that have the same metric */
-		sibling = fn->leaf;
+		sibling = leaf;
 		while (sibling) {
 			if (sibling->rt6i_metric == rt->rt6i_metric &&
 			    rt6_qualify_for_ecmp(sibling)) {
@@ -846,7 +979,8 @@ next_iter:
 					      &sibling->rt6i_siblings);
 				break;
 			}
-			sibling = sibling->dst.rt6_next;
+			sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 		}
 		/* For each sibling in the list, increment the counter of
 		 * siblings. BUG() if counters does not match, list of siblings
@@ -875,10 +1009,12 @@ add:
 		if (err)
 			return err;
 
-		rt->dst.rt6_next = iter;
-		*ins = rt;
-		rt->rt6i_node = fn;
+		rcu_assign_pointer(rt->dst.rt6_next, iter);
 		atomic_inc(&rt->rt6i_ref);
+		rcu_assign_pointer(rt->rt6i_node, fn);
+		rcu_assign_pointer(*ins, rt);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+					  rt, extack);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -902,10 +1038,12 @@ add:
 		if (err)
 			return err;
 
-		*ins = rt;
-		rt->rt6i_node = fn;
-		rt->dst.rt6_next = iter->dst.rt6_next;
 		atomic_inc(&rt->rt6i_ref);
+		rcu_assign_pointer(rt->rt6i_node, fn);
+		rt->dst.rt6_next = iter->dst.rt6_next;
+		rcu_assign_pointer(*ins, rt);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+					  rt, extack);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -913,25 +1051,34 @@ add:
 			fn->fn_flags |= RTN_RTINFO;
 		}
 		nsiblings = iter->rt6i_nsiblings;
+		iter->rt6i_node = NULL;
 		fib6_purge_rt(iter, fn, info->nl_net);
+		if (rcu_access_pointer(fn->rr_ptr) == iter)
+			fn->rr_ptr = NULL;
 		rt6_release(iter);
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
 			ins = &rt->dst.rt6_next;
-			iter = *ins;
+			iter = rcu_dereference_protected(*ins,
+				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
 			while (iter) {
 				if (iter->rt6i_metric > rt->rt6i_metric)
 					break;
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->dst.rt6_next;
+					iter->rt6i_node = NULL;
 					fib6_purge_rt(iter, fn, info->nl_net);
+					if (rcu_access_pointer(fn->rr_ptr) == iter)
+						fn->rr_ptr = NULL;
 					rt6_release(iter);
 					nsiblings--;
+					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
 					ins = &iter->dst.rt6_next;
 				}
-				iter = *ins;
+				iter = rcu_dereference_protected(*ins,
+					lockdep_is_held(&rt->rt6i_table->tb6_lock));
 			}
 			WARN_ON(nsiblings != 0);
 		}
@@ -955,16 +1102,33 @@ void fib6_force_start_gc(struct net *net)
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
 
+static void fib6_update_sernum_upto_root(struct rt6_info *rt,
+					 int sernum)
+{
+	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+
+	/* paired with smp_rmb() in rt6_get_cookie_safe() */
+	smp_wmb();
+	while (fn) {
+		fn->fn_sernum = sernum;
+		fn = rcu_dereference_protected(fn->parent,
+				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	}
+}
+
 /*
  *	Add routing information to the routing tree.
  *	<destination addr>/<source addr>
  *	with source addr info in sub-trees
+ *	Need to own table->tb6_lock
  */
 
 int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	     struct nl_info *info, struct mx6_config *mxc,
 	     struct netlink_ext_ack *extack)
 {
+	struct fib6_table *table = rt->rt6i_table;
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
 	int allow_create = 1;
@@ -973,6 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
 		return -EINVAL;
+	if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
+		return -EINVAL;
 
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -983,9 +1149,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	if (!allow_create && !replace_required)
 		pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
 
-	fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+	fn = fib6_add_1(info->nl_net, table, root,
+			&rt->rt6i_dst.addr, rt->rt6i_dst.plen,
 			offsetof(struct rt6_info, rt6i_dst), allow_create,
-			replace_required, sernum, extack);
+			replace_required, extack);
 	if (IS_ERR(fn)) {
 		err = PTR_ERR(fn);
 		fn = NULL;
@@ -998,7 +1165,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	if (rt->rt6i_src.plen) {
 		struct fib6_node *sn;
 
-		if (!fn->subtree) {
+		if (!rcu_access_pointer(fn->subtree)) {
 			struct fib6_node *sfn;
 
 			/*
@@ -1012,62 +1179,59 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			 */
 
 			/* Create subtree root node */
-			sfn = node_alloc();
+			sfn = node_alloc(info->nl_net);
 			if (!sfn)
-				goto st_failure;
+				goto failure;
 
-			sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
 			atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+			rcu_assign_pointer(sfn->leaf,
+					   info->nl_net->ipv6.ip6_null_entry);
 			sfn->fn_flags = RTN_ROOT;
-			sfn->fn_sernum = sernum;
 
 			/* Now add the first leaf node to new subtree */
 
-			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
-					rt->rt6i_src.plen,
+			sn = fib6_add_1(info->nl_net, table, sfn,
+					&rt->rt6i_src.addr, rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
-					allow_create, replace_required, sernum,
-					extack);
+					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
 				/* If it is failed, discard just allocated
-				   root, and then (in st_failure) stale node
+				   root, and then (in failure) stale node
 				   in main tree.
 				 */
-				node_free(sfn);
+				node_free_immediate(info->nl_net, sfn);
 				err = PTR_ERR(sn);
-				goto st_failure;
+				goto failure;
 			}
 
 			/* Now link new subtree to main tree */
-			sfn->parent = fn;
-			fn->subtree = sfn;
+			rcu_assign_pointer(sfn->parent, fn);
+			rcu_assign_pointer(fn->subtree, sfn);
 		} else {
-			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
-					rt->rt6i_src.plen,
+			sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
+					&rt->rt6i_src.addr, rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
-					allow_create, replace_required, sernum,
-					extack);
+					allow_create, replace_required, extack);
 
 			if (IS_ERR(sn)) {
 				err = PTR_ERR(sn);
-				goto st_failure;
+				goto failure;
 			}
 		}
 
-		if (!fn->leaf) {
-			fn->leaf = rt;
+		if (!rcu_access_pointer(fn->leaf)) {
 			atomic_inc(&rt->rt6i_ref);
+			rcu_assign_pointer(fn->leaf, rt);
 		}
 		fn = sn;
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, info, mxc);
+	err = fib6_add_rt2node(fn, rt, info, mxc, extack);
 	if (!err) {
+		fib6_update_sernum_upto_root(rt, sernum);
 		fib6_start_gc(info->nl_net, rt);
-		if (!(rt->rt6i_flags & RTF_CACHE))
-			fib6_prune_clones(info->nl_net, pn);
 	}
 
 out:
@@ -1077,41 +1241,43 @@ out:
 		 * If fib6_add_1 has cleared the old leaf pointer in the
 		 * super-tree leaf node we have to find a new one for it.
 		 */
-		if (pn != fn && pn->leaf == rt) {
-			pn->leaf = NULL;
+		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+					    lockdep_is_held(&table->tb6_lock));
+		if (pn != fn && pn_leaf == rt) {
+			pn_leaf = NULL;
+			RCU_INIT_POINTER(pn->leaf, NULL);
 			atomic_dec(&rt->rt6i_ref);
 		}
-		if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
-			pn->leaf = fib6_find_prefix(info->nl_net, pn);
+		if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+			pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
 #if RT6_DEBUG >= 2
-			if (!pn->leaf) {
-				WARN_ON(pn->leaf == NULL);
-				pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+			if (!pn_leaf) {
+				WARN_ON(!pn_leaf);
+				pn_leaf = info->nl_net->ipv6.ip6_null_entry;
 			}
 #endif
-			atomic_inc(&pn->leaf->rt6i_ref);
+			atomic_inc(&pn_leaf->rt6i_ref);
+			rcu_assign_pointer(pn->leaf, pn_leaf);
 		}
 #endif
-		/* Always release dst as dst->__refcnt is guaranteed
-		 * to be taken before entering this function
-		 */
-		dst_release_immediate(&rt->dst);
+		goto failure;
 	}
 	return err;
 
-#ifdef CONFIG_IPV6_SUBTREES
-	/* Subtree creation failed, probably main tree node
-	   is orphan. If it is, shoot it.
+failure:
+	/* fn->leaf could be NULL if fn is an intermediate node and we
+	 * failed to add the new route to it in both subtree creation
+	 * failure and fib6_add_rt2node() failure case.
+	 * In both cases, fib6_repair_tree() should be called to fix
+	 * fn->leaf.
 	 */
-st_failure:
 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
-		fib6_repair_tree(info->nl_net, fn);
+		fib6_repair_tree(info->nl_net, table, fn);
 	/* Always release dst as dst->__refcnt is guaranteed
 	 * to be taken before entering this function
 	 */
 	dst_release_immediate(&rt->dst);
 	return err;
-#endif
 }
 
 /*
@@ -1144,7 +1310,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 
 		dir = addr_bit_set(args->addr, fn->fn_bit);
 
-		next = dir ? fn->right : fn->left;
+		next = dir ? rcu_dereference(fn->right) :
+			     rcu_dereference(fn->left);
 
 		if (next) {
 			fn = next;
@@ -1154,18 +1321,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 	}
 
 	while (fn) {
-		if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+		struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+		if (subtree || fn->fn_flags & RTN_RTINFO) {
+			struct rt6_info *leaf = rcu_dereference(fn->leaf);
 			struct rt6key *key;
 
-			key = (struct rt6key *) ((u8 *) fn->leaf +
-						 args->offset);
+			if (!leaf)
+				goto backtrack;
+
+			key = (struct rt6key *) ((u8 *)leaf + args->offset);
 
 			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
 #ifdef CONFIG_IPV6_SUBTREES
-				if (fn->subtree) {
+				if (subtree) {
 					struct fib6_node *sfn;
-					sfn = fib6_lookup_1(fn->subtree,
-							    args + 1);
+					sfn = fib6_lookup_1(subtree, args + 1);
 					if (!sfn)
 						goto backtrack;
 					fn = sfn;
@@ -1175,18 +1346,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
 					return fn;
 			}
 		}
-#ifdef CONFIG_IPV6_SUBTREES
 backtrack:
-#endif
 		if (fn->fn_flags & RTN_ROOT)
 			break;
 
-		fn = fn->parent;
+		fn = rcu_dereference(fn->parent);
 	}
 
 	return NULL;
 }
 
+/* called with rcu_read_lock() held
+ */
 struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
 			      const struct in6_addr *saddr)
 {
@@ -1217,54 +1388,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
 /*
  *	Get node with specified destination prefix (and source prefix,
  *	if subtrees are used)
+ *	exact_match == true means we try to find fn with exact match of
+ *	the passed in prefix addr
+ *	exact_match == false means we try to find fn with longest prefix
+ *	match of the passed in prefix addr. This is useful for finding fn
+ *	for cached route as it will be stored in the exception table under
+ *	the node with longest prefix length.
  */
 
 
 static struct fib6_node *fib6_locate_1(struct fib6_node *root,
 				       const struct in6_addr *addr,
-				       int plen, int offset)
+				       int plen, int offset,
+				       bool exact_match)
 {
-	struct fib6_node *fn;
+	struct fib6_node *fn, *prev = NULL;
 
 	for (fn = root; fn ; ) {
-		struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+		struct rt6_info *leaf = rcu_dereference(fn->leaf);
+		struct rt6key *key;
+
+		/* This node is being deleted */
+		if (!leaf) {
+			if (plen <= fn->fn_bit)
+				goto out;
+			else
+				goto next;
+		}
+
+		key = (struct rt6key *)((u8 *)leaf + offset);
 
 		/*
 		 *	Prefix match
 		 */
 		if (plen < fn->fn_bit ||
 		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
-			return NULL;
+			goto out;
 
 		if (plen == fn->fn_bit)
 			return fn;
 
+		prev = fn;
+
+next:
 		/*
 		 *	We have more bits to go
 		 */
 		if (addr_bit_set(addr, fn->fn_bit))
-			fn = fn->right;
+			fn = rcu_dereference(fn->right);
 		else
-			fn = fn->left;
+			fn = rcu_dereference(fn->left);
 	}
-	return NULL;
+out:
+	if (exact_match)
+		return NULL;
+	else
+		return prev;
 }
 
 struct fib6_node *fib6_locate(struct fib6_node *root,
 			      const struct in6_addr *daddr, int dst_len,
-			      const struct in6_addr *saddr, int src_len)
+			      const struct in6_addr *saddr, int src_len,
+			      bool exact_match)
 {
 	struct fib6_node *fn;
 
 	fn = fib6_locate_1(root, daddr, dst_len,
-			   offsetof(struct rt6_info, rt6i_dst));
+			   offsetof(struct rt6_info, rt6i_dst),
+			   exact_match);
 
 #ifdef CONFIG_IPV6_SUBTREES
 	if (src_len) {
 		WARN_ON(saddr == NULL);
-		if (fn && fn->subtree)
-			fn = fib6_locate_1(fn->subtree, saddr, src_len,
-					   offsetof(struct rt6_info, rt6i_src));
+		if (fn) {
+			struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+			if (subtree) {
+				fn = fib6_locate_1(subtree, saddr, src_len,
+					   offsetof(struct rt6_info, rt6i_src),
+					   exact_match);
+			}
+		}
 	}
 #endif
 
@@ -1280,16 +1484,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
  *
  */
 
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net,
+					 struct fib6_table *table,
+					 struct fib6_node *fn)
 {
+	struct fib6_node *child_left, *child_right;
+
 	if (fn->fn_flags & RTN_ROOT)
 		return net->ipv6.ip6_null_entry;
 
 	while (fn) {
-		if (fn->left)
-			return fn->left->leaf;
-		if (fn->right)
-			return fn->right->leaf;
+		child_left = rcu_dereference_protected(fn->left,
+				    lockdep_is_held(&table->tb6_lock));
+		child_right = rcu_dereference_protected(fn->right,
+				    lockdep_is_held(&table->tb6_lock));
+		if (child_left)
+			return rcu_dereference_protected(child_left->leaf,
+					lockdep_is_held(&table->tb6_lock));
+		if (child_right)
+			return rcu_dereference_protected(child_right->leaf,
+					lockdep_is_held(&table->tb6_lock));
 
 		fn = FIB6_SUBTREE(fn);
 	}
@@ -1299,31 +1513,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
 /*
  *	Called to trim the tree of intermediate nodes when possible. "fn"
  *	is the node we want to try and remove.
+ *	Need to own table->tb6_lock
  */
 
 static struct fib6_node *fib6_repair_tree(struct net *net,
-					   struct fib6_node *fn)
+					  struct fib6_table *table,
+					  struct fib6_node *fn)
 {
 	int children;
 	int nstate;
-	struct fib6_node *child, *pn;
+	struct fib6_node *child;
 	struct fib6_walker *w;
 	int iter = 0;
 
 	for (;;) {
+		struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
+					    lockdep_is_held(&table->tb6_lock));
+		struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
+					    lockdep_is_held(&table->tb6_lock));
+		struct fib6_node *pn = rcu_dereference_protected(fn->parent,
+					    lockdep_is_held(&table->tb6_lock));
+		struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
+					    lockdep_is_held(&table->tb6_lock));
+		struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
+					    lockdep_is_held(&table->tb6_lock));
+		struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+					    lockdep_is_held(&table->tb6_lock));
+		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+					    lockdep_is_held(&table->tb6_lock));
+		struct rt6_info *new_fn_leaf;
+
 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
 		iter++;
 
 		WARN_ON(fn->fn_flags & RTN_RTINFO);
 		WARN_ON(fn->fn_flags & RTN_TL_ROOT);
-		WARN_ON(fn->leaf);
+		WARN_ON(fn_leaf);
 
 		children = 0;
 		child = NULL;
-		if (fn->right)
-			child = fn->right, children |= 1;
-		if (fn->left)
-			child = fn->left, children |= 2;
+		if (fn_r)
+			child = fn_r, children |= 1;
+		if (fn_l)
+			child = fn_l, children |= 2;
 
 		if (children == 3 || FIB6_SUBTREE(fn)
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1331,36 +1563,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 		    || (children && fn->fn_flags & RTN_ROOT)
 #endif
 		    ) {
-			fn->leaf = fib6_find_prefix(net, fn);
+			new_fn_leaf = fib6_find_prefix(net, table, fn);
 #if RT6_DEBUG >= 2
-			if (!fn->leaf) {
-				WARN_ON(!fn->leaf);
-				fn->leaf = net->ipv6.ip6_null_entry;
+			if (!new_fn_leaf) {
+				WARN_ON(!new_fn_leaf);
+				new_fn_leaf = net->ipv6.ip6_null_entry;
 			}
 #endif
-			atomic_inc(&fn->leaf->rt6i_ref);
-			return fn->parent;
+			atomic_inc(&new_fn_leaf->rt6i_ref);
+			rcu_assign_pointer(fn->leaf, new_fn_leaf);
+			return pn;
 		}
 
-		pn = fn->parent;
 #ifdef CONFIG_IPV6_SUBTREES
 		if (FIB6_SUBTREE(pn) == fn) {
 			WARN_ON(!(fn->fn_flags & RTN_ROOT));
-			FIB6_SUBTREE(pn) = NULL;
+			RCU_INIT_POINTER(pn->subtree, NULL);
 			nstate = FWS_L;
 		} else {
 			WARN_ON(fn->fn_flags & RTN_ROOT);
 #endif
-			if (pn->right == fn)
-				pn->right = child;
-			else if (pn->left == fn)
-				pn->left = child;
+			if (pn_r == fn)
+				rcu_assign_pointer(pn->right, child);
+			else if (pn_l == fn)
+				rcu_assign_pointer(pn->left, child);
 #if RT6_DEBUG >= 2
 			else
 				WARN_ON(1);
 #endif
 			if (child)
-				child->parent = pn;
+				rcu_assign_pointer(child->parent, pn);
 			nstate = FWS_R;
 #ifdef CONFIG_IPV6_SUBTREES
 		}
@@ -1369,19 +1601,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 		read_lock(&net->ipv6.fib6_walker_lock);
 		FOR_WALKERS(net, w) {
 			if (!child) {
-				if (w->root == fn) {
-					w->root = w->node = NULL;
-					RT6_TRACE("W %p adjusted by delroot 1\n", w);
-				} else if (w->node == fn) {
+				if (w->node == fn) {
 					RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
 					w->node = pn;
 					w->state = nstate;
 				}
 			} else {
-				if (w->root == fn) {
-					w->root = child;
-					RT6_TRACE("W %p adjusted by delroot 2\n", w);
-				}
 				if (w->node == fn) {
 					w->node = child;
 					if (children&2) {
@@ -1396,33 +1621,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
 		}
 		read_unlock(&net->ipv6.fib6_walker_lock);
 
-		node_free(fn);
+		node_free(net, fn);
 		if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
 			return pn;
 
-		rt6_release(pn->leaf);
-		pn->leaf = NULL;
+		RCU_INIT_POINTER(pn->leaf, NULL);
+		rt6_release(pn_leaf);
 		fn = pn;
 	}
 }
 
-static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
-			   struct nl_info *info)
+static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
+			   struct rt6_info __rcu **rtp, struct nl_info *info)
 {
 	struct fib6_walker *w;
-	struct rt6_info *rt = *rtp;
+	struct rt6_info *rt = rcu_dereference_protected(*rtp,
+				    lockdep_is_held(&table->tb6_lock));
 	struct net *net = info->nl_net;
 
 	RT6_TRACE("fib6_del_route\n");
 
+	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
+
 	/* Unlink it */
 	*rtp = rt->dst.rt6_next;
 	rt->rt6i_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
 
+	/* Flush all cached dst in exception table */
+	rt6_flush_exceptions(rt);
+
 	/* Reset round-robin state, if necessary */
-	if (fn->rr_ptr == rt)
+	if (rcu_access_pointer(fn->rr_ptr) == rt)
 		fn->rr_ptr = NULL;
 
 	/* Remove this entry from other siblings */
@@ -1441,34 +1672,38 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	FOR_WALKERS(net, w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rt->dst.rt6_next;
+			w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+					    lockdep_is_held(&table->tb6_lock));
 			if (!w->leaf)
 				w->state = FWS_U;
 		}
 	}
 	read_unlock(&net->ipv6.fib6_walker_lock);
 
-	rt->dst.rt6_next = NULL;
-
 	/* If it was last route, expunge its radix tree node */
-	if (!fn->leaf) {
+	if (!rcu_access_pointer(fn->leaf)) {
 		fn->fn_flags &= ~RTN_RTINFO;
 		net->ipv6.rt6_stats->fib_route_nodes--;
-		fn = fib6_repair_tree(net, fn);
+		fn = fib6_repair_tree(net, table, fn);
 	}
 
 	fib6_purge_rt(rt, fn, net);
 
+	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
 	rt6_release(rt);
 }
 
+/* Need to own table->tb6_lock */
 int fib6_del(struct rt6_info *rt, struct nl_info *info)
 {
+	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	struct fib6_table *table = rt->rt6i_table;
 	struct net *net = info->nl_net;
-	struct fib6_node *fn = rt->rt6i_node;
-	struct rt6_info **rtp;
+	struct rt6_info __rcu **rtp;
+	struct rt6_info __rcu **rtp_next;
 
 #if RT6_DEBUG >= 2
 	if (rt->dst.obsolete > 0) {
@@ -1481,28 +1716,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-	if (!(rt->rt6i_flags & RTF_CACHE)) {
-		struct fib6_node *pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
-		/* clones of this route might be in another subtree */
-		if (rt->rt6i_src.plen) {
-			while (!(pn->fn_flags & RTN_ROOT))
-				pn = pn->parent;
-			pn = pn->parent;
-		}
-#endif
-		fib6_prune_clones(info->nl_net, pn);
-	}
+	/* remove cached dst from exception table */
+	if (rt->rt6i_flags & RTF_CACHE)
+		return rt6_remove_exception_rt(rt);
 
 	/*
 	 *	Walk the leaf entries looking for ourself
 	 */
 
-	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
-		if (*rtp == rt) {
-			fib6_del_route(fn, rtp, info);
+	for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
+		struct rt6_info *cur = rcu_dereference_protected(*rtp,
+					lockdep_is_held(&table->tb6_lock));
+		if (rt == cur) {
+			fib6_del_route(table, fn, rtp, info);
 			return 0;
 		}
+		rtp_next = &cur->dst.rt6_next;
 	}
 	return -ENOENT;
 }
@@ -1529,22 +1758,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
  *	0   -> walk is complete.
  *	>0  -> walk is incomplete (i.e. suspended)
  *	<0  -> walk is terminated by an error.
+ *
+ *	This function is called with tb6_lock held.
  */
 
 static int fib6_walk_continue(struct fib6_walker *w)
 {
-	struct fib6_node *fn, *pn;
+	struct fib6_node *fn, *pn, *left, *right;
+
+	/* w->root should always be table->tb6_root */
+	WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
 
 	for (;;) {
 		fn = w->node;
 		if (!fn)
 			return 0;
 
-		if (w->prune && fn != w->root &&
-		    fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
-			w->state = FWS_C;
-			w->leaf = fn->leaf;
-		}
 		switch (w->state) {
 #ifdef CONFIG_IPV6_SUBTREES
 		case FWS_S:
@@ -1554,21 +1783,26 @@ static int fib6_walk_continue(struct fib6_walker *w)
 			}
 			w->state = FWS_L;
 #endif
+			/* fall through */
 		case FWS_L:
-			if (fn->left) {
-				w->node = fn->left;
+			left = rcu_dereference_protected(fn->left, 1);
+			if (left) {
+				w->node = left;
 				w->state = FWS_INIT;
 				continue;
 			}
 			w->state = FWS_R;
+			/* fall through */
 		case FWS_R:
-			if (fn->right) {
-				w->node = fn->right;
+			right = rcu_dereference_protected(fn->right, 1);
+			if (right) {
+				w->node = right;
 				w->state = FWS_INIT;
 				continue;
 			}
 			w->state = FWS_C;
-			w->leaf = fn->leaf;
+			w->leaf = rcu_dereference_protected(fn->leaf, 1);
+			/* fall through */
 		case FWS_C:
 			if (w->leaf && fn->fn_flags & RTN_RTINFO) {
 				int err;
@@ -1587,10 +1821,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
 			}
 skip:
 			w->state = FWS_U;
+			/* fall through */
 		case FWS_U:
 			if (fn == w->root)
 				return 0;
-			pn = fn->parent;
+			pn = rcu_dereference_protected(fn->parent, 1);
+			left = rcu_dereference_protected(pn->left, 1);
+			right = rcu_dereference_protected(pn->right, 1);
 			w->node = pn;
 #ifdef CONFIG_IPV6_SUBTREES
 			if (FIB6_SUBTREE(pn) == fn) {
@@ -1599,13 +1836,13 @@ skip:
 				continue;
 			}
 #endif
-			if (pn->left == fn) {
+			if (left == fn) {
 				w->state = FWS_R;
 				continue;
 			}
-			if (pn->right == fn) {
+			if (right == fn) {
 				w->state = FWS_C;
-				w->leaf = w->node->leaf;
+				w->leaf = rcu_dereference_protected(w->node->leaf, 1);
 				continue;
 			}
 #if RT6_DEBUG >= 2
@@ -1648,7 +1885,7 @@ static int fib6_clean_node(struct fib6_walker *w)
 		return 0;
 	}
 
-	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+	for_each_fib6_walker_rt(w) {
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
@@ -1656,7 +1893,9 @@ static int fib6_clean_node(struct fib6_walker *w)
 			if (res) {
 #if RT6_DEBUG >= 2
 				pr_debug("%s: del failed: rt=%p@%p err=%d\n",
-					 __func__, rt, rt->rt6i_node, res);
+					 __func__, rt,
+					 rcu_access_pointer(rt->rt6i_node),
+					 res);
 #endif
 				continue;
 			}
@@ -1674,20 +1913,16 @@ static int fib6_clean_node(struct fib6_walker *w)
  *	func is called on each route.
  *		It may return -1 -> delete this route.
  *		              0  -> continue walking
- *
- *	prune==1 -> only immediate children of node (certainly,
- *	ignoring pure split nodes) will be scanned.
  */
 
 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
 			    int (*func)(struct rt6_info *, void *arg),
-			    bool prune, int sernum, void *arg)
+			    int sernum, void *arg)
 {
 	struct fib6_cleaner c;
 
 	c.w.root = root;
 	c.w.func = fib6_clean_node;
-	c.w.prune = prune;
 	c.w.count = 0;
 	c.w.skip = 0;
 	c.func = func;
@@ -1710,10 +1945,10 @@ static void __fib6_clean_all(struct net *net,
 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
 		head = &net->ipv6.fib_table_hash[h];
 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
-			write_lock_bh(&table->tb6_lock);
+			spin_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(net, &table->tb6_root,
-					func, false, sernum, arg);
-			write_unlock_bh(&table->tb6_lock);
+					func, sernum, arg);
+			spin_unlock_bh(&table->tb6_lock);
 		}
 	}
 	rcu_read_unlock();
@@ -1725,22 +1960,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
 	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
 }
 
-static int fib6_prune_clone(struct rt6_info *rt, void *arg)
-{
-	if (rt->rt6i_flags & RTF_CACHE) {
-		RT6_TRACE("pruning clone %p\n", rt);
-		return -1;
-	}
-
-	return 0;
-}
-
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
-{
-	fib6_clean_tree(net, fn, fib6_prune_clone, true,
-			FIB6_NO_SERNUM_CHANGE, NULL);
-}
-
 static void fib6_flush_trees(struct net *net)
 {
 	int new_sernum = fib6_new_sernum(net);
@@ -1752,12 +1971,6 @@ static void fib6_flush_trees(struct net *net)
  *	Garbage collection
  */
 
-struct fib6_gc_args
-{
-	int			timeout;
-	int			more;
-};
-
 static int fib6_age(struct rt6_info *rt, void *arg)
 {
 	struct fib6_gc_args *gc_args = arg;
@@ -1766,9 +1979,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 	/*
 	 *	check addrconf expiration here.
 	 *	Routes are expired even if they are in use.
-	 *
-	 *	Also age clones. Note, that clones are aged out
-	 *	only if they are not in use now.
 	 */
 
 	if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1777,29 +1987,14 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 			return -1;
 		}
 		gc_args->more++;
-	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (atomic_read(&rt->dst.__refcnt) == 1 &&
-		    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
-			RT6_TRACE("aging clone %p\n", rt);
-			return -1;
-		} else if (rt->rt6i_flags & RTF_GATEWAY) {
-			struct neighbour *neigh;
-			__u8 neigh_flags = 0;
-
-			neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
-			if (neigh) {
-				neigh_flags = neigh->flags;
-				neigh_release(neigh);
-			}
-			if (!(neigh_flags & NTF_ROUTER)) {
-				RT6_TRACE("purging route %p via non-router but gateway\n",
-					  rt);
-				return -1;
-			}
-		}
-		gc_args->more++;
 	}
 
+	/*	Also age clones in the exception table.
+	 *	Note, that clones are aged out
+	 *	only if they are not in use now.
+	 */
+	rt6_age_exceptions(rt, gc_args, now);
+
 	return 0;
 }
 
@@ -1839,6 +2034,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
 static int __net_init fib6_net_init(struct net *net)
 {
 	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+	int err;
+
+	err = fib6_notifier_init(net);
+	if (err)
+		return err;
 
 	spin_lock_init(&net->ipv6.fib6_gc_lock);
 	rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1862,7 +2062,8 @@ static int __net_init fib6_net_init(struct net *net)
 		goto out_fib_table_hash;
 
 	net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
-	net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+	rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
+			   net->ipv6.ip6_null_entry);
 	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -1873,7 +2074,8 @@ static int __net_init fib6_net_init(struct net *net)
 	if (!net->ipv6.fib6_local_tbl)
 		goto out_fib6_main_tbl;
 	net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
-	net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+	rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
+			   net->ipv6.ip6_null_entry);
 	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -1891,22 +2093,31 @@ out_fib_table_hash:
 out_rt6_stats:
 	kfree(net->ipv6.rt6_stats);
 out_timer:
+	fib6_notifier_exit(net);
 	return -ENOMEM;
 }
 
 static void fib6_net_exit(struct net *net)
 {
+	unsigned int i;
+
 	rt6_ifdown(net, NULL);
 	del_timer_sync(&net->ipv6.ip6_fib_timer);
 
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
-	kfree(net->ipv6.fib6_local_tbl);
-#endif
-	inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
-	kfree(net->ipv6.fib6_main_tbl);
+	for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+		struct hlist_node *tmp;
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
+			hlist_del(&tb->tb6_hlist);
+			fib6_free_table(tb);
+		}
+	}
+
 	kfree(net->ipv6.fib_table_hash);
 	kfree(net->ipv6.rt6_stats);
+	fib6_notifier_exit(net);
 }
 
 static struct pernet_operations fib6_net_ops = {
@@ -1930,7 +2141,7 @@ int __init fib6_init(void)
 		goto out_kmem_cache_create;
 
 	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
-			      NULL);
+			      0);
 	if (ret)
 		goto out_unregister_subsys;
 
@@ -1994,7 +2205,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
 		return 1;
 
 	do {
-		iter->w.leaf = iter->w.leaf->dst.rt6_next;
+		iter->w.leaf = rcu_dereference_protected(
+				iter->w.leaf->dst.rt6_next,
+				lockdep_is_held(&iter->tbl->tb6_lock));
 		iter->skip--;
 		if (!iter->skip && iter->w.leaf)
 			return 1;
@@ -2059,7 +2272,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (!v)
 		goto iter_table;
 
-	n = ((struct rt6_info *)v)->dst.rt6_next;
+	n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
 	if (n) {
 		++*pos;
 		return n;
@@ -2067,9 +2280,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 iter_table:
 	ipv6_route_check_sernum(iter);
-	read_lock(&iter->tbl->tb6_lock);
+	spin_lock_bh(&iter->tbl->tb6_lock);
 	r = fib6_walk_continue(&iter->w);
-	read_unlock(&iter->tbl->tb6_lock);
+	spin_unlock_bh(&iter->tbl->tb6_lock);
 	if (r > 0) {
 		if (v)
 			++*pos;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 8081bafe441b..9f2e73c71768 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -47,7 +47,7 @@ static atomic_t fl_size = ATOMIC_INIT(0);
 static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
 
 static void ip6_fl_gc(unsigned long dummy);
-static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
+static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
 
 /* FL hash table lock: it protects only of GC */
 
@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
 	}
 	opt_space->dst1opt = fopt->dst1opt;
 	opt_space->opt_flen = fopt->opt_flen;
+	opt_space->tot_len = fopt->tot_len;
 	return opt_space;
 }
 EXPORT_SYMBOL_GPL(fl6_merge_options);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 67ff2aaf5dcb..b90bad7a4e56 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
 static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		       u8 type, u8 code, int offset, __be32 info)
 {
+	struct net *net = dev_net(skb->dev);
 	const struct gre_base_hdr *greh;
 	const struct ipv6hdr *ipv6h;
 	int grehlen = sizeof(*greh);
@@ -402,19 +403,21 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return;
 
 	switch (type) {
-		__u32 teli;
 		struct ipv6_tlv_tnl_enc_lim *tel;
-		__u32 mtu;
+		__u32 teli;
 	case ICMPV6_DEST_UNREACH:
 		net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
 				    t->parms.name);
-		break;
+		if (code != ICMPV6_PORT_UNREACH)
+			break;
+		return;
 	case ICMPV6_TIME_EXCEED:
 		if (code == ICMPV6_EXC_HOPLIMIT) {
 			net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 					    t->parms.name);
+			break;
 		}
-		break;
+		return;
 	case ICMPV6_PARAMPROB:
 		teli = 0;
 		if (code == ICMPV6_HDR_FIELD)
@@ -430,13 +433,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 					    t->parms.name);
 		}
-		break;
+		return;
 	case ICMPV6_PKT_TOOBIG:
-		mtu = be32_to_cpu(info) - offset;
-		if (mtu < IPV6_MIN_MTU)
-			mtu = IPV6_MIN_MTU;
-		t->dev->mtu = mtu;
-		break;
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+		return;
+	case NDISC_REDIRECT:
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
+		return;
 	}
 
 	if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -498,8 +502,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 			       __u32 *pmtu, __be16 proto)
 {
 	struct ip6_tnl *tunnel = netdev_priv(dev);
-	__be16 protocol = (dev->type == ARPHRD_ETHER) ?
-			  htons(ETH_P_TEB) : proto;
+	__be16 protocol;
 
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
@@ -513,6 +516,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		tunnel->o_seqno++;
 
 	/* Push GRE header. */
+	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 	gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 			 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
 
@@ -938,24 +942,25 @@ done:
 }
 
 static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
-			unsigned short type,
-			const void *daddr, const void *saddr, unsigned int len)
+			 unsigned short type, const void *daddr,
+			 const void *saddr, unsigned int len)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
-	__be16 *p = (__be16 *)(ipv6h+1);
+	struct ipv6hdr *ipv6h;
+	__be16 *p;
 
-	ip6_flow_hdr(ipv6h, 0,
-		     ip6_make_flowlabel(dev_net(dev), skb,
-					t->fl.u.ip6.flowlabel, true,
-					&t->fl.u.ip6));
+	ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
+	ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
+						  t->fl.u.ip6.flowlabel,
+						  true, &t->fl.u.ip6));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
 	ipv6h->daddr = t->parms.raddr;
 
-	p[0]		= t->parms.o_flags;
-	p[1]		= htons(type);
+	p = (__be16 *)(ipv6h + 1);
+	p[0] = t->parms.o_flags;
+	p[1] = htons(type);
 
 	/*
 	 *	Set the source hardware address.
@@ -1153,19 +1158,21 @@ err_alloc_dev:
 	return err;
 }
 
-static void __net_exit ip6gre_exit_net(struct net *net)
+static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
 {
+	struct net *net;
 	LIST_HEAD(list);
 
 	rtnl_lock();
-	ip6gre_destroy_tunnels(net, &list);
+	list_for_each_entry(net, net_list, exit_list)
+		ip6gre_destroy_tunnels(net, &list);
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
 
 static struct pernet_operations ip6gre_net_ops = {
 	.init = ip6gre_init_net,
-	.exit = ip6gre_exit_net,
+	.exit_batch = ip6gre_exit_batch_net,
 	.id   = &ip6gre_net_id,
 	.size = sizeof(struct ip6gre_net),
 };
@@ -1308,6 +1315,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	dev->features |= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netif_keep_dst(dev);
 }
 
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 713676f14a0e..02045494c24c 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/export.h>
 #include <linux/icmpv6.h>
 #include <linux/mutex.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cdb3728faca7..4a87f9428ca5 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 
 	for (skb = segs; skb; skb = skb->next) {
 		ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
-		if (gso_partial)
+		if (gso_partial && skb_is_gso(skb))
 			payload_len = skb_shinfo(skb)->gso_size +
 				      SKB_GSO_CB(skb)->data_offset +
 				      skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2dfe50d8d609..5110a418cc4d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 }
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
-static inline int ip6_ufo_append_data(struct sock *sk,
-			struct sk_buff_head *queue,
-			int getfrag(void *from, char *to, int offset, int len,
-			int odd, struct sk_buff *skb),
-			void *from, int length, int hh_len, int fragheaderlen,
-			int exthdrlen, int transhdrlen, int mtu,
-			unsigned int flags, const struct flowi6 *fl6)
-
-{
-	struct sk_buff *skb;
-	int err;
-
-	/* There is support for UDP large send offload by network
-	 * device, so create one single skb packet containing complete
-	 * udp datagram
-	 */
-	skb = skb_peek_tail(queue);
-	if (!skb) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
-		if (!skb)
-			return err;
-
-		/* reserve space for Hardware header */
-		skb_reserve(skb, hh_len);
-
-		/* create space for UDP/IP header */
-		skb_put(skb, fragheaderlen + transhdrlen);
-
-		/* initialize network header pointer */
-		skb_set_network_header(skb, exthdrlen);
-
-		/* initialize protocol header pointer */
-		skb->transport_header = skb->network_header + fragheaderlen;
-
-		skb->protocol = htons(ETH_P_IPV6);
-		skb->csum = 0;
-
-		if (flags & MSG_CONFIRM)
-			skb_set_dst_pending_confirm(skb, 1);
-
-		__skb_queue_tail(queue, skb);
-	} else if (skb_is_gso(skb)) {
-		goto append;
-	}
-
-	skb->ip_summed = CHECKSUM_PARTIAL;
-	/* Specify the length of each IPv6 datagram fragment.
-	 * It has to be a multiple of 8.
-	 */
-	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
-				     sizeof(struct frag_hdr)) & ~7;
-	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
-							 &fl6->daddr,
-							 &fl6->saddr);
-
-append:
-	return skb_append_datato_frags(sk, skb, getfrag, from,
-				       (length - transhdrlen));
-}
-
 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
 					       gfp_t gfp)
 {
@@ -1224,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 		if (WARN_ON(v6_cork->opt))
 			return -EINVAL;
 
-		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
 		if (unlikely(!v6_cork->opt))
 			return -ENOBUFS;
 
-		v6_cork->opt->tot_len = opt->tot_len;
+		v6_cork->opt->tot_len = sizeof(*opt);
 		v6_cork->opt->opt_flen = opt->opt_flen;
 		v6_cork->opt->opt_nflen = opt->opt_nflen;
 
@@ -1381,20 +1318,6 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((skb && skb_is_gso(skb)) ||
-	    (((length + (skb ? skb->len : headersize)) > mtu) &&
-	    (skb_queue_len(queue) <= 1) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
-		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
-					  hh_len, fragheaderlen, exthdrlen,
-					  transhdrlen, mtu, flags, fl6);
-		if (err)
-			goto error;
-		return 0;
-	}
-
 	if (!skb)
 		goto alloc_new_skb;
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3a0ba2ae4b0f..3d3092adf1d2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -171,7 +171,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
 	}
 
 	t = rcu_dereference(ip6n->collect_md_tun);
-	if (t)
+	if (t && t->dev->flags & IFF_UP)
 		return t;
 
 	t = rcu_dereference(ip6n->tnls_wc[0]);
@@ -471,15 +471,16 @@ static int
 ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 {
-	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
-	struct ip6_tnl *t;
-	int rel_msg = 0;
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+	struct net *net = dev_net(skb->dev);
 	u8 rel_type = ICMPV6_DEST_UNREACH;
 	u8 rel_code = ICMPV6_ADDR_UNREACH;
-	u8 tproto;
 	__u32 rel_info = 0;
-	__u16 len;
+	struct ip6_tnl *t;
 	int err = -ENOENT;
+	int rel_msg = 0;
+	u8 tproto;
+	__u16 len;
 
 	/* If the packet doesn't contain the original IPv6 header we are
 	   in trouble since we might need the source address for further
@@ -490,16 +491,15 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 	if (!t)
 		goto out;
 
-	tproto = ACCESS_ONCE(t->parms.proto);
+	tproto = READ_ONCE(t->parms.proto);
 	if (tproto != ipproto && tproto != 0)
 		goto out;
 
 	err = 0;
 
 	switch (*type) {
-		__u32 teli;
 		struct ipv6_tlv_tnl_enc_lim *tel;
-		__u32 mtu;
+		__u32 mtu, teli;
 	case ICMPV6_DEST_UNREACH:
 		net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
 				    t->parms.name);
@@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
+		ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
+				sock_net_uid(net, NULL));
 		mtu = *info - offset;
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
-		t->dev->mtu = mtu;
-
 		len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
 		if (len > mtu) {
 			rel_type = ICMPV6_PKT_TOOBIG;
@@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 			rel_msg = 1;
 		}
 		break;
+	case NDISC_REDIRECT:
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
+		break;
 	}
 
 	*type = rel_type;
@@ -559,13 +563,12 @@ static int
 ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	   u8 type, u8 code, int offset, __be32 info)
 {
-	int rel_msg = 0;
-	u8 rel_type = type;
-	u8 rel_code = code;
 	__u32 rel_info = ntohl(info);
-	int err;
-	struct sk_buff *skb2;
 	const struct iphdr *eiph;
+	struct sk_buff *skb2;
+	int err, rel_msg = 0;
+	u8 rel_type = type;
+	u8 rel_code = code;
 	struct rtable *rt;
 	struct flowi4 fl4;
 
@@ -590,9 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		rel_type = ICMP_DEST_UNREACH;
 		rel_code = ICMP_FRAG_NEEDED;
 		break;
-	case NDISC_REDIRECT:
-		rel_type = ICMP_REDIRECT;
-		rel_code = ICMP_REDIR_HOST;
 	default:
 		return 0;
 	}
@@ -611,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	eiph = ip_hdr(skb2);
 
 	/* Try to guess incoming interface */
-	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
-				   eiph->saddr, 0,
-				   0, 0,
-				   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
+				   0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 	if (IS_ERR(rt))
 		goto out;
 
 	skb2->dev = rt->dst.dev;
+	ip_rt_put(rt);
 
 	/* route "incoming" packet */
 	if (rt->rt_flags & RTCF_LOCAL) {
-		ip_rt_put(rt);
-		rt = NULL;
 		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
-					   eiph->daddr, eiph->saddr,
-					   0, 0,
-					   IPPROTO_IPIP,
-					   RT_TOS(eiph->tos), 0);
-		if (IS_ERR(rt) ||
-		    rt->dst.dev->type != ARPHRD_TUNNEL) {
+					   eiph->daddr, eiph->saddr, 0, 0,
+					   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+		if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
 			if (!IS_ERR(rt))
 				ip_rt_put(rt);
 			goto out;
 		}
 		skb_dst_set(skb2, &rt->dst);
 	} else {
-		ip_rt_put(rt);
 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 				   skb2->dev) ||
 		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
@@ -649,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (rel_info > dst_mtu(skb_dst(skb2)))
 			goto out;
 
-		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
+		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2,
+						rel_info);
 	}
-	if (rel_type == ICMP_REDIRECT)
-		skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
 
 	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
 
@@ -665,11 +657,10 @@ static int
 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	   u8 type, u8 code, int offset, __be32 info)
 {
-	int rel_msg = 0;
+	__u32 rel_info = ntohl(info);
+	int err, rel_msg = 0;
 	u8 rel_type = type;
 	u8 rel_code = code;
-	__u32 rel_info = ntohl(info);
-	int err;
 
 	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
 			  &rel_msg, &rel_info, offset);
@@ -769,7 +760,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 
 		if ((ipv6_addr_is_multicast(laddr) ||
 		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
-		    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+		    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
+		     likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
 			ret = 1;
 	}
 	return ret;
@@ -899,7 +891,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 	t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
 
 	if (t) {
-		u8 tproto = ACCESS_ONCE(t->parms.proto);
+		u8 tproto = READ_ONCE(t->parms.proto);
 
 		if (tproto != ipproto && tproto != 0)
 			goto drop;
@@ -999,7 +991,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 		if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
 			pr_warn("%s xmit: Local address not yet configured!\n",
 				p->name);
-		else if (!ipv6_addr_is_multicast(raddr) &&
+		else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+			 !ipv6_addr_is_multicast(raddr) &&
 			 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
 			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
 				p->name);
@@ -1043,6 +1036,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	struct dst_entry *dst = NULL, *ndst = NULL;
 	struct net_device *tdev;
 	int mtu;
+	unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
 	unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
 	unsigned int max_headroom = psh_hlen;
 	bool use_cache = false;
@@ -1124,7 +1118,7 @@ route_lookup:
 				     t->parms.name);
 		goto tx_err_dst_release;
 	}
-	mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
+	mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
 	if (encap_limit >= 0) {
 		max_headroom += 8;
 		mtu -= 8;
@@ -1133,7 +1127,7 @@ route_lookup:
 		mtu = IPV6_MIN_MTU;
 	if (skb_dst(skb) && !t->parms.collect_md)
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-	if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
+	if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
 		err = -EMSGSIZE;
 		goto tx_err_dst_release;
@@ -1184,6 +1178,7 @@ route_lookup:
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_frag_opts(skb, &opt.ops, &proto);
 	}
+	hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
 
 	/* Calculate max headroom for all the headers and adjust
 	 * needed_headroom if necessary.
@@ -1231,7 +1226,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
-	tproto = ACCESS_ONCE(t->parms.proto);
+	tproto = READ_ONCE(t->parms.proto);
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
@@ -1301,7 +1296,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 tproto;
 	int err;
 
-	tproto = ACCESS_ONCE(t->parms.proto);
+	tproto = READ_ONCE(t->parms.proto);
 	if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
@@ -2166,17 +2161,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
 	.priority	=	1,
 };
 
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
 {
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	struct net_device *dev, *aux;
 	int h;
 	struct ip6_tnl *t;
-	LIST_HEAD(list);
 
 	for_each_netdev_safe(net, dev, aux)
 		if (dev->rtnl_link_ops == &ip6_link_ops)
-			unregister_netdevice_queue(dev, &list);
+			unregister_netdevice_queue(dev, list);
 
 	for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@ -2185,12 +2179,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
 			 * been added to the list by the previous loop.
 			 */
 			if (!net_eq(dev_net(t->dev), net))
-				unregister_netdevice_queue(t->dev, &list);
+				unregister_netdevice_queue(t->dev, list);
 			t = rtnl_dereference(t->next);
 		}
 	}
-
-	unregister_netdevice_many(&list);
 }
 
 static int __net_init ip6_tnl_init_net(struct net *net)
@@ -2234,16 +2226,21 @@ err_alloc_dev:
 	return err;
 }
 
-static void __net_exit ip6_tnl_exit_net(struct net *net)
+static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
 {
+	struct net *net;
+	LIST_HEAD(list);
+
 	rtnl_lock();
-	ip6_tnl_destroy_tunnels(net);
+	list_for_each_entry(net, net_list, exit_list)
+		ip6_tnl_destroy_tunnels(net, &list);
+	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
 
 static struct pernet_operations ip6_tnl_net_ops = {
 	.init = ip6_tnl_init_net,
-	.exit = ip6_tnl_exit_net,
+	.exit_batch = ip6_tnl_exit_batch_net,
 	.id   = &ip6_tnl_net_id,
 	.size = sizeof(struct ip6_tnl_net),
 };
@@ -2258,6 +2255,9 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
+	if (!ipv6_mod_enabled())
+		return -EOPNOTSUPP;
+
 	err = register_pernet_device(&ip6_tnl_net_ops);
 	if (err < 0)
 		goto out_pernet;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 486c2305f53c..dbb74f3c57a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *tdev;
 	struct xfrm_state *x;
+	int pkt_len = skb->len;
 	int err = -1;
 	int mtu;
 
@@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
 
 		u64_stats_update_begin(&tstats->syncp);
-		tstats->tx_bytes += skb->len;
+		tstats->tx_bytes += pkt_len;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
 	} else {
@@ -1052,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
 	.get_link_net	= ip6_tnl_get_link_net,
 };
 
-static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
+					    struct list_head *list)
 {
 	int h;
 	struct ip6_tnl *t;
-	LIST_HEAD(list);
 
 	for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
-			unregister_netdevice_queue(t->dev, &list);
+			unregister_netdevice_queue(t->dev, list);
 			t = rtnl_dereference(t->next);
 		}
 	}
 
 	t = rtnl_dereference(ip6n->tnls_wc[0]);
-	unregister_netdevice_queue(t->dev, &list);
-	unregister_netdevice_many(&list);
+	unregister_netdevice_queue(t->dev, list);
 }
 
 static int __net_init vti6_init_net(struct net *net)
@@ -1108,18 +1108,24 @@ err_alloc_dev:
 	return err;
 }
 
-static void __net_exit vti6_exit_net(struct net *net)
+static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
 {
-	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+	struct vti6_net *ip6n;
+	struct net *net;
+	LIST_HEAD(list);
 
 	rtnl_lock();
-	vti6_destroy_tunnels(ip6n);
+	list_for_each_entry(net, net_list, exit_list) {
+		ip6n = net_generic(net, vti6_net_id);
+		vti6_destroy_tunnels(ip6n, &list);
+	}
+	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
 
 static struct pernet_operations vti6_net_ops = {
 	.init = vti6_init_net,
-	.exit = vti6_exit_net,
+	.exit_batch = vti6_exit_batch_net,
 	.id   = &vti6_net_id,
 	.size = sizeof(struct vti6_net),
 };
@@ -1145,33 +1151,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
 	.priority	=	100,
 };
 
-static bool is_vti6_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti6_netdev_ops;
-}
-
-static int vti6_device_event(struct notifier_block *unused,
-			     unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip6_tnl *t = netdev_priv(dev);
-
-	if (!is_vti6_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(t->net, dev_net(dev)))
-			xfrm_garbage_collect(t->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti6_notifier_block __read_mostly = {
-	.notifier_call = vti6_device_event,
-};
-
 /**
  * vti6_tunnel_init - register protocol and reserve needed resources
  *
@@ -1182,8 +1161,6 @@ static int __init vti6_tunnel_init(void)
 	const char *msg;
 	int err;
 
-	register_netdevice_notifier(&vti6_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti6_net_ops);
 	if (err < 0)
@@ -1216,7 +1193,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti6_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti6_notifier_block);
 	pr_err("vti6 init: failed to register %s\n", msg);
 	return err;
 }
@@ -1231,7 +1207,6 @@ static void __exit vti6_tunnel_cleanup(void)
 	xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
 	xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti6_net_ops);
-	unregister_netdevice_notifier(&vti6_notifier_block);
 }
 
 module_init(vti6_tunnel_init);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7454850f2098..9c24b85949c1 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1427,7 +1427,7 @@ int __init ip6_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
-		      ip6mr_rtm_dumproute, NULL);
+		      ip6mr_rtm_dumproute, 0);
 	return 0;
 #ifdef CONFIG_IPV6_PIMSM_V2
 add_proto_fail:
@@ -1617,6 +1617,10 @@ int ip6mr_sk_done(struct sock *sk)
 	struct net *net = sock_net(sk);
 	struct mr6_table *mrt;
 
+	if (sk->sk_type != SOCK_RAW ||
+	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+		return err;
+
 	rtnl_lock();
 	ip6mr_for_each_table(mrt, net) {
 		if (sk == mrt->mroute6_sk) {
@@ -1722,6 +1726,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	case MRT6_ADD_MFC:
 	case MRT6_DEL_MFC:
 		parent = -1;
+		/* fall through */
 	case MRT6_ADD_MFC_PROXY:
 	case MRT6_DEL_MFC_PROXY:
 		if (optlen < sizeof(mfc))
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 02d795fe3d7f..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -242,7 +242,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 			pktopt = xchg(&np->pktoptions, NULL);
 			kfree_skb(pktopt);
 
-			sk->sk_destruct = inet_sock_destruct;
 			/*
 			 * ... and add it to the refcnt debug socks count
 			 * in the new family. -acme
@@ -378,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		break;
 
+	case IPV6_FREEBIND:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		/* we also don't have a separate freebind bit for IPV6 */
+		inet_sk(sk)->freebind = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_RECVORIGDSTADDR:
 		if (optlen < sizeof(int))
 			goto e_inval;
@@ -1215,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = inet_sk(sk)->transparent;
 		break;
 
+	case IPV6_FREEBIND:
+		val = inet_sk(sk)->freebind;
+		break;
+
 	case IPV6_RECVORIGDSTADDR:
 		val = np->rxopt.bits.rxorigdstaddr;
 		break;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0327c1f2e6fc..b3cea200c85e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -46,6 +46,7 @@
 #endif
 
 #include <linux/if_addr.h>
+#include <linux/if_ether.h>
 #include <linux/if_arp.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
@@ -127,7 +128,7 @@ struct neigh_table nd_tbl = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 64,
 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
 			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
@@ -426,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb,
 		       int hop_limit, int len)
 {
 	struct ipv6hdr *hdr;
+	struct inet6_dev *idev;
+	unsigned tclass;
+
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
+	tclass = idev ? idev->cnf.ndisc_tclass : 0;
+	rcu_read_unlock();
 
 	skb_push(skb, sizeof(*hdr));
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, 0, 0);
+	ip6_flow_hdr(hdr, tclass, 0);
 
 	hdr->payload_len = htons(len);
 	hdr->nexthdr = IPPROTO_ICMPV6;
@@ -822,7 +830,7 @@ have_ifp:
 				 * who is doing DAD
 				 * so fail our DAD process
 				 */
-				addrconf_dad_failure(ifp);
+				addrconf_dad_failure(skb, ifp);
 				return;
 			} else {
 				/*
@@ -975,7 +983,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	if (ifp) {
 		if (skb->pkt_type != PACKET_LOOPBACK
 		    && (ifp->flags & IFA_F_TENTATIVE)) {
-				addrconf_dad_failure(ifp);
+				addrconf_dad_failure(skb, ifp);
 				return;
 		}
 		/* What should we make now? The advertisement
@@ -989,8 +997,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		 */
 		if (skb->pkt_type != PACKET_LOOPBACK)
 			ND_PRINTK(1, warn,
-				  "NA: someone advertises our address %pI6 on %s!\n",
-				  &ifp->addr, ifp->idev->dev->name);
+				  "NA: %pM advertised our address %pI6c on %s!\n",
+				  eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
 		in6_ifa_put(ifp);
 		return;
 	}
@@ -1779,6 +1787,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 
 static struct notifier_block ndisc_netdev_notifier = {
 	.notifier_call = ndisc_netdev_event,
+	.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index fe180c96040e..c6ee0cdd0ba9 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the netfilter modules on top of IPv6.
 #
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1f90644056ac..f06e25065a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,12 +39,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv6 packet filter");
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)	WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
 void *ip6t_alloc_initial_table(const struct xt_table *info)
 {
 	return xt_alloc_initial_table(ip6t, IP6T);
@@ -176,7 +170,7 @@ static const char *const comments[] = {
 	[NF_IP6_TRACE_COMMENT_POLICY]	= "policy",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
 	.type = NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
@@ -284,7 +278,7 @@ ip6t_do_table(struct sk_buff *skb,
 	acpar.hotdrop = false;
 	acpar.state   = state;
 
-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	WARN_ON(!(table->valid_hooks & (1 << hook)));
 
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
@@ -315,7 +309,7 @@ ip6t_do_table(struct sk_buff *skb,
 		const struct xt_entry_match *ematch;
 		struct xt_counters *counter;
 
-		IP_NF_ASSERT(e);
+		WARN_ON(!e);
 		acpar.thoff = 0;
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
@@ -335,7 +329,7 @@ ip6t_do_table(struct sk_buff *skb,
 		ADD_COUNTER(*counter, skb->len, 1);
 
 		t = ip6t_get_target_c(e);
-		IP_NF_ASSERT(t->u.kernel.target);
+		WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
@@ -801,7 +795,27 @@ get_counters(const struct xt_table_info *t,
 
 			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
+			cond_resched();
+		}
+	}
+}
+
+static void get_old_counters(const struct xt_table_info *t,
+			     struct xt_counters counters[])
+{
+	struct ip6t_entry *iter;
+	unsigned int cpu, i;
+
+	for_each_possible_cpu(cpu) {
+		i = 0;
+		xt_entry_foreach(iter, t->entries, t->size) {
+			const struct xt_counters *tmp;
+
+			tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
+			ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
+			++i;
 		}
+		cond_resched();
 	}
 }
 
@@ -1095,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters, and synchronize with replace */
-	get_counters(oldinfo, counters);
+	get_old_counters(oldinfo, counters);
 
 	/* Decrease module usage counts and free resource */
 	xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index ce203dd729e0..437af8c95277 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 	nexthdr = ipv6_hdr(skb)->nexthdr;
 	thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
 				 &frag_off);
-	if (thoff < 0)
+	if (thoff < 0 || nexthdr != IPPROTO_TCP)
 		return NF_ACCEPT;
 
 	th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
@@ -438,7 +438,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
 	{
 		.hook		= ipv6_synproxy_hook,
 		.pf		= NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 7d2bd940291f..991512576c8c 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -69,7 +69,7 @@ static unsigned int ip6table_nat_local_fn(void *priv,
 	return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= ip6table_nat_in,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4e3402486833..3b80a38f62b8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void ipv6_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "src=%pI6 dst=%pI6 ",
-		   tuple->src.u3.ip6, tuple->dst.u3.ip6);
-}
-
 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -191,7 +184,7 @@ static unsigned int ipv6_conntrack_local(void *priv,
 	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
 }
 
-static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
 	{
 		.hook		= ipv6_conntrack_in,
 		.pf		= NFPROTO_IPV6,
@@ -308,11 +301,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
 
 	return 0;
 }
-
-static int ipv6_nlattr_tuple_size(void)
-{
-	return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static int ipv6_hooks_register(struct net *net)
@@ -351,18 +339,17 @@ static void ipv6_hooks_unregister(struct net *net)
 	mutex_unlock(&register_ipv6_hooks);
 }
 
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
+const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
-	.name			= "ipv6",
 	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
-	.print_tuple		= ipv6_print_tuple,
 	.get_l4proto		= ipv6_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
-	.nlattr_tuple_size	= ipv6_nlattr_tuple_size,
 	.nlattr_to_tuple	= ipv6_nlattr_to_tuple,
 	.nla_policy		= ipv6_nla_policy,
+	.nla_size		= NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
+				  NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
 #endif
 	.net_ns_get		= ipv6_hooks_register,
 	.net_ns_put		= ipv6_hooks_unregister,
@@ -398,25 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 
 static int ipv6_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
-					    ARRAY_SIZE(builtin_l4proto6));
-	if (ret < 0)
-		return ret;
-
-	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
-		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
-						ARRAY_SIZE(builtin_l4proto6));
-	}
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+					     ARRAY_SIZE(builtin_l4proto6));
 }
 
 static void ipv6_net_exit(struct net *net)
 {
-	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
 					ARRAY_SIZE(builtin_l4proto6));
 }
@@ -434,6 +408,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
+	    nf_conntrack_l3proto_ipv6.nla_size))
+		return -EINVAL;
+#endif
+
 	ret = nf_register_sockopt(&so_getorigdst6);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d5f028e33f65..3ac0d826afc4 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmpv6_print_tuple(struct seq_file *s,
-			      const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "type=%u code=%u id=%u ",
-		   tuple->dst.u.icmp.type,
-		   tuple->dst.u.icmp.code,
-		   ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmpv6_get_timeouts(struct net *net)
 {
 	return &icmpv6_pernet(net)->timeout;
@@ -104,8 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
@@ -131,11 +119,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 		pr_debug("icmpv6: can't create new conn with type %u\n",
 			 type + 128);
 		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
-		if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
-			nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
-				      NULL, NULL,
-				      "nf_ct_icmpv6: invalid new with type %d ",
-				      type + 128);
 		return false;
 	}
 	return true;
@@ -144,8 +127,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 static int
 icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
-		     unsigned int icmp6off,
-		     unsigned int hooknum)
+		     unsigned int icmp6off)
 {
 	struct nf_conntrack_tuple intuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
@@ -153,7 +135,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
-	NF_CT_ASSERT(!skb_nfct(skb));
+	WARN_ON(skb_nfct(skb));
 
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuplepr(skb,
@@ -193,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 	return NF_ACCEPT;
 }
 
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+			     u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
 static int
 icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	     struct sk_buff *skb, unsigned int dataoff,
@@ -204,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 
 	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmp6h == NULL) {
-		if (LOG_INVALID(net, IPPROTO_ICMPV6))
-			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
-			      "nf_ct_icmpv6: short packet ");
+		icmpv6_error_log(skb, net, pf, "short packet");
 		return -NF_ACCEPT;
 	}
 
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-		if (LOG_INVALID(net, IPPROTO_ICMPV6))
-			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_icmpv6: ICMPv6 checksum failed ");
+		icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
 		return -NF_ACCEPT;
 	}
 
@@ -229,7 +213,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum);
+	return icmpv6_error_message(net, tmpl, skb, dataoff);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -275,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
 	return 0;
 }
 
-static int icmpv6_nlattr_tuple_size(void)
+static unsigned int icmpv6_nlattr_tuple_size(void)
 {
-	return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
 }
 #endif
 
@@ -367,10 +356,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_ICMPV6,
-	.name			= "icmpv6",
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
-	.print_tuple		= icmpv6_print_tuple,
 	.packet			= icmpv6_packet,
 	.get_timeouts		= icmpv6_get_timeouts,
 	.new			= icmpv6_new,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 986d4ca38832..977d8900cfd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
 	return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
 }
 
-static void nf_ct_frag6_expire(unsigned long data)
+static void nf_ct_frag6_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
 	struct net *net;
 
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
 	ip6_expire_frag_queue(net, fq, &nf_frags);
@@ -622,18 +623,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
 
 static int nf_ct_net_init(struct net *net)
 {
-	int res;
-
 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	res = inet_frags_init_net(&net->nf_frag.frags);
-	if (res)
-		return res;
-	res = nf_ct_frag6_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->nf_frag.frags);
-	return res;
+	inet_frags_init_net(&net->nf_frag.frags);
+
+	return nf_ct_frag6_sysctl_register(net);
 }
 
 static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index ada60d1a991b..b326da59257f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(void *priv,
 	return err == 0 ? NF_ACCEPT : NF_DROP;
 }
 
-static struct nf_hook_ops ipv6_defrag_ops[] = {
+static const struct nf_hook_ops ipv6_defrag_ops[] = {
 	{
 		.hook		= ipv6_defrag,
 		.pf		= NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 97c724224da7..b397a8fe88b9 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -25,7 +25,7 @@
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index b2b4f031b3a1..1d2fb9267d6f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -196,7 +196,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 	struct nf_conntrack_tuple target;
 	unsigned long statusbit;
 
-	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 			else
 				return NF_ACCEPT;
 		}
-		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+		/* Only ICMPs can be IP_CT_IS_REPLY: */
+		/* fall through */
 	case IP_CT_NEW:
 		/* Seen it before?  This can happen for loopback, retrans,
 		 * or local packets.
@@ -319,8 +320,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 
 	default:
 		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index d7b679037bae..98f61fcb9108 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -36,8 +36,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 	struct nf_nat_range newrange;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
 
 	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
 			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 43f91d9b086c..54b5899543ef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev)
 
 static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
 			       const struct nft_pktinfo *pkt,
-			       const struct net_device *dev)
+			       const struct net_device *dev,
+			       struct ipv6hdr *iph)
 {
-	const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
 	int lookup_flags = 0;
 
 	if (priv->flags & NFTA_FIB_F_DADDR) {
@@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
 }
 
 static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
-				const struct nft_pktinfo *pkt)
+				const struct nft_pktinfo *pkt,
+				struct ipv6hdr *iph)
 {
 	const struct net_device *dev = NULL;
 	const struct nf_ipv6_ops *v6ops;
@@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 	else if (priv->flags & NFTA_FIB_F_OIF)
 		dev = nft_out(pkt);
 
-	nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+	nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
 
 	v6ops = nf_get_ipv6_ops();
 	if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 			const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	u32 *dest = &regs->data[priv->dreg];
+	struct ipv6hdr *iph, _iph;
 
-	*dest = __nft_fib6_eval_type(priv, pkt);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	*dest = __nft_fib6_eval_type(priv, pkt, iph);
 }
 EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
 
@@ -141,8 +150,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		   const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	const struct net_device *oif = NULL;
 	u32 *dest = &regs->data[priv->dreg];
+	struct ipv6hdr *iph, _iph;
 	struct flowi6 fl6 = {
 		.flowi6_iif = LOOPBACK_IFINDEX,
 		.flowi6_proto = pkt->tprot,
@@ -155,7 +166,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	else if (priv->flags & NFTA_FIB_F_OIF)
 		oif = nft_out(pkt);
 
-	lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
 
 	if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
 	    nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index abb2c307fbe8..4a7e5ffa5108 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
 	return id;
 }
 
-/* This function exists only for tap drivers that must support broken
- * clients requesting UFO without specifying an IPv6 fragment ID.
- *
- * This is similar to ipv6_select_ident() but we use an independent hash
- * seed to limit information leakage.
- *
- * The network header must be set before calling this.
- */
-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
-{
-	static u32 ip6_proxy_idents_hashrnd __read_mostly;
-	struct in6_addr buf[2];
-	struct in6_addr *addrs;
-	u32 id;
-
-	addrs = skb_header_pointer(skb,
-				   skb_network_offset(skb) +
-				   offsetof(struct ipv6hdr, saddr),
-				   sizeof(buf), buf);
-	if (!addrs)
-		return;
-
-	net_get_random_once(&ip6_proxy_idents_hashrnd,
-			    sizeof(ip6_proxy_idents_hashrnd));
-
-	id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
-				 &addrs[1], &addrs[0]);
-	skb_shinfo(skb)->ip6_frag_id = htonl(id);
-}
-EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
-
 __be32 ipv6_select_ident(struct net *net,
 			 const struct in6_addr *daddr,
 			 const struct in6_addr *saddr)
@@ -86,7 +55,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 	while (offset <= packet_len) {
 		struct ipv6_opt_hdr *exthdr;
-		unsigned int len;
 
 		switch (**nexthdr) {
 
@@ -112,10 +80,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 						 offset);
-		len = ipv6_optlen(exthdr);
-		if (len + offset >= IPV6_MAXPLEN)
+		offset += ipv6_optlen(exthdr);
+		if (offset > IPV6_MAXPLEN)
 			return -EINVAL;
-		offset += len;
 		*nexthdr = &exthdr->nexthdr;
 	}
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd338ff..d12c55dad7d1 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 				ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
-		err = icmpv6_push_pending_frames(sk, &fl6,
-						 (struct icmp6hdr *) &pfh.icmph,
-						 len);
+		icmpv6_push_pending_frames(sk, &fl6,
+					   (struct icmp6hdr *)&pfh.icmph, len);
 	}
 	release_sock(sk);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 60be012fe708..761a473a07c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
 
 struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
-		const struct in6_addr *rmt_addr, int dif)
+		const struct in6_addr *rmt_addr, int dif, int sdif)
 {
 	bool is_multicast = ipv6_addr_is_multicast(loc_addr);
 
@@ -86,7 +86,9 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
-			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != dif &&
+			    sk->sk_bound_dev_if != sdif)
 				continue;
 
 			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -178,7 +180,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 		goto out;
 
 	net = dev_net(skb->dev);
-	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
+	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr,
+			     inet6_iif(skb), inet6_sdif(skb));
 
 	while (sk) {
 		int filtered;
@@ -222,7 +225,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 			}
 		}
 		sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
-				     inet6_iif(skb));
+				     inet6_iif(skb), inet6_sdif(skb));
 	}
 out:
 	read_unlock(&raw_v6_hashinfo.lock);
@@ -378,7 +381,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 		net = dev_net(skb->dev);
 
 		while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
-						inet6_iif(skb)))) {
+					     inet6_iif(skb), inet6_iif(skb)))) {
 			rawv6_err(sk, skb, NULL, type, code,
 					inner_offset, info);
 			sk = sk_next(sk);
@@ -1052,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 		if (optname == IPV6_CHECKSUM ||
 		    optname == IPV6_HDRINCL)
 			break;
+		/* fall through */
 	default:
 		return ipv6_setsockopt(sk, level, optname, optval, optlen);
 	}
@@ -1074,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
 		if (optname == IPV6_CHECKSUM ||
 		    optname == IPV6_HDRINCL)
 			break;
+		/* fall through */
 	default:
 		return compat_ipv6_setsockopt(sk, level, optname,
 					      optval, optlen);
@@ -1135,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 		if (optname == IPV6_CHECKSUM ||
 		    optname == IPV6_HDRINCL)
 			break;
+		/* fall through */
 	default:
 		return ipv6_getsockopt(sk, level, optname, optval, optlen);
 	}
@@ -1157,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
 		if (optname == IPV6_CHECKSUM ||
 		    optname == IPV6_HDRINCL)
 			break;
+		/* fall through */
 	default:
 		return compat_ipv6_getsockopt(sk, level, optname,
 					      optval, optlen);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e1da5b888cc4..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ out:
 }
 EXPORT_SYMBOL(ip6_expire_frag_queue);
 
-static void ip6_frag_expire(unsigned long data)
+static void ip6_frag_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
 	struct net *net;
 
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, ipv6.frags);
 
 	ip6_expire_frag_queue(net, fq, &ip6_frags);
@@ -714,19 +715,13 @@ static void ip6_frags_sysctl_unregister(void)
 
 static int __net_init ipv6_frags_init_net(struct net *net)
 {
-	int res;
-
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	res = inet_frags_init_net(&net->ipv6.frags);
-	if (res)
-		return res;
-	res = ip6_frags_ns_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->ipv6.frags);
-	return res;
+	inet_frags_init_net(&net->ipv6.frags);
+
+	return ip6_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 94d6a13d47f0..05eb7bc36156 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -44,6 +44,7 @@
 #include <linux/seq_file.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/jhash.h>
 #include <net/net_namespace.h>
 #include <net/snmp.h>
 #include <net/ipv6.h>
@@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net,
 			 struct in6_addr *dst, struct in6_addr *src,
 			 int iif, int type, u32 portid, u32 seq,
 			 unsigned int flags);
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+					   struct in6_addr *daddr,
+					   struct in6_addr *saddr);
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
 {
 	if (!list_empty(&rt->rt6i_uncached)) {
 		struct uncached_list *ul = rt->rt6i_uncached_list;
+		struct net *net = dev_net(rt->dst.dev);
 
 		spin_lock_bh(&ul->lock);
 		list_del(&rt->rt6i_uncached);
+		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
 		spin_unlock_bh(&ul->lock);
 	}
 }
@@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					1, DST_OBSOLETE_FORCE_CHK, flags);
 
-	if (rt)
+	if (rt) {
 		rt6_info_init(rt);
+		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
+	}
 
 	return rt;
 }
@@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
 
 	if (rt) {
 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
-		if (rt->rt6i_pcpu) {
-			int cpu;
-
-			for_each_possible_cpu(cpu) {
-				struct rt6_info **p;
-
-				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
-				/* no one shares rt */
-				*p =  NULL;
-			}
-		} else {
+		if (!rt->rt6i_pcpu) {
 			dst_release_immediate(&rt->dst);
 			return NULL;
 		}
@@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
+	struct rt6_exception_bucket *bucket;
 	struct dst_entry *from = dst->from;
 	struct inet6_dev *idev;
 
@@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
 	}
+	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
+	if (bucket) {
+		rt->rt6i_exception_bucket = NULL;
+		kfree(bucket);
+	}
 
 	dst->from = NULL;
 	dst_release(from);
@@ -440,21 +444,12 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 		if (time_after(jiffies, rt->dst.expires))
 			return true;
 	} else if (rt->dst.from) {
-		return rt6_check_expired((struct rt6_info *) rt->dst.from);
+		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+		       rt6_check_expired((struct rt6_info *)rt->dst.from);
 	}
 	return false;
 }
 
-/* Multipath route selection:
- *   Hash based function using packet header and flowlabel.
- * Adapted from fib_info_hashfn()
- */
-static int rt6_info_hash_nhsfn(unsigned int candidate_count,
-			       const struct flowi6 *fl6)
-{
-	return get_hash_from_flowi6(fl6) % candidate_count;
-}
-
 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     int strict)
@@ -462,7 +457,13 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	struct rt6_info *sibling, *next_sibling;
 	int route_choosen;
 
-	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+	/* We might have already computed the hash for ICMPv6 errors. In such
+	 * case it will always be non-zero. Otherwise now is the time to do it.
+	 */
+	if (!fl6->mp_hash)
+		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+
+	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
 	/* Don't change the route, if route_choosen == 0
 	 * (siblings does not include ourself)
 	 */
@@ -481,7 +482,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 }
 
 /*
- *	Route lookup. Any table->tb6_lock is implied.
+ *	Route lookup. rcu_read_lock() should be held.
  */
 
 static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -496,7 +497,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	if (!oif && ipv6_addr_any(saddr))
 		goto out;
 
-	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
 		struct net_device *dev = sprt->dst.dev;
 
 		if (oif) {
@@ -705,6 +706,7 @@ out:
 }
 
 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+				     struct rt6_info *leaf,
 				     struct rt6_info *rr_head,
 				     u32 metric, int oif, int strict,
 				     bool *do_rr)
@@ -714,7 +716,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	cont = NULL;
-	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+	for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
 		if (rt->rt6i_metric != metric) {
 			cont = rt;
 			break;
@@ -723,7 +725,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 	}
 
-	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+	for (rt = leaf; rt && rt != rr_head;
+	     rt = rcu_dereference(rt->dst.rt6_next)) {
 		if (rt->rt6i_metric != metric) {
 			cont = rt;
 			break;
@@ -735,37 +738,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 	if (match || !cont)
 		return match;
 
-	for (rt = cont; rt; rt = rt->dst.rt6_next)
+	for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
 	return match;
 }
 
-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+				   int oif, int strict)
 {
+	struct rt6_info *leaf = rcu_dereference(fn->leaf);
 	struct rt6_info *match, *rt0;
-	struct net *net;
 	bool do_rr = false;
+	int key_plen;
 
-	rt0 = fn->rr_ptr;
+	if (!leaf || leaf == net->ipv6.ip6_null_entry)
+		return net->ipv6.ip6_null_entry;
+
+	rt0 = rcu_dereference(fn->rr_ptr);
 	if (!rt0)
-		fn->rr_ptr = rt0 = fn->leaf;
+		rt0 = leaf;
+
+	/* Double check to make sure fn is not an intermediate node
+	 * and fn->leaf does not points to its child's leaf
+	 * (This might happen if all routes under fn are deleted from
+	 * the tree and fib6_repair_tree() is called on the node.)
+	 */
+	key_plen = rt0->rt6i_dst.plen;
+#ifdef CONFIG_IPV6_SUBTREES
+	if (rt0->rt6i_src.plen)
+		key_plen = rt0->rt6i_src.plen;
+#endif
+	if (fn->fn_bit != key_plen)
+		return net->ipv6.ip6_null_entry;
 
-	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
 			     &do_rr);
 
 	if (do_rr) {
-		struct rt6_info *next = rt0->dst.rt6_next;
+		struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
-			next = fn->leaf;
-
-		if (next != rt0)
-			fn->rr_ptr = next;
+			next = leaf;
+
+		if (next != rt0) {
+			spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+			/* make sure next is not being deleted from the tree */
+			if (next->rt6i_node)
+				rcu_assign_pointer(fn->rr_ptr, next);
+			spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+		}
 	}
 
-	net = dev_net(rt0->dst.dev);
 	return match ? match : net->ipv6.ip6_null_entry;
 }
 
@@ -853,13 +878,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 					struct in6_addr *saddr)
 {
-	struct fib6_node *pn;
+	struct fib6_node *pn, *sn;
 	while (1) {
 		if (fn->fn_flags & RTN_TL_ROOT)
 			return NULL;
-		pn = fn->parent;
-		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
-			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+		pn = rcu_dereference(fn->parent);
+		sn = FIB6_SUBTREE(pn);
+		if (sn && sn != fn)
+			fn = fib6_lookup(sn, NULL, saddr);
 		else
 			fn = pn;
 		if (fn->fn_flags & RTN_RTINFO)
@@ -867,29 +893,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 	}
 }
 
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
+			  bool null_fallback)
+{
+	struct rt6_info *rt = *prt;
+
+	if (dst_hold_safe(&rt->dst))
+		return true;
+	if (null_fallback) {
+		rt = net->ipv6.ip6_null_entry;
+		dst_hold(&rt->dst);
+	} else {
+		rt = NULL;
+	}
+	*prt = rt;
+	return false;
+}
+
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6, int flags)
 {
+	struct rt6_info *rt, *rt_cache;
 	struct fib6_node *fn;
-	struct rt6_info *rt;
 
-	read_lock_bh(&table->tb6_lock);
+	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	rt = fn->leaf;
-	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
-	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+	rt = rcu_dereference(fn->leaf);
+	if (!rt) {
+		rt = net->ipv6.ip6_null_entry;
+	} else {
+		rt = rt6_device_match(net, rt, &fl6->saddr,
+				      fl6->flowi6_oif, flags);
+		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+			rt = rt6_multipath_select(rt, fl6,
+						  fl6->flowi6_oif, flags);
+	}
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
-	dst_use(&rt->dst, jiffies);
-	read_unlock_bh(&table->tb6_lock);
+	/* Search through exception table */
+	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+	if (rt_cache)
+		rt = rt_cache;
+
+	if (ip6_hold_safe(net, &rt, true))
+		dst_use_noref(&rt->dst, jiffies);
+
+	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+	trace_fib6_table_lookup(net, rt, table, fl6);
 
 	return rt;
 
@@ -941,9 +997,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 	struct fib6_table *table;
 
 	table = rt->rt6i_table;
-	write_lock_bh(&table->tb6_lock);
+	spin_lock_bh(&table->tb6_lock);
 	err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
-	write_unlock_bh(&table->tb6_lock);
+	spin_unlock_bh(&table->tb6_lock);
 
 	return err;
 }
@@ -958,10 +1014,34 @@ int ip6_ins_rt(struct rt6_info *rt)
 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
 }
 
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+{
+	struct net_device *dev = rt->dst.dev;
+
+	if (rt->rt6i_flags & RTF_LOCAL) {
+		/* for copies of local routes, dst->dev needs to be the
+		 * device if it is a master device, the master device if
+		 * device is enslaved, and the loopback as the default
+		 */
+		if (netif_is_l3_slave(dev) &&
+		    !rt6_need_strict(&rt->rt6i_dst.addr))
+			dev = l3mdev_master_dev_rcu(dev);
+		else if (!netif_is_l3_master(dev))
+			dev = dev_net(dev)->loopback_dev;
+		/* last case is netif_is_l3_master(dev) is true in which
+		 * case we want dev returned to be dev
+		 */
+	}
+
+	return dev;
+}
+
 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
 {
+	struct net_device *dev;
 	struct rt6_info *rt;
 
 	/*
@@ -971,8 +1051,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 		ort = (struct rt6_info *)ort->dst.from;
 
-	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
-
+	rcu_read_lock();
+	dev = ip6_rt_get_dev_rcu(ort);
+	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	rcu_read_unlock();
 	if (!rt)
 		return NULL;
 
@@ -1000,11 +1082,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 
 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 {
+	struct net_device *dev;
 	struct rt6_info *pcpu_rt;
 
-	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
-				  rt->dst.dev, rt->dst.flags);
-
+	rcu_read_lock();
+	dev = ip6_rt_get_dev_rcu(rt);
+	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
 	ip6_rt_copy_init(pcpu_rt, rt);
@@ -1013,7 +1097,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 	return pcpu_rt;
 }
 
-/* It should be called with read_lock_bh(&tb6_lock) acquired */
+/* It should be called with rcu_read_lock() acquired */
 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 {
 	struct rt6_info *pcpu_rt, **p;
@@ -1021,16 +1105,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 	p = this_cpu_ptr(rt->rt6i_pcpu);
 	pcpu_rt = *p;
 
-	if (pcpu_rt) {
-		dst_hold(&pcpu_rt->dst);
+	if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
 		rt6_dst_from_metrics_check(pcpu_rt);
-	}
+
 	return pcpu_rt;
 }
 
 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 {
-	struct fib6_table *table = rt->rt6i_table;
 	struct rt6_info *pcpu_rt, *prev, **p;
 
 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1041,36 +1123,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 		return net->ipv6.ip6_null_entry;
 	}
 
-	read_lock_bh(&table->tb6_lock);
-	if (rt->rt6i_pcpu) {
-		p = this_cpu_ptr(rt->rt6i_pcpu);
-		prev = cmpxchg(p, NULL, pcpu_rt);
-		if (prev) {
-			/* If someone did it before us, return prev instead */
-			dst_release_immediate(&pcpu_rt->dst);
-			pcpu_rt = prev;
-		}
-	} else {
-		/* rt has been removed from the fib6 tree
-		 * before we have a chance to acquire the read_lock.
-		 * In this case, don't brother to create a pcpu rt
-		 * since rt is going away anyway.  The next
-		 * dst_check() will trigger a re-lookup.
-		 */
-		dst_release_immediate(&pcpu_rt->dst);
-		pcpu_rt = rt;
-	}
 	dst_hold(&pcpu_rt->dst);
+	p = this_cpu_ptr(rt->rt6i_pcpu);
+	prev = cmpxchg(p, NULL, pcpu_rt);
+	BUG_ON(prev);
+
 	rt6_dst_from_metrics_check(pcpu_rt);
-	read_unlock_bh(&table->tb6_lock);
 	return pcpu_rt;
 }
 
+/* exception hash table implementation
+ */
+static DEFINE_SPINLOCK(rt6_exception_lock);
+
+/* Remove rt6_ex from hash table and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
+				 struct rt6_exception *rt6_ex)
+{
+	struct net *net;
+
+	if (!bucket || !rt6_ex)
+		return;
+
+	net = dev_net(rt6_ex->rt6i->dst.dev);
+	rt6_ex->rt6i->rt6i_node = NULL;
+	hlist_del_rcu(&rt6_ex->hlist);
+	rt6_release(rt6_ex->rt6i);
+	kfree_rcu(rt6_ex, rcu);
+	WARN_ON_ONCE(!bucket->depth);
+	bucket->depth--;
+	net->ipv6.rt6_stats->fib_rt_cache--;
+}
+
+/* Remove oldest rt6_ex in bucket and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
+{
+	struct rt6_exception *rt6_ex, *oldest = NULL;
+
+	if (!bucket)
+		return;
+
+	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
+			oldest = rt6_ex;
+	}
+	rt6_remove_exception(bucket, oldest);
+}
+
+static u32 rt6_exception_hash(const struct in6_addr *dst,
+			      const struct in6_addr *src)
+{
+	static u32 seed __read_mostly;
+	u32 val;
+
+	net_get_random_once(&seed, sizeof(seed));
+	val = jhash(dst, sizeof(*dst), seed);
+
+#ifdef CONFIG_IPV6_SUBTREES
+	if (src)
+		val = jhash(src, sizeof(*src), val);
+#endif
+	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rt6_exception_lock
+ */
+static struct rt6_exception *
+__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
+			      const struct in6_addr *daddr,
+			      const struct in6_addr *saddr)
+{
+	struct rt6_exception *rt6_ex;
+	u32 hval;
+
+	if (!(*bucket) || !daddr)
+		return NULL;
+
+	hval = rt6_exception_hash(daddr, saddr);
+	*bucket += hval;
+
+	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
+		struct rt6_info *rt6 = rt6_ex->rt6i;
+		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+		if (matched && saddr)
+			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+		if (matched)
+			return rt6_ex;
+	}
+	return NULL;
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rcu_read_lock()
+ */
+static struct rt6_exception *
+__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
+			 const struct in6_addr *daddr,
+			 const struct in6_addr *saddr)
+{
+	struct rt6_exception *rt6_ex;
+	u32 hval;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (!(*bucket) || !daddr)
+		return NULL;
+
+	hval = rt6_exception_hash(daddr, saddr);
+	*bucket += hval;
+
+	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
+		struct rt6_info *rt6 = rt6_ex->rt6i;
+		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+		if (matched && saddr)
+			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+		if (matched)
+			return rt6_ex;
+	}
+	return NULL;
+}
+
+static int rt6_insert_exception(struct rt6_info *nrt,
+				struct rt6_info *ort)
+{
+	struct net *net = dev_net(ort->dst.dev);
+	struct rt6_exception_bucket *bucket;
+	struct in6_addr *src_key = NULL;
+	struct rt6_exception *rt6_ex;
+	int err = 0;
+
+	/* ort can't be a cache or pcpu route */
+	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
+		ort = (struct rt6_info *)ort->dst.from;
+	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
+
+	spin_lock_bh(&rt6_exception_lock);
+
+	if (ort->exception_bucket_flushed) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+					lockdep_is_held(&rt6_exception_lock));
+	if (!bucket) {
+		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
+				 GFP_ATOMIC);
+		if (!bucket) {
+			err = -ENOMEM;
+			goto out;
+		}
+		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+	}
+
+#ifdef CONFIG_IPV6_SUBTREES
+	/* rt6i_src.plen != 0 indicates ort is in subtree
+	 * and exception table is indexed by a hash of
+	 * both rt6i_dst and rt6i_src.
+	 * Otherwise, the exception table is indexed by
+	 * a hash of only rt6i_dst.
+	 */
+	if (ort->rt6i_src.plen)
+		src_key = &nrt->rt6i_src.addr;
+#endif
+
+	/* Update rt6i_prefsrc as it could be changed
+	 * in rt6_remove_prefsrc()
+	 */
+	nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+	/* rt6_mtu_change() might lower mtu on ort.
+	 * Only insert this exception route if its mtu
+	 * is less than ort's mtu value.
+	 */
+	if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
+					       src_key);
+	if (rt6_ex)
+		rt6_remove_exception(bucket, rt6_ex);
+
+	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
+	if (!rt6_ex) {
+		err = -ENOMEM;
+		goto out;
+	}
+	rt6_ex->rt6i = nrt;
+	rt6_ex->stamp = jiffies;
+	atomic_inc(&nrt->rt6i_ref);
+	nrt->rt6i_node = ort->rt6i_node;
+	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
+	bucket->depth++;
+	net->ipv6.rt6_stats->fib_rt_cache++;
+
+	if (bucket->depth > FIB6_MAX_DEPTH)
+		rt6_exception_remove_oldest(bucket);
+
+out:
+	spin_unlock_bh(&rt6_exception_lock);
+
+	/* Update fn->fn_sernum to invalidate all cached dst */
+	if (!err) {
+		fib6_update_sernum(ort);
+		fib6_force_start_gc(net);
+	}
+
+	return err;
+}
+
+void rt6_flush_exceptions(struct rt6_info *rt)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	struct hlist_node *tmp;
+	int i;
+
+	spin_lock_bh(&rt6_exception_lock);
+	/* Prevent rt6_insert_exception() to recreate the bucket list */
+	rt->exception_bucket_flushed = 1;
+
+	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+				    lockdep_is_held(&rt6_exception_lock));
+	if (!bucket)
+		goto out;
+
+	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
+			rt6_remove_exception(bucket, rt6_ex);
+		WARN_ON_ONCE(bucket->depth);
+		bucket++;
+	}
+
+out:
+	spin_unlock_bh(&rt6_exception_lock);
+}
+
+/* Find cached rt in the hash table inside passed in rt
+ * Caller has to hold rcu_read_lock()
+ */
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+					   struct in6_addr *daddr,
+					   struct in6_addr *saddr)
+{
+	struct rt6_exception_bucket *bucket;
+	struct in6_addr *src_key = NULL;
+	struct rt6_exception *rt6_ex;
+	struct rt6_info *res = NULL;
+
+	bucket = rcu_dereference(rt->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+	/* rt6i_src.plen != 0 indicates rt is in subtree
+	 * and exception table is indexed by a hash of
+	 * both rt6i_dst and rt6i_src.
+	 * Otherwise, the exception table is indexed by
+	 * a hash of only rt6i_dst.
+	 */
+	if (rt->rt6i_src.plen)
+		src_key = saddr;
+#endif
+	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+
+	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+		res = rt6_ex->rt6i;
+
+	return res;
+}
+
+/* Remove the passed in cached rt from the hash table that contains it */
+int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+	struct rt6_exception_bucket *bucket;
+	struct in6_addr *src_key = NULL;
+	struct rt6_exception *rt6_ex;
+	int err;
+
+	if (!from ||
+	    !(rt->rt6i_flags & RTF_CACHE))
+		return -EINVAL;
+
+	if (!rcu_access_pointer(from->rt6i_exception_bucket))
+		return -ENOENT;
+
+	spin_lock_bh(&rt6_exception_lock);
+	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+				    lockdep_is_held(&rt6_exception_lock));
+#ifdef CONFIG_IPV6_SUBTREES
+	/* rt6i_src.plen != 0 indicates 'from' is in subtree
+	 * and exception table is indexed by a hash of
+	 * both rt6i_dst and rt6i_src.
+	 * Otherwise, the exception table is indexed by
+	 * a hash of only rt6i_dst.
+	 */
+	if (from->rt6i_src.plen)
+		src_key = &rt->rt6i_src.addr;
+#endif
+	rt6_ex = __rt6_find_exception_spinlock(&bucket,
+					       &rt->rt6i_dst.addr,
+					       src_key);
+	if (rt6_ex) {
+		rt6_remove_exception(bucket, rt6_ex);
+		err = 0;
+	} else {
+		err = -ENOENT;
+	}
+
+	spin_unlock_bh(&rt6_exception_lock);
+	return err;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+	struct rt6_exception_bucket *bucket;
+	struct in6_addr *src_key = NULL;
+	struct rt6_exception *rt6_ex;
+
+	if (!from ||
+	    !(rt->rt6i_flags & RTF_CACHE))
+		return;
+
+	rcu_read_lock();
+	bucket = rcu_dereference(from->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+	/* rt6i_src.plen != 0 indicates 'from' is in subtree
+	 * and exception table is indexed by a hash of
+	 * both rt6i_dst and rt6i_src.
+	 * Otherwise, the exception table is indexed by
+	 * a hash of only rt6i_dst.
+	 */
+	if (from->rt6i_src.plen)
+		src_key = &rt->rt6i_src.addr;
+#endif
+	rt6_ex = __rt6_find_exception_rcu(&bucket,
+					  &rt->rt6i_dst.addr,
+					  src_key);
+	if (rt6_ex)
+		rt6_ex->stamp = jiffies;
+
+	rcu_read_unlock();
+}
+
+static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	int i;
+
+	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+					lockdep_is_held(&rt6_exception_lock));
+
+	if (bucket) {
+		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
+			}
+			bucket++;
+		}
+	}
+}
+
+static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	int i;
+
+	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+					lockdep_is_held(&rt6_exception_lock));
+
+	if (bucket) {
+		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+				struct rt6_info *entry = rt6_ex->rt6i;
+				/* For RTF_CACHE with rt6i_pmtu == 0
+				 * (i.e. a redirected route),
+				 * the metrics of its rt->dst.from has already
+				 * been updated.
+				 */
+				if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
+					entry->rt6i_pmtu = mtu;
+			}
+			bucket++;
+		}
+	}
+}
+
+#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
+
+static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+					struct in6_addr *gateway)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	struct hlist_node *tmp;
+	int i;
+
+	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+		return;
+
+	spin_lock_bh(&rt6_exception_lock);
+	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+				     lockdep_is_held(&rt6_exception_lock));
+
+	if (bucket) {
+		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+			hlist_for_each_entry_safe(rt6_ex, tmp,
+						  &bucket->chain, hlist) {
+				struct rt6_info *entry = rt6_ex->rt6i;
+
+				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
+				    RTF_CACHE_GATEWAY &&
+				    ipv6_addr_equal(gateway,
+						    &entry->rt6i_gateway)) {
+					rt6_remove_exception(bucket, rt6_ex);
+				}
+			}
+			bucket++;
+		}
+	}
+
+	spin_unlock_bh(&rt6_exception_lock);
+}
+
+static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
+				      struct rt6_exception *rt6_ex,
+				      struct fib6_gc_args *gc_args,
+				      unsigned long now)
+{
+	struct rt6_info *rt = rt6_ex->rt6i;
+
+	/* we are pruning and obsoleting aged-out and non gateway exceptions
+	 * even if others have still references to them, so that on next
+	 * dst_check() such references can be dropped.
+	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
+	 * expired, independently from their aging, as per RFC 8201 section 4
+	 */
+	if (!(rt->rt6i_flags & RTF_EXPIRES) &&
+	    time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+		RT6_TRACE("aging clone %p\n", rt);
+		rt6_remove_exception(bucket, rt6_ex);
+		return;
+	} else if (rt->rt6i_flags & RTF_GATEWAY) {
+		struct neighbour *neigh;
+		__u8 neigh_flags = 0;
+
+		neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+		if (neigh) {
+			neigh_flags = neigh->flags;
+			neigh_release(neigh);
+		}
+		if (!(neigh_flags & NTF_ROUTER)) {
+			RT6_TRACE("purging route %p via non-router but gateway\n",
+				  rt);
+			rt6_remove_exception(bucket, rt6_ex);
+			return;
+		}
+	} else if (__rt6_check_expired(rt)) {
+		RT6_TRACE("purging expired route %p\n", rt);
+		rt6_remove_exception(bucket, rt6_ex);
+		return;
+	}
+	gc_args->more++;
+}
+
+void rt6_age_exceptions(struct rt6_info *rt,
+			struct fib6_gc_args *gc_args,
+			unsigned long now)
+{
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+	struct hlist_node *tmp;
+	int i;
+
+	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+		return;
+
+	spin_lock_bh(&rt6_exception_lock);
+	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+				    lockdep_is_held(&rt6_exception_lock));
+
+	if (bucket) {
+		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+			hlist_for_each_entry_safe(rt6_ex, tmp,
+						  &bucket->chain, hlist) {
+				rt6_age_examine_exception(bucket, rt6_ex,
+							  gc_args, now);
+			}
+			bucket++;
+		}
+	}
+	spin_unlock_bh(&rt6_exception_lock);
+}
+
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       int oif, struct flowi6 *fl6, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *rt;
+	struct rt6_info *rt, *rt_cache;
 	int strict = 0;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1078,7 +1650,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 	if (net->ipv6.devconf_all->forwarding == 0)
 		strict |= RT6_LOOKUP_F_REACHABLE;
 
-	read_lock_bh(&table->tb6_lock);
+	rcu_read_lock();
 
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
@@ -1087,7 +1659,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		oif = 0;
 
 redo_rt6_select:
-	rt = rt6_select(fn, oif, strict);
+	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
 		rt = rt6_multipath_select(rt, fl6, oif, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -1102,14 +1674,23 @@ redo_rt6_select:
 		}
 	}
 
+	/*Search through exception table */
+	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+	if (rt_cache)
+		rt = rt_cache;
 
-	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
-		dst_use(&rt->dst, jiffies);
-		read_unlock_bh(&table->tb6_lock);
-
-		rt6_dst_from_metrics_check(rt);
-
-		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+	if (rt == net->ipv6.ip6_null_entry) {
+		rcu_read_unlock();
+		dst_hold(&rt->dst);
+		trace_fib6_table_lookup(net, rt, table, fl6);
+		return rt;
+	} else if (rt->rt6i_flags & RTF_CACHE) {
+		if (ip6_hold_safe(net, &rt, true)) {
+			dst_use_noref(&rt->dst, jiffies);
+			rt6_dst_from_metrics_check(rt);
+		}
+		rcu_read_unlock();
+		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1121,8 +1702,14 @@ redo_rt6_select:
 
 		struct rt6_info *uncached_rt;
 
-		dst_use(&rt->dst, jiffies);
-		read_unlock_bh(&table->tb6_lock);
+		if (ip6_hold_safe(net, &rt, true)) {
+			dst_use_noref(&rt->dst, jiffies);
+		} else {
+			rcu_read_unlock();
+			uncached_rt = rt;
+			goto uncached_rt_out;
+		}
+		rcu_read_unlock();
 
 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
@@ -1132,12 +1719,14 @@ redo_rt6_select:
 			 * No need for another dst_hold()
 			 */
 			rt6_uncached_list_add(uncached_rt);
+			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
 		} else {
 			uncached_rt = net->ipv6.ip6_null_entry;
 			dst_hold(&uncached_rt->dst);
 		}
 
-		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
+uncached_rt_out:
+		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
 		return uncached_rt;
 
 	} else {
@@ -1145,26 +1734,28 @@ redo_rt6_select:
 
 		struct rt6_info *pcpu_rt;
 
-		rt->dst.lastuse = jiffies;
-		rt->dst.__use++;
+		dst_use_noref(&rt->dst, jiffies);
+		local_bh_disable();
 		pcpu_rt = rt6_get_pcpu_route(rt);
 
-		if (pcpu_rt) {
-			read_unlock_bh(&table->tb6_lock);
-		} else {
-			/* We have to do the read_unlock first
-			 * because rt6_make_pcpu_route() may trigger
-			 * ip6_dst_gc() which will take the write_lock.
-			 */
-			dst_hold(&rt->dst);
-			read_unlock_bh(&table->tb6_lock);
-			pcpu_rt = rt6_make_pcpu_route(rt);
-			dst_release(&rt->dst);
+		if (!pcpu_rt) {
+			/* atomic_inc_not_zero() is needed when using rcu */
+			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+				/* No dst_hold() on rt is needed because grabbing
+				 * rt->rt6i_ref makes sure rt can't be released.
+				 */
+				pcpu_rt = rt6_make_pcpu_route(rt);
+				rt6_release(rt);
+			} else {
+				/* rt is already removed from tree */
+				pcpu_rt = net->ipv6.ip6_null_entry;
+				dst_hold(&pcpu_rt->dst);
+			}
 		}
-
-		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
+		local_bh_enable();
+		rcu_read_unlock();
+		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
 		return pcpu_rt;
-
 	}
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -1186,6 +1777,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
+static void ip6_multipath_l3_keys(const struct sk_buff *skb,
+				  struct flow_keys *keys)
+{
+	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+	const struct ipv6hdr *key_iph = outer_iph;
+	const struct ipv6hdr *inner_iph;
+	const struct icmp6hdr *icmph;
+	struct ipv6hdr _inner_iph;
+
+	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
+		goto out;
+
+	icmph = icmp6_hdr(skb);
+	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
+	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
+	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
+	    icmph->icmp6_type != ICMPV6_PARAMPROB)
+		goto out;
+
+	inner_iph = skb_header_pointer(skb,
+				       skb_transport_offset(skb) + sizeof(*icmph),
+				       sizeof(_inner_iph), &_inner_iph);
+	if (!inner_iph)
+		goto out;
+
+	key_iph = inner_iph;
+out:
+	memset(keys, 0, sizeof(*keys));
+	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	keys->addrs.v6addrs.src = key_iph->saddr;
+	keys->addrs.v6addrs.dst = key_iph->daddr;
+	keys->tags.flow_label = ip6_flowinfo(key_iph);
+	keys->basic.ip_proto = key_iph->nexthdr;
+}
+
+/* if skb is set it will be used and fl6 can be NULL */
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+{
+	struct flow_keys hash_keys;
+
+	if (skb) {
+		ip6_multipath_l3_keys(skb, &hash_keys);
+		return flow_hash_from_keys(&hash_keys);
+	}
+
+	return get_hash_from_flowi6(fl6);
+}
+
 void ip6_route_input(struct sk_buff *skb)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -1204,6 +1843,8 @@ void ip6_route_input(struct sk_buff *skb)
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
+		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
 	skb_dst_drop(skb);
 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 }
@@ -1250,9 +1891,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 	struct dst_entry *new = NULL;
 
 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
-		       DST_OBSOLETE_NONE, 0);
+		       DST_OBSOLETE_DEAD, 0);
 	if (rt) {
 		rt6_info_init(rt);
+		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 
 		new = &rt->dst;
 		new->__use = 1;
@@ -1289,7 +1931,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt)
 
 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 {
-	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+	u32 rt_cookie = 0;
+
+	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
 		return NULL;
 
 	if (rt6_check_expired(rt))
@@ -1357,8 +2001,14 @@ static void ip6_link_failure(struct sk_buff *skb)
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (dst_hold_safe(&rt->dst))
 				ip6_del_rt(rt);
-		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
-			rt->rt6i_node->fn_sernum = -1;
+		} else {
+			struct fib6_node *fn;
+
+			rcu_read_lock();
+			fn = rcu_dereference(rt->rt6i_node);
+			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+				fn->fn_sernum = -1;
+			rcu_read_unlock();
 		}
 	}
 }
@@ -1375,7 +2025,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
 {
 	return !(rt->rt6i_flags & RTF_CACHE) &&
-		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+		(rt->rt6i_flags & RTF_PCPU ||
+		 rcu_access_pointer(rt->rt6i_node));
 }
 
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -1407,23 +2058,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 
 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
 		rt6_do_update_pmtu(rt6, mtu);
+		/* update rt6_ex->stamp for cache */
+		if (rt6->rt6i_flags & RTF_CACHE)
+			rt6_update_exception_stamp_rt(rt6);
 	} else if (daddr) {
 		struct rt6_info *nrt6;
 
 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
 		if (nrt6) {
 			rt6_do_update_pmtu(nrt6, mtu);
-
-			/* ip6_ins_rt(nrt6) will bump the
-			 * rt6->rt6i_node->fn_sernum
-			 * which will fail the next rt6_check() and
-			 * invalidate the sk->sk_dst_cache.
-			 */
-			ip6_ins_rt(nrt6);
-			/* Release the reference taken in
-			 * ip6_rt_cache_alloc()
-			 */
-			dst_release(&nrt6->dst);
+			if (rt6_insert_exception(nrt6, rt6))
+				dst_release_immediate(&nrt6->dst);
 		}
 	}
 }
@@ -1487,7 +2132,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt;
+	struct rt6_info *rt, *rt_cache;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -1500,10 +2145,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	 * routes.
 	 */
 
-	read_lock_bh(&table->tb6_lock);
+	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+	for_each_fib6_node_rt_rcu(fn) {
 		if (rt6_check_expired(rt))
 			continue;
 		if (rt->dst.error)
@@ -1512,8 +2157,23 @@ restart:
 			continue;
 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
 			continue;
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+		/* rt_cache's gateway might be different from its 'parent'
+		 * in the case of an ip redirect.
+		 * So we keep searching in the exception table if the gateway
+		 * is different.
+		 */
+		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+			rt_cache = rt6_find_cached_rt(rt,
+						      &fl6->daddr,
+						      &fl6->saddr);
+			if (rt_cache &&
+			    ipv6_addr_equal(&rdfl->gateway,
+					    &rt_cache->rt6i_gateway)) {
+				rt = rt_cache;
+				break;
+			}
 			continue;
+		}
 		break;
 	}
 
@@ -1531,11 +2191,11 @@ restart:
 	}
 
 out:
-	dst_hold(&rt->dst);
+	ip6_hold_safe(net, &rt, true);
 
-	read_unlock_bh(&table->tb6_lock);
+	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+	trace_fib6_table_lookup(net, rt, table, fl6);
 	return rt;
 };
 
@@ -1682,6 +2342,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	 * do proper release of the net_device
 	 */
 	rt6_uncached_list_add(rt);
+	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
 
 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
 
@@ -1717,6 +2378,7 @@ out:
 static int ip6_convert_metrics(struct mx6_config *mxc,
 			       const struct fib6_config *cfg)
 {
+	struct net *net = cfg->fc_nlinfo.nl_net;
 	bool ecn_ca = false;
 	struct nlattr *nla;
 	int remaining;
@@ -1742,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 			char tmp[TCP_CA_NAME_MAX];
 
 			nla_strlcpy(tmp, nla, sizeof(tmp));
-			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+			val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
 			if (val == TCP_CA_UNSPEC)
 				goto err;
 		} else {
@@ -1817,6 +2479,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		goto out;
 	}
 
+	/* RTF_CACHE is an internal flag; can not be set by userspace */
+	if (cfg->fc_flags & RTF_CACHE) {
+		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
+		goto out;
+	}
+
 	if (cfg->fc_dst_len > 128) {
 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
 		goto out;
@@ -2132,9 +2800,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	}
 
 	table = rt->rt6i_table;
-	write_lock_bh(&table->tb6_lock);
+	spin_lock_bh(&table->tb6_lock);
 	err = fib6_del(rt, info);
-	write_unlock_bh(&table->tb6_lock);
+	spin_unlock_bh(&table->tb6_lock);
 
 out:
 	ip6_rt_put(rt);
@@ -2160,7 +2828,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	if (rt == net->ipv6.ip6_null_entry)
 		goto out_put;
 	table = rt->rt6i_table;
-	write_lock_bh(&table->tb6_lock);
+	spin_lock_bh(&table->tb6_lock);
 
 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
 		struct rt6_info *sibling, *next_sibling;
@@ -2190,7 +2858,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 
 	err = fib6_del(rt, info);
 out_unlock:
-	write_unlock_bh(&table->tb6_lock);
+	spin_unlock_bh(&table->tb6_lock);
 out_put:
 	ip6_rt_put(rt);
 
@@ -2204,9 +2872,9 @@ out_put:
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
+	struct rt6_info *rt, *rt_cache;
 	struct fib6_table *table;
 	struct fib6_node *fn;
-	struct rt6_info *rt;
 	int err = -ESRCH;
 
 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2215,17 +2883,22 @@ static int ip6_route_del(struct fib6_config *cfg,
 		return err;
 	}
 
-	read_lock_bh(&table->tb6_lock);
+	rcu_read_lock();
 
 	fn = fib6_locate(&table->tb6_root,
 			 &cfg->fc_dst, cfg->fc_dst_len,
-			 &cfg->fc_src, cfg->fc_src_len);
+			 &cfg->fc_src, cfg->fc_src_len,
+			 !(cfg->fc_flags & RTF_CACHE));
 
 	if (fn) {
-		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-			if ((rt->rt6i_flags & RTF_CACHE) &&
-			    !(cfg->fc_flags & RTF_CACHE))
-				continue;
+		for_each_fib6_node_rt_rcu(fn) {
+			if (cfg->fc_flags & RTF_CACHE) {
+				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+							      &cfg->fc_src);
+				if (!rt_cache)
+					continue;
+				rt = rt_cache;
+			}
 			if (cfg->fc_ifindex &&
 			    (!rt->dst.dev ||
 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2237,8 +2910,9 @@ static int ip6_route_del(struct fib6_config *cfg,
 				continue;
 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
 				continue;
-			dst_hold(&rt->dst);
-			read_unlock_bh(&table->tb6_lock);
+			if (!dst_hold_safe(&rt->dst))
+				break;
+			rcu_read_unlock();
 
 			/* if gateway was specified only delete the one hop */
 			if (cfg->fc_flags & RTF_GATEWAY)
@@ -2247,7 +2921,7 @@ static int ip6_route_del(struct fib6_config *cfg,
 			return __ip6_del_rt_siblings(rt, cfg);
 		}
 	}
-	read_unlock_bh(&table->tb6_lock);
+	rcu_read_unlock();
 
 	return err;
 }
@@ -2351,8 +3025,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	nrt->rt6i_protocol = RTPROT_REDIRECT;
 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
-	if (ip6_ins_rt(nrt))
-		goto out_release;
+	/* No need to remove rt from the exception table if rt is
+	 * a cached route because rt6_insert_exception() will
+	 * takes care of it
+	 */
+	if (rt6_insert_exception(nrt, rt)) {
+		dst_release_immediate(&nrt->dst);
+		goto out;
+	}
 
 	netevent.old = &rt->dst;
 	netevent.new = &nrt->dst;
@@ -2360,17 +3040,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	netevent.neigh = neigh;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
-	if (rt->rt6i_flags & RTF_CACHE) {
-		rt = (struct rt6_info *) dst_clone(&rt->dst);
-		ip6_del_rt(rt);
-	}
-
-out_release:
-	/* Release the reference taken in
-	 * ip6_rt_cache_alloc()
-	 */
-	dst_release(&nrt->dst);
-
 out:
 	neigh_release(neigh);
 }
@@ -2427,23 +3096,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	if (!table)
 		return NULL;
 
-	read_lock_bh(&table->tb6_lock);
-	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
+	rcu_read_lock();
+	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
 	if (!fn)
 		goto out;
 
-	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+	for_each_fib6_node_rt_rcu(fn) {
 		if (rt->dst.dev->ifindex != ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 			continue;
-		dst_hold(&rt->dst);
+		ip6_hold_safe(NULL, &rt, false);
 		break;
 	}
 out:
-	read_unlock_bh(&table->tb6_lock);
+	rcu_read_unlock();
 	return rt;
 }
 
@@ -2489,16 +3158,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
 	if (!table)
 		return NULL;
 
-	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+	rcu_read_lock();
+	for_each_fib6_node_rt_rcu(&table->tb6_root) {
 		if (dev == rt->dst.dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 			break;
 	}
 	if (rt)
-		dst_hold(&rt->dst);
-	read_unlock_bh(&table->tb6_lock);
+		ip6_hold_safe(NULL, &rt, false);
+	rcu_read_unlock();
 	return rt;
 }
 
@@ -2536,17 +3205,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
 	struct rt6_info *rt;
 
 restart:
-	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+	rcu_read_lock();
+	for_each_fib6_node_rt_rcu(&table->tb6_root) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			dst_hold(&rt->dst);
-			read_unlock_bh(&table->tb6_lock);
-			ip6_del_rt(rt);
+			if (dst_hold_safe(&rt->dst)) {
+				rcu_read_unlock();
+				ip6_del_rt(rt);
+			} else {
+				rcu_read_unlock();
+			}
 			goto restart;
 		}
 	}
-	read_unlock_bh(&table->tb6_lock);
+	rcu_read_unlock();
 
 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
 }
@@ -2688,15 +3360,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	u32 tb_id;
 	struct net *net = dev_net(idev->dev);
-	struct net_device *dev = net->loopback_dev;
+	struct net_device *dev = idev->dev;
 	struct rt6_info *rt;
 
-	/* use L3 Master device as loopback for host routes if device
-	 * is enslaved and address is not link local or multicast
-	 */
-	if (!rt6_need_strict(addr))
-		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
-
 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
@@ -2740,8 +3406,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
 	if (((void *)rt->dst.dev == dev || !dev) &&
 	    rt != net->ipv6.ip6_null_entry &&
 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+		spin_lock_bh(&rt6_exception_lock);
 		/* remove prefsrc entry */
 		rt->rt6i_prefsrc.plen = 0;
+		/* need to update cache as well */
+		rt6_exceptions_remove_prefsrc(rt);
+		spin_unlock_bh(&rt6_exception_lock);
 	}
 	return 0;
 }
@@ -2758,18 +3428,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 }
 
 #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
-#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
 /* Remove routers and update dst entries when gateway turn into host. */
 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
 
-	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
-	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
-	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+	    ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
 		return -1;
 	}
+
+	/* Further clean up cached routes in exception table.
+	 * This is needed because cached route may have a different
+	 * gateway than its 'parent' in the case of an ip redirect.
+	 */
+	rt6_exceptions_clean_tohost(rt, gateway);
+
 	return 0;
 }
 
@@ -2848,19 +3523,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	if (rt->dst.dev == arg->dev &&
 	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
-		if (rt->rt6i_flags & RTF_CACHE) {
-			/* For RTF_CACHE with rt6i_pmtu == 0
-			 * (i.e. a redirected route),
-			 * the metrics of its rt->dst.from has already
-			 * been updated.
-			 */
-			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
-				rt->rt6i_pmtu = arg->mtu;
-		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
-			   (dst_mtu(&rt->dst) < arg->mtu &&
-			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+		spin_lock_bh(&rt6_exception_lock);
+		if (dst_mtu(&rt->dst) >= arg->mtu ||
+		    (dst_mtu(&rt->dst) < arg->mtu &&
+		     dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
 		}
+		rt6_exceptions_update_pmtu(rt, arg->mtu);
+		spin_unlock_bh(&rt6_exception_lock);
 	}
 	return 0;
 }
@@ -3327,6 +3997,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
+	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+		*flags |= RTNH_F_OFFLOAD;
+
 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
 	if (!skip_oif && rt->dst.dev &&
 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -3605,8 +4278,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		struct net_device *dev;
 		int flags = 0;
 
-		dev = __dev_get_by_index(net, iif);
+		rcu_read_lock();
+
+		dev = dev_get_by_index_rcu(net, iif);
 		if (!dev) {
+			rcu_read_unlock();
 			err = -ENODEV;
 			goto errout;
 		}
@@ -3618,15 +4294,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 		if (!fibmatch)
 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+		else
+			dst = ip6_route_lookup(net, &fl6, 0);
+
+		rcu_read_unlock();
 	} else {
 		fl6.flowi6_oif = oif;
 
 		if (!fibmatch)
 			dst = ip6_route_output(net, NULL, &fl6);
+		else
+			dst = ip6_route_lookup(net, &fl6, 0);
 	}
 
-	if (fibmatch)
-		dst = ip6_route_lookup(net, &fl6, 0);
 
 	rt = container_of(dst, struct rt6_info, dst);
 	if (rt->dst.error) {
@@ -3751,7 +4431,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
 		   net->ipv6.rt6_stats->fib_nodes,
 		   net->ipv6.rt6_stats->fib_route_nodes,
-		   net->ipv6.rt6_stats->fib_rt_alloc,
+		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
 		   net->ipv6.rt6_stats->fib_rt_entries,
 		   net->ipv6.rt6_stats->fib_rt_cache,
 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
@@ -3918,6 +4598,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 			 ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	net->ipv6.fib6_has_custom_rules = false;
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
 					       sizeof(*net->ipv6.ip6_prohibit_entry),
 					       GFP_KERNEL);
@@ -4093,9 +4774,10 @@ int __init ip6_route_init(void)
 		goto fib6_rules_init;
 
 	ret = -ENOBUFS;
-	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
+	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
+	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
+			    RTNL_FLAG_DOIT_UNLOCKED))
 		goto out_register_late_subsys;
 
 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 15fba55e3da8..c81407770956 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -40,7 +40,7 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
 	if (((srh->hdrlen + 1) << 3) != len)
 		return false;
 
-	if (srh->segments_left != srh->first_segment)
+	if (srh->segments_left > srh->first_segment)
 		return false;
 
 	tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
@@ -456,6 +456,10 @@ int __init seg6_init(void)
 	err = seg6_iptunnel_init();
 	if (err)
 		goto out_unregister_pernet;
+
+	err = seg6_local_init();
+	if (err)
+		goto out_unregister_pernet;
 #endif
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
@@ -471,6 +475,7 @@ out:
 #ifdef CONFIG_IPV6_SEG6_HMAC
 out_unregister_iptun:
 #ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+	seg6_local_exit();
 	seg6_iptunnel_exit();
 #endif
 #endif
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index f950cb53d5e3..33fb35cbfac1 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -110,7 +111,7 @@ static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
 	struct seg6_hmac_algo *algo;
 	int i, alg_count;
 
-	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	alg_count = ARRAY_SIZE(hmac_algos);
 	for (i = 0; i < alg_count; i++) {
 		algo = &hmac_algos[i];
 		if (algo->alg_id == alg_id)
@@ -360,7 +361,7 @@ static int seg6_hmac_init_algo(void)
 	struct shash_desc *shash;
 	int i, alg_count, cpu;
 
-	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	alg_count = ARRAY_SIZE(hmac_algos);
 
 	for (i = 0; i < alg_count; i++) {
 		struct crypto_shash **p_tfm;
@@ -421,7 +422,7 @@ void seg6_hmac_exit(void)
 	struct seg6_hmac_algo *algo = NULL;
 	int i, alg_count, cpu;
 
-	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	alg_count = ARRAY_SIZE(hmac_algos);
 	for (i = 0; i < alg_count; i++) {
 		algo = &hmac_algos[i];
 		for_each_possible_cpu(cpu) {
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 264d772d3c7d..bd6cc688bd19 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 }
 
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ipv6hdr *hdr, *inner_hdr;
@@ -116,15 +116,22 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 	 * hlim will be decremented in ip6_forward() afterwards and
 	 * decapsulation will overwrite inner hlim with outer hlim
 	 */
-	ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
-		     ip6_flowlabel(inner_hdr));
-	hdr->hop_limit = inner_hdr->hop_limit;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+			     ip6_flowlabel(inner_hdr));
+		hdr->hop_limit = inner_hdr->hop_limit;
+	} else {
+		ip6_flow_hdr(hdr, 0, 0);
+		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+	}
+
 	hdr->nexthdr = NEXTHDR_ROUTING;
 
 	isrh = (void *)hdr + sizeof(*hdr);
 	memcpy(isrh, osrh, hdrlen);
 
-	isrh->nexthdr = NEXTHDR_IPV6;
+	isrh->nexthdr = proto;
 
 	hdr->daddr = isrh->segments[isrh->first_segment];
 	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
@@ -141,10 +148,10 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 
 /* insert an SRH within an IPv6 packet, just after the IPv6 header */
-#ifdef CONFIG_IPV6_SEG6_INLINE
-static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 {
 	struct ipv6hdr *hdr, *oldhdr;
 	struct ipv6_sr_hdr *isrh;
@@ -193,13 +200,13 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
 	return 0;
 }
-#endif
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 
 static int seg6_do_srh(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct seg6_iptunnel_encap *tinfo;
-	int err = 0;
+	int proto, err = 0;
 
 	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 
@@ -209,19 +216,47 @@ static int seg6_do_srh(struct sk_buff *skb)
 	}
 
 	switch (tinfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
 	case SEG6_IPTUN_MODE_INLINE:
+		if (skb->protocol != htons(ETH_P_IPV6))
+			return -EINVAL;
+
 		err = seg6_do_srh_inline(skb, tinfo->srh);
+		if (err)
+			return err;
+
 		skb_reset_inner_headers(skb);
 		break;
-#endif
 	case SEG6_IPTUN_MODE_ENCAP:
-		err = seg6_do_srh_encap(skb, tinfo->srh);
+		if (skb->protocol == htons(ETH_P_IPV6))
+			proto = IPPROTO_IPV6;
+		else if (skb->protocol == htons(ETH_P_IP))
+			proto = IPPROTO_IPIP;
+		else
+			return -EINVAL;
+
+		err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+		if (err)
+			return err;
+
+		skb->protocol = htons(ETH_P_IPV6);
 		break;
-	}
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		if (!skb_mac_header_was_set(skb))
+			return -EINVAL;
 
-	if (err)
-		return err;
+		if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
+			return -ENOMEM;
+
+		skb_mac_header_rebuild(skb);
+		skb_push(skb, skb->mac_len);
+
+		err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
+		if (err)
+			return err;
+
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
 
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
@@ -336,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla,
 	struct seg6_lwt *slwt;
 	int err;
 
+	if (family != AF_INET && family != AF_INET6)
+		return -EINVAL;
+
 	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
 			       seg6_iptunnel_policy, extack);
 
@@ -357,12 +395,15 @@ static int seg6_build_state(struct nlattr *nla,
 		return -EINVAL;
 
 	switch (tuninfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
 	case SEG6_IPTUN_MODE_INLINE:
+		if (family != AF_INET6)
+			return -EINVAL;
+
 		break;
-#endif
 	case SEG6_IPTUN_MODE_ENCAP:
 		break;
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -386,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla,
 	memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 
 	newts->type = LWTUNNEL_ENCAP_SEG6;
-	newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
-			LWTUNNEL_STATE_INPUT_REDIRECT;
+	newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+
+	if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
+		newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
 	newts->headroom = seg6_lwt_headroom(tuninfo);
 
 	*ts = newts;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
new file mode 100644
index 000000000000..825b8e01f947
--- /dev/null
+++ b/net/ipv6/seg6_local.c
@@ -0,0 +1,934 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *        modify it under the terms of the GNU General Public License
+ *        as published by the Free Software Foundation; either version
+ *        2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_local.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/dst_cache.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+#include <linux/etherdevice.h>
+
+struct seg6_local_lwt;
+
+struct seg6_action_desc {
+	int action;
+	unsigned long attrs;
+	int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+	int static_headroom;
+};
+
+struct seg6_local_lwt {
+	int action;
+	struct ipv6_sr_hdr *srh;
+	int table;
+	struct in_addr nh4;
+	struct in6_addr nh6;
+	int iif;
+	int oif;
+
+	int headroom;
+	struct seg6_action_desc *desc;
+};
+
+static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
+{
+	return (struct seg6_local_lwt *)lwt->data;
+}
+
+static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
+{
+	struct ipv6_sr_hdr *srh;
+	int len, srhoff = 0;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return NULL;
+
+	if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
+		return NULL;
+
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+	len = (srh->hdrlen + 1) << 3;
+
+	if (!pskb_may_pull(skb, srhoff + len))
+		return NULL;
+
+	if (!seg6_validate_srh(srh, len))
+		return NULL;
+
+	return srh;
+}
+
+static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_srh(skb);
+	if (!srh)
+		return NULL;
+
+	if (srh->segments_left == 0)
+		return NULL;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	if (!seg6_hmac_validate_skb(skb))
+		return NULL;
+#endif
+
+	return srh;
+}
+
+static bool decap_and_validate(struct sk_buff *skb, int proto)
+{
+	struct ipv6_sr_hdr *srh;
+	unsigned int off = 0;
+
+	srh = get_srh(skb);
+	if (srh && srh->segments_left > 0)
+		return false;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	if (srh && !seg6_hmac_validate_skb(skb))
+		return false;
+#endif
+
+	if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
+		return false;
+
+	if (!pskb_pull(skb, off))
+		return false;
+
+	skb_postpull_rcsum(skb, skb_network_header(skb), off);
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->encapsulation = 0;
+
+	return true;
+}
+
+static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
+{
+	struct in6_addr *addr;
+
+	srh->segments_left--;
+	addr = srh->segments + srh->segments_left;
+	*daddr = *addr;
+}
+
+static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			   u32 tbl_id)
+{
+	struct net *net = dev_net(skb->dev);
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
+	int flags = RT6_LOOKUP_F_HAS_SADDR;
+	struct dst_entry *dst = NULL;
+	struct rt6_info *rt;
+	struct flowi6 fl6;
+
+	fl6.flowi6_iif = skb->dev->ifindex;
+	fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
+	fl6.saddr = hdr->saddr;
+	fl6.flowlabel = ip6_flowinfo(hdr);
+	fl6.flowi6_mark = skb->mark;
+	fl6.flowi6_proto = hdr->nexthdr;
+
+	if (nhaddr)
+		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
+	if (!tbl_id) {
+		dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+	} else {
+		struct fib6_table *table;
+
+		table = fib6_get_table(net, tbl_id);
+		if (!table)
+			goto out;
+
+		rt = ip6_pol_route(net, table, 0, &fl6, flags);
+		dst = &rt->dst;
+	}
+
+	if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
+		dst_release(dst);
+		dst = NULL;
+	}
+
+out:
+	if (!dst) {
+		rt = net->ipv6.ip6_blk_hole_entry;
+		dst = &rt->dst;
+		dst_hold(dst);
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+}
+
+/* regular endpoint function */
+static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* regular endpoint, and forward to specified nexthop */
+static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	lookup_nexthop(skb, &slwt->nh6, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	lookup_nexthop(skb, NULL, slwt->table);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* decapsulate and forward inner L2 frame on specified interface */
+static int input_action_end_dx2(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct net *net = dev_net(skb->dev);
+	struct net_device *odev;
+	struct ethhdr *eth;
+
+	if (!decap_and_validate(skb, NEXTHDR_NONE))
+		goto drop;
+
+	if (!pskb_may_pull(skb, ETH_HLEN))
+		goto drop;
+
+	skb_reset_mac_header(skb);
+	eth = (struct ethhdr *)skb->data;
+
+	/* To determine the frame's protocol, we assume it is 802.3. This avoids
+	 * a call to eth_type_trans(), which is not really relevant for our
+	 * use case.
+	 */
+	if (!eth_proto_is_802_3(eth->h_proto))
+		goto drop;
+
+	odev = dev_get_by_index_rcu(net, slwt->oif);
+	if (!odev)
+		goto drop;
+
+	/* As we accept Ethernet frames, make sure the egress device is of
+	 * the correct type.
+	 */
+	if (odev->type != ARPHRD_ETHER)
+		goto drop;
+
+	if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
+		goto drop;
+
+	skb_orphan(skb);
+
+	if (skb_warn_if_lro(skb))
+		goto drop;
+
+	skb_forward_csum(skb);
+
+	if (skb->len - ETH_HLEN > odev->mtu)
+		goto drop;
+
+	skb->dev = odev;
+	skb->protocol = eth->h_proto;
+
+	return dev_queue_xmit(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* decapsulate and forward to specified nexthop */
+static int input_action_end_dx6(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct in6_addr *nhaddr = NULL;
+
+	/* this function accepts IPv6 encapsulated packets, with either
+	 * an SRH with SL=0, or no SRH.
+	 */
+
+	if (!decap_and_validate(skb, IPPROTO_IPV6))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto drop;
+
+	/* The inner packet is not associated to any local interface,
+	 * so we do not call netif_rx().
+	 *
+	 * If slwt->nh6 is set to ::, then lookup the nexthop for the
+	 * inner packet's DA. Otherwise, use the specified nexthop.
+	 */
+
+	if (!ipv6_addr_any(&slwt->nh6))
+		nhaddr = &slwt->nh6;
+
+	lookup_nexthop(skb, nhaddr, 0);
+
+	return dst_input(skb);
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct iphdr *iph;
+	__be32 nhaddr;
+	int err;
+
+	if (!decap_and_validate(skb, IPPROTO_IPIP))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto drop;
+
+	skb->protocol = htons(ETH_P_IP);
+
+	iph = ip_hdr(skb);
+
+	nhaddr = slwt->nh4.s_addr ?: iph->daddr;
+
+	skb_dst_drop(skb);
+
+	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+	if (err)
+		goto drop;
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int input_action_end_dt6(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	if (!decap_and_validate(skb, IPPROTO_IPV6))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto drop;
+
+	lookup_nexthop(skb, NULL, slwt->table);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* push an SRH on top of the current one */
+static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	int err = -EINVAL;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	err = seg6_do_srh_inline(skb, slwt->srh);
+	if (err)
+		goto drop;
+
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+	lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+/* encapsulate within an outer IPv6 header and a specified SRH */
+static int input_action_end_b6_encap(struct sk_buff *skb,
+				     struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	int err = -EINVAL;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	skb_reset_inner_headers(skb);
+	skb->encapsulation = 1;
+
+	err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
+	if (err)
+		goto drop;
+
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+	lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+static struct seg6_action_desc seg6_action_table[] = {
+	{
+		.action		= SEG6_LOCAL_ACTION_END,
+		.attrs		= 0,
+		.input		= input_action_end,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_X,
+		.attrs		= (1 << SEG6_LOCAL_NH6),
+		.input		= input_action_end_x,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_T,
+		.attrs		= (1 << SEG6_LOCAL_TABLE),
+		.input		= input_action_end_t,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DX2,
+		.attrs		= (1 << SEG6_LOCAL_OIF),
+		.input		= input_action_end_dx2,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DX6,
+		.attrs		= (1 << SEG6_LOCAL_NH6),
+		.input		= input_action_end_dx6,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DX4,
+		.attrs		= (1 << SEG6_LOCAL_NH4),
+		.input		= input_action_end_dx4,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DT6,
+		.attrs		= (1 << SEG6_LOCAL_TABLE),
+		.input		= input_action_end_dt6,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_B6,
+		.attrs		= (1 << SEG6_LOCAL_SRH),
+		.input		= input_action_end_b6,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_B6_ENCAP,
+		.attrs		= (1 << SEG6_LOCAL_SRH),
+		.input		= input_action_end_b6_encap,
+		.static_headroom	= sizeof(struct ipv6hdr),
+	}
+};
+
+static struct seg6_action_desc *__get_action_desc(int action)
+{
+	struct seg6_action_desc *desc;
+	int i, count;
+
+	count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+	for (i = 0; i < count; i++) {
+		desc = &seg6_action_table[i];
+		if (desc->action == action)
+			return desc;
+	}
+
+	return NULL;
+}
+
+static int seg6_local_input(struct sk_buff *skb)
+{
+	struct dst_entry *orig_dst = skb_dst(skb);
+	struct seg6_action_desc *desc;
+	struct seg6_local_lwt *slwt;
+
+	if (skb->protocol != htons(ETH_P_IPV6)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+	desc = slwt->desc;
+
+	return desc->input(skb, slwt);
+}
+
+static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
+	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
+	[SEG6_LOCAL_TABLE]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_NH4]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct in_addr) },
+	[SEG6_LOCAL_NH6]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct in6_addr) },
+	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
+};
+
+static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	int len;
+
+	srh = nla_data(attrs[SEG6_LOCAL_SRH]);
+	len = nla_len(attrs[SEG6_LOCAL_SRH]);
+
+	/* SRH must contain at least one segment */
+	if (len < sizeof(*srh) + sizeof(struct in6_addr))
+		return -EINVAL;
+
+	if (!seg6_validate_srh(srh, len))
+		return -EINVAL;
+
+	slwt->srh = kmalloc(len, GFP_KERNEL);
+	if (!slwt->srh)
+		return -ENOMEM;
+
+	memcpy(slwt->srh, srh, len);
+
+	slwt->headroom += len;
+
+	return 0;
+}
+
+static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	struct nlattr *nla;
+	int len;
+
+	srh = slwt->srh;
+	len = (srh->hdrlen + 1) << 3;
+
+	nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
+	if (!nla)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), srh, len);
+
+	return 0;
+}
+
+static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	int len = (a->srh->hdrlen + 1) << 3;
+
+	if (len != ((b->srh->hdrlen + 1) << 3))
+		return 1;
+
+	return memcmp(a->srh, b->srh, len);
+}
+
+static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
+
+	return 0;
+}
+
+static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (a->table != b->table)
+		return 1;
+
+	return 0;
+}
+
+static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
+	       sizeof(struct in_addr));
+
+	return 0;
+}
+
+static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *nla;
+
+	nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
+	if (!nla)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
+
+	return 0;
+}
+
+static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
+}
+
+static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
+	       sizeof(struct in6_addr));
+
+	return 0;
+}
+
+static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *nla;
+
+	nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
+	if (!nla)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
+
+	return 0;
+}
+
+static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
+}
+
+static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
+
+	return 0;
+}
+
+static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (a->iif != b->iif)
+		return 1;
+
+	return 0;
+}
+
+static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
+
+	return 0;
+}
+
+static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (a->oif != b->oif)
+		return 1;
+
+	return 0;
+}
+
+struct seg6_action_param {
+	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
+	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+	int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
+};
+
+static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
+	[SEG6_LOCAL_SRH]	= { .parse = parse_nla_srh,
+				    .put = put_nla_srh,
+				    .cmp = cmp_nla_srh },
+
+	[SEG6_LOCAL_TABLE]	= { .parse = parse_nla_table,
+				    .put = put_nla_table,
+				    .cmp = cmp_nla_table },
+
+	[SEG6_LOCAL_NH4]	= { .parse = parse_nla_nh4,
+				    .put = put_nla_nh4,
+				    .cmp = cmp_nla_nh4 },
+
+	[SEG6_LOCAL_NH6]	= { .parse = parse_nla_nh6,
+				    .put = put_nla_nh6,
+				    .cmp = cmp_nla_nh6 },
+
+	[SEG6_LOCAL_IIF]	= { .parse = parse_nla_iif,
+				    .put = put_nla_iif,
+				    .cmp = cmp_nla_iif },
+
+	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
+				    .put = put_nla_oif,
+				    .cmp = cmp_nla_oif },
+};
+
+static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct seg6_action_param *param;
+	struct seg6_action_desc *desc;
+	int i, err;
+
+	desc = __get_action_desc(slwt->action);
+	if (!desc)
+		return -EINVAL;
+
+	if (!desc->input)
+		return -EOPNOTSUPP;
+
+	slwt->desc = desc;
+	slwt->headroom += desc->static_headroom;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (desc->attrs & (1 << i)) {
+			if (!attrs[i])
+				return -EINVAL;
+
+			param = &seg6_action_params[i];
+
+			err = param->parse(attrs, slwt);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
+				  const void *cfg, struct lwtunnel_state **ts,
+				  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[SEG6_LOCAL_MAX + 1];
+	struct lwtunnel_state *newts;
+	struct seg6_local_lwt *slwt;
+	int err;
+
+	if (family != AF_INET6)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
+			       extack);
+
+	if (err < 0)
+		return err;
+
+	if (!tb[SEG6_LOCAL_ACTION])
+		return -EINVAL;
+
+	newts = lwtunnel_state_alloc(sizeof(*slwt));
+	if (!newts)
+		return -ENOMEM;
+
+	slwt = seg6_local_lwtunnel(newts);
+	slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
+
+	err = parse_nla_action(tb, slwt);
+	if (err < 0)
+		goto out_free;
+
+	newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
+	newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
+	newts->headroom = slwt->headroom;
+
+	*ts = newts;
+
+	return 0;
+
+out_free:
+	kfree(slwt->srh);
+	kfree(newts);
+	return err;
+}
+
+static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+
+	kfree(slwt->srh);
+}
+
+static int seg6_local_fill_encap(struct sk_buff *skb,
+				 struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+	struct seg6_action_param *param;
+	int i, err;
+
+	if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
+		return -EMSGSIZE;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (slwt->desc->attrs & (1 << i)) {
+			param = &seg6_action_params[i];
+			err = param->put(skb, slwt);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+	unsigned long attrs;
+	int nlsize;
+
+	nlsize = nla_total_size(4); /* action */
+
+	attrs = slwt->desc->attrs;
+
+	if (attrs & (1 << SEG6_LOCAL_SRH))
+		nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
+
+	if (attrs & (1 << SEG6_LOCAL_TABLE))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_NH4))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_NH6))
+		nlsize += nla_total_size(16);
+
+	if (attrs & (1 << SEG6_LOCAL_IIF))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_OIF))
+		nlsize += nla_total_size(4);
+
+	return nlsize;
+}
+
+static int seg6_local_cmp_encap(struct lwtunnel_state *a,
+				struct lwtunnel_state *b)
+{
+	struct seg6_local_lwt *slwt_a, *slwt_b;
+	struct seg6_action_param *param;
+	int i;
+
+	slwt_a = seg6_local_lwtunnel(a);
+	slwt_b = seg6_local_lwtunnel(b);
+
+	if (slwt_a->action != slwt_b->action)
+		return 1;
+
+	if (slwt_a->desc->attrs != slwt_b->desc->attrs)
+		return 1;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (slwt_a->desc->attrs & (1 << i)) {
+			param = &seg6_action_params[i];
+			if (param->cmp(slwt_a, slwt_b))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+static const struct lwtunnel_encap_ops seg6_local_ops = {
+	.build_state	= seg6_local_build_state,
+	.destroy_state	= seg6_local_destroy_state,
+	.input		= seg6_local_input,
+	.fill_encap	= seg6_local_fill_encap,
+	.get_encap_size	= seg6_local_get_encap_size,
+	.cmp_encap	= seg6_local_cmp_encap,
+	.owner		= THIS_MODULE,
+};
+
+int __init seg6_local_init(void)
+{
+	return lwtunnel_encap_add_ops(&seg6_local_ops,
+				      LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
+
+void seg6_local_exit(void)
+{
+	lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac912bb21747..d60ddcb0bfe2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,29 +91,35 @@ struct sit_net {
  * Must be invoked with rcu_read_lock
  */
 static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
-		struct net_device *dev, __be32 remote, __be32 local)
+					     struct net_device *dev,
+					     __be32 remote, __be32 local,
+					     int sifindex)
 {
 	unsigned int h0 = HASH(remote);
 	unsigned int h1 = HASH(local);
 	struct ip_tunnel *t;
 	struct sit_net *sitn = net_generic(net, sit_net_id);
+	int ifindex = dev ? dev->ifindex : 0;
 
 	for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
 		if (local == t->parms.iph.saddr &&
 		    remote == t->parms.iph.daddr &&
-		    (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+		    (!dev || !t->parms.link || ifindex == t->parms.link ||
+		     sifindex == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
 	for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
 		if (remote == t->parms.iph.daddr &&
-		    (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+		    (!dev || !t->parms.link || ifindex == t->parms.link ||
+		     sifindex == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
 	for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
 		if (local == t->parms.iph.saddr &&
-		    (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+		    (!dev || !t->parms.link || ifindex == t->parms.link ||
+		     sifindex == t->parms.link) &&
 		    (t->dev->flags & IFF_UP))
 			return t;
 	}
@@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 	const int code = icmp_hdr(skb)->code;
 	unsigned int data_len = 0;
 	struct ip_tunnel *t;
+	int sifindex;
 	int err;
 
 	switch (type) {
@@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 
 	err = -ENOENT;
 
-	t = ipip6_tunnel_lookup(dev_net(skb->dev),
-				skb->dev,
-				iph->daddr,
-				iph->saddr);
+	sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
+	t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+				iph->daddr, iph->saddr, sifindex);
 	if (!t)
 		goto out;
 
@@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct ip_tunnel *tunnel;
+	int sifindex;
 	int err;
 
+	sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
-				     iph->saddr, iph->daddr);
+				     iph->saddr, iph->daddr, sifindex);
 	if (tunnel) {
 		struct pcpu_sw_netstats *tstats;
 
@@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	const struct iphdr *iph;
 	struct ip_tunnel *tunnel;
+	int sifindex;
+
+	sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
 
 	iph = ip_hdr(skb);
 	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
-				     iph->saddr, iph->daddr);
+				     iph->saddr, iph->daddr, sifindex);
 	if (tunnel) {
 		const struct tnl_ptk_info *tpi;
 
@@ -1848,19 +1859,22 @@ err_alloc_dev:
 	return err;
 }
 
-static void __net_exit sit_exit_net(struct net *net)
+static void __net_exit sit_exit_batch_net(struct list_head *net_list)
 {
 	LIST_HEAD(list);
+	struct net *net;
 
 	rtnl_lock();
-	sit_destroy_tunnels(net, &list);
+	list_for_each_entry(net, net_list, exit_list)
+		sit_destroy_tunnels(net, &list);
+
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
 
 static struct pernet_operations sit_net_ops = {
 	.init = sit_init_net,
-	.exit = sit_exit_net,
+	.exit_batch = sit_exit_batch_net,
 	.id   = &sit_net_id,
 	.size = sizeof(struct sit_net),
 };
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 4e7817abc0b9..e7a3a6b6cf56 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	}
 
 	req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
-	tcp_select_initial_window(tcp_full_space(sk), req->mss,
+	tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
 				  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
 				  dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 69c50e737c54..a789a8ac6a64 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
  *
@@ -90,6 +91,41 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "flowlabel_reflect",
+		.data		= &init_net.ipv6.sysctl.flowlabel_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "max_dst_opts_number",
+		.data		= &init_net.ipv6.sysctl.max_dst_opts_cnt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "max_hbh_opts_number",
+		.data		= &init_net.ipv6.sysctl.max_hbh_opts_cnt,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "max_dst_opts_length",
+		.data		= &init_net.ipv6.sysctl.max_dst_opts_len,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "max_hbh_length",
+		.data		= &init_net.ipv6.sysctl.max_hbh_opts_len,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
@@ -149,6 +185,11 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
 	ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
 	ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
+	ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
+	ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt;
+	ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
+	ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
+	ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 206210125fd7..6bb98c93edfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,8 @@
 #include <crypto/hash.h>
 #include <linux/scatterlist.h>
 
+#include <trace/events/tcp.h>
+
 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req);
@@ -350,7 +352,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
 					&hdr->daddr, th->dest,
 					&hdr->saddr, ntohs(th->source),
-					skb->dev->ifindex);
+					skb->dev->ifindex, inet6_sdif(skb));
 
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 	int genhash;
 	struct sock *sk1 = NULL;
 #endif
-	int oif;
+	int oif = 0;
 
 	if (th->rst)
 		return;
@@ -918,7 +920,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 					   &tcp_hashinfo, NULL, 0,
 					   &ipv6h->saddr,
 					   th->source, &ipv6h->daddr,
-					   ntohs(th->source), tcp_v6_iif(skb));
+					   ntohs(th->source), tcp_v6_iif(skb),
+					   tcp_v6_sdif(skb));
 		if (!sk1)
 			goto out;
 
@@ -938,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
 			  (th->doff << 2);
 
-	oif = sk ? sk->sk_bound_dev_if : 0;
+	if (sk) {
+		oif = sk->sk_bound_dev_if;
+		trace_tcp_send_reset(sk, skb);
+	}
+
 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1296,7 +1303,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
@@ -1393,10 +1400,13 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 }
 
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
+	int sdif = inet6_sdif(skb);
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
 	bool refcounted;
@@ -1430,7 +1440,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
-				th->source, th->dest, inet6_iif(skb),
+				th->source, th->dest, inet6_iif(skb), sdif,
 				&refcounted);
 	if (!sk)
 		goto no_tcp_socket;
@@ -1456,9 +1466,9 @@ process:
 		}
 		sock_hold(sk);
 		refcounted = true;
-		if (tcp_filter(sk, skb))
-			goto discard_and_relse;
-		nsk = tcp_check_req(sk, skb, req, false);
+		nsk = NULL;
+		if (!tcp_filter(sk, skb))
+			nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
 			goto discard_and_relse;
@@ -1505,8 +1515,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v6_do_rcv(sk, skb);
+		ret = tcp_v6_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
@@ -1564,7 +1573,8 @@ do_time_wait:
 					    skb, __tcp_hdrlen(th),
 					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
-					    ntohs(th->dest), tcp_v6_iif(skb));
+					    ntohs(th->dest), tcp_v6_iif(skb),
+					    sdif);
 		if (sk2) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
 			inet_twsk_deschedule_put(tw);
@@ -1573,8 +1583,9 @@ do_time_wait:
 			refcounted = false;
 			goto process;
 		}
-		/* Fall through to ACK */
 	}
+		/* to ACK */
+		/* fall through */
 	case TCP_TW_ACK:
 		tcp_v6_timewait_ack(sk, skb);
 		break;
@@ -1611,7 +1622,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 					&hdr->saddr, th->source,
 					&hdr->daddr, ntohs(th->dest),
-					inet6_iif(skb));
+					inet6_iif(skb), inet6_sdif(skb));
 	if (sk) {
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
@@ -1929,8 +1940,8 @@ struct proto tcpv6_prot = {
 	.memory_pressure	= &tcp_memory_pressure,
 	.orphan_count		= &tcp_orphan_count,
 	.sysctl_mem		= sysctl_tcp_mem,
-	.sysctl_wmem		= sysctl_tcp_wmem,
-	.sysctl_rmem		= sysctl_tcp_rmem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
@@ -1945,6 +1956,9 @@ struct proto tcpv6_prot = {
 	.diag_destroy		= tcp_abort,
 };
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct inet6_protocol tcpv6_protocol = {
 	.early_demux	=	tcp_v6_early_demux,
 	.early_demux_handler =  tcp_v6_early_demux,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 578142b7ca3e..3f30fa313bf2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -129,7 +129,7 @@ static void udp_v6_rehash(struct sock *sk)
 static int compute_score(struct sock *sk, struct net *net,
 			 const struct in6_addr *saddr, __be16 sport,
 			 const struct in6_addr *daddr, unsigned short hnum,
-			 int dif, bool exact_dif)
+			 int dif, int sdif, bool exact_dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -161,9 +161,13 @@ static int compute_score(struct sock *sk, struct net *net,
 	}
 
 	if (sk->sk_bound_dev_if || exact_dif) {
-		if (sk->sk_bound_dev_if != dif)
+		bool dev_match = (sk->sk_bound_dev_if == dif ||
+				  sk->sk_bound_dev_if == sdif);
+
+		if (exact_dif && !dev_match)
 			return -1;
-		score++;
+		if (sk->sk_bound_dev_if && dev_match)
+			score++;
 	}
 
 	if (sk->sk_incoming_cpu == raw_smp_processor_id())
@@ -175,9 +179,9 @@ static int compute_score(struct sock *sk, struct net *net,
 /* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
-		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		bool exact_dif, struct udp_hslot *hslot2,
-		struct sk_buff *skb)
+		const struct in6_addr *daddr, unsigned int hnum,
+		int dif, int sdif, bool exact_dif,
+		struct udp_hslot *hslot2, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness, matches = 0, reuseport = 0;
@@ -187,7 +191,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -214,10 +218,10 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 
 /* rcu_read_lock() must be held */
 struct sock *__udp6_lib_lookup(struct net *net,
-				      const struct in6_addr *saddr, __be16 sport,
-				      const struct in6_addr *daddr, __be16 dport,
-				      int dif, struct udp_table *udptable,
-				      struct sk_buff *skb)
+			       const struct in6_addr *saddr, __be16 sport,
+			       const struct in6_addr *daddr, __be16 dport,
+			       int dif, int sdif, struct udp_table *udptable,
+			       struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
@@ -235,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 			goto begin;
 
 		result = udp6_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif, exact_dif,
+					  daddr, hnum, dif, sdif, exact_dif,
 					  hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
@@ -250,7 +254,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif,
+						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2,
 						  skb);
 		}
@@ -261,7 +265,7 @@ begin:
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
 		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
-				      exact_dif);
+				      sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -294,7 +298,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 udptable, skb);
+				 inet6_sdif(skb), udptable, skb);
 }
 
 struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
@@ -304,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 &udp_table, skb);
+				 inet6_sdif(skb), &udp_table, skb);
 }
 EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
 
@@ -320,7 +324,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 	struct sock *sk;
 
 	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
-				dif, &udp_table, NULL);
+				dif, 0, &udp_table, NULL);
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -362,7 +366,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
 
 try_again:
-	peeking = off = sk_peek_offset(sk, flags);
+	peeking = flags & MSG_PEEK;
+	off = sk_peek_offset(sk, flags);
 	skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
 	if (!skb)
 		return err;
@@ -501,7 +506,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
-			       inet6_iif(skb), udptable, skb);
+			       inet6_iif(skb), 0, udptable, skb);
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 				  ICMP6_MIB_INERRORS);
@@ -574,8 +579,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
 static struct static_key udpv6_encap_needed __read_mostly;
 void udpv6_encap_enable(void)
 {
-	if (!static_key_enabled(&udpv6_encap_needed))
-		static_key_slow_inc(&udpv6_encap_needed);
+	static_key_enable(&udpv6_encap_needed);
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 
@@ -602,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		 */
 
 		/* if we're overly short, let UDP handle it */
-		encap_rcv = ACCESS_ONCE(up->encap_rcv);
+		encap_rcv = READ_ONCE(up->encap_rcv);
 		if (encap_rcv) {
 			int ret;
 
@@ -767,6 +771,15 @@ start_lookup:
 	return 0;
 }
 
+static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+{
+	if (udp_sk_rx_dst_set(sk, dst)) {
+		const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+	}
+}
+
 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
@@ -816,7 +829,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		int ret;
 
 		if (unlikely(sk->sk_rx_dst != dst))
-			udp_sk_rx_dst_set(sk, dst);
+			udp6_sk_rx_dst_set(sk, dst);
 
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
@@ -893,7 +906,7 @@ discard:
 static struct sock *__udp6_lib_demux_lookup(struct net *net,
 			__be16 loc_port, const struct in6_addr *loc_addr,
 			__be16 rmt_port, const struct in6_addr *rmt_addr,
-			int dif)
+			int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
 	unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
@@ -904,7 +917,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -919,6 +932,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 	struct sock *sk;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int sdif = inet6_sdif(skb);
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) +
 	    sizeof(struct udphdr)))
@@ -930,7 +944,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 		sk = __udp6_lib_demux_lookup(net, uh->dest,
 					     &ipv6_hdr(skb)->daddr,
 					     uh->source, &ipv6_hdr(skb)->saddr,
-					     dif);
+					     dif, sdif);
 	else
 		return;
 
@@ -1001,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 		 */
 		offset = skb_transport_offset(skb);
 		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		csum = skb->csum;
 
 		skb->ip_summed = CHECKSUM_NONE;
 
@@ -1417,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk)
 
 	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
 		void (*encap_destroy)(struct sock *sk);
-		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		encap_destroy = READ_ONCE(up->encap_destroy);
 		if (encap_destroy)
 			encap_destroy(sk);
 	}
@@ -1466,6 +1481,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 }
 #endif
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct inet6_protocol udpv6_protocol = {
 	.early_demux	=	udp_v6_early_demux,
 	.early_demux_handler =  udp_v6_early_demux,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index f180b3d85e31..7903e21c178b 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _UDP6_IMPL_H
 #define _UDP6_IMPL_H
 #include <net/udp.h>
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7d378c032cb..455fd4e39333 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,109 +17,15 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
-					 netdev_features_t features)
+static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+					   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	unsigned int mss;
-	unsigned int unfrag_ip6hlen, unfrag_len;
-	struct frag_hdr *fptr;
-	u8 *packet_start, *prevhdr;
-	u8 nexthdr;
-	u8 frag_hdr_sz = sizeof(struct frag_hdr);
-	__wsum csum;
-	int tnl_hlen;
-	int err;
-
-	mss = skb_shinfo(skb)->gso_size;
-	if (unlikely(skb->len <= mss))
-		goto out;
-
-	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-		/* Packet is from an untrusted source, reset gso_segs. */
-
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-		/* Set the IPv6 fragment id if not set yet */
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-
-		segs = NULL;
-		goto out;
-	}
 
 	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
 	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
 		segs = skb_udp_tunnel_segment(skb, features, true);
-	else {
-		const struct ipv6hdr *ipv6h;
-		struct udphdr *uh;
-
-		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-			goto out;
-
-		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-		 * do checksum of UDP packets sent as multiple IP fragments.
-		 */
-
-		uh = udp_hdr(skb);
-		ipv6h = ipv6_hdr(skb);
-
-		uh->check = 0;
-		csum = skb_checksum(skb, 0, skb->len, 0);
-		uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
-					  &ipv6h->daddr, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-		/* If there is no outer header we can fake a checksum offload
-		 * due to the fact that we have already done the checksum in
-		 * software prior to segmenting the frame.
-		 */
-		if (!skb->encap_hdr_csum)
-			features |= NETIF_F_HW_CSUM;
-
-		/* Check if there is enough headroom to insert fragment header. */
-		tnl_hlen = skb_tnl_header_len(skb);
-		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
-			if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
-				goto out;
-		}
-
-		/* Find the unfragmentable header and shift it left by frag_hdr_sz
-		 * bytes to insert fragment header.
-		 */
-		err = ip6_find_1stfragopt(skb, &prevhdr);
-		if (err < 0)
-			return ERR_PTR(err);
-		unfrag_ip6hlen = err;
-		nexthdr = *prevhdr;
-		*prevhdr = NEXTHDR_FRAGMENT;
-		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-			     unfrag_ip6hlen + tnl_hlen;
-		packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
-		memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
-		SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
-		skb->mac_header -= frag_hdr_sz;
-		skb->network_header -= frag_hdr_sz;
-
-		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
-		fptr->nexthdr = nexthdr;
-		fptr->reserved = 0;
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-
-		/* Fragment the skb. ipv6 header and the remaining fields of the
-		 * fragment header are updated in ipv6_gso_segment()
-		 */
-		segs = skb_segment(skb, features);
-	}
 
-out:
 	return segs;
 }
 
@@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv6_offload = {
 	.callbacks = {
-		.gso_segment	=	udp6_ufo_fragment,
+		.gso_segment	=	udp6_tunnel_segment,
 		.gro_receive	=	udp6_gro_receive,
 		.gro_complete	=	udp6_gro_complete,
 	},
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 3ef5d913e7a3..fe04e23af986 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
  *
@@ -34,6 +35,7 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
+	int nhlen = skb->data - skb_network_header(skb);
 
 	skb_network_header(skb)[IP6CB(skb)->nhoff] =
 		XFRM_MODE_SKB_CB(skb)->protocol;
@@ -43,8 +45,9 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 		return 1;
 #endif
 
-	__skb_push(skb, skb->data - skb_network_header(skb));
+	__skb_push(skb, nhlen);
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 79651bc71bf0..885ade234a49 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm6_policy.c: based on xfrm4_policy.c
  *
@@ -27,7 +28,8 @@
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
-					  const xfrm_address_t *daddr)
+					  const xfrm_address_t *daddr,
+					  u32 mark)
 {
 	struct flowi6 fl6;
 	struct dst_entry *dst;
@@ -36,6 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
 	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+	fl6.flowi6_mark = mark;
 	memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
 	if (saddr)
 		memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -52,12 +55,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 }
 
 static int xfrm6_get_saddr(struct net *net, int oif,
-			   xfrm_address_t *saddr, xfrm_address_t *daddr)
+			   xfrm_address_t *saddr, xfrm_address_t *daddr,
+			   u32 mark)
 {
 	struct dst_entry *dst;
 	struct net_device *dev;
 
-	dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
+	dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
@@ -149,6 +153,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 		switch (nexthdr) {
 		case NEXTHDR_FRAGMENT:
 			onlyproto = 1;
+			/* fall through */
 		case NEXTHDR_ROUTING:
 		case NEXTHDR_HOP:
 		case NEXTHDR_DEST:
@@ -214,14 +219,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	}
 }
 
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -279,14 +276,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops_template = {
 	.family =		AF_INET6,
-	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.redirect =		xfrm6_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 8a1f9c0d2a13..b15075a5c227 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm6_state.c: based on xfrm4_state.c
  *
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4e438bc7ee87..f85f0d7480ac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net)
 
 static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+	unsigned int i;
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+		WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+		WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i]));
 }
 
 static struct pernet_operations xfrm6_tunnel_net_ops = {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index ac598ec90589..d21a9d128d3e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		rc = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
+		/* fall through */
 	case SIOCGIFADDR:
 		rc = ipxitf_ioctl(cmd, argp);
 		break;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 7d75e4c5c75d..38a3d51d9ead 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	IPX proc routines
  *
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index b5d91447f3dc..3cf93aa9f284 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Implements the IPX routing routines.
  *	Code moved from af_ipx.c.
diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c
index 32dcd601ab32..ba7d4214bbff 100644
--- a/net/ipx/pe2.c
+++ b/net/ipx/pe2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/module.h>
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0dafcc561ed6..c3eef457db88 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* -*- linux-c -*-
  * sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
  *
diff --git a/net/irda/Kconfig b/net/irda/Kconfig
deleted file mode 100644
index c8671a7ffb3c..000000000000
--- a/net/irda/Kconfig
+++ /dev/null
@@ -1,96 +0,0 @@
-#
-# IrDA protocol configuration
-#
-
-menuconfig IRDA
-	depends on NET && !S390
-	tristate "IrDA (infrared) subsystem support"
-	select CRC_CCITT
-	---help---
-	  Say Y here if you want to build support for the IrDA (TM) protocols.
-	  The Infrared Data Associations (tm) specifies standards for wireless
-	  infrared communication and is supported by most laptops and PDA's.
-
-	  To use Linux support for the IrDA (tm) protocols, you will also need
-	  some user-space utilities like irattach.  For more information, see
-	  the file <file:Documentation/networking/irda.txt>.  You also want to
-	  read the IR-HOWTO, available at
-	  <http://www.tldp.org/docs.html#howto>.
-
-	  If you want to exchange bits of data (vCal, vCard) with a PDA, you
-	  will need to install some OBEX application, such as OpenObex :
-	  <http://sourceforge.net/projects/openobex/>
-
-	  To compile this support as a module, choose M here: the module will
-	  be called irda.
-
-comment "IrDA protocols"
-	depends on IRDA
-
-source "net/irda/irlan/Kconfig"
-
-source "net/irda/irnet/Kconfig"
-
-source "net/irda/ircomm/Kconfig"
-
-config IRDA_ULTRA
-	bool "Ultra (connectionless) protocol"
-	depends on IRDA
-	help
-	  Say Y here to support the connectionless Ultra IRDA protocol.
-	  Ultra allows to exchange data over IrDA with really simple devices
-	  (watch, beacon) without the overhead of the IrDA protocol (no handshaking,
-	  no management frames, simple fixed header).
-	  Ultra is available as a special socket : socket(AF_IRDA, SOCK_DGRAM, 1);
-
-comment "IrDA options"
-	depends on IRDA
-
-config IRDA_CACHE_LAST_LSAP
-	bool "Cache last LSAP"
-	depends on IRDA
-	help
-	  Say Y here if you want IrLMP to cache the last LSAP used.  This
-	  makes sense since most frames will be sent/received on the same
-	  connection.  Enabling this option will save a hash-lookup per frame.
-
-	  If unsure, say Y.
-
-config IRDA_FAST_RR
-	bool "Fast RRs (low latency)"
-	depends on IRDA
-	---help---
-	  Say Y here is you want IrLAP to send fast RR (Receive Ready) frames
-	  when acting as a primary station.
-	  Disabling this option will make latency over IrDA very bad. Enabling
-	  this option will make the IrDA stack send more packet than strictly
-	  necessary, thus reduce your battery life (but not that much).
-
-	  Fast RR will make IrLAP send out a RR frame immediately when
-	  receiving a frame if its own transmit queue is currently empty. This
-	  will give a lot of speed improvement when receiving much data since
-	  the secondary station will not have to wait the max. turn around
-	  time (usually 500ms) before it is allowed to transmit the next time.
-	  If the transmit queue of the secondary is also empty, the primary will
-	  start backing-off before sending another RR frame, waiting longer
-	  each time until the back-off reaches the max. turn around time.
-	  This back-off increase in controlled via
-	  /proc/sys/net/irda/fast_poll_increase
-
-	  If unsure, say Y.
-
-config IRDA_DEBUG
-	bool "Debug information"
-	depends on IRDA
-	help
-	  Say Y here if you want the IrDA subsystem to write debug information
-	  to your syslog. You can change the debug level in
-	  /proc/sys/net/irda/debug .
-	  When this option is enabled, the IrDA also perform many extra internal
-	  verifications which will usually prevent the kernel to crash in case of
-	  bugs.
-
-	  If unsure, say Y (since it makes it easier to find the bugs).
-
-source "drivers/net/irda/Kconfig"
-
diff --git a/net/irda/Makefile b/net/irda/Makefile
deleted file mode 100644
index 187f6c563a4b..000000000000
--- a/net/irda/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-#
-# Makefile for the Linux IrDA protocol layer.
-#
-
-obj-$(CONFIG_IRDA) += irda.o
-obj-$(CONFIG_IRLAN) += irlan/
-obj-$(CONFIG_IRNET) += irnet/
-obj-$(CONFIG_IRCOMM) += ircomm/
-
-irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \
-          irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \
-          irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \
-	  discovery.o parameters.o irnetlink.o irmod.o
-irda-$(CONFIG_PROC_FS) += irproc.o
-irda-$(CONFIG_SYSCTL) += irsysctl.o
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
deleted file mode 100644
index 2e6990f8b80b..000000000000
--- a/net/irda/af_irda.c
+++ /dev/null
@@ -1,2695 +0,0 @@
-/*********************************************************************
- *
- * Filename:      af_irda.c
- * Version:       0.9
- * Description:   IrDA sockets implementation
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun May 31 10:12:43 1998
- * Modified at:   Sat Dec 25 21:10:23 1999
- * Modified by:   Dag Brattli <dag@brattli.net>
- * Sources:       af_netroom.c, af_ax25.c, af_rose.c, af_x25.c etc.
- *
- *     Copyright (c) 1999 Dag Brattli <dagb@cs.uit.no>
- *     Copyright (c) 1999-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *     All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- *     Linux-IrDA now supports four different types of IrDA sockets:
- *
- *     o SOCK_STREAM:    TinyTP connections with SAR disabled. The
- *                       max SDU size is 0 for conn. of this type
- *     o SOCK_SEQPACKET: TinyTP connections with SAR enabled. TTP may
- *                       fragment the messages, but will preserve
- *                       the message boundaries
- *     o SOCK_DGRAM:     IRDAPROTO_UNITDATA: TinyTP connections with Unitdata
- *                       (unreliable) transfers
- *                       IRDAPROTO_ULTRA: Connectionless and unreliable data
- *
- ********************************************************************/
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-#include <linux/init.h>
-#include <linux/net.h>
-#include <linux/irda.h>
-#include <linux/poll.h>
-
-#include <asm/ioctls.h>		/* TIOCOUTQ, TIOCINQ */
-#include <linux/uaccess.h>
-
-#include <net/sock.h>
-#include <net/tcp_states.h>
-
-#include <net/irda/af_irda.h>
-
-static int irda_create(struct net *net, struct socket *sock, int protocol, int kern);
-
-static const struct proto_ops irda_stream_ops;
-static const struct proto_ops irda_seqpacket_ops;
-static const struct proto_ops irda_dgram_ops;
-
-#ifdef CONFIG_IRDA_ULTRA
-static const struct proto_ops irda_ultra_ops;
-#define ULTRA_MAX_DATA 382
-#endif /* CONFIG_IRDA_ULTRA */
-
-#define IRDA_MAX_HEADER (TTP_MAX_HEADER)
-
-/*
- * Function irda_data_indication (instance, sap, skb)
- *
- *    Received some data from TinyTP. Just queue it on the receive queue
- *
- */
-static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
-{
-	struct irda_sock *self;
-	struct sock *sk;
-	int err;
-
-	self = instance;
-	sk = instance;
-
-	err = sock_queue_rcv_skb(sk, skb);
-	if (err) {
-		pr_debug("%s(), error: no more mem!\n", __func__);
-		self->rx_flow = FLOW_STOP;
-
-		/* When we return error, TTP will need to requeue the skb */
-		return err;
-	}
-
-	return 0;
-}
-
-/*
- * Function irda_disconnect_indication (instance, sap, reason, skb)
- *
- *    Connection has been closed. Check reason to find out why
- *
- */
-static void irda_disconnect_indication(void *instance, void *sap,
-				       LM_REASON reason, struct sk_buff *skb)
-{
-	struct irda_sock *self;
-	struct sock *sk;
-
-	self = instance;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	/* Don't care about it, but let's not leak it */
-	if(skb)
-		dev_kfree_skb(skb);
-
-	sk = instance;
-	if (sk == NULL) {
-		pr_debug("%s(%p) : BUG : sk is NULL\n",
-			 __func__, self);
-		return;
-	}
-
-	/* Prevent race conditions with irda_release() and irda_shutdown() */
-	bh_lock_sock(sk);
-	if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
-		sk->sk_state     = TCP_CLOSE;
-		sk->sk_shutdown |= SEND_SHUTDOWN;
-
-		sk->sk_state_change(sk);
-
-		/* Close our TSAP.
-		 * If we leave it open, IrLMP put it back into the list of
-		 * unconnected LSAPs. The problem is that any incoming request
-		 * can then be matched to this socket (and it will be, because
-		 * it is at the head of the list). This would prevent any
-		 * listening socket waiting on the same TSAP to get those
-		 * requests. Some apps forget to close sockets, or hang to it
-		 * a bit too long, so we may stay in this dead state long
-		 * enough to be noticed...
-		 * Note : all socket function do check sk->sk_state, so we are
-		 * safe...
-		 * Jean II
-		 */
-		if (self->tsap) {
-			irttp_close_tsap(self->tsap);
-			self->tsap = NULL;
-		}
-	}
-	bh_unlock_sock(sk);
-
-	/* Note : once we are there, there is not much you want to do
-	 * with the socket anymore, apart from closing it.
-	 * For example, bind() and connect() won't reset sk->sk_err,
-	 * sk->sk_shutdown and sk->sk_flags to valid values...
-	 * Jean II
-	 */
-}
-
-/*
- * Function irda_connect_confirm (instance, sap, qos, max_sdu_size, skb)
- *
- *    Connections has been confirmed by the remote device
- *
- */
-static void irda_connect_confirm(void *instance, void *sap,
-				 struct qos_info *qos,
-				 __u32 max_sdu_size, __u8 max_header_size,
-				 struct sk_buff *skb)
-{
-	struct irda_sock *self;
-	struct sock *sk;
-
-	self = instance;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	sk = instance;
-	if (sk == NULL) {
-		dev_kfree_skb(skb);
-		return;
-	}
-
-	dev_kfree_skb(skb);
-	// Should be ??? skb_queue_tail(&sk->sk_receive_queue, skb);
-
-	/* How much header space do we need to reserve */
-	self->max_header_size = max_header_size;
-
-	/* IrTTP max SDU size in transmit direction */
-	self->max_sdu_size_tx = max_sdu_size;
-
-	/* Find out what the largest chunk of data that we can transmit is */
-	switch (sk->sk_type) {
-	case SOCK_STREAM:
-		if (max_sdu_size != 0) {
-			net_err_ratelimited("%s: max_sdu_size must be 0\n",
-					    __func__);
-			return;
-		}
-		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-		break;
-	case SOCK_SEQPACKET:
-		if (max_sdu_size == 0) {
-			net_err_ratelimited("%s: max_sdu_size cannot be 0\n",
-					    __func__);
-			return;
-		}
-		self->max_data_size = max_sdu_size;
-		break;
-	default:
-		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	}
-
-	pr_debug("%s(), max_data_size=%d\n", __func__,
-		 self->max_data_size);
-
-	memcpy(&self->qos_tx, qos, sizeof(struct qos_info));
-
-	/* We are now connected! */
-	sk->sk_state = TCP_ESTABLISHED;
-	sk->sk_state_change(sk);
-}
-
-/*
- * Function irda_connect_indication(instance, sap, qos, max_sdu_size, userdata)
- *
- *    Incoming connection
- *
- */
-static void irda_connect_indication(void *instance, void *sap,
-				    struct qos_info *qos, __u32 max_sdu_size,
-				    __u8 max_header_size, struct sk_buff *skb)
-{
-	struct irda_sock *self;
-	struct sock *sk;
-
-	self = instance;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	sk = instance;
-	if (sk == NULL) {
-		dev_kfree_skb(skb);
-		return;
-	}
-
-	/* How much header space do we need to reserve */
-	self->max_header_size = max_header_size;
-
-	/* IrTTP max SDU size in transmit direction */
-	self->max_sdu_size_tx = max_sdu_size;
-
-	/* Find out what the largest chunk of data that we can transmit is */
-	switch (sk->sk_type) {
-	case SOCK_STREAM:
-		if (max_sdu_size != 0) {
-			net_err_ratelimited("%s: max_sdu_size must be 0\n",
-					    __func__);
-			kfree_skb(skb);
-			return;
-		}
-		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-		break;
-	case SOCK_SEQPACKET:
-		if (max_sdu_size == 0) {
-			net_err_ratelimited("%s: max_sdu_size cannot be 0\n",
-					    __func__);
-			kfree_skb(skb);
-			return;
-		}
-		self->max_data_size = max_sdu_size;
-		break;
-	default:
-		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	}
-
-	pr_debug("%s(), max_data_size=%d\n", __func__,
-		 self->max_data_size);
-
-	memcpy(&self->qos_tx, qos, sizeof(struct qos_info));
-
-	skb_queue_tail(&sk->sk_receive_queue, skb);
-	sk->sk_state_change(sk);
-}
-
-/*
- * Function irda_connect_response (handle)
- *
- *    Accept incoming connection
- *
- */
-static void irda_connect_response(struct irda_sock *self)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
-	if (skb == NULL) {
-		pr_debug("%s() Unable to allocate sk_buff!\n",
-			 __func__);
-		return;
-	}
-
-	/* Reserve space for MUX_CONTROL and LAP header */
-	skb_reserve(skb, IRDA_MAX_HEADER);
-
-	irttp_connect_response(self->tsap, self->max_sdu_size_rx, skb);
-}
-
-/*
- * Function irda_flow_indication (instance, sap, flow)
- *
- *    Used by TinyTP to tell us if it can accept more data or not
- *
- */
-static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
-{
-	struct irda_sock *self;
-	struct sock *sk;
-
-	self = instance;
-	sk = instance;
-	BUG_ON(sk == NULL);
-
-	switch (flow) {
-	case FLOW_STOP:
-		pr_debug("%s(), IrTTP wants us to slow down\n",
-			 __func__);
-		self->tx_flow = flow;
-		break;
-	case FLOW_START:
-		self->tx_flow = flow;
-		pr_debug("%s(), IrTTP wants us to start again\n",
-			 __func__);
-		wake_up_interruptible(sk_sleep(sk));
-		break;
-	default:
-		pr_debug("%s(), Unknown flow command!\n", __func__);
-		/* Unknown flow command, better stop */
-		self->tx_flow = flow;
-		break;
-	}
-}
-
-/*
- * Function irda_getvalue_confirm (obj_id, value, priv)
- *
- *    Got answer from remote LM-IAS, just pass object to requester...
- *
- * Note : duplicate from above, but we need our own version that
- * doesn't touch the dtsap_sel and save the full value structure...
- */
-static void irda_getvalue_confirm(int result, __u16 obj_id,
-				  struct ias_value *value, void *priv)
-{
-	struct irda_sock *self;
-
-	self = priv;
-	if (!self) {
-		net_warn_ratelimited("%s: lost myself!\n", __func__);
-		return;
-	}
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	/* We probably don't need to make any more queries */
-	iriap_close(self->iriap);
-	self->iriap = NULL;
-
-	/* Check if request succeeded */
-	if (result != IAS_SUCCESS) {
-		pr_debug("%s(), IAS query failed! (%d)\n", __func__,
-			 result);
-
-		self->errno = result;	/* We really need it later */
-
-		/* Wake up any processes waiting for result */
-		wake_up_interruptible(&self->query_wait);
-
-		return;
-	}
-
-	/* Pass the object to the caller (so the caller must delete it) */
-	self->ias_result = value;
-	self->errno = 0;
-
-	/* Wake up any processes waiting for result */
-	wake_up_interruptible(&self->query_wait);
-}
-
-/*
- * Function irda_selective_discovery_indication (discovery)
- *
- *    Got a selective discovery indication from IrLMP.
- *
- * IrLMP is telling us that this node is new and matching our hint bit
- * filter. Wake up any process waiting for answer...
- */
-static void irda_selective_discovery_indication(discinfo_t *discovery,
-						DISCOVERY_MODE mode,
-						void *priv)
-{
-	struct irda_sock *self;
-
-	self = priv;
-	if (!self) {
-		net_warn_ratelimited("%s: lost myself!\n", __func__);
-		return;
-	}
-
-	/* Pass parameter to the caller */
-	self->cachedaddr = discovery->daddr;
-
-	/* Wake up process if its waiting for device to be discovered */
-	wake_up_interruptible(&self->query_wait);
-}
-
-/*
- * Function irda_discovery_timeout (priv)
- *
- *    Timeout in the selective discovery process
- *
- * We were waiting for a node to be discovered, but nothing has come up
- * so far. Wake up the user and tell him that we failed...
- */
-static void irda_discovery_timeout(u_long priv)
-{
-	struct irda_sock *self;
-
-	self = (struct irda_sock *) priv;
-	BUG_ON(self == NULL);
-
-	/* Nothing for the caller */
-	self->cachelog = NULL;
-	self->cachedaddr = 0;
-	self->errno = -ETIME;
-
-	/* Wake up process if its still waiting... */
-	wake_up_interruptible(&self->query_wait);
-}
-
-/*
- * Function irda_open_tsap (self)
- *
- *    Open local Transport Service Access Point (TSAP)
- *
- */
-static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name)
-{
-	notify_t notify;
-
-	if (self->tsap) {
-		pr_debug("%s: busy!\n", __func__);
-		return -EBUSY;
-	}
-
-	/* Initialize callbacks to be used by the IrDA stack */
-	irda_notify_init(&notify);
-	notify.connect_confirm       = irda_connect_confirm;
-	notify.connect_indication    = irda_connect_indication;
-	notify.disconnect_indication = irda_disconnect_indication;
-	notify.data_indication       = irda_data_indication;
-	notify.udata_indication	     = irda_data_indication;
-	notify.flow_indication       = irda_flow_indication;
-	notify.instance = self;
-	strncpy(notify.name, name, NOTIFY_MAX_NAME);
-
-	self->tsap = irttp_open_tsap(tsap_sel, DEFAULT_INITIAL_CREDIT,
-				     &notify);
-	if (self->tsap == NULL) {
-		pr_debug("%s(), Unable to allocate TSAP!\n",
-			 __func__);
-		return -ENOMEM;
-	}
-	/* Remember which TSAP selector we actually got */
-	self->stsap_sel = self->tsap->stsap_sel;
-
-	return 0;
-}
-
-/*
- * Function irda_open_lsap (self)
- *
- *    Open local Link Service Access Point (LSAP). Used for opening Ultra
- *    sockets
- */
-#ifdef CONFIG_IRDA_ULTRA
-static int irda_open_lsap(struct irda_sock *self, int pid)
-{
-	notify_t notify;
-
-	if (self->lsap) {
-		net_warn_ratelimited("%s(), busy!\n", __func__);
-		return -EBUSY;
-	}
-
-	/* Initialize callbacks to be used by the IrDA stack */
-	irda_notify_init(&notify);
-	notify.udata_indication	= irda_data_indication;
-	notify.instance = self;
-	strncpy(notify.name, "Ultra", NOTIFY_MAX_NAME);
-
-	self->lsap = irlmp_open_lsap(LSAP_CONNLESS, &notify, pid);
-	if (self->lsap == NULL) {
-		pr_debug("%s(), Unable to allocate LSAP!\n", __func__);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irda_find_lsap_sel (self, name)
- *
- *    Try to lookup LSAP selector in remote LM-IAS
- *
- * Basically, we start a IAP query, and then go to sleep. When the query
- * return, irda_getvalue_confirm will wake us up, and we can examine the
- * result of the query...
- * Note that in some case, the query fail even before we go to sleep,
- * creating some races...
- */
-static int irda_find_lsap_sel(struct irda_sock *self, char *name)
-{
-	pr_debug("%s(%p, %s)\n", __func__, self, name);
-
-	if (self->iriap) {
-		net_warn_ratelimited("%s(): busy with a previous query\n",
-				     __func__);
-		return -EBUSY;
-	}
-
-	self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-				 irda_getvalue_confirm);
-	if(self->iriap == NULL)
-		return -ENOMEM;
-
-	/* Treat unexpected wakeup as disconnect */
-	self->errno = -EHOSTUNREACH;
-
-	/* Query remote LM-IAS */
-	iriap_getvaluebyclass_request(self->iriap, self->saddr, self->daddr,
-				      name, "IrDA:TinyTP:LsapSel");
-
-	/* Wait for answer, if not yet finished (or failed) */
-	if (wait_event_interruptible(self->query_wait, (self->iriap==NULL)))
-		/* Treat signals as disconnect */
-		return -EHOSTUNREACH;
-
-	/* Check what happened */
-	if (self->errno)
-	{
-		/* Requested object/attribute doesn't exist */
-		if((self->errno == IAS_CLASS_UNKNOWN) ||
-		   (self->errno == IAS_ATTRIB_UNKNOWN))
-			return -EADDRNOTAVAIL;
-		else
-			return -EHOSTUNREACH;
-	}
-
-	/* Get the remote TSAP selector */
-	switch (self->ias_result->type) {
-	case IAS_INTEGER:
-		pr_debug("%s() int=%d\n",
-			 __func__, self->ias_result->t.integer);
-
-		if (self->ias_result->t.integer != -1)
-			self->dtsap_sel = self->ias_result->t.integer;
-		else
-			self->dtsap_sel = 0;
-		break;
-	default:
-		self->dtsap_sel = 0;
-		pr_debug("%s(), bad type!\n", __func__);
-		break;
-	}
-	if (self->ias_result)
-		irias_delete_value(self->ias_result);
-
-	if (self->dtsap_sel)
-		return 0;
-
-	return -EADDRNOTAVAIL;
-}
-
-/*
- * Function irda_discover_daddr_and_lsap_sel (self, name)
- *
- *    This try to find a device with the requested service.
- *
- * It basically look into the discovery log. For each address in the list,
- * it queries the LM-IAS of the device to find if this device offer
- * the requested service.
- * If there is more than one node supporting the service, we complain
- * to the user (it should move devices around).
- * The, we set both the destination address and the lsap selector to point
- * on the service on the unique device we have found.
- *
- * Note : this function fails if there is more than one device in range,
- * because IrLMP doesn't disconnect the LAP when the last LSAP is closed.
- * Moreover, we would need to wait the LAP disconnection...
- */
-static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
-{
-	discinfo_t *discoveries;	/* Copy of the discovery log */
-	int	number;			/* Number of nodes in the log */
-	int	i;
-	int	err = -ENETUNREACH;
-	__u32	daddr = DEV_ADDR_ANY;	/* Address we found the service on */
-	__u8	dtsap_sel = 0x0;	/* TSAP associated with it */
-
-	pr_debug("%s(), name=%s\n", __func__, name);
-
-	/* Ask lmp for the current discovery log
-	 * Note : we have to use irlmp_get_discoveries(), as opposed
-	 * to play with the cachelog directly, because while we are
-	 * making our ias query, le log might change... */
-	discoveries = irlmp_get_discoveries(&number, self->mask.word,
-					    self->nslots);
-	/* Check if the we got some results */
-	if (discoveries == NULL)
-		return -ENETUNREACH;	/* No nodes discovered */
-
-	/*
-	 * Now, check all discovered devices (if any), and connect
-	 * client only about the services that the client is
-	 * interested in...
-	 */
-	for(i = 0; i < number; i++) {
-		/* Try the address in the log */
-		self->daddr = discoveries[i].daddr;
-		self->saddr = 0x0;
-		pr_debug("%s(), trying daddr = %08x\n",
-			 __func__, self->daddr);
-
-		/* Query remote LM-IAS for this service */
-		err = irda_find_lsap_sel(self, name);
-		switch (err) {
-		case 0:
-			/* We found the requested service */
-			if(daddr != DEV_ADDR_ANY) {
-				pr_debug("%s(), discovered service ''%s'' in two different devices !!!\n",
-					 __func__, name);
-				self->daddr = DEV_ADDR_ANY;
-				kfree(discoveries);
-				return -ENOTUNIQ;
-			}
-			/* First time we found that one, save it ! */
-			daddr = self->daddr;
-			dtsap_sel = self->dtsap_sel;
-			break;
-		case -EADDRNOTAVAIL:
-			/* Requested service simply doesn't exist on this node */
-			break;
-		default:
-			/* Something bad did happen :-( */
-			pr_debug("%s(), unexpected IAS query failure\n",
-				 __func__);
-			self->daddr = DEV_ADDR_ANY;
-			kfree(discoveries);
-			return -EHOSTUNREACH;
-		}
-	}
-	/* Cleanup our copy of the discovery log */
-	kfree(discoveries);
-
-	/* Check out what we found */
-	if(daddr == DEV_ADDR_ANY) {
-		pr_debug("%s(), cannot discover service ''%s'' in any device !!!\n",
-			 __func__, name);
-		self->daddr = DEV_ADDR_ANY;
-		return -EADDRNOTAVAIL;
-	}
-
-	/* Revert back to discovered device & service */
-	self->daddr = daddr;
-	self->saddr = 0x0;
-	self->dtsap_sel = dtsap_sel;
-
-	pr_debug("%s(), discovered requested service ''%s'' at address %08x\n",
-		 __func__, name, self->daddr);
-
-	return 0;
-}
-
-/*
- * Function irda_getname (sock, uaddr, uaddr_len, peer)
- *
- *    Return the our own, or peers socket address (sockaddr_irda)
- *
- */
-static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
-{
-	struct sockaddr_irda saddr;
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-
-	memset(&saddr, 0, sizeof(saddr));
-	if (peer) {
-		if (sk->sk_state != TCP_ESTABLISHED)
-			return -ENOTCONN;
-
-		saddr.sir_family = AF_IRDA;
-		saddr.sir_lsap_sel = self->dtsap_sel;
-		saddr.sir_addr = self->daddr;
-	} else {
-		saddr.sir_family = AF_IRDA;
-		saddr.sir_lsap_sel = self->stsap_sel;
-		saddr.sir_addr = self->saddr;
-	}
-
-	pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
-	pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
-
-	/* uaddr_len come to us uninitialised */
-	*uaddr_len = sizeof (struct sockaddr_irda);
-	memcpy(uaddr, &saddr, *uaddr_len);
-
-	return 0;
-}
-
-/*
- * Function irda_listen (sock, backlog)
- *
- *    Just move to the listen state
- *
- */
-static int irda_listen(struct socket *sock, int backlog)
-{
-	struct sock *sk = sock->sk;
-	int err = -EOPNOTSUPP;
-
-	lock_sock(sk);
-
-	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
-	    (sk->sk_type != SOCK_DGRAM))
-		goto out;
-
-	if (sk->sk_state != TCP_LISTEN) {
-		sk->sk_max_ack_backlog = backlog;
-		sk->sk_state           = TCP_LISTEN;
-
-		err = 0;
-	}
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-/*
- * Function irda_bind (sock, uaddr, addr_len)
- *
- *    Used by servers to register their well known TSAP
- *
- */
-static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr;
-	struct irda_sock *self = irda_sk(sk);
-	int err;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	if (addr_len != sizeof(struct sockaddr_irda))
-		return -EINVAL;
-
-	lock_sock(sk);
-#ifdef CONFIG_IRDA_ULTRA
-	/* Special care for Ultra sockets */
-	if ((sk->sk_type == SOCK_DGRAM) &&
-	    (sk->sk_protocol == IRDAPROTO_ULTRA)) {
-		self->pid = addr->sir_lsap_sel;
-		err = -EOPNOTSUPP;
-		if (self->pid & 0x80) {
-			pr_debug("%s(), extension in PID not supp!\n",
-				 __func__);
-			goto out;
-		}
-		err = irda_open_lsap(self, self->pid);
-		if (err < 0)
-			goto out;
-
-		/* Pretend we are connected */
-		sock->state = SS_CONNECTED;
-		sk->sk_state   = TCP_ESTABLISHED;
-		err = 0;
-
-		goto out;
-	}
-#endif /* CONFIG_IRDA_ULTRA */
-
-	self->ias_obj = irias_new_object(addr->sir_name, jiffies);
-	err = -ENOMEM;
-	if (self->ias_obj == NULL)
-		goto out;
-
-	err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name);
-	if (err < 0) {
-		irias_delete_object(self->ias_obj);
-		self->ias_obj = NULL;
-		goto out;
-	}
-
-	/*  Register with LM-IAS */
-	irias_add_integer_attrib(self->ias_obj, "IrDA:TinyTP:LsapSel",
-				 self->stsap_sel, IAS_KERNEL_ATTR);
-	irias_insert_object(self->ias_obj);
-
-	err = 0;
-out:
-	release_sock(sk);
-	return err;
-}
-
-/*
- * Function irda_accept (sock, newsock, flags)
- *
- *    Wait for incoming connection
- *
- */
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
-		       bool kern)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *new, *self = irda_sk(sk);
-	struct sock *newsk;
-	struct sk_buff *skb = NULL;
-	int err;
-
-	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
-	if (err)
-		return err;
-
-	err = -EINVAL;
-
-	lock_sock(sk);
-	if (sock->state != SS_UNCONNECTED)
-		goto out;
-
-	err = -EOPNOTSUPP;
-	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
-	    (sk->sk_type != SOCK_DGRAM))
-		goto out;
-
-	err = -EINVAL;
-	if (sk->sk_state != TCP_LISTEN)
-		goto out;
-
-	/*
-	 *	The read queue this time is holding sockets ready to use
-	 *	hooked into the SABM we saved
-	 */
-
-	/*
-	 * We can perform the accept only if there is incoming data
-	 * on the listening socket.
-	 * So, we will block the caller until we receive any data.
-	 * If the caller was waiting on select() or poll() before
-	 * calling us, the data is waiting for us ;-)
-	 * Jean II
-	 */
-	while (1) {
-		skb = skb_dequeue(&sk->sk_receive_queue);
-		if (skb)
-			break;
-
-		/* Non blocking operation */
-		err = -EWOULDBLOCK;
-		if (flags & O_NONBLOCK)
-			goto out;
-
-		err = wait_event_interruptible(*(sk_sleep(sk)),
-					skb_peek(&sk->sk_receive_queue));
-		if (err)
-			goto out;
-	}
-
-	newsk = newsock->sk;
-	err = -EIO;
-	if (newsk == NULL)
-		goto out;
-
-	newsk->sk_state = TCP_ESTABLISHED;
-
-	new = irda_sk(newsk);
-
-	/* Now attach up the new socket */
-	new->tsap = irttp_dup(self->tsap, new);
-	err = -EPERM; /* value does not seem to make sense. -arnd */
-	if (!new->tsap) {
-		pr_debug("%s(), dup failed!\n", __func__);
-		goto out;
-	}
-
-	new->stsap_sel = new->tsap->stsap_sel;
-	new->dtsap_sel = new->tsap->dtsap_sel;
-	new->saddr = irttp_get_saddr(new->tsap);
-	new->daddr = irttp_get_daddr(new->tsap);
-
-	new->max_sdu_size_tx = self->max_sdu_size_tx;
-	new->max_sdu_size_rx = self->max_sdu_size_rx;
-	new->max_data_size   = self->max_data_size;
-	new->max_header_size = self->max_header_size;
-
-	memcpy(&new->qos_tx, &self->qos_tx, sizeof(struct qos_info));
-
-	/* Clean up the original one to keep it in listen state */
-	irttp_listen(self->tsap);
-
-	sk->sk_ack_backlog--;
-
-	newsock->state = SS_CONNECTED;
-
-	irda_connect_response(new);
-	err = 0;
-out:
-	kfree_skb(skb);
-	release_sock(sk);
-	return err;
-}
-
-/*
- * Function irda_connect (sock, uaddr, addr_len, flags)
- *
- *    Connect to a IrDA device
- *
- * The main difference with a "standard" connect is that with IrDA we need
- * to resolve the service name into a TSAP selector (in TCP, port number
- * doesn't have to be resolved).
- * Because of this service name resolution, we can offer "auto-connect",
- * where we connect to a service without specifying a destination address.
- *
- * Note : by consulting "errno", the user space caller may learn the cause
- * of the failure. Most of them are visible in the function, others may come
- * from subroutines called and are listed here :
- *	o EBUSY : already processing a connect
- *	o EHOSTUNREACH : bad addr->sir_addr argument
- *	o EADDRNOTAVAIL : bad addr->sir_name argument
- *	o ENOTUNIQ : more than one node has addr->sir_name (auto-connect)
- *	o ENETUNREACH : no node found on the network (auto-connect)
- */
-static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
-			int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr;
-	struct irda_sock *self = irda_sk(sk);
-	int err;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	lock_sock(sk);
-	/* Don't allow connect for Ultra sockets */
-	err = -ESOCKTNOSUPPORT;
-	if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA))
-		goto out;
-
-	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
-		sock->state = SS_CONNECTED;
-		err = 0;
-		goto out;   /* Connect completed during a ERESTARTSYS event */
-	}
-
-	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
-		sock->state = SS_UNCONNECTED;
-		err = -ECONNREFUSED;
-		goto out;
-	}
-
-	err = -EISCONN;      /* No reconnect on a seqpacket socket */
-	if (sk->sk_state == TCP_ESTABLISHED)
-		goto out;
-
-	sk->sk_state   = TCP_CLOSE;
-	sock->state = SS_UNCONNECTED;
-
-	err = -EINVAL;
-	if (addr_len != sizeof(struct sockaddr_irda))
-		goto out;
-
-	/* Check if user supplied any destination device address */
-	if ((!addr->sir_addr) || (addr->sir_addr == DEV_ADDR_ANY)) {
-		/* Try to find one suitable */
-		err = irda_discover_daddr_and_lsap_sel(self, addr->sir_name);
-		if (err) {
-			pr_debug("%s(), auto-connect failed!\n", __func__);
-			goto out;
-		}
-	} else {
-		/* Use the one provided by the user */
-		self->daddr = addr->sir_addr;
-		pr_debug("%s(), daddr = %08x\n", __func__, self->daddr);
-
-		/* If we don't have a valid service name, we assume the
-		 * user want to connect on a specific LSAP. Prevent
-		 * the use of invalid LSAPs (IrLMP 1.1 p10). Jean II */
-		if((addr->sir_name[0] != '\0') ||
-		   (addr->sir_lsap_sel >= 0x70)) {
-			/* Query remote LM-IAS using service name */
-			err = irda_find_lsap_sel(self, addr->sir_name);
-			if (err) {
-				pr_debug("%s(), connect failed!\n", __func__);
-				goto out;
-			}
-		} else {
-			/* Directly connect to the remote LSAP
-			 * specified by the sir_lsap field.
-			 * Please use with caution, in IrDA LSAPs are
-			 * dynamic and there is no "well-known" LSAP. */
-			self->dtsap_sel = addr->sir_lsap_sel;
-		}
-	}
-
-	/* Check if we have opened a local TSAP */
-	if (!self->tsap) {
-		err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
-		if (err)
-			goto out;
-	}
-
-	/* Move to connecting socket, start sending Connect Requests */
-	sock->state = SS_CONNECTING;
-	sk->sk_state   = TCP_SYN_SENT;
-
-	/* Connect to remote device */
-	err = irttp_connect_request(self->tsap, self->dtsap_sel,
-				    self->saddr, self->daddr, NULL,
-				    self->max_sdu_size_rx, NULL);
-	if (err) {
-		pr_debug("%s(), connect failed!\n", __func__);
-		goto out;
-	}
-
-	/* Now the loop */
-	err = -EINPROGRESS;
-	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
-		goto out;
-
-	err = -ERESTARTSYS;
-	if (wait_event_interruptible(*(sk_sleep(sk)),
-				     (sk->sk_state != TCP_SYN_SENT)))
-		goto out;
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		sock->state = SS_UNCONNECTED;
-		err = sock_error(sk);
-		if (!err)
-			err = -ECONNRESET;
-		goto out;
-	}
-
-	sock->state = SS_CONNECTED;
-
-	/* At this point, IrLMP has assigned our source address */
-	self->saddr = irttp_get_saddr(self->tsap);
-	err = 0;
-out:
-	release_sock(sk);
-	return err;
-}
-
-static struct proto irda_proto = {
-	.name	  = "IRDA",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct irda_sock),
-};
-
-/*
- * Function irda_create (sock, protocol)
- *
- *    Create IrDA socket
- *
- */
-static int irda_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
-{
-	struct sock *sk;
-	struct irda_sock *self;
-
-	if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
-		return -EINVAL;
-
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
-	/* Check for valid socket type */
-	switch (sock->type) {
-	case SOCK_STREAM:     /* For TTP connections with SAR disabled */
-	case SOCK_SEQPACKET:  /* For TTP connections with SAR enabled */
-	case SOCK_DGRAM:      /* For TTP Unitdata or LMP Ultra transfers */
-		break;
-	default:
-		return -ESOCKTNOSUPPORT;
-	}
-
-	/* Allocate networking socket */
-	sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern);
-	if (sk == NULL)
-		return -ENOMEM;
-
-	self = irda_sk(sk);
-	pr_debug("%s() : self is %p\n", __func__, self);
-
-	init_waitqueue_head(&self->query_wait);
-
-	switch (sock->type) {
-	case SOCK_STREAM:
-		sock->ops = &irda_stream_ops;
-		self->max_sdu_size_rx = TTP_SAR_DISABLE;
-		break;
-	case SOCK_SEQPACKET:
-		sock->ops = &irda_seqpacket_ops;
-		self->max_sdu_size_rx = TTP_SAR_UNBOUND;
-		break;
-	case SOCK_DGRAM:
-		switch (protocol) {
-#ifdef CONFIG_IRDA_ULTRA
-		case IRDAPROTO_ULTRA:
-			sock->ops = &irda_ultra_ops;
-			/* Initialise now, because we may send on unbound
-			 * sockets. Jean II */
-			self->max_data_size = ULTRA_MAX_DATA - LMP_PID_HEADER;
-			self->max_header_size = IRDA_MAX_HEADER + LMP_PID_HEADER;
-			break;
-#endif /* CONFIG_IRDA_ULTRA */
-		case IRDAPROTO_UNITDATA:
-			sock->ops = &irda_dgram_ops;
-			/* We let Unitdata conn. be like seqpack conn. */
-			self->max_sdu_size_rx = TTP_SAR_UNBOUND;
-			break;
-		default:
-			sk_free(sk);
-			return -ESOCKTNOSUPPORT;
-		}
-		break;
-	default:
-		sk_free(sk);
-		return -ESOCKTNOSUPPORT;
-	}
-
-	/* Initialise networking socket struct */
-	sock_init_data(sock, sk);	/* Note : set sk->sk_refcnt to 1 */
-	sk->sk_family = PF_IRDA;
-	sk->sk_protocol = protocol;
-
-	/* Register as a client with IrLMP */
-	self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
-	self->mask.word = 0xffff;
-	self->rx_flow = self->tx_flow = FLOW_START;
-	self->nslots = DISCOVERY_DEFAULT_SLOTS;
-	self->daddr = DEV_ADDR_ANY;	/* Until we get connected */
-	self->saddr = 0x0;		/* so IrLMP assign us any link */
-	return 0;
-}
-
-/*
- * Function irda_destroy_socket (self)
- *
- *    Destroy socket
- *
- */
-static void irda_destroy_socket(struct irda_sock *self)
-{
-	pr_debug("%s(%p)\n", __func__, self);
-
-	/* Unregister with IrLMP */
-	irlmp_unregister_client(self->ckey);
-	irlmp_unregister_service(self->skey);
-
-	/* Unregister with LM-IAS */
-	if (self->ias_obj) {
-		irias_delete_object(self->ias_obj);
-		self->ias_obj = NULL;
-	}
-
-	if (self->iriap) {
-		iriap_close(self->iriap);
-		self->iriap = NULL;
-	}
-
-	if (self->tsap) {
-		irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
-		irttp_close_tsap(self->tsap);
-		self->tsap = NULL;
-	}
-#ifdef CONFIG_IRDA_ULTRA
-	if (self->lsap) {
-		irlmp_close_lsap(self->lsap);
-		self->lsap = NULL;
-	}
-#endif /* CONFIG_IRDA_ULTRA */
-}
-
-/*
- * Function irda_release (sock)
- */
-static int irda_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-
-	if (sk == NULL)
-		return 0;
-
-	lock_sock(sk);
-	sk->sk_state       = TCP_CLOSE;
-	sk->sk_shutdown   |= SEND_SHUTDOWN;
-	sk->sk_state_change(sk);
-
-	/* Destroy IrDA socket */
-	irda_destroy_socket(irda_sk(sk));
-
-	sock_orphan(sk);
-	sock->sk   = NULL;
-	release_sock(sk);
-
-	/* Purge queues (see sock_init_data()) */
-	skb_queue_purge(&sk->sk_receive_queue);
-
-	/* Destroy networking socket if we are the last reference on it,
-	 * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */
-	sock_put(sk);
-
-	/* Notes on socket locking and deallocation... - Jean II
-	 * In theory we should put pairs of sock_hold() / sock_put() to
-	 * prevent the socket to be destroyed whenever there is an
-	 * outstanding request or outstanding incoming packet or event.
-	 *
-	 * 1) This may include IAS request, both in connect and getsockopt.
-	 * Unfortunately, the situation is a bit more messy than it looks,
-	 * because we close iriap and kfree(self) above.
-	 *
-	 * 2) This may include selective discovery in getsockopt.
-	 * Same stuff as above, irlmp registration and self are gone.
-	 *
-	 * Probably 1 and 2 may not matter, because it's all triggered
-	 * by a process and the socket layer already prevent the
-	 * socket to go away while a process is holding it, through
-	 * sockfd_put() and fput()...
-	 *
-	 * 3) This may include deferred TSAP closure. In particular,
-	 * we may receive a late irda_disconnect_indication()
-	 * Fortunately, (tsap_cb *)->close_pend should protect us
-	 * from that.
-	 *
-	 * I did some testing on SMP, and it looks solid. And the socket
-	 * memory leak is now gone... - Jean II
-	 */
-
-	return 0;
-}
-
-/*
- * Function irda_sendmsg (sock, msg, len)
- *
- *    Send message down to TinyTP. This function is used for both STREAM and
- *    SEQPACK services. This is possible since it forces the client to
- *    fragment the message if necessary
- */
-static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self;
-	struct sk_buff *skb;
-	int err = -EPIPE;
-
-	pr_debug("%s(), len=%zd\n", __func__, len);
-
-	/* Note : socket.c set MSG_EOR on SEQPACKET sockets */
-	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT |
-			       MSG_NOSIGNAL)) {
-		return -EINVAL;
-	}
-
-	lock_sock(sk);
-
-	if (sk->sk_shutdown & SEND_SHUTDOWN)
-		goto out_err;
-
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		err = -ENOTCONN;
-		goto out;
-	}
-
-	self = irda_sk(sk);
-
-	/* Check if IrTTP is wants us to slow down */
-
-	if (wait_event_interruptible(*(sk_sleep(sk)),
-	    (self->tx_flow != FLOW_STOP  ||  sk->sk_state != TCP_ESTABLISHED))) {
-		err = -ERESTARTSYS;
-		goto out;
-	}
-
-	/* Check if we are still connected */
-	if (sk->sk_state != TCP_ESTABLISHED) {
-		err = -ENOTCONN;
-		goto out;
-	}
-
-	/* Check that we don't send out too big frames */
-	if (len > self->max_data_size) {
-		pr_debug("%s(), Chopping frame from %zd to %d bytes!\n",
-			 __func__, len, self->max_data_size);
-		len = self->max_data_size;
-	}
-
-	skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16,
-				  msg->msg_flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto out_err;
-
-	skb_reserve(skb, self->max_header_size + 16);
-	skb_reset_transport_header(skb);
-	skb_put(skb, len);
-	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
-	if (err) {
-		kfree_skb(skb);
-		goto out_err;
-	}
-
-	/*
-	 * Just send the message to TinyTP, and let it deal with possible
-	 * errors. No need to duplicate all that here
-	 */
-	err = irttp_data_request(self->tsap, skb);
-	if (err) {
-		pr_debug("%s(), err=%d\n", __func__, err);
-		goto out_err;
-	}
-
-	release_sock(sk);
-	/* Tell client how much data we actually sent */
-	return len;
-
-out_err:
-	err = sk_stream_error(sk, msg->msg_flags, err);
-out:
-	release_sock(sk);
-	return err;
-
-}
-
-/*
- * Function irda_recvmsg_dgram (sock, msg, size, flags)
- *
- *    Try to receive message and copy it to user. The frame is discarded
- *    after being read, regardless of how much the user actually read
- */
-static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg,
-			      size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-	struct sk_buff *skb;
-	size_t copied;
-	int err;
-
-	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
-				flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		return err;
-
-	skb_reset_transport_header(skb);
-	copied = skb->len;
-
-	if (copied > size) {
-		pr_debug("%s(), Received truncated frame (%zd < %zd)!\n",
-			 __func__, copied, size);
-		copied = size;
-		msg->msg_flags |= MSG_TRUNC;
-	}
-	skb_copy_datagram_msg(skb, 0, msg, copied);
-
-	skb_free_datagram(sk, skb);
-
-	/*
-	 *  Check if we have previously stopped IrTTP and we know
-	 *  have more free space in our rx_queue. If so tell IrTTP
-	 *  to start delivering frames again before our rx_queue gets
-	 *  empty
-	 */
-	if (self->rx_flow == FLOW_STOP) {
-		if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) {
-			pr_debug("%s(), Starting IrTTP\n", __func__);
-			self->rx_flow = FLOW_START;
-			irttp_flow_request(self->tsap, FLOW_START);
-		}
-	}
-
-	return copied;
-}
-
-/*
- * Function irda_recvmsg_stream (sock, msg, size, flags)
- */
-static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg,
-			       size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-	int noblock = flags & MSG_DONTWAIT;
-	size_t copied = 0;
-	int target, err;
-	long timeo;
-
-	if ((err = sock_error(sk)) < 0)
-		return err;
-
-	if (sock->flags & __SO_ACCEPTCON)
-		return -EINVAL;
-
-	err =-EOPNOTSUPP;
-	if (flags & MSG_OOB)
-		return -EOPNOTSUPP;
-
-	err = 0;
-	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
-	timeo = sock_rcvtimeo(sk, noblock);
-
-	do {
-		int chunk;
-		struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
-
-		if (skb == NULL) {
-			DEFINE_WAIT(wait);
-			err = 0;
-
-			if (copied >= target)
-				break;
-
-			prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
-			/*
-			 *	POSIX 1003.1g mandates this order.
-			 */
-			err = sock_error(sk);
-			if (err)
-				;
-			else if (sk->sk_shutdown & RCV_SHUTDOWN)
-				;
-			else if (noblock)
-				err = -EAGAIN;
-			else if (signal_pending(current))
-				err = sock_intr_errno(timeo);
-			else if (sk->sk_state != TCP_ESTABLISHED)
-				err = -ENOTCONN;
-			else if (skb_peek(&sk->sk_receive_queue) == NULL)
-				/* Wait process until data arrives */
-				schedule();
-
-			finish_wait(sk_sleep(sk), &wait);
-
-			if (err)
-				return err;
-			if (sk->sk_shutdown & RCV_SHUTDOWN)
-				break;
-
-			continue;
-		}
-
-		chunk = min_t(unsigned int, skb->len, size);
-		if (memcpy_to_msg(msg, skb->data, chunk)) {
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			if (copied == 0)
-				copied = -EFAULT;
-			break;
-		}
-		copied += chunk;
-		size -= chunk;
-
-		/* Mark read part of skb as used */
-		if (!(flags & MSG_PEEK)) {
-			skb_pull(skb, chunk);
-
-			/* put the skb back if we didn't use it up.. */
-			if (skb->len) {
-				pr_debug("%s(), back on q!\n",
-					 __func__);
-				skb_queue_head(&sk->sk_receive_queue, skb);
-				break;
-			}
-
-			kfree_skb(skb);
-		} else {
-			pr_debug("%s() questionable!?\n", __func__);
-
-			/* put message back and return */
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			break;
-		}
-	} while (size);
-
-	/*
-	 *  Check if we have previously stopped IrTTP and we know
-	 *  have more free space in our rx_queue. If so tell IrTTP
-	 *  to start delivering frames again before our rx_queue gets
-	 *  empty
-	 */
-	if (self->rx_flow == FLOW_STOP) {
-		if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) {
-			pr_debug("%s(), Starting IrTTP\n", __func__);
-			self->rx_flow = FLOW_START;
-			irttp_flow_request(self->tsap, FLOW_START);
-		}
-	}
-
-	return copied;
-}
-
-/*
- * Function irda_sendmsg_dgram (sock, msg, len)
- *
- *    Send message down to TinyTP for the unreliable sequenced
- *    packet service...
- *
- */
-static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg,
-			      size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self;
-	struct sk_buff *skb;
-	int err;
-
-	pr_debug("%s(), len=%zd\n", __func__, len);
-
-	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	if (sk->sk_shutdown & SEND_SHUTDOWN) {
-		send_sig(SIGPIPE, current, 0);
-		err = -EPIPE;
-		goto out;
-	}
-
-	err = -ENOTCONN;
-	if (sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	self = irda_sk(sk);
-
-	/*
-	 * Check that we don't send out too big frames. This is an unreliable
-	 * service, so we have no fragmentation and no coalescence
-	 */
-	if (len > self->max_data_size) {
-		pr_debug("%s(), Warning too much data! Chopping frame from %zd to %d bytes!\n",
-			 __func__, len, self->max_data_size);
-		len = self->max_data_size;
-	}
-
-	skb = sock_alloc_send_skb(sk, len + self->max_header_size,
-				  msg->msg_flags & MSG_DONTWAIT, &err);
-	err = -ENOBUFS;
-	if (!skb)
-		goto out;
-
-	skb_reserve(skb, self->max_header_size);
-	skb_reset_transport_header(skb);
-
-	pr_debug("%s(), appending user data\n", __func__);
-	skb_put(skb, len);
-	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
-	if (err) {
-		kfree_skb(skb);
-		goto out;
-	}
-
-	/*
-	 * Just send the message to TinyTP, and let it deal with possible
-	 * errors. No need to duplicate all that here
-	 */
-	err = irttp_udata_request(self->tsap, skb);
-	if (err) {
-		pr_debug("%s(), err=%d\n", __func__, err);
-		goto out;
-	}
-
-	release_sock(sk);
-	return len;
-
-out:
-	release_sock(sk);
-	return err;
-}
-
-/*
- * Function irda_sendmsg_ultra (sock, msg, len)
- *
- *    Send message down to IrLMP for the unreliable Ultra
- *    packet service...
- */
-#ifdef CONFIG_IRDA_ULTRA
-static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg,
-			      size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self;
-	__u8 pid = 0;
-	int bound = 0;
-	struct sk_buff *skb;
-	int err;
-
-	pr_debug("%s(), len=%zd\n", __func__, len);
-
-	err = -EINVAL;
-	if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	err = -EPIPE;
-	if (sk->sk_shutdown & SEND_SHUTDOWN) {
-		send_sig(SIGPIPE, current, 0);
-		goto out;
-	}
-
-	self = irda_sk(sk);
-
-	/* Check if an address was specified with sendto. Jean II */
-	if (msg->msg_name) {
-		DECLARE_SOCKADDR(struct sockaddr_irda *, addr, msg->msg_name);
-		err = -EINVAL;
-		/* Check address, extract pid. Jean II */
-		if (msg->msg_namelen < sizeof(*addr))
-			goto out;
-		if (addr->sir_family != AF_IRDA)
-			goto out;
-
-		pid = addr->sir_lsap_sel;
-		if (pid & 0x80) {
-			pr_debug("%s(), extension in PID not supp!\n",
-				 __func__);
-			err = -EOPNOTSUPP;
-			goto out;
-		}
-	} else {
-		/* Check that the socket is properly bound to an Ultra
-		 * port. Jean II */
-		if ((self->lsap == NULL) ||
-		    (sk->sk_state != TCP_ESTABLISHED)) {
-			pr_debug("%s(), socket not bound to Ultra PID.\n",
-				 __func__);
-			err = -ENOTCONN;
-			goto out;
-		}
-		/* Use PID from socket */
-		bound = 1;
-	}
-
-	/*
-	 * Check that we don't send out too big frames. This is an unreliable
-	 * service, so we have no fragmentation and no coalescence
-	 */
-	if (len > self->max_data_size) {
-		pr_debug("%s(), Warning too much data! Chopping frame from %zd to %d bytes!\n",
-			 __func__, len, self->max_data_size);
-		len = self->max_data_size;
-	}
-
-	skb = sock_alloc_send_skb(sk, len + self->max_header_size,
-				  msg->msg_flags & MSG_DONTWAIT, &err);
-	err = -ENOBUFS;
-	if (!skb)
-		goto out;
-
-	skb_reserve(skb, self->max_header_size);
-	skb_reset_transport_header(skb);
-
-	pr_debug("%s(), appending user data\n", __func__);
-	skb_put(skb, len);
-	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
-	if (err) {
-		kfree_skb(skb);
-		goto out;
-	}
-
-	err = irlmp_connless_data_request((bound ? self->lsap : NULL),
-					  skb, pid);
-	if (err)
-		pr_debug("%s(), err=%d\n", __func__, err);
-out:
-	release_sock(sk);
-	return err ? : len;
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irda_shutdown (sk, how)
- */
-static int irda_shutdown(struct socket *sock, int how)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	lock_sock(sk);
-
-	sk->sk_state       = TCP_CLOSE;
-	sk->sk_shutdown   |= SEND_SHUTDOWN;
-	sk->sk_state_change(sk);
-
-	if (self->iriap) {
-		iriap_close(self->iriap);
-		self->iriap = NULL;
-	}
-
-	if (self->tsap) {
-		irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
-		irttp_close_tsap(self->tsap);
-		self->tsap = NULL;
-	}
-
-	/* A few cleanup so the socket look as good as new... */
-	self->rx_flow = self->tx_flow = FLOW_START;	/* needed ??? */
-	self->daddr = DEV_ADDR_ANY;	/* Until we get re-connected */
-	self->saddr = 0x0;		/* so IrLMP assign us any link */
-
-	release_sock(sk);
-
-	return 0;
-}
-
-/*
- * Function irda_poll (file, sock, wait)
- */
-static unsigned int irda_poll(struct file * file, struct socket *sock,
-			      poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-	unsigned int mask;
-
-	poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	/* Exceptional events? */
-	if (sk->sk_err)
-		mask |= POLLERR;
-	if (sk->sk_shutdown & RCV_SHUTDOWN) {
-		pr_debug("%s(), POLLHUP\n", __func__);
-		mask |= POLLHUP;
-	}
-
-	/* Readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue)) {
-		pr_debug("Socket is readable\n");
-		mask |= POLLIN | POLLRDNORM;
-	}
-
-	/* Connection-based need to check for termination and startup */
-	switch (sk->sk_type) {
-	case SOCK_STREAM:
-		if (sk->sk_state == TCP_CLOSE) {
-			pr_debug("%s(), POLLHUP\n", __func__);
-			mask |= POLLHUP;
-		}
-
-		if (sk->sk_state == TCP_ESTABLISHED) {
-			if ((self->tx_flow == FLOW_START) &&
-			    sock_writeable(sk))
-			{
-				mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-			}
-		}
-		break;
-	case SOCK_SEQPACKET:
-		if ((self->tx_flow == FLOW_START) &&
-		    sock_writeable(sk))
-		{
-			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-		}
-		break;
-	case SOCK_DGRAM:
-		if (sock_writeable(sk))
-			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-		break;
-	default:
-		break;
-	}
-
-	return mask;
-}
-
-/*
- * Function irda_ioctl (sock, cmd, arg)
- */
-static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	int err;
-
-	pr_debug("%s(), cmd=%#x\n", __func__, cmd);
-
-	err = -EINVAL;
-	switch (cmd) {
-	case TIOCOUTQ: {
-		long amount;
-
-		amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
-		if (amount < 0)
-			amount = 0;
-		err = put_user(amount, (unsigned int __user *)arg);
-		break;
-	}
-
-	case TIOCINQ: {
-		struct sk_buff *skb;
-		long amount = 0L;
-		/* These two are safe on a single CPU system as only user tasks fiddle here */
-		if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
-			amount = skb->len;
-		err = put_user(amount, (unsigned int __user *)arg);
-		break;
-	}
-
-	case SIOCGSTAMP:
-		if (sk != NULL)
-			err = sock_get_timestamp(sk, (struct timeval __user *)arg);
-		break;
-
-	case SIOCGIFADDR:
-	case SIOCSIFADDR:
-	case SIOCGIFDSTADDR:
-	case SIOCSIFDSTADDR:
-	case SIOCGIFBRDADDR:
-	case SIOCSIFBRDADDR:
-	case SIOCGIFNETMASK:
-	case SIOCSIFNETMASK:
-	case SIOCGIFMETRIC:
-	case SIOCSIFMETRIC:
-		break;
-	default:
-		pr_debug("%s(), doing device ioctl!\n", __func__);
-		err = -ENOIOCTLCMD;
-	}
-
-	return err;
-}
-
-#ifdef CONFIG_COMPAT
-/*
- * Function irda_ioctl (sock, cmd, arg)
- */
-static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	/*
-	 * All IRDA's ioctl are standard ones.
-	 */
-	return -ENOIOCTLCMD;
-}
-#endif
-
-/*
- * Function irda_setsockopt (sock, level, optname, optval, optlen)
- *
- *    Set some options for the socket
- *
- */
-static int irda_setsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-	struct irda_ias_set    *ias_opt;
-	struct ias_object      *ias_obj;
-	struct ias_attrib *	ias_attr;	/* Attribute in IAS object */
-	int opt, free_ias = 0, err = 0;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	if (level != SOL_IRLMP)
-		return -ENOPROTOOPT;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case IRLMP_IAS_SET:
-		/* The user want to add an attribute to an existing IAS object
-		 * (in the IAS database) or to create a new object with this
-		 * attribute.
-		 * We first query IAS to know if the object exist, and then
-		 * create the right attribute...
-		 */
-
-		if (optlen != sizeof(struct irda_ias_set)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Copy query to the driver. */
-		ias_opt = memdup_user(optval, optlen);
-		if (IS_ERR(ias_opt)) {
-			err = PTR_ERR(ias_opt);
-			goto out;
-		}
-
-		/* Find the object we target.
-		 * If the user gives us an empty string, we use the object
-		 * associated with this socket. This will workaround
-		 * duplicated class name - Jean II */
-		if(ias_opt->irda_class_name[0] == '\0') {
-			if(self->ias_obj == NULL) {
-				kfree(ias_opt);
-				err = -EINVAL;
-				goto out;
-			}
-			ias_obj = self->ias_obj;
-		} else
-			ias_obj = irias_find_object(ias_opt->irda_class_name);
-
-		/* Only ROOT can mess with the global IAS database.
-		 * Users can only add attributes to the object associated
-		 * with the socket they own - Jean II */
-		if((!capable(CAP_NET_ADMIN)) &&
-		   ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
-			kfree(ias_opt);
-			err = -EPERM;
-			goto out;
-		}
-
-		/* If the object doesn't exist, create it */
-		if(ias_obj == (struct ias_object *) NULL) {
-			/* Create a new object */
-			ias_obj = irias_new_object(ias_opt->irda_class_name,
-						   jiffies);
-			if (ias_obj == NULL) {
-				kfree(ias_opt);
-				err = -ENOMEM;
-				goto out;
-			}
-			free_ias = 1;
-		}
-
-		/* Do we have the attribute already ? */
-		if(irias_find_attrib(ias_obj, ias_opt->irda_attrib_name)) {
-			kfree(ias_opt);
-			if (free_ias) {
-				kfree(ias_obj->name);
-				kfree(ias_obj);
-			}
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Look at the type */
-		switch(ias_opt->irda_attrib_type) {
-		case IAS_INTEGER:
-			/* Add an integer attribute */
-			irias_add_integer_attrib(
-				ias_obj,
-				ias_opt->irda_attrib_name,
-				ias_opt->attribute.irda_attrib_int,
-				IAS_USER_ATTR);
-			break;
-		case IAS_OCT_SEQ:
-			/* Check length */
-			if(ias_opt->attribute.irda_attrib_octet_seq.len >
-			   IAS_MAX_OCTET_STRING) {
-				kfree(ias_opt);
-				if (free_ias) {
-					kfree(ias_obj->name);
-					kfree(ias_obj);
-				}
-
-				err = -EINVAL;
-				goto out;
-			}
-			/* Add an octet sequence attribute */
-			irias_add_octseq_attrib(
-			      ias_obj,
-			      ias_opt->irda_attrib_name,
-			      ias_opt->attribute.irda_attrib_octet_seq.octet_seq,
-			      ias_opt->attribute.irda_attrib_octet_seq.len,
-			      IAS_USER_ATTR);
-			break;
-		case IAS_STRING:
-			/* Should check charset & co */
-			/* Check length */
-			/* The length is encoded in a __u8, and
-			 * IAS_MAX_STRING == 256, so there is no way
-			 * userspace can pass us a string too large.
-			 * Jean II */
-			/* NULL terminate the string (avoid troubles) */
-			ias_opt->attribute.irda_attrib_string.string[ias_opt->attribute.irda_attrib_string.len] = '\0';
-			/* Add a string attribute */
-			irias_add_string_attrib(
-				ias_obj,
-				ias_opt->irda_attrib_name,
-				ias_opt->attribute.irda_attrib_string.string,
-				IAS_USER_ATTR);
-			break;
-		default :
-			kfree(ias_opt);
-			if (free_ias) {
-				kfree(ias_obj->name);
-				kfree(ias_obj);
-			}
-			err = -EINVAL;
-			goto out;
-		}
-		irias_insert_object(ias_obj);
-		kfree(ias_opt);
-		break;
-	case IRLMP_IAS_DEL:
-		/* The user want to delete an object from our local IAS
-		 * database. We just need to query the IAS, check is the
-		 * object is not owned by the kernel and delete it.
-		 */
-
-		if (optlen != sizeof(struct irda_ias_set)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Copy query to the driver. */
-		ias_opt = memdup_user(optval, optlen);
-		if (IS_ERR(ias_opt)) {
-			err = PTR_ERR(ias_opt);
-			goto out;
-		}
-
-		/* Find the object we target.
-		 * If the user gives us an empty string, we use the object
-		 * associated with this socket. This will workaround
-		 * duplicated class name - Jean II */
-		if(ias_opt->irda_class_name[0] == '\0')
-			ias_obj = self->ias_obj;
-		else
-			ias_obj = irias_find_object(ias_opt->irda_class_name);
-		if(ias_obj == (struct ias_object *) NULL) {
-			kfree(ias_opt);
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Only ROOT can mess with the global IAS database.
-		 * Users can only del attributes from the object associated
-		 * with the socket they own - Jean II */
-		if((!capable(CAP_NET_ADMIN)) &&
-		   ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
-			kfree(ias_opt);
-			err = -EPERM;
-			goto out;
-		}
-
-		/* Find the attribute (in the object) we target */
-		ias_attr = irias_find_attrib(ias_obj,
-					     ias_opt->irda_attrib_name);
-		if(ias_attr == (struct ias_attrib *) NULL) {
-			kfree(ias_opt);
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Check is the user space own the object */
-		if(ias_attr->value->owner != IAS_USER_ATTR) {
-			pr_debug("%s(), attempting to delete a kernel attribute\n",
-				 __func__);
-			kfree(ias_opt);
-			err = -EPERM;
-			goto out;
-		}
-
-		/* Remove the attribute (and maybe the object) */
-		irias_delete_attrib(ias_obj, ias_attr, 1);
-		kfree(ias_opt);
-		break;
-	case IRLMP_MAX_SDU_SIZE:
-		if (optlen < sizeof(int)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (get_user(opt, (int __user *)optval)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		/* Only possible for a seqpacket service (TTP with SAR) */
-		if (sk->sk_type != SOCK_SEQPACKET) {
-			pr_debug("%s(), setting max_sdu_size = %d\n",
-				 __func__, opt);
-			self->max_sdu_size_rx = opt;
-		} else {
-			net_warn_ratelimited("%s: not allowed to set MAXSDUSIZE for this socket type!\n",
-					     __func__);
-			err = -ENOPROTOOPT;
-			goto out;
-		}
-		break;
-	case IRLMP_HINTS_SET:
-		if (optlen < sizeof(int)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* The input is really a (__u8 hints[2]), easier as an int */
-		if (get_user(opt, (int __user *)optval)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		/* Unregister any old registration */
-		irlmp_unregister_service(self->skey);
-
-		self->skey = irlmp_register_service((__u16) opt);
-		break;
-	case IRLMP_HINT_MASK_SET:
-		/* As opposed to the previous case which set the hint bits
-		 * that we advertise, this one set the filter we use when
-		 * making a discovery (nodes which don't match any hint
-		 * bit in the mask are not reported).
-		 */
-		if (optlen < sizeof(int)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* The input is really a (__u8 hints[2]), easier as an int */
-		if (get_user(opt, (int __user *)optval)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		/* Set the new hint mask */
-		self->mask.word = (__u16) opt;
-		/* Mask out extension bits */
-		self->mask.word &= 0x7f7f;
-		/* Check if no bits */
-		if(!self->mask.word)
-			self->mask.word = 0xFFFF;
-
-		break;
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-out:
-	release_sock(sk);
-
-	return err;
-}
-
-/*
- * Function irda_extract_ias_value(ias_opt, ias_value)
- *
- *    Translate internal IAS value structure to the user space representation
- *
- * The external representation of IAS values, as we exchange them with
- * user space program is quite different from the internal representation,
- * as stored in the IAS database (because we need a flat structure for
- * crossing kernel boundary).
- * This function transform the former in the latter. We also check
- * that the value type is valid.
- */
-static int irda_extract_ias_value(struct irda_ias_set *ias_opt,
-				  struct ias_value *ias_value)
-{
-	/* Look at the type */
-	switch (ias_value->type) {
-	case IAS_INTEGER:
-		/* Copy the integer */
-		ias_opt->attribute.irda_attrib_int = ias_value->t.integer;
-		break;
-	case IAS_OCT_SEQ:
-		/* Set length */
-		ias_opt->attribute.irda_attrib_octet_seq.len = ias_value->len;
-		/* Copy over */
-		memcpy(ias_opt->attribute.irda_attrib_octet_seq.octet_seq,
-		       ias_value->t.oct_seq, ias_value->len);
-		break;
-	case IAS_STRING:
-		/* Set length */
-		ias_opt->attribute.irda_attrib_string.len = ias_value->len;
-		ias_opt->attribute.irda_attrib_string.charset = ias_value->charset;
-		/* Copy over */
-		memcpy(ias_opt->attribute.irda_attrib_string.string,
-		       ias_value->t.string, ias_value->len);
-		/* NULL terminate the string (avoid troubles) */
-		ias_opt->attribute.irda_attrib_string.string[ias_value->len] = '\0';
-		break;
-	case IAS_MISSING:
-	default :
-		return -EINVAL;
-	}
-
-	/* Copy type over */
-	ias_opt->irda_attrib_type = ias_value->type;
-
-	return 0;
-}
-
-/*
- * Function irda_getsockopt (sock, level, optname, optval, optlen)
- */
-static int irda_getsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct irda_sock *self = irda_sk(sk);
-	struct irda_device_list list;
-	struct irda_device_info *discoveries;
-	struct irda_ias_set *	ias_opt;	/* IAS get/query params */
-	struct ias_object *	ias_obj;	/* Object in IAS */
-	struct ias_attrib *	ias_attr;	/* Attribute in IAS object */
-	int daddr = DEV_ADDR_ANY;	/* Dest address for IAS queries */
-	int val = 0;
-	int len = 0;
-	int err = 0;
-	int offset, total;
-
-	pr_debug("%s(%p)\n", __func__, self);
-
-	if (level != SOL_IRLMP)
-		return -ENOPROTOOPT;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	if(len < 0)
-		return -EINVAL;
-
-	lock_sock(sk);
-
-	switch (optname) {
-	case IRLMP_ENUMDEVICES:
-
-		/* Offset to first device entry */
-		offset = sizeof(struct irda_device_list) -
-			sizeof(struct irda_device_info);
-
-		if (len < offset) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Ask lmp for the current discovery log */
-		discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
-						    self->nslots);
-		/* Check if the we got some results */
-		if (discoveries == NULL) {
-			err = -EAGAIN;
-			goto out;		/* Didn't find any devices */
-		}
-
-		/* Write total list length back to client */
-		if (copy_to_user(optval, &list, offset))
-			err = -EFAULT;
-
-		/* Copy the list itself - watch for overflow */
-		if (list.len > 2048) {
-			err = -EINVAL;
-			goto bed;
-		}
-		total = offset + (list.len * sizeof(struct irda_device_info));
-		if (total > len)
-			total = len;
-		if (copy_to_user(optval+offset, discoveries, total - offset))
-			err = -EFAULT;
-
-		/* Write total number of bytes used back to client */
-		if (put_user(total, optlen))
-			err = -EFAULT;
-bed:
-		/* Free up our buffer */
-		kfree(discoveries);
-		break;
-	case IRLMP_MAX_SDU_SIZE:
-		val = self->max_data_size;
-		len = sizeof(int);
-		if (put_user(len, optlen)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		if (copy_to_user(optval, &val, len)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		break;
-	case IRLMP_IAS_GET:
-		/* The user want an object from our local IAS database.
-		 * We just need to query the IAS and return the value
-		 * that we found */
-
-		/* Check that the user has allocated the right space for us */
-		if (len != sizeof(struct irda_ias_set)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Copy query to the driver. */
-		ias_opt = memdup_user(optval, len);
-		if (IS_ERR(ias_opt)) {
-			err = PTR_ERR(ias_opt);
-			goto out;
-		}
-
-		/* Find the object we target.
-		 * If the user gives us an empty string, we use the object
-		 * associated with this socket. This will workaround
-		 * duplicated class name - Jean II */
-		if(ias_opt->irda_class_name[0] == '\0')
-			ias_obj = self->ias_obj;
-		else
-			ias_obj = irias_find_object(ias_opt->irda_class_name);
-		if(ias_obj == (struct ias_object *) NULL) {
-			kfree(ias_opt);
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Find the attribute (in the object) we target */
-		ias_attr = irias_find_attrib(ias_obj,
-					     ias_opt->irda_attrib_name);
-		if(ias_attr == (struct ias_attrib *) NULL) {
-			kfree(ias_opt);
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Translate from internal to user structure */
-		err = irda_extract_ias_value(ias_opt, ias_attr->value);
-		if(err) {
-			kfree(ias_opt);
-			goto out;
-		}
-
-		/* Copy reply to the user */
-		if (copy_to_user(optval, ias_opt,
-				 sizeof(struct irda_ias_set))) {
-			kfree(ias_opt);
-			err = -EFAULT;
-			goto out;
-		}
-		/* Note : don't need to put optlen, we checked it */
-		kfree(ias_opt);
-		break;
-	case IRLMP_IAS_QUERY:
-		/* The user want an object from a remote IAS database.
-		 * We need to use IAP to query the remote database and
-		 * then wait for the answer to come back. */
-
-		/* Check that the user has allocated the right space for us */
-		if (len != sizeof(struct irda_ias_set)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* Copy query to the driver. */
-		ias_opt = memdup_user(optval, len);
-		if (IS_ERR(ias_opt)) {
-			err = PTR_ERR(ias_opt);
-			goto out;
-		}
-
-		/* At this point, there are two cases...
-		 * 1) the socket is connected - that's the easy case, we
-		 *	just query the device we are connected to...
-		 * 2) the socket is not connected - the user doesn't want
-		 *	to connect and/or may not have a valid service name
-		 *	(so can't create a fake connection). In this case,
-		 *	we assume that the user pass us a valid destination
-		 *	address in the requesting structure...
-		 */
-		if(self->daddr != DEV_ADDR_ANY) {
-			/* We are connected - reuse known daddr */
-			daddr = self->daddr;
-		} else {
-			/* We are not connected, we must specify a valid
-			 * destination address */
-			daddr = ias_opt->daddr;
-			if((!daddr) || (daddr == DEV_ADDR_ANY)) {
-				kfree(ias_opt);
-				err = -EINVAL;
-				goto out;
-			}
-		}
-
-		/* Check that we can proceed with IAP */
-		if (self->iriap) {
-			net_warn_ratelimited("%s: busy with a previous query\n",
-					     __func__);
-			kfree(ias_opt);
-			err = -EBUSY;
-			goto out;
-		}
-
-		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-					 irda_getvalue_confirm);
-
-		if (self->iriap == NULL) {
-			kfree(ias_opt);
-			err = -ENOMEM;
-			goto out;
-		}
-
-		/* Treat unexpected wakeup as disconnect */
-		self->errno = -EHOSTUNREACH;
-
-		/* Query remote LM-IAS */
-		iriap_getvaluebyclass_request(self->iriap,
-					      self->saddr, daddr,
-					      ias_opt->irda_class_name,
-					      ias_opt->irda_attrib_name);
-
-		/* Wait for answer, if not yet finished (or failed) */
-		if (wait_event_interruptible(self->query_wait,
-					     (self->iriap == NULL))) {
-			/* pending request uses copy of ias_opt-content
-			 * we can free it regardless! */
-			kfree(ias_opt);
-			/* Treat signals as disconnect */
-			err = -EHOSTUNREACH;
-			goto out;
-		}
-
-		/* Check what happened */
-		if (self->errno)
-		{
-			kfree(ias_opt);
-			/* Requested object/attribute doesn't exist */
-			if((self->errno == IAS_CLASS_UNKNOWN) ||
-			   (self->errno == IAS_ATTRIB_UNKNOWN))
-				err = -EADDRNOTAVAIL;
-			else
-				err = -EHOSTUNREACH;
-
-			goto out;
-		}
-
-		/* Translate from internal to user structure */
-		err = irda_extract_ias_value(ias_opt, self->ias_result);
-		if (self->ias_result)
-			irias_delete_value(self->ias_result);
-		if (err) {
-			kfree(ias_opt);
-			goto out;
-		}
-
-		/* Copy reply to the user */
-		if (copy_to_user(optval, ias_opt,
-				 sizeof(struct irda_ias_set))) {
-			kfree(ias_opt);
-			err = -EFAULT;
-			goto out;
-		}
-		/* Note : don't need to put optlen, we checked it */
-		kfree(ias_opt);
-		break;
-	case IRLMP_WAITDEVICE:
-		/* This function is just another way of seeing life ;-)
-		 * IRLMP_ENUMDEVICES assumes that you have a static network,
-		 * and that you just want to pick one of the devices present.
-		 * On the other hand, in here we assume that no device is
-		 * present and that at some point in the future a device will
-		 * come into range. When this device arrive, we just wake
-		 * up the caller, so that he has time to connect to it before
-		 * the device goes away...
-		 * Note : once the node has been discovered for more than a
-		 * few second, it won't trigger this function, unless it
-		 * goes away and come back changes its hint bits (so we
-		 * might call it IRLMP_WAITNEWDEVICE).
-		 */
-
-		/* Check that the user is passing us an int */
-		if (len != sizeof(int)) {
-			err = -EINVAL;
-			goto out;
-		}
-		/* Get timeout in ms (max time we block the caller) */
-		if (get_user(val, (int __user *)optval)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		/* Tell IrLMP we want to be notified */
-		irlmp_update_client(self->ckey, self->mask.word,
-				    irda_selective_discovery_indication,
-				    NULL, (void *) self);
-
-		/* Do some discovery (and also return cached results) */
-		irlmp_discovery_request(self->nslots);
-
-		/* Wait until a node is discovered */
-		if (!self->cachedaddr) {
-			pr_debug("%s(), nothing discovered yet, going to sleep...\n",
-				 __func__);
-
-			/* Set watchdog timer to expire in <val> ms. */
-			self->errno = 0;
-			setup_timer(&self->watchdog, irda_discovery_timeout,
-					(unsigned long)self);
-			mod_timer(&self->watchdog,
-				  jiffies + msecs_to_jiffies(val));
-
-			/* Wait for IR-LMP to call us back */
-			err = __wait_event_interruptible(self->query_wait,
-			      (self->cachedaddr != 0 || self->errno == -ETIME));
-
-			/* If watchdog is still activated, kill it! */
-			del_timer(&(self->watchdog));
-
-			pr_debug("%s(), ...waking up !\n", __func__);
-
-			if (err != 0)
-				goto out;
-		}
-		else
-			pr_debug("%s(), found immediately !\n",
-				 __func__);
-
-		/* Tell IrLMP that we have been notified */
-		irlmp_update_client(self->ckey, self->mask.word,
-				    NULL, NULL, NULL);
-
-		/* Check if the we got some results */
-		if (!self->cachedaddr) {
-			err = -EAGAIN;		/* Didn't find any devices */
-			goto out;
-		}
-		daddr = self->cachedaddr;
-		/* Cleanup */
-		self->cachedaddr = 0;
-
-		/* We return the daddr of the device that trigger the
-		 * wakeup. As irlmp pass us only the new devices, we
-		 * are sure that it's not an old device.
-		 * If the user want more details, he should query
-		 * the whole discovery log and pick one device...
-		 */
-		if (put_user(daddr, (int __user *)optval)) {
-			err = -EFAULT;
-			goto out;
-		}
-
-		break;
-	default:
-		err = -ENOPROTOOPT;
-	}
-
-out:
-
-	release_sock(sk);
-
-	return err;
-}
-
-static const struct net_proto_family irda_family_ops = {
-	.family = PF_IRDA,
-	.create = irda_create,
-	.owner	= THIS_MODULE,
-};
-
-static const struct proto_ops irda_stream_ops = {
-	.family =	PF_IRDA,
-	.owner =	THIS_MODULE,
-	.release =	irda_release,
-	.bind =		irda_bind,
-	.connect =	irda_connect,
-	.socketpair =	sock_no_socketpair,
-	.accept =	irda_accept,
-	.getname =	irda_getname,
-	.poll =		irda_poll,
-	.ioctl =	irda_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl =	irda_compat_ioctl,
-#endif
-	.listen =	irda_listen,
-	.shutdown =	irda_shutdown,
-	.setsockopt =	irda_setsockopt,
-	.getsockopt =	irda_getsockopt,
-	.sendmsg =	irda_sendmsg,
-	.recvmsg =	irda_recvmsg_stream,
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-
-static const struct proto_ops irda_seqpacket_ops = {
-	.family =	PF_IRDA,
-	.owner =	THIS_MODULE,
-	.release =	irda_release,
-	.bind =		irda_bind,
-	.connect =	irda_connect,
-	.socketpair =	sock_no_socketpair,
-	.accept =	irda_accept,
-	.getname =	irda_getname,
-	.poll =		datagram_poll,
-	.ioctl =	irda_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl =	irda_compat_ioctl,
-#endif
-	.listen =	irda_listen,
-	.shutdown =	irda_shutdown,
-	.setsockopt =	irda_setsockopt,
-	.getsockopt =	irda_getsockopt,
-	.sendmsg =	irda_sendmsg,
-	.recvmsg =	irda_recvmsg_dgram,
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-
-static const struct proto_ops irda_dgram_ops = {
-	.family =	PF_IRDA,
-	.owner =	THIS_MODULE,
-	.release =	irda_release,
-	.bind =		irda_bind,
-	.connect =	irda_connect,
-	.socketpair =	sock_no_socketpair,
-	.accept =	irda_accept,
-	.getname =	irda_getname,
-	.poll =		datagram_poll,
-	.ioctl =	irda_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl =	irda_compat_ioctl,
-#endif
-	.listen =	irda_listen,
-	.shutdown =	irda_shutdown,
-	.setsockopt =	irda_setsockopt,
-	.getsockopt =	irda_getsockopt,
-	.sendmsg =	irda_sendmsg_dgram,
-	.recvmsg =	irda_recvmsg_dgram,
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-
-#ifdef CONFIG_IRDA_ULTRA
-static const struct proto_ops irda_ultra_ops = {
-	.family =	PF_IRDA,
-	.owner =	THIS_MODULE,
-	.release =	irda_release,
-	.bind =		irda_bind,
-	.connect =	sock_no_connect,
-	.socketpair =	sock_no_socketpair,
-	.accept =	sock_no_accept,
-	.getname =	irda_getname,
-	.poll =		datagram_poll,
-	.ioctl =	irda_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl =	irda_compat_ioctl,
-#endif
-	.listen =	sock_no_listen,
-	.shutdown =	irda_shutdown,
-	.setsockopt =	irda_setsockopt,
-	.getsockopt =	irda_getsockopt,
-	.sendmsg =	irda_sendmsg_ultra,
-	.recvmsg =	irda_recvmsg_dgram,
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irsock_init (pro)
- *
- *    Initialize IrDA protocol
- *
- */
-int __init irsock_init(void)
-{
-	int rc = proto_register(&irda_proto, 0);
-
-	if (rc == 0)
-		rc = sock_register(&irda_family_ops);
-
-	return rc;
-}
-
-/*
- * Function irsock_cleanup (void)
- *
- *    Remove IrDA protocol
- *
- */
-void irsock_cleanup(void)
-{
-	sock_unregister(PF_IRDA);
-	proto_unregister(&irda_proto);
-}
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
deleted file mode 100644
index 364d70aed068..000000000000
--- a/net/irda/discovery.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/*********************************************************************
- *
- * Filename:      discovery.c
- * Version:       0.1
- * Description:   Routines for handling discoveries at the IrLMP layer
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Apr  6 15:33:50 1999
- * Modified at:   Sat Oct  9 17:11:31 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Modified at:   Fri May 28  3:11 CST 1999
- * Modified by:   Horst von Brand <vonbrand@sleipnir.valparaiso.cl>
- *
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-
-#include <net/irda/discovery.h>
-
-#include <asm/unaligned.h>
-
-/*
- * Function irlmp_add_discovery (cachelog, discovery)
- *
- *    Add a new discovery to the cachelog, and remove any old discoveries
- *    from the same device
- *
- * Note : we try to preserve the time this device was *first* discovered
- * (as opposed to the time of last discovery used for cleanup). This is
- * used by clients waiting for discovery events to tell if the device
- * discovered is "new" or just the same old one. They can't rely there
- * on a binary flag (new/old), because not all discovery events are
- * propagated to them, and they might not always listen, so they would
- * miss some new devices popping up...
- * Jean II
- */
-void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new)
-{
-	discovery_t *discovery, *node;
-	unsigned long flags;
-
-	/* Set time of first discovery if node is new (see below) */
-	new->firststamp = new->timestamp;
-
-	spin_lock_irqsave(&cachelog->hb_spinlock, flags);
-
-	/*
-	 * Remove all discoveries of devices that has previously been
-	 * discovered on the same link with the same name (info), or the
-	 * same daddr. We do this since some devices (mostly PDAs) change
-	 * their device address between every discovery.
-	 */
-	discovery = (discovery_t *) hashbin_get_first(cachelog);
-	while (discovery != NULL ) {
-		node = discovery;
-
-		/* Be sure to stay one item ahead */
-		discovery = (discovery_t *) hashbin_get_next(cachelog);
-
-		if ((node->data.saddr == new->data.saddr) &&
-		    ((node->data.daddr == new->data.daddr) ||
-		     (strcmp(node->data.info, new->data.info) == 0)))
-		{
-			/* This discovery is a previous discovery
-			 * from the same device, so just remove it
-			 */
-			hashbin_remove_this(cachelog, (irda_queue_t *) node);
-			/* Check if hints bits are unchanged */
-			if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints))
-				/* Set time of first discovery for this node */
-				new->firststamp = node->firststamp;
-			kfree(node);
-		}
-	}
-
-	/* Insert the new and updated version */
-	hashbin_insert(cachelog, (irda_queue_t *) new, new->data.daddr, NULL);
-
-	spin_unlock_irqrestore(&cachelog->hb_spinlock, flags);
-}
-
-/*
- * Function irlmp_add_discovery_log (cachelog, log)
- *
- *    Merge a disovery log into the cachelog.
- *
- */
-void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log)
-{
-	discovery_t *discovery;
-
-	/*
-	 *  If log is missing this means that IrLAP was unable to perform the
-	 *  discovery, so restart discovery again with just the half timeout
-	 *  of the normal one.
-	 */
-	/* Well... It means that there was nobody out there - Jean II */
-	if (log == NULL) {
-		/* irlmp_start_discovery_timer(irlmp, 150); */
-		return;
-	}
-
-	/*
-	 * Locking : we are the only owner of this discovery log, so
-	 * no need to lock it.
-	 * We just need to lock the global log in irlmp_add_discovery().
-	 */
-	discovery = (discovery_t *) hashbin_remove_first(log);
-	while (discovery != NULL) {
-		irlmp_add_discovery(cachelog, discovery);
-
-		discovery = (discovery_t *) hashbin_remove_first(log);
-	}
-
-	/* Delete the now empty log */
-	hashbin_delete(log, (FREE_FUNC) kfree);
-}
-
-/*
- * Function irlmp_expire_discoveries (log, saddr, force)
- *
- *    Go through all discoveries and expire all that has stayed too long
- *
- * Note : this assume that IrLAP won't change its saddr, which
- * currently is a valid assumption...
- */
-void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force)
-{
-	discovery_t *		discovery;
-	discovery_t *		curr;
-	unsigned long		flags;
-	discinfo_t *		buffer = NULL;
-	int			n;		/* Size of the full log */
-	int			i = 0;		/* How many we expired */
-
-	IRDA_ASSERT(log != NULL, return;);
-	spin_lock_irqsave(&log->hb_spinlock, flags);
-
-	discovery = (discovery_t *) hashbin_get_first(log);
-	while (discovery != NULL) {
-		/* Be sure to be one item ahead */
-		curr = discovery;
-		discovery = (discovery_t *) hashbin_get_next(log);
-
-		/* Test if it's time to expire this discovery */
-		if ((curr->data.saddr == saddr) &&
-		    (force ||
-		     ((jiffies - curr->timestamp) > DISCOVERY_EXPIRE_TIMEOUT)))
-		{
-			/* Create buffer as needed.
-			 * As this function get called a lot and most time
-			 * we don't have anything to put in the log (we are
-			 * quite picky), we can save a lot of overhead
-			 * by not calling kmalloc. Jean II */
-			if(buffer == NULL) {
-				/* Create the client specific buffer */
-				n = HASHBIN_GET_SIZE(log);
-				buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
-				if (buffer == NULL) {
-					spin_unlock_irqrestore(&log->hb_spinlock, flags);
-					return;
-				}
-
-			}
-
-			/* Copy discovery information */
-			memcpy(&(buffer[i]), &(curr->data),
-			       sizeof(discinfo_t));
-			i++;
-
-			/* Remove it from the log */
-			curr = hashbin_remove_this(log, (irda_queue_t *) curr);
-			kfree(curr);
-		}
-	}
-
-	/* Drop the spinlock before calling the higher layers, as
-	 * we can't guarantee they won't call us back and create a
-	 * deadlock. We will work on our own private data, so we
-	 * don't care to be interrupted. - Jean II */
-	spin_unlock_irqrestore(&log->hb_spinlock, flags);
-
-	if(buffer == NULL)
-		return;
-
-	/* Tell IrLMP and registered clients about it */
-	irlmp_discovery_expiry(buffer, i);
-
-	/* Free up our buffer */
-	kfree(buffer);
-}
-
-#if 0
-/*
- * Function irlmp_dump_discoveries (log)
- *
- *    Print out all discoveries in log
- *
- */
-void irlmp_dump_discoveries(hashbin_t *log)
-{
-	discovery_t *discovery;
-
-	IRDA_ASSERT(log != NULL, return;);
-
-	discovery = (discovery_t *) hashbin_get_first(log);
-	while (discovery != NULL) {
-		pr_debug("Discovery:\n");
-		pr_debug("  daddr=%08x\n", discovery->data.daddr);
-		pr_debug("  saddr=%08x\n", discovery->data.saddr);
-		pr_debug("  nickname=%s\n", discovery->data.info);
-
-		discovery = (discovery_t *) hashbin_get_next(log);
-	}
-}
-#endif
-
-/*
- * Function irlmp_copy_discoveries (log, pn, mask)
- *
- *    Copy all discoveries in a buffer
- *
- * This function implement a safe way for lmp clients to access the
- * discovery log. The basic problem is that we don't want the log
- * to change (add/remove) while the client is reading it. If the
- * lmp client manipulate directly the hashbin, he is sure to get
- * into troubles...
- * The idea is that we copy all the current discovery log in a buffer
- * which is specific to the client and pass this copy to him. As we
- * do this operation with the spinlock grabbed, we are safe...
- * Note : we don't want those clients to grab the spinlock, because
- * we have no control on how long they will hold it...
- * Note : we choose to copy the log in "struct irda_device_info" to
- * save space...
- * Note : the client must kfree himself() the log...
- * Jean II
- */
-struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
-						__u16 mask, int old_entries)
-{
-	discovery_t *		discovery;
-	unsigned long		flags;
-	discinfo_t *		buffer = NULL;
-	int			j_timeout = (sysctl_discovery_timeout * HZ);
-	int			n;		/* Size of the full log */
-	int			i = 0;		/* How many we picked */
-
-	IRDA_ASSERT(pn != NULL, return NULL;);
-	IRDA_ASSERT(log != NULL, return NULL;);
-
-	/* Save spin lock */
-	spin_lock_irqsave(&log->hb_spinlock, flags);
-
-	discovery = (discovery_t *) hashbin_get_first(log);
-	while (discovery != NULL) {
-		/* Mask out the ones we don't want :
-		 * We want to match the discovery mask, and to get only
-		 * the most recent one (unless we want old ones) */
-		if ((get_unaligned((__u16 *)discovery->data.hints) & mask) &&
-		    ((old_entries) ||
-		     ((jiffies - discovery->firststamp) < j_timeout))) {
-			/* Create buffer as needed.
-			 * As this function get called a lot and most time
-			 * we don't have anything to put in the log (we are
-			 * quite picky), we can save a lot of overhead
-			 * by not calling kmalloc. Jean II */
-			if(buffer == NULL) {
-				/* Create the client specific buffer */
-				n = HASHBIN_GET_SIZE(log);
-				buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC);
-				if (buffer == NULL) {
-					spin_unlock_irqrestore(&log->hb_spinlock, flags);
-					return NULL;
-				}
-
-			}
-
-			/* Copy discovery information */
-			memcpy(&(buffer[i]), &(discovery->data),
-			       sizeof(discinfo_t));
-			i++;
-		}
-		discovery = (discovery_t *) hashbin_get_next(log);
-	}
-
-	spin_unlock_irqrestore(&log->hb_spinlock, flags);
-
-	/* Get the actual number of device in the buffer and return */
-	*pn = i;
-	return buffer;
-}
-
-#ifdef CONFIG_PROC_FS
-static inline discovery_t *discovery_seq_idx(loff_t pos)
-
-{
-	discovery_t *discovery;
-
-	for (discovery = (discovery_t *) hashbin_get_first(irlmp->cachelog);
-	     discovery != NULL;
-	     discovery = (discovery_t *) hashbin_get_next(irlmp->cachelog)) {
-		if (pos-- == 0)
-			break;
-	}
-
-	return discovery;
-}
-
-static void *discovery_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	spin_lock_irq(&irlmp->cachelog->hb_spinlock);
-	return *pos ? discovery_seq_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *discovery_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return (v == SEQ_START_TOKEN)
-		? (void *) hashbin_get_first(irlmp->cachelog)
-		: (void *) hashbin_get_next(irlmp->cachelog);
-}
-
-static void discovery_seq_stop(struct seq_file *seq, void *v)
-{
-	spin_unlock_irq(&irlmp->cachelog->hb_spinlock);
-}
-
-static int discovery_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "IrLMP: Discovery log:\n\n");
-	else {
-		const discovery_t *discovery = v;
-
-		seq_printf(seq, "nickname: %s, hint: 0x%02x%02x",
-			   discovery->data.info,
-			   discovery->data.hints[0],
-			   discovery->data.hints[1]);
-#if 0
-		if ( discovery->data.hints[0] & HINT_PNP)
-			seq_puts(seq, "PnP Compatible ");
-		if ( discovery->data.hints[0] & HINT_PDA)
-			seq_puts(seq, "PDA/Palmtop ");
-		if ( discovery->data.hints[0] & HINT_COMPUTER)
-			seq_puts(seq, "Computer ");
-		if ( discovery->data.hints[0] & HINT_PRINTER)
-			seq_puts(seq, "Printer ");
-		if ( discovery->data.hints[0] & HINT_MODEM)
-			seq_puts(seq, "Modem ");
-		if ( discovery->data.hints[0] & HINT_FAX)
-			seq_puts(seq, "Fax ");
-		if ( discovery->data.hints[0] & HINT_LAN)
-			seq_puts(seq, "LAN Access ");
-
-		if ( discovery->data.hints[1] & HINT_TELEPHONY)
-			seq_puts(seq, "Telephony ");
-		if ( discovery->data.hints[1] & HINT_FILE_SERVER)
-			seq_puts(seq, "File Server ");
-		if ( discovery->data.hints[1] & HINT_COMM)
-			seq_puts(seq, "IrCOMM ");
-		if ( discovery->data.hints[1] & HINT_OBEX)
-			seq_puts(seq, "IrOBEX ");
-#endif
-		seq_printf(seq,", saddr: 0x%08x, daddr: 0x%08x\n\n",
-			       discovery->data.saddr,
-			       discovery->data.daddr);
-
-		seq_putc(seq, '\n');
-	}
-	return 0;
-}
-
-static const struct seq_operations discovery_seq_ops = {
-	.start  = discovery_seq_start,
-	.next   = discovery_seq_next,
-	.stop   = discovery_seq_stop,
-	.show   = discovery_seq_show,
-};
-
-static int discovery_seq_open(struct inode *inode, struct file *file)
-{
-	IRDA_ASSERT(irlmp != NULL, return -EINVAL;);
-
-	return seq_open(file, &discovery_seq_ops);
-}
-
-const struct file_operations discovery_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = discovery_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release,
-};
-#endif
diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig
deleted file mode 100644
index 19492c1707b7..000000000000
--- a/net/irda/ircomm/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
-config IRCOMM
-	tristate "IrCOMM protocol"
-	depends on IRDA && TTY
-	help
-	  Say Y here if you want to build support for the IrCOMM protocol.
-	  To compile it as modules, choose M here: the modules will be
-	  called ircomm and ircomm_tty.
-	  IrCOMM implements serial port emulation, and makes it possible to
-	  use all existing applications that understands TTY's with an
-	  infrared link.  Thus you should be able to use application like PPP,
-	  minicom and others.
-
diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile
deleted file mode 100644
index ab23b5ba7e33..000000000000
--- a/net/irda/ircomm/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# Makefile for the Linux IrDA IrCOMM protocol layer.
-#
-
-obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o
-
-ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
-ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
deleted file mode 100644
index 3af219545f6d..000000000000
--- a/net/irda/ircomm/ircomm_core.c
+++ /dev/null
@@ -1,563 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_core.c
- * Version:       1.0
- * Description:   IrCOMM service interface
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 20:37:34 1999
- * Modified at:   Tue Dec 21 13:26:41 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irias_object.h>
-
-#include <net/irda/ircomm_event.h>
-#include <net/irda/ircomm_lmp.h>
-#include <net/irda/ircomm_ttp.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_core.h>
-
-static int __ircomm_close(struct ircomm_cb *self);
-static void ircomm_control_indication(struct ircomm_cb *self,
-				      struct sk_buff *skb, int clen);
-
-#ifdef CONFIG_PROC_FS
-extern struct proc_dir_entry *proc_irda;
-static int ircomm_seq_open(struct inode *, struct file *);
-
-static const struct file_operations ircomm_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open           = ircomm_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release,
-};
-#endif /* CONFIG_PROC_FS */
-
-hashbin_t *ircomm = NULL;
-
-static int __init ircomm_init(void)
-{
-	ircomm = hashbin_new(HB_LOCK);
-	if (ircomm == NULL) {
-		net_err_ratelimited("%s(), can't allocate hashbin!\n",
-				    __func__);
-		return -ENOMEM;
-	}
-
-#ifdef CONFIG_PROC_FS
-	{ struct proc_dir_entry *ent;
-	ent = proc_create("ircomm", 0, proc_irda, &ircomm_proc_fops);
-	if (!ent) {
-		printk(KERN_ERR "ircomm_init: can't create /proc entry!\n");
-		return -ENODEV;
-	}
-	}
-#endif /* CONFIG_PROC_FS */
-
-	net_info_ratelimited("IrCOMM protocol (Dag Brattli)\n");
-
-	return 0;
-}
-
-static void __exit ircomm_cleanup(void)
-{
-	hashbin_delete(ircomm, (FREE_FUNC) __ircomm_close);
-
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ircomm", proc_irda);
-#endif /* CONFIG_PROC_FS */
-}
-
-/*
- * Function ircomm_open (client_notify)
- *
- *    Start a new IrCOMM instance
- *
- */
-struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
-{
-	struct ircomm_cb *self = NULL;
-	int ret;
-
-	pr_debug("%s(), service_type=0x%02x\n", __func__ ,
-		 service_type);
-
-	IRDA_ASSERT(ircomm != NULL, return NULL;);
-
-	self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
-	if (self == NULL)
-		return NULL;
-
-	self->notify = *notify;
-	self->magic = IRCOMM_MAGIC;
-
-	/* Check if we should use IrLMP or IrTTP */
-	if (service_type & IRCOMM_3_WIRE_RAW) {
-		self->flow_status = FLOW_START;
-		ret = ircomm_open_lsap(self);
-	} else
-		ret = ircomm_open_tsap(self);
-
-	if (ret < 0) {
-		kfree(self);
-		return NULL;
-	}
-
-	self->service_type = service_type;
-	self->line = line;
-
-	hashbin_insert(ircomm, (irda_queue_t *) self, line, NULL);
-
-	ircomm_next_state(self, IRCOMM_IDLE);
-
-	return self;
-}
-
-EXPORT_SYMBOL(ircomm_open);
-
-/*
- * Function ircomm_close_instance (self)
- *
- *    Remove IrCOMM instance
- *
- */
-static int __ircomm_close(struct ircomm_cb *self)
-{
-	/* Disconnect link if any */
-	ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, NULL, NULL);
-
-	/* Remove TSAP */
-	if (self->tsap) {
-		irttp_close_tsap(self->tsap);
-		self->tsap = NULL;
-	}
-
-	/* Remove LSAP */
-	if (self->lsap) {
-		irlmp_close_lsap(self->lsap);
-		self->lsap = NULL;
-	}
-	self->magic = 0;
-
-	kfree(self);
-
-	return 0;
-}
-
-/*
- * Function ircomm_close (self)
- *
- *    Closes and removes the specified IrCOMM instance
- *
- */
-int ircomm_close(struct ircomm_cb *self)
-{
-	struct ircomm_cb *entry;
-
-	IRDA_ASSERT(self != NULL, return -EIO;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EIO;);
-
-	entry = hashbin_remove(ircomm, self->line, NULL);
-
-	IRDA_ASSERT(entry == self, return -1;);
-
-	return __ircomm_close(self);
-}
-
-EXPORT_SYMBOL(ircomm_close);
-
-/*
- * Function ircomm_connect_request (self, service_type)
- *
- *    Impl. of this function is differ from one of the reference. This
- *    function does discovery as well as sending connect request
- *
- */
-int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel,
-			   __u32 saddr, __u32 daddr, struct sk_buff *skb,
-			   __u8 service_type)
-{
-	struct ircomm_info info;
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-
-	self->service_type= service_type;
-
-	info.dlsap_sel = dlsap_sel;
-	info.saddr = saddr;
-	info.daddr = daddr;
-
-	ret = ircomm_do_event(self, IRCOMM_CONNECT_REQUEST, skb, &info);
-
-	return ret;
-}
-
-EXPORT_SYMBOL(ircomm_connect_request);
-
-/*
- * Function ircomm_connect_indication (self, qos, skb)
- *
- *    Notify user layer about the incoming connection
- *
- */
-void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb,
-			       struct ircomm_info *info)
-{
-	/*
-	 * If there are any data hiding in the control channel, we must
-	 * deliver it first. The side effect is that the control channel
-	 * will be removed from the skb
-	 */
-	if (self->notify.connect_indication)
-		self->notify.connect_indication(self->notify.instance, self,
-						info->qos, info->max_data_size,
-						info->max_header_size, skb);
-	else {
-		pr_debug("%s(), missing handler\n", __func__);
-	}
-}
-
-/*
- * Function ircomm_connect_response (self, userdata, max_sdu_size)
- *
- *    User accepts connection
- *
- */
-int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-
-	ret = ircomm_do_event(self, IRCOMM_CONNECT_RESPONSE, userdata, NULL);
-
-	return ret;
-}
-
-EXPORT_SYMBOL(ircomm_connect_response);
-
-/*
- * Function connect_confirm (self, skb)
- *
- *    Notify user layer that the link is now connected
- *
- */
-void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb,
-			    struct ircomm_info *info)
-{
-	if (self->notify.connect_confirm )
-		self->notify.connect_confirm(self->notify.instance,
-					     self, info->qos,
-					     info->max_data_size,
-					     info->max_header_size, skb);
-	else {
-		pr_debug("%s(), missing handler\n", __func__);
-	}
-}
-
-/*
- * Function ircomm_data_request (self, userdata)
- *
- *    Send IrCOMM data to peer device
- *
- */
-int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -EFAULT;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;);
-	IRDA_ASSERT(skb != NULL, return -EFAULT;);
-
-	ret = ircomm_do_event(self, IRCOMM_DATA_REQUEST, skb, NULL);
-
-	return ret;
-}
-
-EXPORT_SYMBOL(ircomm_data_request);
-
-/*
- * Function ircomm_data_indication (self, skb)
- *
- *    Data arrived, so deliver it to user
- *
- */
-void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(skb->len > 0, return;);
-
-	if (self->notify.data_indication)
-		self->notify.data_indication(self->notify.instance, self, skb);
-	else {
-		pr_debug("%s(), missing handler\n", __func__);
-	}
-}
-
-/*
- * Function ircomm_process_data (self, skb)
- *
- *    Data arrived which may contain control channel data
- *
- */
-void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb)
-{
-	int clen;
-
-	IRDA_ASSERT(skb->len > 0, return;);
-
-	clen = skb->data[0];
-
-	/*
-	 * Input validation check: a stir4200/mcp2150 combinations sometimes
-	 * results in frames with clen > remaining packet size. These are
-	 * illegal; if we throw away just this frame then it seems to carry on
-	 * fine
-	 */
-	if (unlikely(skb->len < (clen + 1))) {
-		pr_debug("%s() throwing away illegal frame\n",
-			 __func__);
-		return;
-	}
-
-	/*
-	 * If there are any data hiding in the control channel, we must
-	 * deliver it first. The side effect is that the control channel
-	 * will be removed from the skb
-	 */
-	if (clen > 0)
-		ircomm_control_indication(self, skb, clen);
-
-	/* Remove control channel from data channel */
-	skb_pull(skb, clen+1);
-
-	if (skb->len)
-		ircomm_data_indication(self, skb);
-	else {
-		pr_debug("%s(), data was control info only!\n",
-			 __func__);
-	}
-}
-
-/*
- * Function ircomm_control_request (self, params)
- *
- *    Send control data to peer device
- *
- */
-int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -EFAULT;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;);
-	IRDA_ASSERT(skb != NULL, return -EFAULT;);
-
-	ret = ircomm_do_event(self, IRCOMM_CONTROL_REQUEST, skb, NULL);
-
-	return ret;
-}
-
-EXPORT_SYMBOL(ircomm_control_request);
-
-/*
- * Function ircomm_control_indication (self, skb)
- *
- *    Data has arrived on the control channel
- *
- */
-static void ircomm_control_indication(struct ircomm_cb *self,
-				      struct sk_buff *skb, int clen)
-{
-	/* Use udata for delivering data on the control channel */
-	if (self->notify.udata_indication) {
-		struct sk_buff *ctrl_skb;
-
-		/* We don't own the skb, so clone it */
-		ctrl_skb = skb_clone(skb, GFP_ATOMIC);
-		if (!ctrl_skb)
-			return;
-
-		/* Remove data channel from control channel */
-		skb_trim(ctrl_skb, clen+1);
-
-		self->notify.udata_indication(self->notify.instance, self,
-					      ctrl_skb);
-
-		/* Drop reference count -
-		 * see ircomm_tty_control_indication(). */
-		dev_kfree_skb(ctrl_skb);
-	} else {
-		pr_debug("%s(), missing handler\n", __func__);
-	}
-}
-
-/*
- * Function ircomm_disconnect_request (self, userdata, priority)
- *
- *    User layer wants to disconnect the IrCOMM connection
- *
- */
-int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata)
-{
-	struct ircomm_info info;
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-
-	ret = ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, userdata,
-			      &info);
-	return ret;
-}
-
-EXPORT_SYMBOL(ircomm_disconnect_request);
-
-/*
- * Function disconnect_indication (self, skb)
- *
- *    Tell user that the link has been disconnected
- *
- */
-void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb,
-				  struct ircomm_info *info)
-{
-	IRDA_ASSERT(info != NULL, return;);
-
-	if (self->notify.disconnect_indication) {
-		self->notify.disconnect_indication(self->notify.instance, self,
-						   info->reason, skb);
-	} else {
-		pr_debug("%s(), missing handler\n", __func__);
-	}
-}
-
-/*
- * Function ircomm_flow_request (self, flow)
- *
- *
- *
- */
-void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
-	if (self->service_type == IRCOMM_3_WIRE_RAW)
-		return;
-
-	irttp_flow_request(self->tsap, flow);
-}
-
-EXPORT_SYMBOL(ircomm_flow_request);
-
-#ifdef CONFIG_PROC_FS
-static void *ircomm_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	struct ircomm_cb *self;
-	loff_t off = 0;
-
-	spin_lock_irq(&ircomm->hb_spinlock);
-
-	for (self = (struct ircomm_cb *) hashbin_get_first(ircomm);
-	     self != NULL;
-	     self = (struct ircomm_cb *) hashbin_get_next(ircomm)) {
-		if (off++ == *pos)
-			break;
-
-	}
-	return self;
-}
-
-static void *ircomm_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-
-	return (void *) hashbin_get_next(ircomm);
-}
-
-static void ircomm_seq_stop(struct seq_file *seq, void *v)
-{
-	spin_unlock_irq(&ircomm->hb_spinlock);
-}
-
-static int ircomm_seq_show(struct seq_file *seq, void *v)
-{
-	const struct ircomm_cb *self = v;
-
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EINVAL; );
-
-	if(self->line < 0x10)
-		seq_printf(seq, "ircomm%d", self->line);
-	else
-		seq_printf(seq, "irlpt%d", self->line - 0x10);
-
-	seq_printf(seq,
-		   " state: %s, slsap_sel: %#02x, dlsap_sel: %#02x, mode:",
-		   ircomm_state[ self->state],
-		   self->slsap_sel, self->dlsap_sel);
-
-	if(self->service_type & IRCOMM_3_WIRE_RAW)
-		seq_printf(seq, " 3-wire-raw");
-	if(self->service_type & IRCOMM_3_WIRE)
-		seq_printf(seq, " 3-wire");
-	if(self->service_type & IRCOMM_9_WIRE)
-		seq_printf(seq, " 9-wire");
-	if(self->service_type & IRCOMM_CENTRONICS)
-		seq_printf(seq, " Centronics");
-	seq_putc(seq, '\n');
-
-	return 0;
-}
-
-static const struct seq_operations ircomm_seq_ops = {
-	.start  = ircomm_seq_start,
-	.next   = ircomm_seq_next,
-	.stop   = ircomm_seq_stop,
-	.show   = ircomm_seq_show,
-};
-
-static int ircomm_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ircomm_seq_ops);
-}
-#endif /* CONFIG_PROC_FS */
-
-MODULE_AUTHOR("Dag Brattli <dag@brattli.net>");
-MODULE_DESCRIPTION("IrCOMM protocol");
-MODULE_LICENSE("GPL");
-
-module_init(ircomm_init);
-module_exit(ircomm_cleanup);
diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c
deleted file mode 100644
index b0730ac9f388..000000000000
--- a/net/irda/ircomm/ircomm_event.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_event.c
- * Version:       1.0
- * Description:   IrCOMM layer state machine
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 20:33:11 1999
- * Modified at:   Sun Dec 12 13:44:32 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irias_object.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_event.h>
-
-static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event,
-			     struct sk_buff *skb, struct ircomm_info *info);
-static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event,
-			      struct sk_buff *skb, struct ircomm_info *info);
-static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event,
-			      struct sk_buff *skb, struct ircomm_info *info);
-static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event,
-			     struct sk_buff *skb, struct ircomm_info *info);
-
-const char *const ircomm_state[] = {
-	"IRCOMM_IDLE",
-	"IRCOMM_WAITI",
-	"IRCOMM_WAITR",
-	"IRCOMM_CONN",
-};
-
-static const char *const ircomm_event[] __maybe_unused = {
-	"IRCOMM_CONNECT_REQUEST",
-	"IRCOMM_CONNECT_RESPONSE",
-	"IRCOMM_TTP_CONNECT_INDICATION",
-	"IRCOMM_LMP_CONNECT_INDICATION",
-	"IRCOMM_TTP_CONNECT_CONFIRM",
-	"IRCOMM_LMP_CONNECT_CONFIRM",
-
-	"IRCOMM_LMP_DISCONNECT_INDICATION",
-	"IRCOMM_TTP_DISCONNECT_INDICATION",
-	"IRCOMM_DISCONNECT_REQUEST",
-
-	"IRCOMM_TTP_DATA_INDICATION",
-	"IRCOMM_LMP_DATA_INDICATION",
-	"IRCOMM_DATA_REQUEST",
-	"IRCOMM_CONTROL_REQUEST",
-	"IRCOMM_CONTROL_INDICATION",
-};
-
-static int (*state[])(struct ircomm_cb *self, IRCOMM_EVENT event,
-		      struct sk_buff *skb, struct ircomm_info *info) =
-{
-	ircomm_state_idle,
-	ircomm_state_waiti,
-	ircomm_state_waitr,
-	ircomm_state_conn,
-};
-
-/*
- * Function ircomm_state_idle (self, event, skb)
- *
- *    IrCOMM is currently idle
- *
- */
-static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event,
-			     struct sk_buff *skb, struct ircomm_info *info)
-{
-	int ret = 0;
-
-	switch (event) {
-	case IRCOMM_CONNECT_REQUEST:
-		ircomm_next_state(self, IRCOMM_WAITI);
-		ret = self->issue.connect_request(self, skb, info);
-		break;
-	case IRCOMM_TTP_CONNECT_INDICATION:
-	case IRCOMM_LMP_CONNECT_INDICATION:
-		ircomm_next_state(self, IRCOMM_WAITR);
-		ircomm_connect_indication(self, skb, info);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_state_waiti (self, event, skb)
- *
- *    The IrCOMM user has requested an IrCOMM connection to the remote
- *    device and is awaiting confirmation
- */
-static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event,
-			      struct sk_buff *skb, struct ircomm_info *info)
-{
-	int ret = 0;
-
-	switch (event) {
-	case IRCOMM_TTP_CONNECT_CONFIRM:
-	case IRCOMM_LMP_CONNECT_CONFIRM:
-		ircomm_next_state(self, IRCOMM_CONN);
-		ircomm_connect_confirm(self, skb, info);
-		break;
-	case IRCOMM_TTP_DISCONNECT_INDICATION:
-	case IRCOMM_LMP_DISCONNECT_INDICATION:
-		ircomm_next_state(self, IRCOMM_IDLE);
-		ircomm_disconnect_indication(self, skb, info);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_state_waitr (self, event, skb)
- *
- *    IrCOMM has received an incoming connection request and is awaiting
- *    response from the user
- */
-static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event,
-			      struct sk_buff *skb, struct ircomm_info *info)
-{
-	int ret = 0;
-
-	switch (event) {
-	case IRCOMM_CONNECT_RESPONSE:
-		ircomm_next_state(self, IRCOMM_CONN);
-		ret = self->issue.connect_response(self, skb);
-		break;
-	case IRCOMM_DISCONNECT_REQUEST:
-		ircomm_next_state(self, IRCOMM_IDLE);
-		ret = self->issue.disconnect_request(self, skb, info);
-		break;
-	case IRCOMM_TTP_DISCONNECT_INDICATION:
-	case IRCOMM_LMP_DISCONNECT_INDICATION:
-		ircomm_next_state(self, IRCOMM_IDLE);
-		ircomm_disconnect_indication(self, skb, info);
-		break;
-	default:
-		pr_debug("%s(), unknown event = %s\n", __func__ ,
-			 ircomm_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_state_conn (self, event, skb)
- *
- *    IrCOMM is connected to the peer IrCOMM device
- *
- */
-static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event,
-			     struct sk_buff *skb, struct ircomm_info *info)
-{
-	int ret = 0;
-
-	switch (event) {
-	case IRCOMM_DATA_REQUEST:
-		ret = self->issue.data_request(self, skb, 0);
-		break;
-	case IRCOMM_TTP_DATA_INDICATION:
-		ircomm_process_data(self, skb);
-		break;
-	case IRCOMM_LMP_DATA_INDICATION:
-		ircomm_data_indication(self, skb);
-		break;
-	case IRCOMM_CONTROL_REQUEST:
-		/* Just send a separate frame for now */
-		ret = self->issue.data_request(self, skb, skb->len);
-		break;
-	case IRCOMM_TTP_DISCONNECT_INDICATION:
-	case IRCOMM_LMP_DISCONNECT_INDICATION:
-		ircomm_next_state(self, IRCOMM_IDLE);
-		ircomm_disconnect_indication(self, skb, info);
-		break;
-	case IRCOMM_DISCONNECT_REQUEST:
-		ircomm_next_state(self, IRCOMM_IDLE);
-		ret = self->issue.disconnect_request(self, skb, info);
-		break;
-	default:
-		pr_debug("%s(), unknown event = %s\n", __func__ ,
-			 ircomm_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_do_event (self, event, skb)
- *
- *    Process event
- *
- */
-int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event,
-		    struct sk_buff *skb, struct ircomm_info *info)
-{
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_state[self->state], ircomm_event[event]);
-
-	return (*state[self->state])(self, event, skb, info);
-}
-
-/*
- * Function ircomm_next_state (self, state)
- *
- *    Switch state
- *
- */
-void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state)
-{
-	self->state = state;
-
-	pr_debug("%s: next state=%s, service type=%d\n", __func__ ,
-		 ircomm_state[self->state], self->service_type);
-}
diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c
deleted file mode 100644
index e4cc847bb933..000000000000
--- a/net/irda/ircomm/ircomm_lmp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_lmp.c
- * Version:       1.0
- * Description:   Interface between IrCOMM and IrLMP
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 20:48:27 1999
- * Modified at:   Sun Dec 12 13:44:17 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Sources:       Previous IrLPT work by Thomas Davis
- *
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/gfp.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irda_device.h>	/* struct irda_skb_cb */
-
-#include <net/irda/ircomm_event.h>
-#include <net/irda/ircomm_lmp.h>
-
-
-/*
- * Function ircomm_lmp_connect_request (self, userdata)
- *
- *
- *
- */
-static int ircomm_lmp_connect_request(struct ircomm_cb *self,
-				      struct sk_buff *userdata,
-				      struct ircomm_info *info)
-{
-	int ret = 0;
-
-	/* Don't forget to refcount it - should be NULL anyway */
-	if(userdata)
-		skb_get(userdata);
-
-	ret = irlmp_connect_request(self->lsap, info->dlsap_sel,
-				    info->saddr, info->daddr, NULL, userdata);
-	return ret;
-}
-
-/*
- * Function ircomm_lmp_connect_response (self, skb)
- *
- *
- *
- */
-static int ircomm_lmp_connect_response(struct ircomm_cb *self,
-				       struct sk_buff *userdata)
-{
-	struct sk_buff *tx_skb;
-
-	/* Any userdata supplied? */
-	if (userdata == NULL) {
-		tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
-		if (!tx_skb)
-			return -ENOMEM;
-
-		/* Reserve space for MUX and LAP header */
-		skb_reserve(tx_skb, LMP_MAX_HEADER);
-	} else {
-		/*
-		 *  Check that the client has reserved enough space for
-		 *  headers
-		 */
-		IRDA_ASSERT(skb_headroom(userdata) >= LMP_MAX_HEADER,
-			    return -1;);
-
-		/* Don't forget to refcount it - should be NULL anyway */
-		skb_get(userdata);
-		tx_skb = userdata;
-	}
-
-	return irlmp_connect_response(self->lsap, tx_skb);
-}
-
-static int ircomm_lmp_disconnect_request(struct ircomm_cb *self,
-					 struct sk_buff *userdata,
-					 struct ircomm_info *info)
-{
-	struct sk_buff *tx_skb;
-	int ret;
-
-	if (!userdata) {
-		tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
-		if (!tx_skb)
-			return -ENOMEM;
-
-		/*  Reserve space for MUX and LAP header */
-		skb_reserve(tx_skb, LMP_MAX_HEADER);
-		userdata = tx_skb;
-	} else {
-		/* Don't forget to refcount it - should be NULL anyway */
-		skb_get(userdata);
-	}
-
-	ret = irlmp_disconnect_request(self->lsap, userdata);
-
-	return ret;
-}
-
-/*
- * Function ircomm_lmp_flow_control (skb)
- *
- *    This function is called when a data frame we have sent to IrLAP has
- *    been deallocated. We do this to make sure we don't flood IrLAP with
- *    frames, since we are not using the IrTTP flow control mechanism
- */
-static void ircomm_lmp_flow_control(struct sk_buff *skb)
-{
-	struct irda_skb_cb *cb;
-	struct ircomm_cb *self;
-	int line;
-
-	IRDA_ASSERT(skb != NULL, return;);
-
-	cb = (struct irda_skb_cb *) skb->cb;
-
-	line = cb->line;
-
-	self = (struct ircomm_cb *) hashbin_lock_find(ircomm, line, NULL);
-	if (!self) {
-		pr_debug("%s(), didn't find myself\n", __func__);
-		return;
-	}
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
-	self->pkt_count--;
-
-	if ((self->pkt_count < 2) && (self->flow_status == FLOW_STOP)) {
-		pr_debug("%s(), asking TTY to start again!\n", __func__);
-		self->flow_status = FLOW_START;
-		if (self->notify.flow_indication)
-			self->notify.flow_indication(self->notify.instance,
-						     self, FLOW_START);
-	}
-}
-
-/*
- * Function ircomm_lmp_data_request (self, userdata)
- *
- *    Send data frame to peer device
- *
- */
-static int ircomm_lmp_data_request(struct ircomm_cb *self,
-				   struct sk_buff *skb,
-				   int not_used)
-{
-	struct irda_skb_cb *cb;
-	int ret;
-
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	cb = (struct irda_skb_cb *) skb->cb;
-
-	cb->line = self->line;
-
-	pr_debug("%s(), sending frame\n", __func__);
-
-	/* Don't forget to refcount it - see ircomm_tty_do_softint() */
-	skb_get(skb);
-
-	skb_orphan(skb);
-	skb->destructor = ircomm_lmp_flow_control;
-
-	if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) {
-		pr_debug("%s(), asking TTY to slow down!\n", __func__);
-		self->flow_status = FLOW_STOP;
-		if (self->notify.flow_indication)
-			self->notify.flow_indication(self->notify.instance,
-						     self, FLOW_STOP);
-	}
-	ret = irlmp_data_request(self->lsap, skb);
-	if (ret) {
-		net_err_ratelimited("%s(), failed\n", __func__);
-		/* irlmp_data_request already free the packet */
-	}
-
-	return ret;
-}
-
-/*
- * Function ircomm_lmp_data_indication (instance, sap, skb)
- *
- *    Incoming data which we must deliver to the state machine, to check
- *    we are still connected.
- */
-static int ircomm_lmp_data_indication(void *instance, void *sap,
-				      struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	ircomm_do_event(self, IRCOMM_LMP_DATA_INDICATION, skb, NULL);
-
-	/* Drop reference count - see ircomm_tty_data_indication(). */
-	dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function ircomm_lmp_connect_confirm (instance, sap, qos, max_sdu_size,
- *                                       max_header_size, skb)
- *
- *    Connection has been confirmed by peer device
- *
- */
-static void ircomm_lmp_connect_confirm(void *instance, void *sap,
-				       struct qos_info *qos,
-				       __u32 max_seg_size,
-				       __u8 max_header_size,
-				       struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-	struct ircomm_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(qos != NULL, return;);
-
-	info.max_data_size = max_seg_size;
-	info.max_header_size = max_header_size;
-	info.qos = qos;
-
-	ircomm_do_event(self, IRCOMM_LMP_CONNECT_CONFIRM, skb, &info);
-
-	/* Drop reference count - see ircomm_tty_connect_confirm(). */
-	dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_lmp_connect_indication (instance, sap, qos, max_sdu_size,
- *                                         max_header_size, skb)
- *
- *    Peer device wants to make a connection with us
- *
- */
-static void ircomm_lmp_connect_indication(void *instance, void *sap,
-					  struct qos_info *qos,
-					  __u32 max_seg_size,
-					  __u8 max_header_size,
-					  struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *)instance;
-	struct ircomm_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(qos != NULL, return;);
-
-	info.max_data_size = max_seg_size;
-	info.max_header_size = max_header_size;
-	info.qos = qos;
-
-	ircomm_do_event(self, IRCOMM_LMP_CONNECT_INDICATION, skb, &info);
-
-	/* Drop reference count - see ircomm_tty_connect_indication(). */
-	dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_lmp_disconnect_indication (instance, sap, reason, skb)
- *
- *    Peer device has closed the connection, or the link went down for some
- *    other reason
- */
-static void ircomm_lmp_disconnect_indication(void *instance, void *sap,
-					     LM_REASON reason,
-					     struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-	struct ircomm_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
-	info.reason = reason;
-
-	ircomm_do_event(self, IRCOMM_LMP_DISCONNECT_INDICATION, skb, &info);
-
-	/* Drop reference count - see ircomm_tty_disconnect_indication(). */
-	if(skb)
-		dev_kfree_skb(skb);
-}
-/*
- * Function ircomm_open_lsap (self)
- *
- *    Open LSAP. This function will only be used when using "raw" services
- *
- */
-int ircomm_open_lsap(struct ircomm_cb *self)
-{
-	notify_t notify;
-
-	/* Register callbacks */
-	irda_notify_init(&notify);
-	notify.data_indication       = ircomm_lmp_data_indication;
-	notify.connect_confirm       = ircomm_lmp_connect_confirm;
-	notify.connect_indication    = ircomm_lmp_connect_indication;
-	notify.disconnect_indication = ircomm_lmp_disconnect_indication;
-	notify.instance = self;
-	strlcpy(notify.name, "IrCOMM", sizeof(notify.name));
-
-	self->lsap = irlmp_open_lsap(LSAP_ANY, &notify, 0);
-	if (!self->lsap) {
-		pr_debug("%sfailed to allocate tsap\n", __func__);
-		return -1;
-	}
-	self->slsap_sel = self->lsap->slsap_sel;
-
-	/*
-	 *  Initialize the call-table for issuing commands
-	 */
-	self->issue.data_request       = ircomm_lmp_data_request;
-	self->issue.connect_request    = ircomm_lmp_connect_request;
-	self->issue.connect_response   = ircomm_lmp_connect_response;
-	self->issue.disconnect_request = ircomm_lmp_disconnect_request;
-
-	return 0;
-}
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
deleted file mode 100644
index 5728e76ca6d5..000000000000
--- a/net/irda/ircomm/ircomm_param.c
+++ /dev/null
@@ -1,501 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_param.c
- * Version:       1.0
- * Description:   Parameter handling for the IrCOMM protocol
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Jun  7 10:25:11 1999
- * Modified at:   Sun Jan 30 14:32:03 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/parameters.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_tty_attach.h>
-#include <net/irda/ircomm_tty.h>
-
-#include <net/irda/ircomm_param.h>
-
-static int ircomm_param_service_type(void *instance, irda_param_t *param,
-				     int get);
-static int ircomm_param_port_type(void *instance, irda_param_t *param,
-				  int get);
-static int ircomm_param_port_name(void *instance, irda_param_t *param,
-				  int get);
-static int ircomm_param_service_type(void *instance, irda_param_t *param,
-				     int get);
-static int ircomm_param_data_rate(void *instance, irda_param_t *param,
-				  int get);
-static int ircomm_param_data_format(void *instance, irda_param_t *param,
-				    int get);
-static int ircomm_param_flow_control(void *instance, irda_param_t *param,
-				     int get);
-static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get);
-static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get);
-static int ircomm_param_line_status(void *instance, irda_param_t *param,
-				    int get);
-static int ircomm_param_dte(void *instance, irda_param_t *param, int get);
-static int ircomm_param_dce(void *instance, irda_param_t *param, int get);
-static int ircomm_param_poll(void *instance, irda_param_t *param, int get);
-
-static const pi_minor_info_t pi_minor_call_table_common[] = {
-	{ ircomm_param_service_type, PV_INT_8_BITS },
-	{ ircomm_param_port_type,    PV_INT_8_BITS },
-	{ ircomm_param_port_name,    PV_STRING }
-};
-static const pi_minor_info_t pi_minor_call_table_non_raw[] = {
-	{ ircomm_param_data_rate,    PV_INT_32_BITS | PV_BIG_ENDIAN },
-	{ ircomm_param_data_format,  PV_INT_8_BITS },
-	{ ircomm_param_flow_control, PV_INT_8_BITS },
-	{ ircomm_param_xon_xoff,     PV_INT_16_BITS },
-	{ ircomm_param_enq_ack,      PV_INT_16_BITS },
-	{ ircomm_param_line_status,  PV_INT_8_BITS }
-};
-static const pi_minor_info_t pi_minor_call_table_9_wire[] = {
-	{ ircomm_param_dte,          PV_INT_8_BITS },
-	{ ircomm_param_dce,          PV_INT_8_BITS },
-	{ ircomm_param_poll,         PV_NO_VALUE },
-};
-
-static const pi_major_info_t pi_major_call_table[] = {
-	{ pi_minor_call_table_common,  3 },
-	{ pi_minor_call_table_non_raw, 6 },
-	{ pi_minor_call_table_9_wire,  3 }
-/* 	{ pi_minor_call_table_centronics }  */
-};
-
-pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 };
-
-/*
- * Function ircomm_param_request (self, pi, flush)
- *
- *    Queue a parameter for the control channel
- *
- */
-int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
-{
-	unsigned long flags;
-	struct sk_buff *skb;
-	int count;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	/* Make sure we don't send parameters for raw mode */
-	if (self->service_type == IRCOMM_3_WIRE_RAW)
-		return 0;
-
-	spin_lock_irqsave(&self->spinlock, flags);
-
-	skb = self->ctrl_skb;
-	if (!skb) {
-		skb = alloc_skb(256, GFP_ATOMIC);
-		if (!skb) {
-			spin_unlock_irqrestore(&self->spinlock, flags);
-			return -ENOMEM;
-		}
-
-		skb_reserve(skb, self->max_header_size);
-		self->ctrl_skb = skb;
-	}
-	/*
-	 * Inserting is a little bit tricky since we don't know how much
-	 * room we will need. But this should hopefully work OK
-	 */
-	count = irda_param_insert(self, pi, skb_tail_pointer(skb),
-				  skb_tailroom(skb), &ircomm_param_info);
-	if (count < 0) {
-		net_warn_ratelimited("%s(), no room for parameter!\n",
-				     __func__);
-		spin_unlock_irqrestore(&self->spinlock, flags);
-		return -1;
-	}
-	skb_put(skb, count);
-	pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
-
-	spin_unlock_irqrestore(&self->spinlock, flags);
-
-	if (flush) {
-		/* ircomm_tty_do_softint will take care of the rest */
-		schedule_work(&self->tqueue);
-	}
-
-	return count;
-}
-
-/*
- * Function ircomm_param_service_type (self, buf, len)
- *
- *    Handle service type, this function will both be called after the LM-IAS
- *    query and then the remote device sends its initial parameters
- *
- */
-static int ircomm_param_service_type(void *instance, irda_param_t *param,
-				     int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	__u8 service_type = (__u8) param->pv.i;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get) {
-		param->pv.i = self->settings.service_type;
-		return 0;
-	}
-
-	/* Find all common service types */
-	service_type &= self->service_type;
-	if (!service_type) {
-		pr_debug("%s(), No common service type to use!\n", __func__);
-		return -1;
-	}
-	pr_debug("%s(), services in common=%02x\n", __func__ ,
-		 service_type);
-
-	/*
-	 * Now choose a preferred service type of those available
-	 */
-	if (service_type & IRCOMM_CENTRONICS)
-		self->settings.service_type = IRCOMM_CENTRONICS;
-	else if (service_type & IRCOMM_9_WIRE)
-		self->settings.service_type = IRCOMM_9_WIRE;
-	else if (service_type & IRCOMM_3_WIRE)
-		self->settings.service_type = IRCOMM_3_WIRE;
-	else if (service_type & IRCOMM_3_WIRE_RAW)
-		self->settings.service_type = IRCOMM_3_WIRE_RAW;
-
-	pr_debug("%s(), resulting service type=0x%02x\n", __func__ ,
-		 self->settings.service_type);
-
-	/*
-	 * Now the line is ready for some communication. Check if we are a
-	 * server, and send over some initial parameters.
-	 * Client do it in ircomm_tty_state_setup().
-	 * Note : we may get called from ircomm_tty_getvalue_confirm(),
-	 * therefore before we even have open any socket. And self->client
-	 * is initialised to TRUE only later. So, we check if the link is
-	 * really initialised. - Jean II
-	 */
-	if ((self->max_header_size != IRCOMM_TTY_HDR_UNINITIALISED) &&
-	    (!self->client) &&
-	    (self->settings.service_type != IRCOMM_3_WIRE_RAW))
-	{
-		/* Init connection */
-		ircomm_tty_send_initial_parameters(self);
-		ircomm_tty_link_established(self);
-	}
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_port_type (self, param)
- *
- *    The port type parameter tells if the devices are serial or parallel.
- *    Since we only advertise serial service, this parameter should only
- *    be equal to IRCOMM_SERIAL.
- */
-static int ircomm_param_port_type(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = IRCOMM_SERIAL;
-	else {
-		self->settings.port_type = (__u8) param->pv.i;
-
-		pr_debug("%s(), port type=%d\n", __func__ ,
-			 self->settings.port_type);
-	}
-	return 0;
-}
-
-/*
- * Function ircomm_param_port_name (self, param)
- *
- *    Exchange port name
- *
- */
-static int ircomm_param_port_name(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get) {
-		pr_debug("%s(), not imp!\n", __func__);
-	} else {
-		pr_debug("%s(), port-name=%s\n", __func__ , param->pv.c);
-		strncpy(self->settings.port_name, param->pv.c, 32);
-	}
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_data_rate (self, param)
- *
- *    Exchange data rate to be used in this settings
- *
- */
-static int ircomm_param_data_rate(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->settings.data_rate;
-	else
-		self->settings.data_rate = param->pv.i;
-
-	pr_debug("%s(), data rate = %d\n", __func__ , param->pv.i);
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_data_format (self, param)
- *
- *    Exchange data format to be used in this settings
- *
- */
-static int ircomm_param_data_format(void *instance, irda_param_t *param,
-				    int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->settings.data_format;
-	else
-		self->settings.data_format = (__u8) param->pv.i;
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_flow_control (self, param)
- *
- *    Exchange flow control settings to be used in this settings
- *
- */
-static int ircomm_param_flow_control(void *instance, irda_param_t *param,
-				     int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->settings.flow_control;
-	else
-		self->settings.flow_control = (__u8) param->pv.i;
-
-	pr_debug("%s(), flow control = 0x%02x\n", __func__ , (__u8)param->pv.i);
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_xon_xoff (self, param)
- *
- *    Exchange XON/XOFF characters
- *
- */
-static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get) {
-		param->pv.i = self->settings.xonxoff[0];
-		param->pv.i |= self->settings.xonxoff[1] << 8;
-	} else {
-		self->settings.xonxoff[0] = (__u16) param->pv.i & 0xff;
-		self->settings.xonxoff[1] = (__u16) param->pv.i >> 8;
-	}
-
-	pr_debug("%s(), XON/XOFF = 0x%02x,0x%02x\n", __func__ ,
-		 param->pv.i & 0xff, param->pv.i >> 8);
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_enq_ack (self, param)
- *
- *    Exchange ENQ/ACK characters
- *
- */
-static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get) {
-		param->pv.i = self->settings.enqack[0];
-		param->pv.i |= self->settings.enqack[1] << 8;
-	} else {
-		self->settings.enqack[0] = (__u16) param->pv.i & 0xff;
-		self->settings.enqack[1] = (__u16) param->pv.i >> 8;
-	}
-
-	pr_debug("%s(), ENQ/ACK = 0x%02x,0x%02x\n", __func__ ,
-		 param->pv.i & 0xff, param->pv.i >> 8);
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_line_status (self, param)
- *
- *
- *
- */
-static int ircomm_param_line_status(void *instance, irda_param_t *param,
-				    int get)
-{
-	pr_debug("%s(), not impl.\n", __func__);
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_dte (instance, param)
- *
- *    If we get here, there must be some sort of null-modem connection, and
- *    we are probably working in server mode as well.
- */
-static int ircomm_param_dte(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	__u8 dte;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->settings.dte;
-	else {
-		dte = (__u8) param->pv.i;
-
-		self->settings.dce = 0;
-
-		if (dte & IRCOMM_DELTA_DTR)
-			self->settings.dce |= (IRCOMM_DELTA_DSR|
-					      IRCOMM_DELTA_RI |
-					      IRCOMM_DELTA_CD);
-		if (dte & IRCOMM_DTR)
-			self->settings.dce |= (IRCOMM_DSR|
-					      IRCOMM_RI |
-					      IRCOMM_CD);
-
-		if (dte & IRCOMM_DELTA_RTS)
-			self->settings.dce |= IRCOMM_DELTA_CTS;
-		if (dte & IRCOMM_RTS)
-			self->settings.dce |= IRCOMM_CTS;
-
-		/* Take appropriate actions */
-		ircomm_tty_check_modem_status(self);
-
-		/* Null modem cable emulator */
-		self->settings.null_modem = TRUE;
-	}
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_dce (instance, param)
- *
- *
- *
- */
-static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	__u8 dce;
-
-	pr_debug("%s(), dce = 0x%02x\n", __func__ , (__u8)param->pv.i);
-
-	dce = (__u8) param->pv.i;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	self->settings.dce = dce;
-
-	/* Check if any of the settings have changed */
-	if (dce & 0x0f) {
-		if (dce & IRCOMM_DELTA_CTS) {
-			pr_debug("%s(), CTS\n", __func__);
-		}
-	}
-
-	ircomm_tty_check_modem_status(self);
-
-	return 0;
-}
-
-/*
- * Function ircomm_param_poll (instance, param)
- *
- *    Called when the peer device is polling for the line settings
- *
- */
-static int ircomm_param_poll(void *instance, irda_param_t *param, int get)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	/* Poll parameters are always of length 0 (just a signal) */
-	if (!get) {
-		/* Respond with DTE line settings */
-		ircomm_param_request(self, IRCOMM_DTE, TRUE);
-	}
-	return 0;
-}
-
-
-
-
-
diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c
deleted file mode 100644
index 4b81e0934770..000000000000
--- a/net/irda/ircomm/ircomm_ttp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_ttp.c
- * Version:       1.0
- * Description:   Interface between IrCOMM and IrTTP
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 20:48:27 1999
- * Modified at:   Mon Dec 13 11:35:13 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-
-#include <net/irda/ircomm_event.h>
-#include <net/irda/ircomm_ttp.h>
-
-static int ircomm_ttp_data_indication(void *instance, void *sap,
-				      struct sk_buff *skb);
-static void ircomm_ttp_connect_confirm(void *instance, void *sap,
-				       struct qos_info *qos,
-				       __u32 max_sdu_size,
-				       __u8 max_header_size,
-				       struct sk_buff *skb);
-static void ircomm_ttp_connect_indication(void *instance, void *sap,
-					  struct qos_info *qos,
-					  __u32 max_sdu_size,
-					  __u8 max_header_size,
-					  struct sk_buff *skb);
-static void ircomm_ttp_flow_indication(void *instance, void *sap,
-				       LOCAL_FLOW cmd);
-static void ircomm_ttp_disconnect_indication(void *instance, void *sap,
-					     LM_REASON reason,
-					     struct sk_buff *skb);
-static int ircomm_ttp_data_request(struct ircomm_cb *self,
-				   struct sk_buff *skb,
-				   int clen);
-static int ircomm_ttp_connect_request(struct ircomm_cb *self,
-				      struct sk_buff *userdata,
-				      struct ircomm_info *info);
-static int ircomm_ttp_connect_response(struct ircomm_cb *self,
-				       struct sk_buff *userdata);
-static int ircomm_ttp_disconnect_request(struct ircomm_cb *self,
-					 struct sk_buff *userdata,
-					 struct ircomm_info *info);
-
-/*
- * Function ircomm_open_tsap (self)
- *
- *
- *
- */
-int ircomm_open_tsap(struct ircomm_cb *self)
-{
-	notify_t notify;
-
-	/* Register callbacks */
-	irda_notify_init(&notify);
-	notify.data_indication       = ircomm_ttp_data_indication;
-	notify.connect_confirm       = ircomm_ttp_connect_confirm;
-	notify.connect_indication    = ircomm_ttp_connect_indication;
-	notify.flow_indication       = ircomm_ttp_flow_indication;
-	notify.disconnect_indication = ircomm_ttp_disconnect_indication;
-	notify.instance = self;
-	strlcpy(notify.name, "IrCOMM", sizeof(notify.name));
-
-	self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT,
-				     &notify);
-	if (!self->tsap) {
-		pr_debug("%sfailed to allocate tsap\n", __func__);
-		return -1;
-	}
-	self->slsap_sel = self->tsap->stsap_sel;
-
-	/*
-	 *  Initialize the call-table for issuing commands
-	 */
-	self->issue.data_request       = ircomm_ttp_data_request;
-	self->issue.connect_request    = ircomm_ttp_connect_request;
-	self->issue.connect_response   = ircomm_ttp_connect_response;
-	self->issue.disconnect_request = ircomm_ttp_disconnect_request;
-
-	return 0;
-}
-
-/*
- * Function ircomm_ttp_connect_request (self, userdata)
- *
- *
- *
- */
-static int ircomm_ttp_connect_request(struct ircomm_cb *self,
-				      struct sk_buff *userdata,
-				      struct ircomm_info *info)
-{
-	int ret = 0;
-
-	/* Don't forget to refcount it - should be NULL anyway */
-	if(userdata)
-		skb_get(userdata);
-
-	ret = irttp_connect_request(self->tsap, info->dlsap_sel,
-				    info->saddr, info->daddr, NULL,
-				    TTP_SAR_DISABLE, userdata);
-
-	return ret;
-}
-
-/*
- * Function ircomm_ttp_connect_response (self, skb)
- *
- *
- *
- */
-static int ircomm_ttp_connect_response(struct ircomm_cb *self,
-				       struct sk_buff *userdata)
-{
-	int ret;
-
-	/* Don't forget to refcount it - should be NULL anyway */
-	if(userdata)
-		skb_get(userdata);
-
-	ret = irttp_connect_response(self->tsap, TTP_SAR_DISABLE, userdata);
-
-	return ret;
-}
-
-/*
- * Function ircomm_ttp_data_request (self, userdata)
- *
- *    Send IrCOMM data to IrTTP layer. Currently we do not try to combine
- *    control data with pure data, so they will be sent as separate frames.
- *    Should not be a big problem though, since control frames are rare. But
- *    some of them are sent after connection establishment, so this can
- *    increase the latency a bit.
- */
-static int ircomm_ttp_data_request(struct ircomm_cb *self,
-				   struct sk_buff *skb,
-				   int clen)
-{
-	int ret;
-
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	pr_debug("%s(), clen=%d\n", __func__ , clen);
-
-	/*
-	 * Insert clen field, currently we either send data only, or control
-	 * only frames, to make things easier and avoid queueing
-	 */
-	IRDA_ASSERT(skb_headroom(skb) >= IRCOMM_HEADER_SIZE, return -1;);
-
-	/* Don't forget to refcount it - see ircomm_tty_do_softint() */
-	skb_get(skb);
-
-	skb_push(skb, IRCOMM_HEADER_SIZE);
-
-	skb->data[0] = clen;
-
-	ret = irttp_data_request(self->tsap, skb);
-	if (ret) {
-		net_err_ratelimited("%s(), failed\n", __func__);
-		/* irttp_data_request already free the packet */
-	}
-
-	return ret;
-}
-
-/*
- * Function ircomm_ttp_data_indication (instance, sap, skb)
- *
- *    Incoming data
- *
- */
-static int ircomm_ttp_data_indication(void *instance, void *sap,
-				      struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	ircomm_do_event(self, IRCOMM_TTP_DATA_INDICATION, skb, NULL);
-
-	/* Drop reference count - see ircomm_tty_data_indication(). */
-	dev_kfree_skb(skb);
-
-	return 0;
-}
-
-static void ircomm_ttp_connect_confirm(void *instance, void *sap,
-				       struct qos_info *qos,
-				       __u32 max_sdu_size,
-				       __u8 max_header_size,
-				       struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-	struct ircomm_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(qos != NULL, goto out;);
-
-	if (max_sdu_size != TTP_SAR_DISABLE) {
-		net_err_ratelimited("%s(), SAR not allowed for IrCOMM!\n",
-				    __func__);
-		goto out;
-	}
-
-	info.max_data_size = irttp_get_max_seg_size(self->tsap)
-		- IRCOMM_HEADER_SIZE;
-	info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE;
-	info.qos = qos;
-
-	ircomm_do_event(self, IRCOMM_TTP_CONNECT_CONFIRM, skb, &info);
-
-out:
-	/* Drop reference count - see ircomm_tty_connect_confirm(). */
-	dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_ttp_connect_indication (instance, sap, qos, max_sdu_size,
- *                                         max_header_size, skb)
- *
- *
- *
- */
-static void ircomm_ttp_connect_indication(void *instance, void *sap,
-					  struct qos_info *qos,
-					  __u32 max_sdu_size,
-					  __u8 max_header_size,
-					  struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *)instance;
-	struct ircomm_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(qos != NULL, goto out;);
-
-	if (max_sdu_size != TTP_SAR_DISABLE) {
-		net_err_ratelimited("%s(), SAR not allowed for IrCOMM!\n",
-				    __func__);
-		goto out;
-	}
-
-	info.max_data_size = irttp_get_max_seg_size(self->tsap)
-		- IRCOMM_HEADER_SIZE;
-	info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE;
-	info.qos = qos;
-
-	ircomm_do_event(self, IRCOMM_TTP_CONNECT_INDICATION, skb, &info);
-
-out:
-	/* Drop reference count - see ircomm_tty_connect_indication(). */
-	dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_ttp_disconnect_request (self, userdata, info)
- *
- *
- *
- */
-static int ircomm_ttp_disconnect_request(struct ircomm_cb *self,
-					 struct sk_buff *userdata,
-					 struct ircomm_info *info)
-{
-	int ret;
-
-	/* Don't forget to refcount it - should be NULL anyway */
-	if(userdata)
-		skb_get(userdata);
-
-	ret = irttp_disconnect_request(self->tsap, userdata, P_NORMAL);
-
-	return ret;
-}
-
-/*
- * Function ircomm_ttp_disconnect_indication (instance, sap, reason, skb)
- *
- *
- *
- */
-static void ircomm_ttp_disconnect_indication(void *instance, void *sap,
-					     LM_REASON reason,
-					     struct sk_buff *skb)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-	struct ircomm_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
-	info.reason = reason;
-
-	ircomm_do_event(self, IRCOMM_TTP_DISCONNECT_INDICATION, skb, &info);
-
-	/* Drop reference count - see ircomm_tty_disconnect_indication(). */
-	if(skb)
-		dev_kfree_skb(skb);
-}
-
-/*
- * Function ircomm_ttp_flow_indication (instance, sap, cmd)
- *
- *    Layer below is telling us to start or stop the flow of data
- *
- */
-static void ircomm_ttp_flow_indication(void *instance, void *sap,
-				       LOCAL_FLOW cmd)
-{
-	struct ircomm_cb *self = (struct ircomm_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;);
-
-	if (self->notify.flow_indication)
-		self->notify.flow_indication(self->notify.instance, self, cmd);
-}
-
-
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
deleted file mode 100644
index ec157c3419b5..000000000000
--- a/net/irda/ircomm/ircomm_tty.c
+++ /dev/null
@@ -1,1329 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_tty.c
- * Version:       1.0
- * Description:   IrCOMM serial TTY driver
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Jun  6 21:00:56 1999
- * Modified at:   Wed Feb 23 00:09:02 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Sources:       serial.c and previous IrCOMM work by Takahide Higuchi
- *
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-#include <linux/seq_file.h>
-#include <linux/termios.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>		/* for MODULE_ALIAS_CHARDEV_MAJOR */
-
-#include <linux/uaccess.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_tty_attach.h>
-#include <net/irda/ircomm_tty.h>
-
-static int ircomm_tty_install(struct tty_driver *driver,
-		struct tty_struct *tty);
-static int  ircomm_tty_open(struct tty_struct *tty, struct file *filp);
-static void ircomm_tty_close(struct tty_struct * tty, struct file *filp);
-static int  ircomm_tty_write(struct tty_struct * tty,
-			     const unsigned char *buf, int count);
-static int  ircomm_tty_write_room(struct tty_struct *tty);
-static void ircomm_tty_throttle(struct tty_struct *tty);
-static void ircomm_tty_unthrottle(struct tty_struct *tty);
-static int  ircomm_tty_chars_in_buffer(struct tty_struct *tty);
-static void ircomm_tty_flush_buffer(struct tty_struct *tty);
-static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch);
-static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout);
-static void ircomm_tty_hangup(struct tty_struct *tty);
-static void ircomm_tty_do_softint(struct work_struct *work);
-static void ircomm_tty_shutdown(struct ircomm_tty_cb *self);
-static void ircomm_tty_stop(struct tty_struct *tty);
-
-static int ircomm_tty_data_indication(void *instance, void *sap,
-				      struct sk_buff *skb);
-static int ircomm_tty_control_indication(void *instance, void *sap,
-					 struct sk_buff *skb);
-static void ircomm_tty_flow_indication(void *instance, void *sap,
-				       LOCAL_FLOW cmd);
-#ifdef CONFIG_PROC_FS
-static const struct file_operations ircomm_tty_proc_fops;
-#endif /* CONFIG_PROC_FS */
-static struct tty_driver *driver;
-
-static hashbin_t *ircomm_tty = NULL;
-
-static const struct tty_operations ops = {
-	.install	 = ircomm_tty_install,
-	.open            = ircomm_tty_open,
-	.close           = ircomm_tty_close,
-	.write           = ircomm_tty_write,
-	.write_room      = ircomm_tty_write_room,
-	.chars_in_buffer = ircomm_tty_chars_in_buffer,
-	.flush_buffer    = ircomm_tty_flush_buffer,
-	.ioctl           = ircomm_tty_ioctl,	/* ircomm_tty_ioctl.c */
-	.tiocmget        = ircomm_tty_tiocmget,	/* ircomm_tty_ioctl.c */
-	.tiocmset        = ircomm_tty_tiocmset,	/* ircomm_tty_ioctl.c */
-	.throttle        = ircomm_tty_throttle,
-	.unthrottle      = ircomm_tty_unthrottle,
-	.send_xchar      = ircomm_tty_send_xchar,
-	.set_termios     = ircomm_tty_set_termios,
-	.stop            = ircomm_tty_stop,
-	.start           = ircomm_tty_start,
-	.hangup          = ircomm_tty_hangup,
-	.wait_until_sent = ircomm_tty_wait_until_sent,
-#ifdef CONFIG_PROC_FS
-	.proc_fops       = &ircomm_tty_proc_fops,
-#endif /* CONFIG_PROC_FS */
-};
-
-static void ircomm_port_raise_dtr_rts(struct tty_port *port, int raise)
-{
-	struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb,
-			port);
-	/*
-	 * Here, we use to lock those two guys, but as ircomm_param_request()
-	 * does it itself, I don't see the point (and I see the deadlock).
-	 * Jean II
-	 */
-	if (raise)
-		self->settings.dte |= IRCOMM_RTS | IRCOMM_DTR;
-	else
-		self->settings.dte &= ~(IRCOMM_RTS | IRCOMM_DTR);
-
-	ircomm_param_request(self, IRCOMM_DTE, TRUE);
-}
-
-static int ircomm_port_carrier_raised(struct tty_port *port)
-{
-	struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb,
-			port);
-	return self->settings.dce & IRCOMM_CD;
-}
-
-static const struct tty_port_operations ircomm_port_ops = {
-	.dtr_rts = ircomm_port_raise_dtr_rts,
-	.carrier_raised = ircomm_port_carrier_raised,
-};
-
-/*
- * Function ircomm_tty_init()
- *
- *    Init IrCOMM TTY layer/driver
- *
- */
-static int __init ircomm_tty_init(void)
-{
-	driver = alloc_tty_driver(IRCOMM_TTY_PORTS);
-	if (!driver)
-		return -ENOMEM;
-	ircomm_tty = hashbin_new(HB_LOCK);
-	if (ircomm_tty == NULL) {
-		net_err_ratelimited("%s(), can't allocate hashbin!\n",
-				    __func__);
-		put_tty_driver(driver);
-		return -ENOMEM;
-	}
-
-	driver->driver_name     = "ircomm";
-	driver->name            = "ircomm";
-	driver->major           = IRCOMM_TTY_MAJOR;
-	driver->minor_start     = IRCOMM_TTY_MINOR;
-	driver->type            = TTY_DRIVER_TYPE_SERIAL;
-	driver->subtype         = SERIAL_TYPE_NORMAL;
-	driver->init_termios    = tty_std_termios;
-	driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
-	driver->flags           = TTY_DRIVER_REAL_RAW;
-	tty_set_operations(driver, &ops);
-	if (tty_register_driver(driver)) {
-		net_err_ratelimited("%s(): Couldn't register serial driver\n",
-				    __func__);
-		put_tty_driver(driver);
-		return -1;
-	}
-	return 0;
-}
-
-static void __exit __ircomm_tty_cleanup(struct ircomm_tty_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	ircomm_tty_shutdown(self);
-
-	self->magic = 0;
-	tty_port_destroy(&self->port);
-	kfree(self);
-}
-
-/*
- * Function ircomm_tty_cleanup ()
- *
- *    Remove IrCOMM TTY layer/driver
- *
- */
-static void __exit ircomm_tty_cleanup(void)
-{
-	int ret;
-
-	ret = tty_unregister_driver(driver);
-	if (ret) {
-		net_err_ratelimited("%s(), failed to unregister driver\n",
-				    __func__);
-		return;
-	}
-
-	hashbin_delete(ircomm_tty, (FREE_FUNC) __ircomm_tty_cleanup);
-	put_tty_driver(driver);
-}
-
-/*
- * Function ircomm_startup (self)
- *
- *
- *
- */
-static int ircomm_tty_startup(struct ircomm_tty_cb *self)
-{
-	notify_t notify;
-	int ret = -ENODEV;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	/* Check if already open */
-	if (tty_port_initialized(&self->port)) {
-		pr_debug("%s(), already open so break out!\n", __func__);
-		return 0;
-	}
-	tty_port_set_initialized(&self->port, 1);
-
-	/* Register with IrCOMM */
-	irda_notify_init(&notify);
-	/* These callbacks we must handle ourselves */
-	notify.data_indication       = ircomm_tty_data_indication;
-	notify.udata_indication      = ircomm_tty_control_indication;
-	notify.flow_indication       = ircomm_tty_flow_indication;
-
-	/* Use the ircomm_tty interface for these ones */
-	notify.disconnect_indication = ircomm_tty_disconnect_indication;
-	notify.connect_confirm       = ircomm_tty_connect_confirm;
-	notify.connect_indication    = ircomm_tty_connect_indication;
-	strlcpy(notify.name, "ircomm_tty", sizeof(notify.name));
-	notify.instance = self;
-
-	if (!self->ircomm) {
-		self->ircomm = ircomm_open(&notify, self->service_type,
-					   self->line);
-	}
-	if (!self->ircomm)
-		goto err;
-
-	self->slsap_sel = self->ircomm->slsap_sel;
-
-	/* Connect IrCOMM link with remote device */
-	ret = ircomm_tty_attach_cable(self);
-	if (ret < 0) {
-		net_err_ratelimited("%s(), error attaching cable!\n", __func__);
-		goto err;
-	}
-
-	return 0;
-err:
-	tty_port_set_initialized(&self->port, 0);
-	return ret;
-}
-
-/*
- * Function ircomm_block_til_ready (self, filp)
- *
- *
- *
- */
-static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
-		struct tty_struct *tty, struct file *filp)
-{
-	struct tty_port *port = &self->port;
-	DECLARE_WAITQUEUE(wait, current);
-	int		retval;
-	int		do_clocal = 0;
-	unsigned long	flags;
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if (tty_io_error(tty)) {
-		tty_port_set_active(port, 1);
-		return 0;
-	}
-
-	if (filp->f_flags & O_NONBLOCK) {
-		/* nonblock mode is set */
-		if (C_BAUD(tty))
-			tty_port_raise_dtr_rts(port);
-		tty_port_set_active(port, 1);
-		pr_debug("%s(), O_NONBLOCK requested!\n", __func__);
-		return 0;
-	}
-
-	if (C_CLOCAL(tty)) {
-		pr_debug("%s(), doing CLOCAL!\n", __func__);
-		do_clocal = 1;
-	}
-
-	/* Wait for carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->count is dropped by one, so that
-	 * mgsl_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	pr_debug("%s(%d):block_til_ready before block on %s open_count=%d\n",
-		 __FILE__, __LINE__, tty->driver->name, port->count);
-
-	spin_lock_irqsave(&port->lock, flags);
-	port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	while (1) {
-		if (C_BAUD(tty) && tty_port_initialized(port))
-			tty_port_raise_dtr_rts(port);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (tty_hung_up_p(filp) || !tty_port_initialized(port)) {
-			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
-					-EAGAIN : -ERESTARTSYS;
-			break;
-		}
-
-		/*
-		 * Check if link is ready now. Even if CLOCAL is
-		 * specified, we cannot return before the IrCOMM link is
-		 * ready
-		 */
-		if ((do_clocal || tty_port_carrier_raised(port)) &&
-		    self->state == IRCOMM_TTY_READY)
-		{
-			break;
-		}
-
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-
-		pr_debug("%s(%d):block_til_ready blocking on %s open_count=%d\n",
-			 __FILE__, __LINE__, tty->driver->name, port->count);
-
-		schedule();
-	}
-
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	pr_debug("%s(%d):block_til_ready after blocking on %s open_count=%d\n",
-		 __FILE__, __LINE__, tty->driver->name, port->count);
-
-	if (!retval)
-		tty_port_set_active(port, 1);
-
-	return retval;
-}
-
-
-static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self;
-	unsigned int line = tty->index;
-
-	/* Check if instance already exists */
-	self = hashbin_lock_find(ircomm_tty, line, NULL);
-	if (!self) {
-		/* No, so make new instance */
-		self = kzalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL);
-		if (self == NULL)
-			return -ENOMEM;
-
-		tty_port_init(&self->port);
-		self->port.ops = &ircomm_port_ops;
-		self->magic = IRCOMM_TTY_MAGIC;
-		self->flow = FLOW_STOP;
-
-		self->line = line;
-		INIT_WORK(&self->tqueue, ircomm_tty_do_softint);
-		self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED;
-		self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED;
-
-		/* Init some important stuff */
-		init_timer(&self->watchdog_timer);
-		spin_lock_init(&self->spinlock);
-
-		/*
-		 * Force TTY into raw mode by default which is usually what
-		 * we want for IrCOMM and IrLPT. This way applications will
-		 * not have to twiddle with printcap etc.
-		 *
-		 * Note this is completely usafe and doesn't work properly
-		 */
-		tty->termios.c_iflag = 0;
-		tty->termios.c_oflag = 0;
-
-		/* Insert into hash */
-		hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL);
-	}
-
-	tty->driver_data = self;
-
-	return tty_port_install(&self->port, driver, tty);
-}
-
-/*
- * Function ircomm_tty_open (tty, filp)
- *
- *    This routine is called when a particular tty device is opened. This
- *    routine is mandatory; if this routine is not filled in, the attempted
- *    open will fail with ENODEV.
- */
-static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
-{
-	struct ircomm_tty_cb *self = tty->driver_data;
-	unsigned long	flags;
-	int ret;
-
-	/* ++ is not atomic, so this should be protected - Jean II */
-	spin_lock_irqsave(&self->port.lock, flags);
-	self->port.count++;
-	spin_unlock_irqrestore(&self->port.lock, flags);
-	tty_port_tty_set(&self->port, tty);
-
-	pr_debug("%s(), %s%d, count = %d\n", __func__ , tty->driver->name,
-		 self->line, self->port.count);
-
-	/* Not really used by us, but lets do it anyway */
-	self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
-
-	/* Check if this is a "normal" ircomm device, or an irlpt device */
-	if (self->line < 0x10) {
-		self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE;
-		self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */
-		/* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */
-		self->settings.dce = IRCOMM_CTS | IRCOMM_CD | IRCOMM_DSR | IRCOMM_RI; /* Default line settings */
-		pr_debug("%s(), IrCOMM device\n", __func__);
-	} else {
-		pr_debug("%s(), IrLPT device\n", __func__);
-		self->service_type = IRCOMM_3_WIRE_RAW;
-		self->settings.service_type = IRCOMM_3_WIRE_RAW; /* Default */
-	}
-
-	ret = ircomm_tty_startup(self);
-	if (ret)
-		return ret;
-
-	ret = ircomm_tty_block_til_ready(self, tty, filp);
-	if (ret) {
-		pr_debug("%s(), returning after block_til_ready with %d\n",
-			 __func__, ret);
-
-		return ret;
-	}
-	return 0;
-}
-
-/*
- * Function ircomm_tty_close (tty, filp)
- *
- *    This routine is called when a particular tty device is closed.
- *
- */
-static void ircomm_tty_close(struct tty_struct *tty, struct file *filp)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	struct tty_port *port = &self->port;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	if (tty_port_close_start(port, tty, filp) == 0)
-		return;
-
-	ircomm_tty_shutdown(self);
-
-	tty_driver_flush_buffer(tty);
-
-	tty_port_close_end(port, tty);
-	tty_port_tty_set(port, NULL);
-}
-
-/*
- * Function ircomm_tty_flush_buffer (tty)
- *
- *
- *
- */
-static void ircomm_tty_flush_buffer(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	/*
-	 * Let do_softint() do this to avoid race condition with
-	 * do_softint() ;-)
-	 */
-	schedule_work(&self->tqueue);
-}
-
-/*
- * Function ircomm_tty_do_softint (work)
- *
- *    We use this routine to give the write wakeup to the user at at a
- *    safe time (as fast as possible after write have completed). This
- *    can be compared to the Tx interrupt.
- */
-static void ircomm_tty_do_softint(struct work_struct *work)
-{
-	struct ircomm_tty_cb *self =
-		container_of(work, struct ircomm_tty_cb, tqueue);
-	struct tty_struct *tty;
-	unsigned long flags;
-	struct sk_buff *skb, *ctrl_skb;
-
-	if (!self || self->magic != IRCOMM_TTY_MAGIC)
-		return;
-
-	tty = tty_port_tty_get(&self->port);
-	if (!tty)
-		return;
-
-	/* Unlink control buffer */
-	spin_lock_irqsave(&self->spinlock, flags);
-
-	ctrl_skb = self->ctrl_skb;
-	self->ctrl_skb = NULL;
-
-	spin_unlock_irqrestore(&self->spinlock, flags);
-
-	/* Flush control buffer if any */
-	if(ctrl_skb) {
-		if(self->flow == FLOW_START)
-			ircomm_control_request(self->ircomm, ctrl_skb);
-		/* Drop reference count - see ircomm_ttp_data_request(). */
-		dev_kfree_skb(ctrl_skb);
-	}
-
-	if (tty->hw_stopped)
-		goto put;
-
-	/* Unlink transmit buffer */
-	spin_lock_irqsave(&self->spinlock, flags);
-
-	skb = self->tx_skb;
-	self->tx_skb = NULL;
-
-	spin_unlock_irqrestore(&self->spinlock, flags);
-
-	/* Flush transmit buffer if any */
-	if (skb) {
-		ircomm_tty_do_event(self, IRCOMM_TTY_DATA_REQUEST, skb, NULL);
-		/* Drop reference count - see ircomm_ttp_data_request(). */
-		dev_kfree_skb(skb);
-	}
-
-	/* Check if user (still) wants to be waken up */
-	tty_wakeup(tty);
-put:
-	tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_write (tty, buf, count)
- *
- *    This routine is called by the kernel to write a series of characters
- *    to the tty device. The characters may come from user space or kernel
- *    space. This routine will return the number of characters actually
- *    accepted for writing. This routine is mandatory.
- */
-static int ircomm_tty_write(struct tty_struct *tty,
-			    const unsigned char *buf, int count)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	unsigned long flags;
-	struct sk_buff *skb;
-	int tailroom = 0;
-	int len = 0;
-	int size;
-
-	pr_debug("%s(), count=%d, hw_stopped=%d\n", __func__ , count,
-		 tty->hw_stopped);
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	/* We may receive packets from the TTY even before we have finished
-	 * our setup. Not cool.
-	 * The problem is that we don't know the final header and data size
-	 * to create the proper skb, so any skb we would create would have
-	 * bogus header and data size, so need care.
-	 * We use a bogus header size to safely detect this condition.
-	 * Another problem is that hw_stopped was set to 0 way before it
-	 * should be, so we would drop this skb. It should now be fixed.
-	 * One option is to not accept data until we are properly setup.
-	 * But, I suspect that when it happens, the ppp line discipline
-	 * just "drops" the data, which might screw up connect scripts.
-	 * The second option is to create a "safe skb", with large header
-	 * and small size (see ircomm_tty_open() for values).
-	 * We just need to make sure that when the real values get filled,
-	 * we don't mess up the original "safe skb" (see tx_data_size).
-	 * Jean II */
-	if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED) {
-		pr_debug("%s() : not initialised\n", __func__);
-#ifdef IRCOMM_NO_TX_BEFORE_INIT
-		/* We didn't consume anything, TTY will retry */
-		return 0;
-#endif
-	}
-
-	if (count < 1)
-		return 0;
-
-	/* Protect our manipulation of self->tx_skb and related */
-	spin_lock_irqsave(&self->spinlock, flags);
-
-	/* Fetch current transmit buffer */
-	skb = self->tx_skb;
-
-	/*
-	 * Send out all the data we get, possibly as multiple fragmented
-	 * frames, but this will only happen if the data is larger than the
-	 * max data size. The normal case however is just the opposite, and
-	 * this function may be called multiple times, and will then actually
-	 * defragment the data and send it out as one packet as soon as
-	 * possible, but at a safer point in time
-	 */
-	while (count) {
-		size = count;
-
-		/* Adjust data size to the max data size */
-		if (size > self->max_data_size)
-			size = self->max_data_size;
-
-		/*
-		 * Do we already have a buffer ready for transmit, or do
-		 * we need to allocate a new frame
-		 */
-		if (skb) {
-			/*
-			 * Any room for more data at the end of the current
-			 * transmit buffer? Cannot use skb_tailroom, since
-			 * dev_alloc_skb gives us a larger skb than we
-			 * requested
-			 * Note : use tx_data_size, because max_data_size
-			 * may have changed and we don't want to overwrite
-			 * the skb. - Jean II
-			 */
-			if ((tailroom = (self->tx_data_size - skb->len)) > 0) {
-				/* Adjust data to tailroom */
-				if (size > tailroom)
-					size = tailroom;
-			} else {
-				/*
-				 * Current transmit frame is full, so break
-				 * out, so we can send it as soon as possible
-				 */
-				break;
-			}
-		} else {
-			/* Prepare a full sized frame */
-			skb = alloc_skb(self->max_data_size+
-					self->max_header_size,
-					GFP_ATOMIC);
-			if (!skb) {
-				spin_unlock_irqrestore(&self->spinlock, flags);
-				return -ENOBUFS;
-			}
-			skb_reserve(skb, self->max_header_size);
-			self->tx_skb = skb;
-			/* Remember skb size because max_data_size may
-			 * change later on - Jean II */
-			self->tx_data_size = self->max_data_size;
-		}
-
-		/* Copy data */
-		skb_put_data(skb, buf + len, size);
-
-		count -= size;
-		len += size;
-	}
-
-	spin_unlock_irqrestore(&self->spinlock, flags);
-
-	/*
-	 * Schedule a new thread which will transmit the frame as soon
-	 * as possible, but at a safe point in time. We do this so the
-	 * "user" can give us data multiple times, as PPP does (because of
-	 * its 256 byte tx buffer). We will then defragment and send out
-	 * all this data as one single packet.
-	 */
-	schedule_work(&self->tqueue);
-
-	return len;
-}
-
-/*
- * Function ircomm_tty_write_room (tty)
- *
- *    This routine returns the numbers of characters the tty driver will
- *    accept for queuing to be written. This number is subject to change as
- *    output buffers get emptied, or if the output flow control is acted.
- */
-static int ircomm_tty_write_room(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	unsigned long flags;
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-#ifdef IRCOMM_NO_TX_BEFORE_INIT
-	/* max_header_size tells us if the channel is initialised or not. */
-	if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED)
-		/* Don't bother us yet */
-		return 0;
-#endif
-
-	/* Check if we are allowed to transmit any data.
-	 * hw_stopped is the regular flow control.
-	 * Jean II */
-	if (tty->hw_stopped)
-		ret = 0;
-	else {
-		spin_lock_irqsave(&self->spinlock, flags);
-		if (self->tx_skb)
-			ret = self->tx_data_size - self->tx_skb->len;
-		else
-			ret = self->max_data_size;
-		spin_unlock_irqrestore(&self->spinlock, flags);
-	}
-	pr_debug("%s(), ret=%d\n", __func__ , ret);
-
-	return ret;
-}
-
-/*
- * Function ircomm_tty_wait_until_sent (tty, timeout)
- *
- *    This routine waits until the device has written out all of the
- *    characters in its transmitter FIFO.
- */
-static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	unsigned long orig_jiffies, poll_time;
-	unsigned long flags;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	orig_jiffies = jiffies;
-
-	/* Set poll time to 200 ms */
-	poll_time = msecs_to_jiffies(200);
-	if (timeout)
-		poll_time = min_t(unsigned long, timeout, poll_time);
-
-	spin_lock_irqsave(&self->spinlock, flags);
-	while (self->tx_skb && self->tx_skb->len) {
-		spin_unlock_irqrestore(&self->spinlock, flags);
-		schedule_timeout_interruptible(poll_time);
-		spin_lock_irqsave(&self->spinlock, flags);
-		if (signal_pending(current))
-			break;
-		if (timeout && time_after(jiffies, orig_jiffies + timeout))
-			break;
-	}
-	spin_unlock_irqrestore(&self->spinlock, flags);
-	__set_current_state(TASK_RUNNING);
-}
-
-/*
- * Function ircomm_tty_throttle (tty)
- *
- *    This routine notifies the tty driver that input buffers for the line
- *    discipline are close to full, and it should somehow signal that no
- *    more characters should be sent to the tty.
- */
-static void ircomm_tty_throttle(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	/* Software flow control? */
-	if (I_IXOFF(tty))
-		ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
-
-	/* Hardware flow control? */
-	if (C_CRTSCTS(tty)) {
-		self->settings.dte &= ~IRCOMM_RTS;
-		self->settings.dte |= IRCOMM_DELTA_RTS;
-
-		ircomm_param_request(self, IRCOMM_DTE, TRUE);
-	}
-
-	ircomm_flow_request(self->ircomm, FLOW_STOP);
-}
-
-/*
- * Function ircomm_tty_unthrottle (tty)
- *
- *    This routine notifies the tty drivers that it should signals that
- *    characters can now be sent to the tty without fear of overrunning the
- *    input buffers of the line disciplines.
- */
-static void ircomm_tty_unthrottle(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	/* Using software flow control? */
-	if (I_IXOFF(tty))
-		ircomm_tty_send_xchar(tty, START_CHAR(tty));
-
-	/* Using hardware flow control? */
-	if (C_CRTSCTS(tty)) {
-		self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
-
-		ircomm_param_request(self, IRCOMM_DTE, TRUE);
-		pr_debug("%s(), FLOW_START\n", __func__);
-	}
-	ircomm_flow_request(self->ircomm, FLOW_START);
-}
-
-/*
- * Function ircomm_tty_chars_in_buffer (tty)
- *
- *    Indicates if there are any data in the buffer
- *
- */
-static int ircomm_tty_chars_in_buffer(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	unsigned long flags;
-	int len = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	spin_lock_irqsave(&self->spinlock, flags);
-
-	if (self->tx_skb)
-		len = self->tx_skb->len;
-
-	spin_unlock_irqrestore(&self->spinlock, flags);
-
-	return len;
-}
-
-static void ircomm_tty_shutdown(struct ircomm_tty_cb *self)
-{
-	unsigned long flags;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	if (!tty_port_initialized(&self->port))
-		return;
-	tty_port_set_initialized(&self->port, 0);
-
-	ircomm_tty_detach_cable(self);
-
-	spin_lock_irqsave(&self->spinlock, flags);
-
-	del_timer(&self->watchdog_timer);
-
-	/* Free parameter buffer */
-	if (self->ctrl_skb) {
-		dev_kfree_skb(self->ctrl_skb);
-		self->ctrl_skb = NULL;
-	}
-
-	/* Free transmit buffer */
-	if (self->tx_skb) {
-		dev_kfree_skb(self->tx_skb);
-		self->tx_skb = NULL;
-	}
-
-	if (self->ircomm) {
-		ircomm_close(self->ircomm);
-		self->ircomm = NULL;
-	}
-
-	spin_unlock_irqrestore(&self->spinlock, flags);
-}
-
-/*
- * Function ircomm_tty_hangup (tty)
- *
- *    This routine notifies the tty driver that it should hangup the tty
- *    device.
- *
- */
-static void ircomm_tty_hangup(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	struct tty_port *port = &self->port;
-	unsigned long	flags;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	/* ircomm_tty_flush_buffer(tty); */
-	ircomm_tty_shutdown(self);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (port->tty) {
-		set_bit(TTY_IO_ERROR, &port->tty->flags);
-		tty_kref_put(port->tty);
-	}
-	port->tty = NULL;
-	port->count = 0;
-	spin_unlock_irqrestore(&port->lock, flags);
-	tty_port_set_active(port, 0);
-
-	wake_up_interruptible(&port->open_wait);
-}
-
-/*
- * Function ircomm_tty_send_xchar (tty, ch)
- *
- *    This routine is used to send a high-priority XON/XOFF character to
- *    the device.
- */
-static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch)
-{
-	pr_debug("%s(), not impl\n", __func__);
-}
-
-/*
- * Function ircomm_tty_start (tty)
- *
- *    This routine notifies the tty driver that it resume sending
- *    characters to the tty device.
- */
-void ircomm_tty_start(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
-	ircomm_flow_request(self->ircomm, FLOW_START);
-}
-
-/*
- * Function ircomm_tty_stop (tty)
- *
- *     This routine notifies the tty driver that it should stop outputting
- *     characters to the tty device.
- */
-static void ircomm_tty_stop(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	ircomm_flow_request(self->ircomm, FLOW_STOP);
-}
-
-/*
- * Function ircomm_check_modem_status (self)
- *
- *    Check for any changes in the DCE's line settings. This function should
- *    be called whenever the dce parameter settings changes, to update the
- *    flow control settings and other things
- */
-void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
-{
-	struct tty_struct *tty;
-	int status;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	tty = tty_port_tty_get(&self->port);
-
-	status = self->settings.dce;
-
-	if (status & IRCOMM_DCE_DELTA_ANY) {
-		/*wake_up_interruptible(&self->delta_msr_wait);*/
-	}
-	if (tty_port_check_carrier(&self->port) && (status & IRCOMM_DELTA_CD)) {
-		pr_debug("%s(), ircomm%d CD now %s...\n", __func__ , self->line,
-			 (status & IRCOMM_CD) ? "on" : "off");
-
-		if (status & IRCOMM_CD) {
-			wake_up_interruptible(&self->port.open_wait);
-		} else {
-			pr_debug("%s(), Doing serial hangup..\n", __func__);
-			if (tty)
-				tty_hangup(tty);
-
-			/* Hangup will remote the tty, so better break out */
-			goto put;
-		}
-	}
-	if (tty && tty_port_cts_enabled(&self->port)) {
-		if (tty->hw_stopped) {
-			if (status & IRCOMM_CTS) {
-				pr_debug("%s(), CTS tx start...\n", __func__);
-				tty->hw_stopped = 0;
-
-				/* Wake up processes blocked on open */
-				wake_up_interruptible(&self->port.open_wait);
-
-				schedule_work(&self->tqueue);
-				goto put;
-			}
-		} else {
-			if (!(status & IRCOMM_CTS)) {
-				pr_debug("%s(), CTS tx stop...\n", __func__);
-				tty->hw_stopped = 1;
-			}
-		}
-	}
-put:
-	tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_data_indication (instance, sap, skb)
- *
- *    Handle incoming data, and deliver it to the line discipline
- *
- */
-static int ircomm_tty_data_indication(void *instance, void *sap,
-				      struct sk_buff *skb)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	struct tty_struct *tty;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	tty = tty_port_tty_get(&self->port);
-	if (!tty) {
-		pr_debug("%s(), no tty!\n", __func__);
-		return 0;
-	}
-
-	/*
-	 * If we receive data when hardware is stopped then something is wrong.
-	 * We try to poll the peers line settings to check if we are up todate.
-	 * Devices like WinCE can do this, and since they don't send any
-	 * params, we can just as well declare the hardware for running.
-	 */
-	if (tty->hw_stopped && (self->flow == FLOW_START)) {
-		pr_debug("%s(), polling for line settings!\n", __func__);
-		ircomm_param_request(self, IRCOMM_POLL, TRUE);
-
-		/* We can just as well declare the hardware for running */
-		ircomm_tty_send_initial_parameters(self);
-		ircomm_tty_link_established(self);
-	}
-	tty_kref_put(tty);
-
-	/*
-	 * Use flip buffer functions since the code may be called from interrupt
-	 * context
-	 */
-	tty_insert_flip_string(&self->port, skb->data, skb->len);
-	tty_flip_buffer_push(&self->port);
-
-	/* No need to kfree_skb - see ircomm_ttp_data_indication() */
-
-	return 0;
-}
-
-/*
- * Function ircomm_tty_control_indication (instance, sap, skb)
- *
- *    Parse all incoming parameters (easy!)
- *
- */
-static int ircomm_tty_control_indication(void *instance, void *sap,
-					 struct sk_buff *skb)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	int clen;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	clen = skb->data[0];
-
-	irda_param_extract_all(self, skb->data+1, IRDA_MIN(skb->len-1, clen),
-			       &ircomm_param_info);
-
-	/* No need to kfree_skb - see ircomm_control_indication() */
-
-	return 0;
-}
-
-/*
- * Function ircomm_tty_flow_indication (instance, sap, cmd)
- *
- *    This function is called by IrTTP when it wants us to slow down the
- *    transmission of data. We just mark the hardware as stopped, and wait
- *    for IrTTP to notify us that things are OK again.
- */
-static void ircomm_tty_flow_indication(void *instance, void *sap,
-				       LOCAL_FLOW cmd)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	struct tty_struct *tty;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	tty = tty_port_tty_get(&self->port);
-
-	switch (cmd) {
-	case FLOW_START:
-		pr_debug("%s(), hw start!\n", __func__);
-		if (tty)
-			tty->hw_stopped = 0;
-
-		/* ircomm_tty_do_softint will take care of the rest */
-		schedule_work(&self->tqueue);
-		break;
-	default:  /* If we get here, something is very wrong, better stop */
-	case FLOW_STOP:
-		pr_debug("%s(), hw stopped!\n", __func__);
-		if (tty)
-			tty->hw_stopped = 1;
-		break;
-	}
-
-	tty_kref_put(tty);
-	self->flow = cmd;
-}
-
-#ifdef CONFIG_PROC_FS
-static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
-{
-	struct tty_struct *tty;
-	char sep;
-
-	seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]);
-
-	seq_puts(m, "Service type: ");
-	if (self->service_type & IRCOMM_9_WIRE)
-		seq_puts(m, "9_WIRE");
-	else if (self->service_type & IRCOMM_3_WIRE)
-		seq_puts(m, "3_WIRE");
-	else if (self->service_type & IRCOMM_3_WIRE_RAW)
-		seq_puts(m, "3_WIRE_RAW");
-	else
-		seq_puts(m, "No common service type!\n");
-	seq_putc(m, '\n');
-
-	seq_printf(m, "Port name: %s\n", self->settings.port_name);
-
-	seq_printf(m, "DTE status:");
-	sep = ' ';
-	if (self->settings.dte & IRCOMM_RTS) {
-		seq_printf(m, "%cRTS", sep);
-		sep = '|';
-	}
-	if (self->settings.dte & IRCOMM_DTR) {
-		seq_printf(m, "%cDTR", sep);
-		sep = '|';
-	}
-	seq_putc(m, '\n');
-
-	seq_puts(m, "DCE status:");
-	sep = ' ';
-	if (self->settings.dce & IRCOMM_CTS) {
-		seq_printf(m, "%cCTS", sep);
-		sep = '|';
-	}
-	if (self->settings.dce & IRCOMM_DSR) {
-		seq_printf(m, "%cDSR", sep);
-		sep = '|';
-	}
-	if (self->settings.dce & IRCOMM_CD) {
-		seq_printf(m, "%cCD", sep);
-		sep = '|';
-	}
-	if (self->settings.dce & IRCOMM_RI) {
-		seq_printf(m, "%cRI", sep);
-		sep = '|';
-	}
-	seq_putc(m, '\n');
-
-	seq_puts(m, "Configuration: ");
-	if (!self->settings.null_modem)
-		seq_puts(m, "DTE <-> DCE\n");
-	else
-		seq_puts(m, "DTE <-> DTE (null modem emulation)\n");
-
-	seq_printf(m, "Data rate: %d\n", self->settings.data_rate);
-
-	seq_puts(m, "Flow control:");
-	sep = ' ';
-	if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) {
-		seq_printf(m, "%cXON_XOFF_IN", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) {
-		seq_printf(m, "%cXON_XOFF_OUT", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) {
-		seq_printf(m, "%cRTS_CTS_IN", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) {
-		seq_printf(m, "%cRTS_CTS_OUT", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) {
-		seq_printf(m, "%cDSR_DTR_IN", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) {
-		seq_printf(m, "%cDSR_DTR_OUT", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) {
-		seq_printf(m, "%cENQ_ACK_IN", sep);
-		sep = '|';
-	}
-	if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) {
-		seq_printf(m, "%cENQ_ACK_OUT", sep);
-		sep = '|';
-	}
-	seq_putc(m, '\n');
-
-	seq_puts(m, "Flags:");
-	sep = ' ';
-	if (tty_port_cts_enabled(&self->port)) {
-		seq_printf(m, "%cASYNC_CTS_FLOW", sep);
-		sep = '|';
-	}
-	if (tty_port_check_carrier(&self->port)) {
-		seq_printf(m, "%cASYNC_CHECK_CD", sep);
-		sep = '|';
-	}
-	if (tty_port_initialized(&self->port)) {
-		seq_printf(m, "%cASYNC_INITIALIZED", sep);
-		sep = '|';
-	}
-	if (self->port.flags & ASYNC_LOW_LATENCY) {
-		seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
-		sep = '|';
-	}
-	if (tty_port_active(&self->port)) {
-		seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
-		sep = '|';
-	}
-	seq_putc(m, '\n');
-
-	seq_printf(m, "Role: %s\n", self->client ? "client" : "server");
-	seq_printf(m, "Open count: %d\n", self->port.count);
-	seq_printf(m, "Max data size: %d\n", self->max_data_size);
-	seq_printf(m, "Max header size: %d\n", self->max_header_size);
-
-	tty = tty_port_tty_get(&self->port);
-	if (tty) {
-		seq_printf(m, "Hardware: %s\n",
-			       tty->hw_stopped ? "Stopped" : "Running");
-		tty_kref_put(tty);
-	}
-}
-
-static int ircomm_tty_proc_show(struct seq_file *m, void *v)
-{
-	struct ircomm_tty_cb *self;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags);
-
-	self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
-	while (self != NULL) {
-		if (self->magic != IRCOMM_TTY_MAGIC)
-			break;
-
-		ircomm_tty_line_info(self, m);
-		self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
-	}
-	spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags);
-	return 0;
-}
-
-static int ircomm_tty_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ircomm_tty_proc_show, NULL);
-}
-
-static const struct file_operations ircomm_tty_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ircomm_tty_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif /* CONFIG_PROC_FS */
-
-MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
-MODULE_DESCRIPTION("IrCOMM serial TTY driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV_MAJOR(IRCOMM_TTY_MAJOR);
-
-module_init(ircomm_tty_init);
-module_exit(ircomm_tty_cleanup);
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
deleted file mode 100644
index 0a411019c098..000000000000
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ /dev/null
@@ -1,987 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_tty_attach.c
- * Version:
- * Description:   Code for attaching the serial driver to IrCOMM
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sat Jun  5 17:42:00 1999
- * Modified at:   Tue Jan  4 14:20:49 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/sched.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/parameters.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_event.h>
-
-#include <net/irda/ircomm_tty.h>
-#include <net/irda/ircomm_tty_attach.h>
-
-static void ircomm_tty_ias_register(struct ircomm_tty_cb *self);
-static void ircomm_tty_discovery_indication(discinfo_t *discovery,
-					    DISCOVERY_MODE mode,
-					    void *priv);
-static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id,
-					struct ias_value *value, void *priv);
-static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self,
-					    int timeout);
-static void ircomm_tty_watchdog_timer_expired(void *data);
-
-static int ircomm_tty_state_idle(struct ircomm_tty_cb *self,
-				 IRCOMM_TTY_EVENT event,
-				 struct sk_buff *skb,
-				 struct ircomm_tty_info *info);
-static int ircomm_tty_state_search(struct ircomm_tty_cb *self,
-				   IRCOMM_TTY_EVENT event,
-				   struct sk_buff *skb,
-				   struct ircomm_tty_info *info);
-static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self,
-					     IRCOMM_TTY_EVENT event,
-					     struct sk_buff *skb,
-					     struct ircomm_tty_info *info);
-static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self,
-					   IRCOMM_TTY_EVENT event,
-					   struct sk_buff *skb,
-					   struct ircomm_tty_info *info);
-static int ircomm_tty_state_setup(struct ircomm_tty_cb *self,
-				  IRCOMM_TTY_EVENT event,
-				  struct sk_buff *skb,
-				  struct ircomm_tty_info *info);
-static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
-				  IRCOMM_TTY_EVENT event,
-				  struct sk_buff *skb,
-				  struct ircomm_tty_info *info);
-
-const char *const ircomm_tty_state[] = {
-	"IRCOMM_TTY_IDLE",
-	"IRCOMM_TTY_SEARCH",
-	"IRCOMM_TTY_QUERY_PARAMETERS",
-	"IRCOMM_TTY_QUERY_LSAP_SEL",
-	"IRCOMM_TTY_SETUP",
-	"IRCOMM_TTY_READY",
-	"*** ERROR *** ",
-};
-
-static const char *const ircomm_tty_event[] __maybe_unused = {
-	"IRCOMM_TTY_ATTACH_CABLE",
-	"IRCOMM_TTY_DETACH_CABLE",
-	"IRCOMM_TTY_DATA_REQUEST",
-	"IRCOMM_TTY_DATA_INDICATION",
-	"IRCOMM_TTY_DISCOVERY_REQUEST",
-	"IRCOMM_TTY_DISCOVERY_INDICATION",
-	"IRCOMM_TTY_CONNECT_CONFIRM",
-	"IRCOMM_TTY_CONNECT_INDICATION",
-	"IRCOMM_TTY_DISCONNECT_REQUEST",
-	"IRCOMM_TTY_DISCONNECT_INDICATION",
-	"IRCOMM_TTY_WD_TIMER_EXPIRED",
-	"IRCOMM_TTY_GOT_PARAMETERS",
-	"IRCOMM_TTY_GOT_LSAPSEL",
-	"*** ERROR ****",
-};
-
-static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
-		      struct sk_buff *skb, struct ircomm_tty_info *info) =
-{
-	ircomm_tty_state_idle,
-	ircomm_tty_state_search,
-	ircomm_tty_state_query_parameters,
-	ircomm_tty_state_query_lsap_sel,
-	ircomm_tty_state_setup,
-	ircomm_tty_state_ready,
-};
-
-/*
- * Function ircomm_tty_attach_cable (driver)
- *
- *    Try to attach cable (IrCOMM link). This function will only return
- *    when the link has been connected, or if an error condition occurs.
- *    If success, the return value is the resulting service type.
- */
-int ircomm_tty_attach_cable(struct ircomm_tty_cb *self)
-{
-	struct tty_struct *tty;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	/* Check if somebody has already connected to us */
-	if (ircomm_is_connected(self->ircomm)) {
-		pr_debug("%s(), already connected!\n", __func__);
-		return 0;
-	}
-
-	/* Make sure nobody tries to write before the link is up */
-	tty = tty_port_tty_get(&self->port);
-	if (tty) {
-		tty->hw_stopped = 1;
-		tty_kref_put(tty);
-	}
-
-	ircomm_tty_ias_register(self);
-
-	ircomm_tty_do_event(self, IRCOMM_TTY_ATTACH_CABLE, NULL, NULL);
-
-	return 0;
-}
-
-/*
- * Function ircomm_detach_cable (driver)
- *
- *    Detach cable, or cable has been detached by peer
- *
- */
-void ircomm_tty_detach_cable(struct ircomm_tty_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	del_timer(&self->watchdog_timer);
-
-	/* Remove discovery handler */
-	if (self->ckey) {
-		irlmp_unregister_client(self->ckey);
-		self->ckey = NULL;
-	}
-	/* Remove IrCOMM hint bits */
-	if (self->skey) {
-		irlmp_unregister_service(self->skey);
-		self->skey = NULL;
-	}
-
-	if (self->iriap) {
-		iriap_close(self->iriap);
-		self->iriap = NULL;
-	}
-
-	/* Remove LM-IAS object */
-	if (self->obj) {
-		irias_delete_object(self->obj);
-		self->obj = NULL;
-	}
-
-	ircomm_tty_do_event(self, IRCOMM_TTY_DETACH_CABLE, NULL, NULL);
-
-	/* Reset some values */
-	self->daddr = self->saddr = 0;
-	self->dlsap_sel = self->slsap_sel = 0;
-
-	memset(&self->settings, 0, sizeof(struct ircomm_params));
-}
-
-/*
- * Function ircomm_tty_ias_register (self)
- *
- *    Register with LM-IAS depending on which service type we are
- *
- */
-static void ircomm_tty_ias_register(struct ircomm_tty_cb *self)
-{
-	__u8 oct_seq[6];
-	__u16 hints;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	/* Compute hint bits based on service */
-	hints = irlmp_service_to_hint(S_COMM);
-	if (self->service_type & IRCOMM_3_WIRE_RAW)
-		hints |= irlmp_service_to_hint(S_PRINTER);
-
-	/* Advertise IrCOMM hint bit in discovery */
-	if (!self->skey)
-		self->skey = irlmp_register_service(hints);
-	/* Set up a discovery handler */
-	if (!self->ckey)
-		self->ckey = irlmp_register_client(hints,
-						   ircomm_tty_discovery_indication,
-						   NULL, (void *) self);
-
-	/* If already done, no need to do it again */
-	if (self->obj)
-		return;
-
-	if (self->service_type & IRCOMM_3_WIRE_RAW) {
-		/* Register IrLPT with LM-IAS */
-		self->obj = irias_new_object("IrLPT", IAS_IRLPT_ID);
-		irias_add_integer_attrib(self->obj, "IrDA:IrLMP:LsapSel",
-					 self->slsap_sel, IAS_KERNEL_ATTR);
-	} else {
-		/* Register IrCOMM with LM-IAS */
-		self->obj = irias_new_object("IrDA:IrCOMM", IAS_IRCOMM_ID);
-		irias_add_integer_attrib(self->obj, "IrDA:TinyTP:LsapSel",
-					 self->slsap_sel, IAS_KERNEL_ATTR);
-
-		/* Code the parameters into the buffer */
-		irda_param_pack(oct_seq, "bbbbbb",
-				IRCOMM_SERVICE_TYPE, 1, self->service_type,
-				IRCOMM_PORT_TYPE,    1, IRCOMM_SERIAL);
-
-		/* Register parameters with LM-IAS */
-		irias_add_octseq_attrib(self->obj, "Parameters", oct_seq, 6,
-					IAS_KERNEL_ATTR);
-	}
-	irias_insert_object(self->obj);
-}
-
-/*
- * Function ircomm_tty_ias_unregister (self)
- *
- *    Remove our IAS object and client hook while connected.
- *
- */
-static void ircomm_tty_ias_unregister(struct ircomm_tty_cb *self)
-{
-	/* Remove LM-IAS object now so it is not reused.
-	 * IrCOMM deals very poorly with multiple incoming connections.
-	 * It should looks a lot more like IrNET, and "dup" a server TSAP
-	 * to the application TSAP (based on various rules).
-	 * This is a cheap workaround allowing multiple clients to
-	 * connect to us. It will not always work.
-	 * Each IrCOMM socket has an IAS entry. Incoming connection will
-	 * pick the first one found. So, when we are fully connected,
-	 * we remove our IAS entries so that the next IAS entry is used.
-	 * We do that for *both* client and server, because a server
-	 * can also create client instances.
-	 * Jean II */
-	if (self->obj) {
-		irias_delete_object(self->obj);
-		self->obj = NULL;
-	}
-
-#if 0
-	/* Remove discovery handler.
-	 * While we are connected, we no longer need to receive
-	 * discovery events. This would be the case if there is
-	 * multiple IrLAP interfaces. Jean II */
-	if (self->ckey) {
-		irlmp_unregister_client(self->ckey);
-		self->ckey = NULL;
-	}
-#endif
-}
-
-/*
- * Function ircomm_send_initial_parameters (self)
- *
- *    Send initial parameters to the remote IrCOMM device. These parameters
- *    must be sent before any data.
- */
-int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (self->service_type & IRCOMM_3_WIRE_RAW)
-		return 0;
-
-	/*
-	 * Set default values, but only if the application for some reason
-	 * haven't set them already
-	 */
-	pr_debug("%s(), data-rate = %d\n", __func__ ,
-		 self->settings.data_rate);
-	if (!self->settings.data_rate)
-		self->settings.data_rate = 9600;
-	pr_debug("%s(), data-format = %d\n", __func__ ,
-		 self->settings.data_format);
-	if (!self->settings.data_format)
-		self->settings.data_format = IRCOMM_WSIZE_8;  /* 8N1 */
-
-	pr_debug("%s(), flow-control = %d\n", __func__ ,
-		 self->settings.flow_control);
-	/*self->settings.flow_control = IRCOMM_RTS_CTS_IN|IRCOMM_RTS_CTS_OUT;*/
-
-	/* Do not set delta values for the initial parameters */
-	self->settings.dte = IRCOMM_DTR | IRCOMM_RTS;
-
-	/* Only send service type parameter when we are the client */
-	if (self->client)
-		ircomm_param_request(self, IRCOMM_SERVICE_TYPE, FALSE);
-	ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE);
-	ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE);
-
-	/* For a 3 wire service, we just flush the last parameter and return */
-	if (self->settings.service_type == IRCOMM_3_WIRE) {
-		ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE);
-		return 0;
-	}
-
-	/* Only 9-wire service types continue here */
-	ircomm_param_request(self, IRCOMM_FLOW_CONTROL, FALSE);
-#if 0
-	ircomm_param_request(self, IRCOMM_XON_XOFF, FALSE);
-	ircomm_param_request(self, IRCOMM_ENQ_ACK, FALSE);
-#endif
-	/* Notify peer that we are ready to receive data */
-	ircomm_param_request(self, IRCOMM_DTE, TRUE);
-
-	return 0;
-}
-
-/*
- * Function ircomm_tty_discovery_indication (discovery)
- *
- *    Remote device is discovered, try query the remote IAS to see which
- *    device it is, and which services it has.
- *
- */
-static void ircomm_tty_discovery_indication(discinfo_t *discovery,
-					    DISCOVERY_MODE mode,
-					    void *priv)
-{
-	struct ircomm_tty_cb *self;
-	struct ircomm_tty_info info;
-
-	/* Important note :
-	 * We need to drop all passive discoveries.
-	 * The LSAP management of IrComm is deficient and doesn't deal
-	 * with the case of two instance connecting to each other
-	 * simultaneously (it will deadlock in LMP).
-	 * The proper fix would be to use the same technique as in IrNET,
-	 * to have one server socket and separate instances for the
-	 * connecting/connected socket.
-	 * The workaround is to drop passive discovery, which drastically
-	 * reduce the probability of this happening.
-	 * Jean II */
-	if(mode == DISCOVERY_PASSIVE)
-		return;
-
-	info.daddr = discovery->daddr;
-	info.saddr = discovery->saddr;
-
-	self = priv;
-	ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION,
-			    NULL, &info);
-}
-
-/*
- * Function ircomm_tty_disconnect_indication (instance, sap, reason, skb)
- *
- *    Link disconnected
- *
- */
-void ircomm_tty_disconnect_indication(void *instance, void *sap,
-				      LM_REASON reason,
-				      struct sk_buff *skb)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	struct tty_struct *tty;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	tty = tty_port_tty_get(&self->port);
-	if (!tty)
-		return;
-
-	/* This will stop control data transfers */
-	self->flow = FLOW_STOP;
-
-	/* Stop data transfers */
-	tty->hw_stopped = 1;
-
-	ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL,
-			    NULL);
-	tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_getvalue_confirm (result, obj_id, value, priv)
- *
- *    Got result from the IAS query we make
- *
- */
-static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id,
-					struct ias_value *value,
-					void *priv)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) priv;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	/* We probably don't need to make any more queries */
-	iriap_close(self->iriap);
-	self->iriap = NULL;
-
-	/* Check if request succeeded */
-	if (result != IAS_SUCCESS) {
-		pr_debug("%s(), got NULL value!\n", __func__);
-		return;
-	}
-
-	switch (value->type) {
-	case IAS_OCT_SEQ:
-		pr_debug("%s(), got octet sequence\n", __func__);
-
-		irda_param_extract_all(self, value->t.oct_seq, value->len,
-				       &ircomm_param_info);
-
-		ircomm_tty_do_event(self, IRCOMM_TTY_GOT_PARAMETERS, NULL,
-				    NULL);
-		break;
-	case IAS_INTEGER:
-		/* Got LSAP selector */
-		pr_debug("%s(), got lsapsel = %d\n", __func__ ,
-			 value->t.integer);
-
-		if (value->t.integer == -1) {
-			pr_debug("%s(), invalid value!\n", __func__);
-		} else
-			self->dlsap_sel = value->t.integer;
-
-		ircomm_tty_do_event(self, IRCOMM_TTY_GOT_LSAPSEL, NULL, NULL);
-		break;
-	case IAS_MISSING:
-		pr_debug("%s(), got IAS_MISSING\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), got unknown type!\n", __func__);
-		break;
-	}
-	irias_delete_value(value);
-}
-
-/*
- * Function ircomm_tty_connect_confirm (instance, sap, qos, max_sdu_size, skb)
- *
- *    Connection confirmed
- *
- */
-void ircomm_tty_connect_confirm(void *instance, void *sap,
-				struct qos_info *qos,
-				__u32 max_data_size,
-				__u8 max_header_size,
-				struct sk_buff *skb)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	self->client = TRUE;
-	self->max_data_size = max_data_size;
-	self->max_header_size = max_header_size;
-	self->flow = FLOW_START;
-
-	ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_CONFIRM, NULL, NULL);
-
-	/* No need to kfree_skb - see ircomm_ttp_connect_confirm() */
-}
-
-/*
- * Function ircomm_tty_connect_indication (instance, sap, qos, max_sdu_size,
- *                                         skb)
- *
- *    we are discovered and being requested to connect by remote device !
- *
- */
-void ircomm_tty_connect_indication(void *instance, void *sap,
-				   struct qos_info *qos,
-				   __u32 max_data_size,
-				   __u8 max_header_size,
-				   struct sk_buff *skb)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
-	int clen;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	self->client = FALSE;
-	self->max_data_size = max_data_size;
-	self->max_header_size = max_header_size;
-	self->flow = FLOW_START;
-
-	clen = skb->data[0];
-	if (clen)
-		irda_param_extract_all(self, skb->data+1,
-				       IRDA_MIN(skb->len, clen),
-				       &ircomm_param_info);
-
-	ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_INDICATION, NULL, NULL);
-
-	/* No need to kfree_skb - see ircomm_ttp_connect_indication() */
-}
-
-/*
- * Function ircomm_tty_link_established (self)
- *
- *    Called when the IrCOMM link is established
- *
- */
-void ircomm_tty_link_established(struct ircomm_tty_cb *self)
-{
-	struct tty_struct *tty;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	tty = tty_port_tty_get(&self->port);
-	if (!tty)
-		return;
-
-	del_timer(&self->watchdog_timer);
-
-	/*
-	 * IrCOMM link is now up, and if we are not using hardware
-	 * flow-control, then declare the hardware as running. Otherwise we
-	 * will have to wait for the peer device (DCE) to raise the CTS
-	 * line.
-	 */
-	if (tty_port_cts_enabled(&self->port) &&
-			((self->settings.dce & IRCOMM_CTS) == 0)) {
-		pr_debug("%s(), waiting for CTS ...\n", __func__);
-		goto put;
-	} else {
-		pr_debug("%s(), starting hardware!\n", __func__);
-
-		tty->hw_stopped = 0;
-
-		/* Wake up processes blocked on open */
-		wake_up_interruptible(&self->port.open_wait);
-	}
-
-	schedule_work(&self->tqueue);
-put:
-	tty_kref_put(tty);
-}
-
-/*
- * Function ircomm_tty_start_watchdog_timer (self, timeout)
- *
- *    Start the watchdog timer. This timer is used to make sure that any
- *    connection attempt is successful, and if not, we will retry after
- *    the timeout
- */
-static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self,
-					    int timeout)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	irda_start_timer(&self->watchdog_timer, timeout, (void *) self,
-			 ircomm_tty_watchdog_timer_expired);
-}
-
-/*
- * Function ircomm_tty_watchdog_timer_expired (data)
- *
- *    Called when the connect procedure have taken to much time.
- *
- */
-static void ircomm_tty_watchdog_timer_expired(void *data)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	ircomm_tty_do_event(self, IRCOMM_TTY_WD_TIMER_EXPIRED, NULL, NULL);
-}
-
-
-/*
- * Function ircomm_tty_do_event (self, event, skb)
- *
- *    Process event
- *
- */
-int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
-			struct sk_buff *skb, struct ircomm_tty_info *info)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
-	return (*state[self->state])(self, event, skb, info);
-}
-
-/*
- * Function ircomm_tty_next_state (self, state)
- *
- *    Switch state
- *
- */
-static inline void ircomm_tty_next_state(struct ircomm_tty_cb *self, IRCOMM_TTY_STATE state)
-{
-	/*
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
-
-	pr_debug("%s: next state=%s, service type=%d\n", __func__ ,
-	ircomm_tty_state[self->state], self->service_type);
-	*/
-	self->state = state;
-}
-
-/*
- * Function ircomm_tty_state_idle (self, event, skb, info)
- *
- *    Just hanging around
- *
- */
-static int ircomm_tty_state_idle(struct ircomm_tty_cb *self,
-				 IRCOMM_TTY_EVENT event,
-				 struct sk_buff *skb,
-				 struct ircomm_tty_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_tty_state[self->state], ircomm_tty_event[event]);
-	switch (event) {
-	case IRCOMM_TTY_ATTACH_CABLE:
-		/* Try to discover any remote devices */
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
-
-		irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
-		break;
-	case IRCOMM_TTY_DISCOVERY_INDICATION:
-		self->daddr = info->daddr;
-		self->saddr = info->saddr;
-
-		if (self->iriap) {
-			net_warn_ratelimited("%s(), busy with a previous query\n",
-					     __func__);
-			return -EBUSY;
-		}
-
-		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-					 ircomm_tty_getvalue_confirm);
-
-		iriap_getvaluebyclass_request(self->iriap,
-					      self->saddr, self->daddr,
-					      "IrDA:IrCOMM", "Parameters");
-
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS);
-		break;
-	case IRCOMM_TTY_CONNECT_INDICATION:
-		del_timer(&self->watchdog_timer);
-
-		/* Accept connection */
-		ircomm_connect_response(self->ircomm, NULL);
-		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
-		break;
-	case IRCOMM_TTY_WD_TIMER_EXPIRED:
-		/* Just stay idle */
-		break;
-	case IRCOMM_TTY_DETACH_CABLE:
-		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_tty_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_tty_state_search (self, event, skb, info)
- *
- *    Trying to discover an IrCOMM device
- *
- */
-static int ircomm_tty_state_search(struct ircomm_tty_cb *self,
-				   IRCOMM_TTY_EVENT event,
-				   struct sk_buff *skb,
-				   struct ircomm_tty_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
-	switch (event) {
-	case IRCOMM_TTY_DISCOVERY_INDICATION:
-		self->daddr = info->daddr;
-		self->saddr = info->saddr;
-
-		if (self->iriap) {
-			net_warn_ratelimited("%s(), busy with a previous query\n",
-					     __func__);
-			return -EBUSY;
-		}
-
-		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-					 ircomm_tty_getvalue_confirm);
-
-		if (self->service_type == IRCOMM_3_WIRE_RAW) {
-			iriap_getvaluebyclass_request(self->iriap, self->saddr,
-						      self->daddr, "IrLPT",
-						      "IrDA:IrLMP:LsapSel");
-			ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL);
-		} else {
-			iriap_getvaluebyclass_request(self->iriap, self->saddr,
-						      self->daddr,
-						      "IrDA:IrCOMM",
-						      "Parameters");
-
-			ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS);
-		}
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		break;
-	case IRCOMM_TTY_CONNECT_INDICATION:
-		del_timer(&self->watchdog_timer);
-		ircomm_tty_ias_unregister(self);
-
-		/* Accept connection */
-		ircomm_connect_response(self->ircomm, NULL);
-		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
-		break;
-	case IRCOMM_TTY_WD_TIMER_EXPIRED:
-#if 1
-		/* Give up */
-#else
-		/* Try to discover any remote devices */
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
-#endif
-		break;
-	case IRCOMM_TTY_DETACH_CABLE:
-		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_tty_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_tty_state_query (self, event, skb, info)
- *
- *    Querying the remote LM-IAS for IrCOMM parameters
- *
- */
-static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self,
-					     IRCOMM_TTY_EVENT event,
-					     struct sk_buff *skb,
-					     struct ircomm_tty_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
-	switch (event) {
-	case IRCOMM_TTY_GOT_PARAMETERS:
-		if (self->iriap) {
-			net_warn_ratelimited("%s(), busy with a previous query\n",
-					     __func__);
-			return -EBUSY;
-		}
-
-		self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-					 ircomm_tty_getvalue_confirm);
-
-		iriap_getvaluebyclass_request(self->iriap, self->saddr,
-					      self->daddr, "IrDA:IrCOMM",
-					      "IrDA:TinyTP:LsapSel");
-
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL);
-		break;
-	case IRCOMM_TTY_WD_TIMER_EXPIRED:
-		/* Go back to search mode */
-		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		break;
-	case IRCOMM_TTY_CONNECT_INDICATION:
-		del_timer(&self->watchdog_timer);
-		ircomm_tty_ias_unregister(self);
-
-		/* Accept connection */
-		ircomm_connect_response(self->ircomm, NULL);
-		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
-		break;
-	case IRCOMM_TTY_DETACH_CABLE:
-		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_tty_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_tty_state_query_lsap_sel (self, event, skb, info)
- *
- *    Query remote LM-IAS for the LSAP selector which we can connect to
- *
- */
-static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self,
-					   IRCOMM_TTY_EVENT event,
-					   struct sk_buff *skb,
-					   struct ircomm_tty_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
-	switch (event) {
-	case IRCOMM_TTY_GOT_LSAPSEL:
-		/* Connect to remote device */
-		ret = ircomm_connect_request(self->ircomm, self->dlsap_sel,
-					     self->saddr, self->daddr,
-					     NULL, self->service_type);
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		ircomm_tty_next_state(self, IRCOMM_TTY_SETUP);
-		break;
-	case IRCOMM_TTY_WD_TIMER_EXPIRED:
-		/* Go back to search mode */
-		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		break;
-	case IRCOMM_TTY_CONNECT_INDICATION:
-		del_timer(&self->watchdog_timer);
-		ircomm_tty_ias_unregister(self);
-
-		/* Accept connection */
-		ircomm_connect_response(self->ircomm, NULL);
-		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
-		break;
-	case IRCOMM_TTY_DETACH_CABLE:
-		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_tty_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_tty_state_setup (self, event, skb, info)
- *
- *    Trying to connect
- *
- */
-static int ircomm_tty_state_setup(struct ircomm_tty_cb *self,
-				  IRCOMM_TTY_EVENT event,
-				  struct sk_buff *skb,
-				  struct ircomm_tty_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s: state=%s, event=%s\n", __func__ ,
-		 ircomm_tty_state[self->state], ircomm_tty_event[event]);
-
-	switch (event) {
-	case IRCOMM_TTY_CONNECT_CONFIRM:
-		del_timer(&self->watchdog_timer);
-		ircomm_tty_ias_unregister(self);
-
-		/*
-		 * Send initial parameters. This will also send out queued
-		 * parameters waiting for the connection to come up
-		 */
-		ircomm_tty_send_initial_parameters(self);
-		ircomm_tty_link_established(self);
-		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
-		break;
-	case IRCOMM_TTY_CONNECT_INDICATION:
-		del_timer(&self->watchdog_timer);
-		ircomm_tty_ias_unregister(self);
-
-		/* Accept connection */
-		ircomm_connect_response(self->ircomm, NULL);
-		ircomm_tty_next_state(self, IRCOMM_TTY_READY);
-		break;
-	case IRCOMM_TTY_WD_TIMER_EXPIRED:
-		/* Go back to search mode */
-		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-		break;
-	case IRCOMM_TTY_DETACH_CABLE:
-		/* ircomm_disconnect_request(self->ircomm, NULL); */
-		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_tty_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
-/*
- * Function ircomm_tty_state_ready (self, event, skb, info)
- *
- *    IrCOMM is now connected
- *
- */
-static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
-				  IRCOMM_TTY_EVENT event,
-				  struct sk_buff *skb,
-				  struct ircomm_tty_info *info)
-{
-	int ret = 0;
-
-	switch (event) {
-	case IRCOMM_TTY_DATA_REQUEST:
-		ret = ircomm_data_request(self->ircomm, skb);
-		break;
-	case IRCOMM_TTY_DETACH_CABLE:
-		ircomm_disconnect_request(self->ircomm, NULL);
-		ircomm_tty_next_state(self, IRCOMM_TTY_IDLE);
-		break;
-	case IRCOMM_TTY_DISCONNECT_INDICATION:
-		ircomm_tty_ias_register(self);
-		ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
-		ircomm_tty_start_watchdog_timer(self, 3*HZ);
-
-		if (tty_port_check_carrier(&self->port)) {
-			/* Drop carrier */
-			self->settings.dce = IRCOMM_DELTA_CD;
-			ircomm_tty_check_modem_status(self);
-		} else {
-			pr_debug("%s(), hanging up!\n", __func__);
-			tty_port_tty_hangup(&self->port, false);
-		}
-		break;
-	default:
-		pr_debug("%s(), unknown event: %s\n", __func__ ,
-			 ircomm_tty_event[event]);
-		ret = -EINVAL;
-	}
-	return ret;
-}
-
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
deleted file mode 100644
index 171c3dee760e..000000000000
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ /dev/null
@@ -1,291 +0,0 @@
-/*********************************************************************
- *
- * Filename:      ircomm_tty_ioctl.c
- * Version:
- * Description:
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Jun 10 14:39:09 1999
- * Modified at:   Wed Jan  5 14:45:43 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/termios.h>
-#include <linux/tty.h>
-#include <linux/serial.h>
-
-#include <linux/uaccess.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-
-#include <net/irda/ircomm_core.h>
-#include <net/irda/ircomm_param.h>
-#include <net/irda/ircomm_tty_attach.h>
-#include <net/irda/ircomm_tty.h>
-
-#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
-
-/*
- * Function ircomm_tty_change_speed (driver)
- *
- *    Change speed of the driver. If the remote device is a DCE, then this
- *    should make it change the speed of its serial port
- */
-static void ircomm_tty_change_speed(struct ircomm_tty_cb *self,
-		struct tty_struct *tty)
-{
-	unsigned int cflag, cval;
-	int baud;
-
-	if (!self->ircomm)
-		return;
-
-	cflag = tty->termios.c_cflag;
-
-	/*  byte size and parity */
-	switch (cflag & CSIZE) {
-	case CS5: cval = IRCOMM_WSIZE_5; break;
-	case CS6: cval = IRCOMM_WSIZE_6; break;
-	case CS7: cval = IRCOMM_WSIZE_7; break;
-	case CS8: cval = IRCOMM_WSIZE_8; break;
-	default:  cval = IRCOMM_WSIZE_5; break;
-	}
-	if (cflag & CSTOPB)
-		cval |= IRCOMM_2_STOP_BIT;
-
-	if (cflag & PARENB)
-		cval |= IRCOMM_PARITY_ENABLE;
-	if (!(cflag & PARODD))
-		cval |= IRCOMM_PARITY_EVEN;
-
-	/* Determine divisor based on baud rate */
-	baud = tty_get_baud_rate(tty);
-	if (!baud)
-		baud = 9600;	/* B0 transition handled in rs_set_termios */
-
-	self->settings.data_rate = baud;
-	ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE);
-
-	/* CTS flow control flag and modem status interrupts */
-	tty_port_set_cts_flow(&self->port, cflag & CRTSCTS);
-	if (cflag & CRTSCTS) {
-		self->settings.flow_control |= IRCOMM_RTS_CTS_IN;
-		/* This got me. Bummer. Jean II */
-		if (self->service_type == IRCOMM_3_WIRE_RAW)
-			net_warn_ratelimited("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n",
-					     __func__);
-	} else {
-		self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN;
-	}
-	tty_port_set_check_carrier(&self->port, ~cflag & CLOCAL);
-
-	self->settings.data_format = cval;
-
-	ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE);
-	ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE);
-}
-
-/*
- * Function ircomm_tty_set_termios (tty, old_termios)
- *
- *    This routine allows the tty driver to be notified when device's
- *    termios settings have changed.  Note that a well-designed tty driver
- *    should be prepared to accept the case where old == NULL, and try to
- *    do something rational.
- */
-void ircomm_tty_set_termios(struct tty_struct *tty,
-			    struct ktermios *old_termios)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	unsigned int cflag = tty->termios.c_cflag;
-
-	if ((cflag == old_termios->c_cflag) &&
-	    (RELEVANT_IFLAG(tty->termios.c_iflag) ==
-	     RELEVANT_IFLAG(old_termios->c_iflag)))
-	{
-		return;
-	}
-
-	ircomm_tty_change_speed(self, tty);
-
-	/* Handle transition to B0 status */
-	if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD)) {
-		self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS);
-		ircomm_param_request(self, IRCOMM_DTE, TRUE);
-	}
-
-	/* Handle transition away from B0 status */
-	if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
-		self->settings.dte |= IRCOMM_DTR;
-		if (!C_CRTSCTS(tty) || !tty_throttled(tty))
-			self->settings.dte |= IRCOMM_RTS;
-		ircomm_param_request(self, IRCOMM_DTE, TRUE);
-	}
-
-	/* Handle turning off CRTSCTS */
-	if ((old_termios->c_cflag & CRTSCTS) && !C_CRTSCTS(tty))
-	{
-		tty->hw_stopped = 0;
-		ircomm_tty_start(tty);
-	}
-}
-
-/*
- * Function ircomm_tty_tiocmget (tty)
- *
- *
- *
- */
-int ircomm_tty_tiocmget(struct tty_struct *tty)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	unsigned int result;
-
-	if (tty_io_error(tty))
-		return -EIO;
-
-	result =  ((self->settings.dte & IRCOMM_RTS) ? TIOCM_RTS : 0)
-		| ((self->settings.dte & IRCOMM_DTR) ? TIOCM_DTR : 0)
-		| ((self->settings.dce & IRCOMM_CD)  ? TIOCM_CAR : 0)
-		| ((self->settings.dce & IRCOMM_RI)  ? TIOCM_RNG : 0)
-		| ((self->settings.dce & IRCOMM_DSR) ? TIOCM_DSR : 0)
-		| ((self->settings.dce & IRCOMM_CTS) ? TIOCM_CTS : 0);
-	return result;
-}
-
-/*
- * Function ircomm_tty_tiocmset (tty, set, clear)
- *
- *
- *
- */
-int ircomm_tty_tiocmset(struct tty_struct *tty,
-			unsigned int set, unsigned int clear)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-
-	if (tty_io_error(tty))
-		return -EIO;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
-
-	if (set & TIOCM_RTS)
-		self->settings.dte |= IRCOMM_RTS;
-	if (set & TIOCM_DTR)
-		self->settings.dte |= IRCOMM_DTR;
-
-	if (clear & TIOCM_RTS)
-		self->settings.dte &= ~IRCOMM_RTS;
-	if (clear & TIOCM_DTR)
-		self->settings.dte &= ~IRCOMM_DTR;
-
-	if ((set|clear) & TIOCM_RTS)
-		self->settings.dte |= IRCOMM_DELTA_RTS;
-	if ((set|clear) & TIOCM_DTR)
-		self->settings.dte |= IRCOMM_DELTA_DTR;
-
-	ircomm_param_request(self, IRCOMM_DTE, TRUE);
-
-	return 0;
-}
-
-/*
- * Function get_serial_info (driver, retinfo)
- *
- *
- *
- */
-static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self,
-				      struct serial_struct __user *retinfo)
-{
-	struct serial_struct info;
-
-	memset(&info, 0, sizeof(info));
-	info.line = self->line;
-	info.flags = self->port.flags;
-	info.baud_base = self->settings.data_rate;
-	info.close_delay = self->port.close_delay;
-	info.closing_wait = self->port.closing_wait;
-
-	/* For compatibility  */
-	info.type = PORT_16550A;
-
-	if (copy_to_user(retinfo, &info, sizeof(*retinfo)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- * Function set_serial_info (driver, new_info)
- *
- *
- *
- */
-static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self,
-				      struct serial_struct __user *new_info)
-{
-	return 0;
-}
-
-/*
- * Function ircomm_tty_ioctl (tty, cmd, arg)
- *
- *
- *
- */
-int ircomm_tty_ioctl(struct tty_struct *tty,
-		     unsigned int cmd, unsigned long arg)
-{
-	struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
-	int ret = 0;
-
-	if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
-	    (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
-	    (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
-		if (tty_io_error(tty))
-		    return -EIO;
-	}
-
-	switch (cmd) {
-	case TIOCGSERIAL:
-		ret = ircomm_tty_get_serial_info(self, (struct serial_struct __user *) arg);
-		break;
-	case TIOCSSERIAL:
-		ret = ircomm_tty_set_serial_info(self, (struct serial_struct __user *) arg);
-		break;
-	case TIOCMIWAIT:
-		pr_debug("(), TIOCMIWAIT, not impl!\n");
-		break;
-
-	case TIOCGICOUNT:
-		pr_debug("%s(), TIOCGICOUNT not impl!\n", __func__);
-		return 0;
-	default:
-		ret = -ENOIOCTLCMD;  /* ioctls which we must ignore */
-	}
-	return ret;
-}
-
-
-
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
deleted file mode 100644
index 890b90d055d5..000000000000
--- a/net/irda/irda_device.c
+++ /dev/null
@@ -1,316 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irda_device.c
- * Version:       0.9
- * Description:   Utility functions used by the device drivers
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sat Oct  9 09:22:27 1999
- * Modified at:   Sun Jan 23 17:41:24 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/string.h>
-#include <linux/proc_fs.h>
-#include <linux/skbuff.h>
-#include <linux/capability.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/if_arp.h>
-#include <linux/netdevice.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/kmod.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <asm/ioctls.h>
-#include <linux/uaccess.h>
-#include <asm/dma.h>
-#include <asm/io.h>
-
-#include <net/irda/irda_device.h>
-#include <net/irda/irlap.h>
-#include <net/irda/timer.h>
-#include <net/irda/wrapper.h>
-
-static void __irda_task_delete(struct irda_task *task);
-
-static hashbin_t *dongles = NULL;
-static hashbin_t *tasks = NULL;
-
-static void irda_task_timer_expired(void *data);
-
-int __init irda_device_init( void)
-{
-	dongles = hashbin_new(HB_NOLOCK);
-	if (dongles == NULL) {
-		net_warn_ratelimited("IrDA: Can't allocate dongles hashbin!\n");
-		return -ENOMEM;
-	}
-	spin_lock_init(&dongles->hb_spinlock);
-
-	tasks = hashbin_new(HB_LOCK);
-	if (tasks == NULL) {
-		net_warn_ratelimited("IrDA: Can't allocate tasks hashbin!\n");
-		hashbin_delete(dongles, NULL);
-		return -ENOMEM;
-	}
-
-	/* We no longer initialise the driver ourselves here, we let
-	 * the system do it for us... - Jean II */
-
-	return 0;
-}
-
-static void leftover_dongle(void *arg)
-{
-	struct dongle_reg *reg = arg;
-	net_warn_ratelimited("IrDA: Dongle type %x not unregistered\n",
-			     reg->type);
-}
-
-void irda_device_cleanup(void)
-{
-	hashbin_delete(tasks, (FREE_FUNC) __irda_task_delete);
-
-	hashbin_delete(dongles, leftover_dongle);
-}
-
-/*
- * Function irda_device_set_media_busy (self, status)
- *
- *    Called when we have detected that another station is transmitting
- *    in contention mode.
- */
-void irda_device_set_media_busy(struct net_device *dev, int status)
-{
-	struct irlap_cb *self;
-
-	pr_debug("%s(%s)\n", __func__, status ? "TRUE" : "FALSE");
-
-	self = (struct irlap_cb *) dev->atalk_ptr;
-
-	/* Some drivers may enable the receive interrupt before calling
-	 * irlap_open(), or they may disable the receive interrupt
-	 * after calling irlap_close().
-	 * The IrDA stack is protected from this in irlap_driver_rcv().
-	 * However, the driver calls directly the wrapper, that calls
-	 * us directly. Make sure we protect ourselves.
-	 * Jean II */
-	if (!self || self->magic != LAP_MAGIC)
-		return;
-
-	if (status) {
-		self->media_busy = TRUE;
-		if (status == SMALL)
-			irlap_start_mbusy_timer(self, SMALLBUSY_TIMEOUT);
-		else
-			irlap_start_mbusy_timer(self, MEDIABUSY_TIMEOUT);
-		pr_debug("Media busy!\n");
-	} else {
-		self->media_busy = FALSE;
-		irlap_stop_mbusy_timer(self);
-	}
-}
-EXPORT_SYMBOL(irda_device_set_media_busy);
-
-
-/*
- * Function irda_device_is_receiving (dev)
- *
- *    Check if the device driver is currently receiving data
- *
- */
-int irda_device_is_receiving(struct net_device *dev)
-{
-	struct if_irda_req req;
-	int ret;
-
-	if (!dev->netdev_ops->ndo_do_ioctl) {
-		net_err_ratelimited("%s: do_ioctl not impl. by device driver\n",
-				    __func__);
-		return -1;
-	}
-
-	ret = (dev->netdev_ops->ndo_do_ioctl)(dev, (struct ifreq *) &req,
-					      SIOCGRECEIVING);
-	if (ret < 0)
-		return ret;
-
-	return req.ifr_receiving;
-}
-
-static void __irda_task_delete(struct irda_task *task)
-{
-	del_timer(&task->timer);
-
-	kfree(task);
-}
-
-static void irda_task_delete(struct irda_task *task)
-{
-	/* Unregister task */
-	hashbin_remove(tasks, (long) task, NULL);
-
-	__irda_task_delete(task);
-}
-
-/*
- * Function irda_task_kick (task)
- *
- *    Tries to execute a task possible multiple times until the task is either
- *    finished, or askes for a timeout. When a task is finished, we do post
- *    processing, and notify the parent task, that is waiting for this task
- *    to complete.
- */
-static int irda_task_kick(struct irda_task *task)
-{
-	int finished = TRUE;
-	int count = 0;
-	int timeout;
-
-	IRDA_ASSERT(task != NULL, return -1;);
-	IRDA_ASSERT(task->magic == IRDA_TASK_MAGIC, return -1;);
-
-	/* Execute task until it's finished, or askes for a timeout */
-	do {
-		timeout = task->function(task);
-		if (count++ > 100) {
-			net_err_ratelimited("%s: error in task handler!\n",
-					    __func__);
-			irda_task_delete(task);
-			return TRUE;
-		}
-	} while ((timeout == 0) && (task->state != IRDA_TASK_DONE));
-
-	if (timeout < 0) {
-		net_err_ratelimited("%s: Error executing task!\n", __func__);
-		irda_task_delete(task);
-		return TRUE;
-	}
-
-	/* Check if we are finished */
-	if (task->state == IRDA_TASK_DONE) {
-		del_timer(&task->timer);
-
-		/* Do post processing */
-		if (task->finished)
-			task->finished(task);
-
-		/* Notify parent */
-		if (task->parent) {
-			/* Check if parent is waiting for us to complete */
-			if (task->parent->state == IRDA_TASK_CHILD_WAIT) {
-				task->parent->state = IRDA_TASK_CHILD_DONE;
-
-				/* Stop timer now that we are here */
-				del_timer(&task->parent->timer);
-
-				/* Kick parent task */
-				irda_task_kick(task->parent);
-			}
-		}
-		irda_task_delete(task);
-	} else if (timeout > 0) {
-		irda_start_timer(&task->timer, timeout, (void *) task,
-				 irda_task_timer_expired);
-		finished = FALSE;
-	} else {
-		pr_debug("%s(), not finished, and no timeout!\n",
-			 __func__);
-		finished = FALSE;
-	}
-
-	return finished;
-}
-
-/*
- * Function irda_task_timer_expired (data)
- *
- *    Task time has expired. We now try to execute task (again), and restart
- *    the timer if the task has not finished yet
- */
-static void irda_task_timer_expired(void *data)
-{
-	struct irda_task *task;
-
-	task = data;
-
-	irda_task_kick(task);
-}
-
-/*
- * Function irda_device_setup (dev)
- *
- *    This function should be used by low level device drivers in a similar way
- *    as ether_setup() is used by normal network device drivers
- */
-static void irda_device_setup(struct net_device *dev)
-{
-	dev->hard_header_len = 0;
-	dev->addr_len        = LAP_ALEN;
-
-	dev->type            = ARPHRD_IRDA;
-	dev->tx_queue_len    = 8; /* Window size + 1 s-frame */
-
-	memset(dev->broadcast, 0xff, LAP_ALEN);
-
-	dev->mtu = 2048;
-	dev->flags = IFF_NOARP;
-}
-
-/*
- * Funciton  alloc_irdadev
- * 	Allocates and sets up an IRDA device in a manner similar to
- * 	alloc_etherdev.
- */
-struct net_device *alloc_irdadev(int sizeof_priv)
-{
-	return alloc_netdev(sizeof_priv, "irda%d", NET_NAME_UNKNOWN,
-			    irda_device_setup);
-}
-EXPORT_SYMBOL(alloc_irdadev);
-
-#ifdef CONFIG_ISA_DMA_API
-/*
- * Function setup_dma (idev, buffer, count, mode)
- *
- *    Setup the DMA channel. Commonly used by LPC FIR drivers
- *
- */
-void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode)
-{
-	unsigned long flags;
-
-	flags = claim_dma_lock();
-
-	disable_dma(channel);
-	clear_dma_ff(channel);
-	set_dma_mode(channel, mode);
-	set_dma_addr(channel, buffer);
-	set_dma_count(channel, count);
-	enable_dma(channel);
-
-	release_dma_lock(flags);
-}
-EXPORT_SYMBOL(irda_setup_dma);
-#endif
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
deleted file mode 100644
index 1138eaf5c682..000000000000
--- a/net/irda/iriap.c
+++ /dev/null
@@ -1,1085 +0,0 @@
-/*********************************************************************
- *
- * Filename:      iriap.c
- * Version:       0.8
- * Description:   Information Access Protocol (IAP)
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Aug 21 00:02:07 1997
- * Modified at:   Sat Dec 25 16:42:42 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/iriap_event.h>
-#include <net/irda/iriap.h>
-
-/* FIXME: This one should go in irlmp.c */
-static const char *const ias_charset_types[] __maybe_unused = {
-	"CS_ASCII",
-	"CS_ISO_8859_1",
-	"CS_ISO_8859_2",
-	"CS_ISO_8859_3",
-	"CS_ISO_8859_4",
-	"CS_ISO_8859_5",
-	"CS_ISO_8859_6",
-	"CS_ISO_8859_7",
-	"CS_ISO_8859_8",
-	"CS_ISO_8859_9",
-	"CS_UNICODE"
-};
-
-static hashbin_t *iriap = NULL;
-static void *service_handle;
-
-static void __iriap_close(struct iriap_cb *self);
-static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode);
-static void iriap_disconnect_indication(void *instance, void *sap,
-					LM_REASON reason, struct sk_buff *skb);
-static void iriap_connect_indication(void *instance, void *sap,
-				     struct qos_info *qos, __u32 max_sdu_size,
-				     __u8 max_header_size,
-				     struct sk_buff *skb);
-static void iriap_connect_confirm(void *instance, void *sap,
-				  struct qos_info *qos,
-				  __u32 max_sdu_size, __u8 max_header_size,
-				  struct sk_buff *skb);
-static int iriap_data_indication(void *instance, void *sap,
-				 struct sk_buff *skb);
-
-static void iriap_watchdog_timer_expired(void *data);
-
-static inline void iriap_start_watchdog_timer(struct iriap_cb *self,
-					      int timeout)
-{
-	irda_start_timer(&self->watchdog_timer, timeout, self,
-			 iriap_watchdog_timer_expired);
-}
-
-static struct lock_class_key irias_objects_key;
-
-/*
- * Function iriap_init (void)
- *
- *    Initializes the IrIAP layer, called by the module initialization code
- *    in irmod.c
- */
-int __init iriap_init(void)
-{
-	struct ias_object *obj;
-	struct iriap_cb *server;
-	__u8 oct_seq[6];
-	__u16 hints;
-
-	/* Allocate master array */
-	iriap = hashbin_new(HB_LOCK);
-	if (!iriap)
-		return -ENOMEM;
-
-	/* Object repository - defined in irias_object.c */
-	irias_objects = hashbin_new(HB_LOCK);
-	if (!irias_objects) {
-		net_warn_ratelimited("%s: Can't allocate irias_objects hashbin!\n",
-				     __func__);
-		hashbin_delete(iriap, NULL);
-		return -ENOMEM;
-	}
-
-	lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key,
-				   "irias_objects");
-
-	/*
-	 *  Register some default services for IrLMP
-	 */
-	hints  = irlmp_service_to_hint(S_COMPUTER);
-	service_handle = irlmp_register_service(hints);
-
-	/* Register the Device object with LM-IAS */
-	obj = irias_new_object("Device", IAS_DEVICE_ID);
-	irias_add_string_attrib(obj, "DeviceName", "Linux", IAS_KERNEL_ATTR);
-
-	oct_seq[0] = 0x01;  /* Version 1 */
-	oct_seq[1] = 0x00;  /* IAS support bits */
-	oct_seq[2] = 0x00;  /* LM-MUX support bits */
-#ifdef CONFIG_IRDA_ULTRA
-	oct_seq[2] |= 0x04; /* Connectionless Data support */
-#endif
-	irias_add_octseq_attrib(obj, "IrLMPSupport", oct_seq, 3,
-				IAS_KERNEL_ATTR);
-	irias_insert_object(obj);
-
-	/*
-	 *  Register server support with IrLMP so we can accept incoming
-	 *  connections
-	 */
-	server = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL);
-	if (!server) {
-		pr_debug("%s(), unable to open server\n", __func__);
-		return -1;
-	}
-	iriap_register_lsap(server, LSAP_IAS, IAS_SERVER);
-
-	return 0;
-}
-
-/*
- * Function iriap_cleanup (void)
- *
- *    Initializes the IrIAP layer, called by the module cleanup code in
- *    irmod.c
- */
-void iriap_cleanup(void)
-{
-	irlmp_unregister_service(service_handle);
-
-	hashbin_delete(iriap, (FREE_FUNC) __iriap_close);
-	hashbin_delete(irias_objects, (FREE_FUNC) __irias_delete_object);
-}
-
-/*
- * Function iriap_open (void)
- *
- *    Opens an instance of the IrIAP layer, and registers with IrLMP
- */
-struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
-			    CONFIRM_CALLBACK callback)
-{
-	struct iriap_cb *self;
-
-	self = kzalloc(sizeof(*self), GFP_ATOMIC);
-	if (!self)
-		return NULL;
-
-	/*
-	 *  Initialize instance
-	 */
-
-	self->magic = IAS_MAGIC;
-	self->mode = mode;
-	if (mode == IAS_CLIENT) {
-		if (iriap_register_lsap(self, slsap_sel, mode)) {
-			kfree(self);
-			return NULL;
-		}
-	}
-
-	self->confirm = callback;
-	self->priv = priv;
-
-	/* iriap_getvaluebyclass_request() will construct packets before
-	 * we connect, so this must have a sane value... Jean II */
-	self->max_header_size = LMP_MAX_HEADER;
-
-	init_timer(&self->watchdog_timer);
-
-	hashbin_insert(iriap, (irda_queue_t *) self, (long) self, NULL);
-
-	/* Initialize state machines */
-	iriap_next_client_state(self, S_DISCONNECT);
-	iriap_next_call_state(self, S_MAKE_CALL);
-	iriap_next_server_state(self, R_DISCONNECT);
-	iriap_next_r_connect_state(self, R_WAITING);
-
-	return self;
-}
-EXPORT_SYMBOL(iriap_open);
-
-/*
- * Function __iriap_close (self)
- *
- *    Removes (deallocates) the IrIAP instance
- *
- */
-static void __iriap_close(struct iriap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	del_timer(&self->watchdog_timer);
-
-	if (self->request_skb)
-		dev_kfree_skb(self->request_skb);
-
-	self->magic = 0;
-
-	kfree(self);
-}
-
-/*
- * Function iriap_close (void)
- *
- *    Closes IrIAP and deregisters with IrLMP
- */
-void iriap_close(struct iriap_cb *self)
-{
-	struct iriap_cb *entry;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	if (self->lsap) {
-		irlmp_close_lsap(self->lsap);
-		self->lsap = NULL;
-	}
-
-	entry = (struct iriap_cb *) hashbin_remove(iriap, (long) self, NULL);
-	IRDA_ASSERT(entry == self, return;);
-
-	__iriap_close(self);
-}
-EXPORT_SYMBOL(iriap_close);
-
-static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode)
-{
-	notify_t notify;
-
-	irda_notify_init(&notify);
-	notify.connect_confirm       = iriap_connect_confirm;
-	notify.connect_indication    = iriap_connect_indication;
-	notify.disconnect_indication = iriap_disconnect_indication;
-	notify.data_indication       = iriap_data_indication;
-	notify.instance = self;
-	if (mode == IAS_CLIENT)
-		strcpy(notify.name, "IrIAS cli");
-	else
-		strcpy(notify.name, "IrIAS srv");
-
-	self->lsap = irlmp_open_lsap(slsap_sel, &notify, 0);
-	if (self->lsap == NULL) {
-		net_err_ratelimited("%s: Unable to allocated LSAP!\n",
-				    __func__);
-		return -1;
-	}
-	self->slsap_sel = self->lsap->slsap_sel;
-
-	return 0;
-}
-
-/*
- * Function iriap_disconnect_indication (handle, reason)
- *
- *    Got disconnect, so clean up everything associated with this connection
- *
- */
-static void iriap_disconnect_indication(void *instance, void *sap,
-					LM_REASON reason,
-					struct sk_buff *skb)
-{
-	struct iriap_cb *self;
-
-	pr_debug("%s(), reason=%s [%d]\n", __func__,
-		 irlmp_reason_str(reason), reason);
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	IRDA_ASSERT(iriap != NULL, return;);
-
-	del_timer(&self->watchdog_timer);
-
-	/* Not needed */
-	if (skb)
-		dev_kfree_skb(skb);
-
-	if (self->mode == IAS_CLIENT) {
-		pr_debug("%s(), disconnect as client\n", __func__);
-
-
-		iriap_do_client_event(self, IAP_LM_DISCONNECT_INDICATION,
-				      NULL);
-		/*
-		 * Inform service user that the request failed by sending
-		 * it a NULL value. Warning, the client might close us, so
-		 * remember no to use self anymore after calling confirm
-		 */
-		if (self->confirm)
-			self->confirm(IAS_DISCONNECT, 0, NULL, self->priv);
-	} else {
-		pr_debug("%s(), disconnect as server\n", __func__);
-		iriap_do_server_event(self, IAP_LM_DISCONNECT_INDICATION,
-				      NULL);
-		iriap_close(self);
-	}
-}
-
-/*
- * Function iriap_disconnect_request (handle)
- */
-static void iriap_disconnect_request(struct iriap_cb *self)
-{
-	struct sk_buff *tx_skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
-	if (tx_skb == NULL) {
-		pr_debug("%s(), Could not allocate an sk_buff of length %d\n",
-			 __func__, LMP_MAX_HEADER);
-		return;
-	}
-
-	/*
-	 *  Reserve space for MUX control and LAP header
-	 */
-	skb_reserve(tx_skb, LMP_MAX_HEADER);
-
-	irlmp_disconnect_request(self->lsap, tx_skb);
-}
-
-/*
- * Function iriap_getvaluebyclass (addr, name, attr)
- *
- *    Retrieve all values from attribute in all objects with given class
- *    name
- */
-int iriap_getvaluebyclass_request(struct iriap_cb *self,
-				  __u32 saddr, __u32 daddr,
-				  char *name, char *attr)
-{
-	struct sk_buff *tx_skb;
-	int name_len, attr_len, skb_len;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return -1;);
-
-	/* Client must supply the destination device address */
-	if (!daddr)
-		return -1;
-
-	self->daddr = daddr;
-	self->saddr = saddr;
-
-	/*
-	 *  Save operation, so we know what the later indication is about
-	 */
-	self->operation = GET_VALUE_BY_CLASS;
-
-	/* Give ourselves 10 secs to finish this operation */
-	iriap_start_watchdog_timer(self, 10*HZ);
-
-	name_len = strlen(name);	/* Up to IAS_MAX_CLASSNAME = 60 */
-	attr_len = strlen(attr);	/* Up to IAS_MAX_ATTRIBNAME = 60 */
-
-	skb_len = self->max_header_size+2+name_len+1+attr_len+4;
-	tx_skb = alloc_skb(skb_len, GFP_ATOMIC);
-	if (!tx_skb)
-		return -ENOMEM;
-
-	/* Reserve space for MUX and LAP header */
-	skb_reserve(tx_skb, self->max_header_size);
-	skb_put(tx_skb, 3+name_len+attr_len);
-	frame = tx_skb->data;
-
-	/* Build frame */
-	frame[0] = IAP_LST | GET_VALUE_BY_CLASS;
-	frame[1] = name_len;                       /* Insert length of name */
-	memcpy(frame+2, name, name_len);           /* Insert name */
-	frame[2+name_len] = attr_len;              /* Insert length of attr */
-	memcpy(frame+3+name_len, attr, attr_len);  /* Insert attr */
-
-	iriap_do_client_event(self, IAP_CALL_REQUEST_GVBC, tx_skb);
-
-	/* Drop reference count - see state_s_disconnect(). */
-	dev_kfree_skb(tx_skb);
-
-	return 0;
-}
-EXPORT_SYMBOL(iriap_getvaluebyclass_request);
-
-/*
- * Function iriap_getvaluebyclass_confirm (self, skb)
- *
- *    Got result from GetValueByClass command. Parse it and return result
- *    to service user.
- *
- */
-static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
-					  struct sk_buff *skb)
-{
-	struct ias_value *value;
-	int charset;
-	__u32 value_len;
-	__u32 tmp_cpu32;
-	__u16 obj_id;
-	__u16 len;
-	__u8  type;
-	__u8 *fp;
-	int n;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/* Initialize variables */
-	fp = skb->data;
-	n = 2;
-
-	/* Get length, MSB first */
-	len = get_unaligned_be16(fp + n);
-	n += 2;
-
-	pr_debug("%s(), len=%d\n", __func__, len);
-
-	/* Get object ID, MSB first */
-	obj_id = get_unaligned_be16(fp + n);
-	n += 2;
-
-	type = fp[n++];
-	pr_debug("%s(), Value type = %d\n", __func__, type);
-
-	switch (type) {
-	case IAS_INTEGER:
-		memcpy(&tmp_cpu32, fp+n, 4); n += 4;
-		be32_to_cpus(&tmp_cpu32);
-		value = irias_new_integer_value(tmp_cpu32);
-
-		/*  Legal values restricted to 0x01-0x6f, page 15 irttp */
-		pr_debug("%s(), lsap=%d\n", __func__, value->t.integer);
-		break;
-	case IAS_STRING:
-		charset = fp[n++];
-
-		switch (charset) {
-		case CS_ASCII:
-			break;
-/*		case CS_ISO_8859_1: */
-/*		case CS_ISO_8859_2: */
-/*		case CS_ISO_8859_3: */
-/*		case CS_ISO_8859_4: */
-/*		case CS_ISO_8859_5: */
-/*		case CS_ISO_8859_6: */
-/*		case CS_ISO_8859_7: */
-/*		case CS_ISO_8859_8: */
-/*		case CS_ISO_8859_9: */
-/*		case CS_UNICODE: */
-		default:
-			pr_debug("%s(), charset [%d] %s, not supported\n",
-				 __func__, charset,
-				 charset < ARRAY_SIZE(ias_charset_types) ?
-				 ias_charset_types[charset] :
-				 "(unknown)");
-
-			/* Aborting, close connection! */
-			iriap_disconnect_request(self);
-			return;
-			/* break; */
-		}
-		value_len = fp[n++];
-		pr_debug("%s(), strlen=%d\n", __func__, value_len);
-
-		/* Make sure the string is null-terminated */
-		if (n + value_len < skb->len)
-			fp[n + value_len] = 0x00;
-		pr_debug("Got string %s\n", fp+n);
-
-		/* Will truncate to IAS_MAX_STRING bytes */
-		value = irias_new_string_value(fp+n);
-		break;
-	case IAS_OCT_SEQ:
-		value_len = get_unaligned_be16(fp + n);
-		n += 2;
-
-		/* Will truncate to IAS_MAX_OCTET_STRING bytes */
-		value = irias_new_octseq_value(fp+n, value_len);
-		break;
-	default:
-		value = irias_new_missing_value();
-		break;
-	}
-
-	/* Finished, close connection! */
-	iriap_disconnect_request(self);
-
-	/* Warning, the client might close us, so remember no to use self
-	 * anymore after calling confirm
-	 */
-	if (self->confirm)
-		self->confirm(IAS_SUCCESS, obj_id, value, self->priv);
-	else {
-		pr_debug("%s(), missing handler!\n", __func__);
-		irias_delete_value(value);
-	}
-}
-
-/*
- * Function iriap_getvaluebyclass_response ()
- *
- *    Send answer back to remote LM-IAS
- *
- */
-static void iriap_getvaluebyclass_response(struct iriap_cb *self,
-					   __u16 obj_id,
-					   __u8 ret_code,
-					   struct ias_value *value)
-{
-	struct sk_buff *tx_skb;
-	int n;
-	__be32 tmp_be32;
-	__be16 tmp_be16;
-	__u8 *fp;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-	IRDA_ASSERT(value != NULL, return;);
-	IRDA_ASSERT(value->len <= 1024, return;);
-
-	/* Initialize variables */
-	n = 0;
-
-	/*
-	 *  We must adjust the size of the response after the length of the
-	 *  value. We add 32 bytes because of the 6 bytes for the frame and
-	 *  max 5 bytes for the value coding.
-	 */
-	tx_skb = alloc_skb(value->len + self->max_header_size + 32,
-			   GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	/* Reserve space for MUX and LAP header */
-	skb_reserve(tx_skb, self->max_header_size);
-	skb_put(tx_skb, 6);
-
-	fp = tx_skb->data;
-
-	/* Build frame */
-	fp[n++] = GET_VALUE_BY_CLASS | IAP_LST;
-	fp[n++] = ret_code;
-
-	/* Insert list length (MSB first) */
-	tmp_be16 = htons(0x0001);
-	memcpy(fp+n, &tmp_be16, 2);  n += 2;
-
-	/* Insert object identifier ( MSB first) */
-	tmp_be16 = cpu_to_be16(obj_id);
-	memcpy(fp+n, &tmp_be16, 2); n += 2;
-
-	switch (value->type) {
-	case IAS_STRING:
-		skb_put(tx_skb, 3 + value->len);
-		fp[n++] = value->type;
-		fp[n++] = 0; /* ASCII */
-		fp[n++] = (__u8) value->len;
-		memcpy(fp+n, value->t.string, value->len); n+=value->len;
-		break;
-	case IAS_INTEGER:
-		skb_put(tx_skb, 5);
-		fp[n++] = value->type;
-
-		tmp_be32 = cpu_to_be32(value->t.integer);
-		memcpy(fp+n, &tmp_be32, 4); n += 4;
-		break;
-	case IAS_OCT_SEQ:
-		skb_put(tx_skb, 3 + value->len);
-		fp[n++] = value->type;
-
-		tmp_be16 = cpu_to_be16(value->len);
-		memcpy(fp+n, &tmp_be16, 2); n += 2;
-		memcpy(fp+n, value->t.oct_seq, value->len); n+=value->len;
-		break;
-	case IAS_MISSING:
-		pr_debug("%s: sending IAS_MISSING\n", __func__);
-		skb_put(tx_skb, 1);
-		fp[n++] = value->type;
-		break;
-	default:
-		pr_debug("%s(), type not implemented!\n", __func__);
-		break;
-	}
-	iriap_do_r_connect_event(self, IAP_CALL_RESPONSE, tx_skb);
-
-	/* Drop reference count - see state_r_execute(). */
-	dev_kfree_skb(tx_skb);
-}
-
-/*
- * Function iriap_getvaluebyclass_indication (self, skb)
- *
- *    getvaluebyclass is requested from peer LM-IAS
- *
- */
-static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
-					     struct sk_buff *skb)
-{
-	struct ias_object *obj;
-	struct ias_attrib *attrib;
-	int name_len;
-	int attr_len;
-	char name[IAS_MAX_CLASSNAME + 1];	/* 60 bytes */
-	char attr[IAS_MAX_ATTRIBNAME + 1];	/* 60 bytes */
-	__u8 *fp;
-	int n;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	fp = skb->data;
-	n = 1;
-
-	name_len = fp[n++];
-
-	IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;);
-
-	memcpy(name, fp+n, name_len); n+=name_len;
-	name[name_len] = '\0';
-
-	attr_len = fp[n++];
-
-	IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;);
-
-	memcpy(attr, fp+n, attr_len); n+=attr_len;
-	attr[attr_len] = '\0';
-
-	pr_debug("LM-IAS: Looking up %s: %s\n", name, attr);
-	obj = irias_find_object(name);
-
-	if (obj == NULL) {
-		pr_debug("LM-IAS: Object %s not found\n", name);
-		iriap_getvaluebyclass_response(self, 0x1235, IAS_CLASS_UNKNOWN,
-					       &irias_missing);
-		return;
-	}
-	pr_debug("LM-IAS: found %s, id=%d\n", obj->name, obj->id);
-
-	attrib = irias_find_attrib(obj, attr);
-	if (attrib == NULL) {
-		pr_debug("LM-IAS: Attribute %s not found\n", attr);
-		iriap_getvaluebyclass_response(self, obj->id,
-					       IAS_ATTRIB_UNKNOWN,
-					       &irias_missing);
-		return;
-	}
-
-	/* We have a match; send the value.  */
-	iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS,
-				       attrib->value);
-}
-
-/*
- * Function iriap_send_ack (void)
- *
- *    Currently not used
- *
- */
-void iriap_send_ack(struct iriap_cb *self)
-{
-	struct sk_buff *tx_skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	tx_skb = alloc_skb(LMP_MAX_HEADER + 1, GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	/* Reserve space for MUX and LAP header */
-	skb_reserve(tx_skb, self->max_header_size);
-	skb_put(tx_skb, 1);
-	frame = tx_skb->data;
-
-	/* Build frame */
-	frame[0] = IAP_LST | IAP_ACK | self->operation;
-
-	irlmp_data_request(self->lsap, tx_skb);
-}
-
-void iriap_connect_request(struct iriap_cb *self)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	ret = irlmp_connect_request(self->lsap, LSAP_IAS,
-				    self->saddr, self->daddr,
-				    NULL, NULL);
-	if (ret < 0) {
-		pr_debug("%s(), connect failed!\n", __func__);
-		self->confirm(IAS_DISCONNECT, 0, NULL, self->priv);
-	}
-}
-
-/*
- * Function iriap_connect_confirm (handle, skb)
- *
- *    LSAP connection confirmed!
- *
- */
-static void iriap_connect_confirm(void *instance, void *sap,
-				  struct qos_info *qos, __u32 max_seg_size,
-				  __u8 max_header_size,
-				  struct sk_buff *skb)
-{
-	struct iriap_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	self->max_data_size = max_seg_size;
-	self->max_header_size = max_header_size;
-
-	del_timer(&self->watchdog_timer);
-
-	iriap_do_client_event(self, IAP_LM_CONNECT_CONFIRM, skb);
-
-	/* Drop reference count - see state_s_make_call(). */
-	dev_kfree_skb(skb);
-}
-
-/*
- * Function iriap_connect_indication ( handle, skb)
- *
- *    Remote LM-IAS is requesting connection
- *
- */
-static void iriap_connect_indication(void *instance, void *sap,
-				     struct qos_info *qos, __u32 max_seg_size,
-				     __u8 max_header_size,
-				     struct sk_buff *skb)
-{
-	struct iriap_cb *self, *new;
-
-	self = instance;
-
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(self != NULL, goto out;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;);
-
-	/* Start new server */
-	new = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL);
-	if (!new) {
-		pr_debug("%s(), open failed\n", __func__);
-		goto out;
-	}
-
-	/* Now attach up the new "socket" */
-	new->lsap = irlmp_dup(self->lsap, new);
-	if (!new->lsap) {
-		pr_debug("%s(), dup failed!\n", __func__);
-		goto out;
-	}
-
-	new->max_data_size = max_seg_size;
-	new->max_header_size = max_header_size;
-
-	/* Clean up the original one to keep it in listen state */
-	irlmp_listen(self->lsap);
-
-	iriap_do_server_event(new, IAP_LM_CONNECT_INDICATION, skb);
-
-out:
-	/* Drop reference count - see state_r_disconnect(). */
-	dev_kfree_skb(skb);
-}
-
-/*
- * Function iriap_data_indication (handle, skb)
- *
- *    Receives data from connection identified by handle from IrLMP
- *
- */
-static int iriap_data_indication(void *instance, void *sap,
-				 struct sk_buff *skb)
-{
-	struct iriap_cb *self;
-	__u8  *frame;
-	__u8  opcode;
-
-	self = instance;
-
-	IRDA_ASSERT(skb != NULL, return 0;);
-	IRDA_ASSERT(self != NULL, goto out;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;);
-
-	frame = skb->data;
-
-	if (self->mode == IAS_SERVER) {
-		/* Call server */
-		pr_debug("%s(), Calling server!\n", __func__);
-		iriap_do_r_connect_event(self, IAP_RECV_F_LST, skb);
-		goto out;
-	}
-	opcode = frame[0];
-	if (~opcode & IAP_LST) {
-		net_warn_ratelimited("%s:, IrIAS multiframe commands or results is not implemented yet!\n",
-				     __func__);
-		goto out;
-	}
-
-	/* Check for ack frames since they don't contain any data */
-	if (opcode & IAP_ACK) {
-		pr_debug("%s() Got ack frame!\n", __func__);
-		goto out;
-	}
-
-	opcode &= ~IAP_LST; /* Mask away LST bit */
-
-	switch (opcode) {
-	case GET_INFO_BASE:
-		pr_debug("IrLMP GetInfoBaseDetails not implemented!\n");
-		break;
-	case GET_VALUE_BY_CLASS:
-		iriap_do_call_event(self, IAP_RECV_F_LST, NULL);
-
-		switch (frame[1]) {
-		case IAS_SUCCESS:
-			iriap_getvaluebyclass_confirm(self, skb);
-			break;
-		case IAS_CLASS_UNKNOWN:
-			pr_debug("%s(), No such class!\n", __func__);
-			/* Finished, close connection! */
-			iriap_disconnect_request(self);
-
-			/*
-			 * Warning, the client might close us, so remember
-			 * no to use self anymore after calling confirm
-			 */
-			if (self->confirm)
-				self->confirm(IAS_CLASS_UNKNOWN, 0, NULL,
-					      self->priv);
-			break;
-		case IAS_ATTRIB_UNKNOWN:
-			pr_debug("%s(), No such attribute!\n", __func__);
-			/* Finished, close connection! */
-			iriap_disconnect_request(self);
-
-			/*
-			 * Warning, the client might close us, so remember
-			 * no to use self anymore after calling confirm
-			 */
-			if (self->confirm)
-				self->confirm(IAS_ATTRIB_UNKNOWN, 0, NULL,
-					      self->priv);
-			break;
-		}
-		break;
-	default:
-		pr_debug("%s(), Unknown op-code: %02x\n", __func__,
-			 opcode);
-		break;
-	}
-
-out:
-	/* Cleanup - sub-calls will have done skb_get() as needed. */
-	dev_kfree_skb(skb);
-	return 0;
-}
-
-/*
- * Function iriap_call_indication (self, skb)
- *
- *    Received call to server from peer LM-IAS
- *
- */
-void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb)
-{
-	__u8 *fp;
-	__u8 opcode;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	fp = skb->data;
-
-	opcode = fp[0];
-	if (~opcode & 0x80) {
-		net_warn_ratelimited("%s: IrIAS multiframe commands or results is not implemented yet!\n",
-				     __func__);
-		return;
-	}
-	opcode &= 0x7f; /* Mask away LST bit */
-
-	switch (opcode) {
-	case GET_INFO_BASE:
-		net_warn_ratelimited("%s: GetInfoBaseDetails not implemented yet!\n",
-				     __func__);
-		break;
-	case GET_VALUE_BY_CLASS:
-		iriap_getvaluebyclass_indication(self, skb);
-		break;
-	}
-	/* skb will be cleaned up in iriap_data_indication */
-}
-
-/*
- * Function iriap_watchdog_timer_expired (data)
- *
- *    Query has taken too long time, so abort
- *
- */
-static void iriap_watchdog_timer_expired(void *data)
-{
-	struct iriap_cb *self = (struct iriap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	/* iriap_close(self); */
-}
-
-#ifdef CONFIG_PROC_FS
-
-static const char *const ias_value_types[] = {
-	"IAS_MISSING",
-	"IAS_INTEGER",
-	"IAS_OCT_SEQ",
-	"IAS_STRING"
-};
-
-static inline struct ias_object *irias_seq_idx(loff_t pos)
-{
-	struct ias_object *obj;
-
-	for (obj = (struct ias_object *) hashbin_get_first(irias_objects);
-	     obj; obj = (struct ias_object *) hashbin_get_next(irias_objects)) {
-		if (pos-- == 0)
-			break;
-	}
-
-	return obj;
-}
-
-static void *irias_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	spin_lock_irq(&irias_objects->hb_spinlock);
-
-	return *pos ? irias_seq_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *irias_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-
-	return (v == SEQ_START_TOKEN)
-		? (void *) hashbin_get_first(irias_objects)
-		: (void *) hashbin_get_next(irias_objects);
-}
-
-static void irias_seq_stop(struct seq_file *seq, void *v)
-{
-	spin_unlock_irq(&irias_objects->hb_spinlock);
-}
-
-static int irias_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "LM-IAS Objects:\n");
-	else {
-		struct ias_object *obj = v;
-		struct ias_attrib *attrib;
-
-		IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -EINVAL;);
-
-		seq_printf(seq, "name: %s, id=%d\n",
-			   obj->name, obj->id);
-
-		/* Careful for priority inversions here !
-		 * All other uses of attrib spinlock are independent of
-		 * the object spinlock, so we are safe. Jean II */
-		spin_lock(&obj->attribs->hb_spinlock);
-
-		/* List all attributes for this object */
-		for (attrib = (struct ias_attrib *) hashbin_get_first(obj->attribs);
-		     attrib != NULL;
-		     attrib = (struct ias_attrib *) hashbin_get_next(obj->attribs)) {
-
-			IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC,
-				    goto outloop; );
-
-			seq_printf(seq, " - Attribute name: \"%s\", ",
-				   attrib->name);
-			seq_printf(seq, "value[%s]: ",
-				   ias_value_types[attrib->value->type]);
-
-			switch (attrib->value->type) {
-			case IAS_INTEGER:
-				seq_printf(seq, "%d\n",
-					   attrib->value->t.integer);
-				break;
-			case IAS_STRING:
-				seq_printf(seq, "\"%s\"\n",
-					   attrib->value->t.string);
-				break;
-			case IAS_OCT_SEQ:
-				seq_printf(seq, "octet sequence (%d bytes)\n",
-					   attrib->value->len);
-				break;
-			case IAS_MISSING:
-				seq_puts(seq, "missing\n");
-				break;
-			default:
-				seq_printf(seq, "type %d?\n",
-					   attrib->value->type);
-			}
-			seq_putc(seq, '\n');
-
-		}
-	IRDA_ASSERT_LABEL(outloop:)
-		spin_unlock(&obj->attribs->hb_spinlock);
-	}
-
-	return 0;
-}
-
-static const struct seq_operations irias_seq_ops = {
-	.start  = irias_seq_start,
-	.next   = irias_seq_next,
-	.stop   = irias_seq_stop,
-	.show   = irias_seq_show,
-};
-
-static int irias_seq_open(struct inode *inode, struct file *file)
-{
-	IRDA_ASSERT( irias_objects != NULL, return -EINVAL;);
-
-	return seq_open(file, &irias_seq_ops);
-}
-
-const struct file_operations irias_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = irias_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release,
-};
-
-#endif /* PROC_FS */
diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c
deleted file mode 100644
index e6098b2e048a..000000000000
--- a/net/irda/iriap_event.c
+++ /dev/null
@@ -1,496 +0,0 @@
-/*********************************************************************
- *
- * Filename:      iriap_event.c
- * Version:       0.1
- * Description:   IAP Finite State Machine
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Aug 21 00:02:07 1997
- * Modified at:   Wed Mar  1 11:28:34 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1997, 1999-2000 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/iriap_event.h>
-
-static void state_s_disconnect   (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_connecting   (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_call         (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-
-static void state_s_make_call    (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_calling      (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_outstanding  (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_replying     (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_s_wait_active  (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-
-static void state_r_disconnect   (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_r_call         (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_r_waiting      (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_r_wait_active  (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_r_receiving    (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_r_execute      (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-static void state_r_returning    (struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb);
-
-static void (*iriap_state[])(struct iriap_cb *self, IRIAP_EVENT event,
-			     struct sk_buff *skb) = {
-	/* Client FSM */
-	state_s_disconnect,
-	state_s_connecting,
-	state_s_call,
-
-	/* S-Call FSM */
-	state_s_make_call,
-	state_s_calling,
-	state_s_outstanding,
-	state_s_replying,
-	state_s_wait_for_call,
-	state_s_wait_active,
-
-	/* Server FSM */
-	state_r_disconnect,
-	state_r_call,
-
-	/* R-Connect FSM */
-	state_r_waiting,
-	state_r_wait_active,
-	state_r_receiving,
-	state_r_execute,
-	state_r_returning,
-};
-
-void iriap_next_client_state(struct iriap_cb *self, IRIAP_STATE state)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	self->client_state = state;
-}
-
-void iriap_next_call_state(struct iriap_cb *self, IRIAP_STATE state)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	self->call_state = state;
-}
-
-void iriap_next_server_state(struct iriap_cb *self, IRIAP_STATE state)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	self->server_state = state;
-}
-
-void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	self->r_connect_state = state;
-}
-
-void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event,
-			   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	(*iriap_state[ self->client_state]) (self, event, skb);
-}
-
-void iriap_do_call_event(struct iriap_cb *self, IRIAP_EVENT event,
-			 struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	(*iriap_state[ self->call_state]) (self, event, skb);
-}
-
-void iriap_do_server_event(struct iriap_cb *self, IRIAP_EVENT event,
-			   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	(*iriap_state[ self->server_state]) (self, event, skb);
-}
-
-void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event,
-			      struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	(*iriap_state[ self->r_connect_state]) (self, event, skb);
-}
-
-
-/*
- * Function state_s_disconnect (event, skb)
- *
- *    S-Disconnect, The device has no LSAP connection to a particular
- *    remote device.
- */
-static void state_s_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
-			       struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	switch (event) {
-	case IAP_CALL_REQUEST_GVBC:
-		iriap_next_client_state(self, S_CONNECTING);
-		IRDA_ASSERT(self->request_skb == NULL, return;);
-		/* Don't forget to refcount it -
-		 * see iriap_getvaluebyclass_request(). */
-		skb_get(skb);
-		self->request_skb = skb;
-		iriap_connect_request(self);
-		break;
-	case IAP_LM_DISCONNECT_INDICATION:
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__, event);
-		break;
-	}
-}
-
-/*
- * Function state_s_connecting (self, event, skb)
- *
- *    S-Connecting
- *
- */
-static void state_s_connecting(struct iriap_cb *self, IRIAP_EVENT event,
-			       struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	switch (event) {
-	case IAP_LM_CONNECT_CONFIRM:
-		/*
-		 *  Jump to S-Call FSM
-		 */
-		iriap_do_call_event(self, IAP_CALL_REQUEST, skb);
-		/* iriap_call_request(self, 0,0,0); */
-		iriap_next_client_state(self, S_CALL);
-		break;
-	case IAP_LM_DISCONNECT_INDICATION:
-		/* Abort calls */
-		iriap_next_call_state(self, S_MAKE_CALL);
-		iriap_next_client_state(self, S_DISCONNECT);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__, event);
-		break;
-	}
-}
-
-/*
- * Function state_s_call (self, event, skb)
- *
- *    S-Call, The device can process calls to a specific remote
- *    device. Whenever the LSAP connection is disconnected, this state
- *    catches that event and clears up
- */
-static void state_s_call(struct iriap_cb *self, IRIAP_EVENT event,
-			 struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-
-	switch (event) {
-	case IAP_LM_DISCONNECT_INDICATION:
-		/* Abort calls */
-		iriap_next_call_state(self, S_MAKE_CALL);
-		iriap_next_client_state(self, S_DISCONNECT);
-		break;
-	default:
-		pr_debug("state_s_call: Unknown event %d\n", event);
-		break;
-	}
-}
-
-/*
- * Function state_s_make_call (event, skb)
- *
- *    S-Make-Call
- *
- */
-static void state_s_make_call(struct iriap_cb *self, IRIAP_EVENT event,
-			      struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-
-	switch (event) {
-	case IAP_CALL_REQUEST:
-		/* Already refcounted - see state_s_disconnect() */
-		tx_skb = self->request_skb;
-		self->request_skb = NULL;
-
-		irlmp_data_request(self->lsap, tx_skb);
-		iriap_next_call_state(self, S_OUTSTANDING);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__, event);
-		break;
-	}
-}
-
-/*
- * Function state_s_calling (event, skb)
- *
- *    S-Calling
- *
- */
-static void state_s_calling(struct iriap_cb *self, IRIAP_EVENT event,
-			    struct sk_buff *skb)
-{
-	pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/*
- * Function state_s_outstanding (event, skb)
- *
- *    S-Outstanding, The device is waiting for a response to a command
- *
- */
-static void state_s_outstanding(struct iriap_cb *self, IRIAP_EVENT event,
-				struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-
-	switch (event) {
-	case IAP_RECV_F_LST:
-		/*iriap_send_ack(self);*/
-		/*LM_Idle_request(idle); */
-
-		iriap_next_call_state(self, S_WAIT_FOR_CALL);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__, event);
-		break;
-	}
-}
-
-/*
- * Function state_s_replying (event, skb)
- *
- *    S-Replying, The device is collecting a multiple part response
- */
-static void state_s_replying(struct iriap_cb *self, IRIAP_EVENT event,
-			     struct sk_buff *skb)
-{
-	pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/*
- * Function state_s_wait_for_call (event, skb)
- *
- *    S-Wait-for-Call
- *
- */
-static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event,
-				  struct sk_buff *skb)
-{
-	pr_debug("%s(), Not implemented\n", __func__);
-}
-
-
-/*
- * Function state_s_wait_active (event, skb)
- *
- *    S-Wait-Active
- *
- */
-static void state_s_wait_active(struct iriap_cb *self, IRIAP_EVENT event,
-				struct sk_buff *skb)
-{
-	pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/**************************************************************************
- *
- *  Server FSM
- *
- **************************************************************************/
-
-/*
- * Function state_r_disconnect (self, event, skb)
- *
- *    LM-IAS server is disconnected (not processing any requests!)
- *
- */
-static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event,
-			       struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb;
-
-	switch (event) {
-	case IAP_LM_CONNECT_INDICATION:
-		tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
-		if (tx_skb == NULL)
-			return;
-
-		/* Reserve space for MUX_CONTROL and LAP header */
-		skb_reserve(tx_skb, LMP_MAX_HEADER);
-
-		irlmp_connect_response(self->lsap, tx_skb);
-		/*LM_Idle_request(idle); */
-
-		iriap_next_server_state(self, R_CALL);
-
-		/*
-		 *  Jump to R-Connect FSM, we skip R-Waiting since we do not
-		 *  care about LM_Idle_request()!
-		 */
-		iriap_next_r_connect_state(self, R_RECEIVING);
-		break;
-	default:
-		pr_debug("%s(), unknown event %d\n", __func__, event);
-		break;
-	}
-}
-
-/*
- * Function state_r_call (self, event, skb)
- */
-static void state_r_call(struct iriap_cb *self, IRIAP_EVENT event,
-			 struct sk_buff *skb)
-{
-	switch (event) {
-	case IAP_LM_DISCONNECT_INDICATION:
-		/* Abort call */
-		iriap_next_server_state(self, R_DISCONNECT);
-		iriap_next_r_connect_state(self, R_WAITING);
-		break;
-	default:
-		pr_debug("%s(), unknown event!\n", __func__);
-		break;
-	}
-}
-
-/*
- *  R-Connect FSM
- */
-
-/*
- * Function state_r_waiting (self, event, skb)
- */
-static void state_r_waiting(struct iriap_cb *self, IRIAP_EVENT event,
-			    struct sk_buff *skb)
-{
-	pr_debug("%s(), Not implemented\n", __func__);
-}
-
-static void state_r_wait_active(struct iriap_cb *self, IRIAP_EVENT event,
-				struct sk_buff *skb)
-{
-	pr_debug("%s(), Not implemented\n", __func__);
-}
-
-/*
- * Function state_r_receiving (self, event, skb)
- *
- *    We are receiving a command
- *
- */
-static void state_r_receiving(struct iriap_cb *self, IRIAP_EVENT event,
-			      struct sk_buff *skb)
-{
-	switch (event) {
-	case IAP_RECV_F_LST:
-		iriap_next_r_connect_state(self, R_EXECUTE);
-
-		iriap_call_indication(self, skb);
-		break;
-	default:
-		pr_debug("%s(), unknown event!\n", __func__);
-		break;
-	}
-}
-
-/*
- * Function state_r_execute (self, event, skb)
- *
- *    The server is processing the request
- *
- */
-static void state_r_execute(struct iriap_cb *self, IRIAP_EVENT event,
-			    struct sk_buff *skb)
-{
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IAS_MAGIC, return;);
-
-	switch (event) {
-	case IAP_CALL_RESPONSE:
-		/*
-		 *  Since we don't implement the Waiting state, we return
-		 *  to state Receiving instead, DB.
-		 */
-		iriap_next_r_connect_state(self, R_RECEIVING);
-
-		/* Don't forget to refcount it - see
-		 * iriap_getvaluebyclass_response(). */
-		skb_get(skb);
-
-		irlmp_data_request(self->lsap, skb);
-		break;
-	default:
-		pr_debug("%s(), unknown event!\n", __func__);
-		break;
-	}
-}
-
-static void state_r_returning(struct iriap_cb *self, IRIAP_EVENT event,
-			      struct sk_buff *skb)
-{
-	pr_debug("%s(), event=%d\n", __func__, event);
-
-	switch (event) {
-	case IAP_RECV_F_LST:
-		break;
-	default:
-		break;
-	}
-}
diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c
deleted file mode 100644
index 53b86d0e1630..000000000000
--- a/net/irda/irias_object.c
+++ /dev/null
@@ -1,555 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irias_object.c
- * Version:       0.3
- * Description:   IAS object database and functions
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Oct  1 22:50:04 1998
- * Modified at:   Wed Dec 15 11:23:16 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/module.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irias_object.h>
-
-hashbin_t *irias_objects;
-
-/*
- *  Used when a missing value needs to be returned
- */
-struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}};
-
-
-/*
- * Function ias_new_object (name, id)
- *
- *    Create a new IAS object
- *
- */
-struct ias_object *irias_new_object( char *name, int id)
-{
-	struct ias_object *obj;
-
-	obj = kzalloc(sizeof(struct ias_object), GFP_ATOMIC);
-	if (obj == NULL) {
-		net_warn_ratelimited("%s(), Unable to allocate object!\n",
-				     __func__);
-		return NULL;
-	}
-
-	obj->magic = IAS_OBJECT_MAGIC;
-	obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC);
-	if (!obj->name) {
-		net_warn_ratelimited("%s(), Unable to allocate name!\n",
-				     __func__);
-		kfree(obj);
-		return NULL;
-	}
-	obj->id = id;
-
-	/* Locking notes : the attrib spinlock has lower precendence
-	 * than the objects spinlock. Never grap the objects spinlock
-	 * while holding any attrib spinlock (risk of deadlock). Jean II */
-	obj->attribs = hashbin_new(HB_LOCK);
-
-	if (obj->attribs == NULL) {
-		net_warn_ratelimited("%s(), Unable to allocate attribs!\n",
-				     __func__);
-		kfree(obj->name);
-		kfree(obj);
-		return NULL;
-	}
-
-	return obj;
-}
-EXPORT_SYMBOL(irias_new_object);
-
-/*
- * Function irias_delete_attrib (attrib)
- *
- *    Delete given attribute and deallocate all its memory
- *
- */
-static void __irias_delete_attrib(struct ias_attrib *attrib)
-{
-	IRDA_ASSERT(attrib != NULL, return;);
-	IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;);
-
-	kfree(attrib->name);
-
-	irias_delete_value(attrib->value);
-	attrib->magic = ~IAS_ATTRIB_MAGIC;
-
-	kfree(attrib);
-}
-
-void __irias_delete_object(struct ias_object *obj)
-{
-	IRDA_ASSERT(obj != NULL, return;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
-	kfree(obj->name);
-
-	hashbin_delete(obj->attribs, (FREE_FUNC) __irias_delete_attrib);
-
-	obj->magic = ~IAS_OBJECT_MAGIC;
-
-	kfree(obj);
-}
-
-/*
- * Function irias_delete_object (obj)
- *
- *    Remove object from hashbin and deallocate all attributes associated with
- *    with this object and the object itself
- *
- */
-int irias_delete_object(struct ias_object *obj)
-{
-	struct ias_object *node;
-
-	IRDA_ASSERT(obj != NULL, return -1;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;);
-
-	/* Remove from list */
-	node = hashbin_remove_this(irias_objects, (irda_queue_t *) obj);
-	if (!node)
-		pr_debug("%s(), object already removed!\n",
-			 __func__);
-
-	/* Destroy */
-	__irias_delete_object(obj);
-
-	return 0;
-}
-EXPORT_SYMBOL(irias_delete_object);
-
-/*
- * Function irias_delete_attrib (obj)
- *
- *    Remove attribute from hashbin and, if it was the last attribute of
- *    the object, remove the object as well.
- *
- */
-int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib,
-			int cleanobject)
-{
-	struct ias_attrib *node;
-
-	IRDA_ASSERT(obj != NULL, return -1;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;);
-	IRDA_ASSERT(attrib != NULL, return -1;);
-
-	/* Remove attribute from object */
-	node = hashbin_remove_this(obj->attribs, (irda_queue_t *) attrib);
-	if (!node)
-		return 0; /* Already removed or non-existent */
-
-	/* Deallocate attribute */
-	__irias_delete_attrib(node);
-
-	/* Check if object has still some attributes, destroy it if none.
-	 * At first glance, this look dangerous, as the kernel reference
-	 * various IAS objects. However, we only use this function on
-	 * user attributes, not kernel attributes, so there is no risk
-	 * of deleting a kernel object this way. Jean II */
-	node = (struct ias_attrib *) hashbin_get_first(obj->attribs);
-	if (cleanobject && !node)
-		irias_delete_object(obj);
-
-	return 0;
-}
-
-/*
- * Function irias_insert_object (obj)
- *
- *    Insert an object into the LM-IAS database
- *
- */
-void irias_insert_object(struct ias_object *obj)
-{
-	IRDA_ASSERT(obj != NULL, return;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
-	hashbin_insert(irias_objects, (irda_queue_t *) obj, 0, obj->name);
-}
-EXPORT_SYMBOL(irias_insert_object);
-
-/*
- * Function irias_find_object (name)
- *
- *    Find object with given name
- *
- */
-struct ias_object *irias_find_object(char *name)
-{
-	IRDA_ASSERT(name != NULL, return NULL;);
-
-	/* Unsafe (locking), object might change */
-	return hashbin_lock_find(irias_objects, 0, name);
-}
-EXPORT_SYMBOL(irias_find_object);
-
-/*
- * Function irias_find_attrib (obj, name)
- *
- *    Find named attribute in object
- *
- */
-struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name)
-{
-	struct ias_attrib *attrib;
-
-	IRDA_ASSERT(obj != NULL, return NULL;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return NULL;);
-	IRDA_ASSERT(name != NULL, return NULL;);
-
-	attrib = hashbin_lock_find(obj->attribs, 0, name);
-	if (attrib == NULL)
-		return NULL;
-
-	/* Unsafe (locking), attrib might change */
-	return attrib;
-}
-
-/*
- * Function irias_add_attribute (obj, attrib)
- *
- *    Add attribute to object
- *
- */
-static void irias_add_attrib(struct ias_object *obj, struct ias_attrib *attrib,
-			     int owner)
-{
-	IRDA_ASSERT(obj != NULL, return;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
-	IRDA_ASSERT(attrib != NULL, return;);
-	IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;);
-
-	/* Set if attrib is owned by kernel or user space */
-	attrib->value->owner = owner;
-
-	hashbin_insert(obj->attribs, (irda_queue_t *) attrib, 0, attrib->name);
-}
-
-/*
- * Function irias_object_change_attribute (obj_name, attrib_name, new_value)
- *
- *    Change the value of an objects attribute.
- *
- */
-int irias_object_change_attribute(char *obj_name, char *attrib_name,
-				  struct ias_value *new_value)
-{
-	struct ias_object *obj;
-	struct ias_attrib *attrib;
-	unsigned long flags;
-
-	/* Find object */
-	obj = hashbin_lock_find(irias_objects, 0, obj_name);
-	if (obj == NULL) {
-		net_warn_ratelimited("%s: Unable to find object: %s\n",
-				     __func__, obj_name);
-		return -1;
-	}
-
-	/* Slightly unsafe (obj might get removed under us) */
-	spin_lock_irqsave(&obj->attribs->hb_spinlock, flags);
-
-	/* Find attribute */
-	attrib = hashbin_find(obj->attribs, 0, attrib_name);
-	if (attrib == NULL) {
-		net_warn_ratelimited("%s: Unable to find attribute: %s\n",
-				     __func__, attrib_name);
-		spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
-		return -1;
-	}
-
-	if ( attrib->value->type != new_value->type) {
-		pr_debug("%s(), changing value type not allowed!\n",
-			 __func__);
-		spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
-		return -1;
-	}
-
-	/* Delete old value */
-	irias_delete_value(attrib->value);
-
-	/* Insert new value */
-	attrib->value = new_value;
-
-	/* Success */
-	spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags);
-	return 0;
-}
-EXPORT_SYMBOL(irias_object_change_attribute);
-
-/*
- * Function irias_object_add_integer_attrib (obj, name, value)
- *
- *    Add an integer attribute to an LM-IAS object
- *
- */
-void irias_add_integer_attrib(struct ias_object *obj, char *name, int value,
-			      int owner)
-{
-	struct ias_attrib *attrib;
-
-	IRDA_ASSERT(obj != NULL, return;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-	IRDA_ASSERT(name != NULL, return;);
-
-	attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
-	if (attrib == NULL) {
-		net_warn_ratelimited("%s: Unable to allocate attribute!\n",
-				     __func__);
-		return;
-	}
-
-	attrib->magic = IAS_ATTRIB_MAGIC;
-	attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
-
-	/* Insert value */
-	attrib->value = irias_new_integer_value(value);
-	if (!attrib->name || !attrib->value) {
-		net_warn_ratelimited("%s: Unable to allocate attribute!\n",
-				     __func__);
-		if (attrib->value)
-			irias_delete_value(attrib->value);
-		kfree(attrib->name);
-		kfree(attrib);
-		return;
-	}
-
-	irias_add_attrib(obj, attrib, owner);
-}
-EXPORT_SYMBOL(irias_add_integer_attrib);
-
- /*
- * Function irias_add_octseq_attrib (obj, name, octet_seq, len)
- *
- *    Add a octet sequence attribute to an LM-IAS object
- *
- */
-
-void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets,
-			     int len, int owner)
-{
-	struct ias_attrib *attrib;
-
-	IRDA_ASSERT(obj != NULL, return;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
-	IRDA_ASSERT(name != NULL, return;);
-	IRDA_ASSERT(octets != NULL, return;);
-
-	attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC);
-	if (attrib == NULL) {
-		net_warn_ratelimited("%s: Unable to allocate attribute!\n",
-				     __func__);
-		return;
-	}
-
-	attrib->magic = IAS_ATTRIB_MAGIC;
-	attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
-
-	attrib->value = irias_new_octseq_value( octets, len);
-	if (!attrib->name || !attrib->value) {
-		net_warn_ratelimited("%s: Unable to allocate attribute!\n",
-				     __func__);
-		if (attrib->value)
-			irias_delete_value(attrib->value);
-		kfree(attrib->name);
-		kfree(attrib);
-		return;
-	}
-
-	irias_add_attrib(obj, attrib, owner);
-}
-EXPORT_SYMBOL(irias_add_octseq_attrib);
-
-/*
- * Function irias_object_add_string_attrib (obj, string)
- *
- *    Add a string attribute to an LM-IAS object
- *
- */
-void irias_add_string_attrib(struct ias_object *obj, char *name, char *value,
-			     int owner)
-{
-	struct ias_attrib *attrib;
-
-	IRDA_ASSERT(obj != NULL, return;);
-	IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;);
-
-	IRDA_ASSERT(name != NULL, return;);
-	IRDA_ASSERT(value != NULL, return;);
-
-	attrib = kzalloc(sizeof( struct ias_attrib), GFP_ATOMIC);
-	if (attrib == NULL) {
-		net_warn_ratelimited("%s: Unable to allocate attribute!\n",
-				     __func__);
-		return;
-	}
-
-	attrib->magic = IAS_ATTRIB_MAGIC;
-	attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
-
-	attrib->value = irias_new_string_value(value);
-	if (!attrib->name || !attrib->value) {
-		net_warn_ratelimited("%s: Unable to allocate attribute!\n",
-				     __func__);
-		if (attrib->value)
-			irias_delete_value(attrib->value);
-		kfree(attrib->name);
-		kfree(attrib);
-		return;
-	}
-
-	irias_add_attrib(obj, attrib, owner);
-}
-EXPORT_SYMBOL(irias_add_string_attrib);
-
-/*
- * Function irias_new_integer_value (integer)
- *
- *    Create new IAS integer value
- *
- */
-struct ias_value *irias_new_integer_value(int integer)
-{
-	struct ias_value *value;
-
-	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
-	if (value == NULL)
-		return NULL;
-
-	value->type = IAS_INTEGER;
-	value->len = 4;
-	value->t.integer = integer;
-
-	return value;
-}
-EXPORT_SYMBOL(irias_new_integer_value);
-
-/*
- * Function irias_new_string_value (string)
- *
- *    Create new IAS string value
- *
- * Per IrLMP 1.1, 4.3.3.2, strings are up to 256 chars - Jean II
- */
-struct ias_value *irias_new_string_value(char *string)
-{
-	struct ias_value *value;
-
-	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
-	if (value == NULL)
-		return NULL;
-
-	value->type = IAS_STRING;
-	value->charset = CS_ASCII;
-	value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC);
-	if (!value->t.string) {
-		net_warn_ratelimited("%s: Unable to kmalloc!\n", __func__);
-		kfree(value);
-		return NULL;
-	}
-
-	value->len = strlen(value->t.string);
-
-	return value;
-}
-
-/*
- * Function irias_new_octseq_value (octets, len)
- *
- *    Create new IAS octet-sequence value
- *
- * Per IrLMP 1.1, 4.3.3.2, octet-sequence are up to 1024 bytes - Jean II
- */
-struct ias_value *irias_new_octseq_value(__u8 *octseq , int len)
-{
-	struct ias_value *value;
-
-	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
-	if (value == NULL)
-		return NULL;
-
-	value->type = IAS_OCT_SEQ;
-	/* Check length */
-	if(len > IAS_MAX_OCTET_STRING)
-		len = IAS_MAX_OCTET_STRING;
-	value->len = len;
-
-	value->t.oct_seq = kmemdup(octseq, len, GFP_ATOMIC);
-	if (value->t.oct_seq == NULL){
-		net_warn_ratelimited("%s: Unable to kmalloc!\n", __func__);
-		kfree(value);
-		return NULL;
-	}
-	return value;
-}
-
-struct ias_value *irias_new_missing_value(void)
-{
-	struct ias_value *value;
-
-	value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC);
-	if (value == NULL)
-		return NULL;
-
-	value->type = IAS_MISSING;
-
-	return value;
-}
-
-/*
- * Function irias_delete_value (value)
- *
- *    Delete IAS value
- *
- */
-void irias_delete_value(struct ias_value *value)
-{
-	IRDA_ASSERT(value != NULL, return;);
-
-	switch (value->type) {
-	case IAS_INTEGER: /* Fallthrough */
-	case IAS_MISSING:
-		/* No need to deallocate */
-		break;
-	case IAS_STRING:
-		/* Deallocate string */
-		kfree(value->t.string);
-		break;
-	case IAS_OCT_SEQ:
-		/* Deallocate byte stream */
-		 kfree(value->t.oct_seq);
-		 break;
-	default:
-		pr_debug("%s(), Unknown value type!\n", __func__);
-		break;
-	}
-	kfree(value);
-}
-EXPORT_SYMBOL(irias_delete_value);
diff --git a/net/irda/irlan/Kconfig b/net/irda/irlan/Kconfig
deleted file mode 100644
index 951abc2e3a7f..000000000000
--- a/net/irda/irlan/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-config IRLAN
-	tristate "IrLAN protocol"
-	depends on IRDA
-	help
-	  Say Y here if you want to build support for the IrLAN protocol.
-	  To compile it as a module, choose M here: the module will be called
-	  irlan.  IrLAN emulates an Ethernet and makes it possible to put up
-	  a wireless LAN using infrared beams.
-
-	  The IrLAN protocol can be used to talk with infrared access points
-	  like the HP NetbeamIR, or the ESI JetEye NET.  You can also connect
-	  to another Linux machine running the IrLAN protocol for ad-hoc
-	  networking!
-
diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile
deleted file mode 100644
index 94eefbc8e6b9..000000000000
--- a/net/irda/irlan/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux IrDA IrLAN protocol layer.
-#
-
-obj-$(CONFIG_IRLAN) += irlan.o
-
-irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c
deleted file mode 100644
index c5837a40c78e..000000000000
--- a/net/irda/irlan/irlan_client.c
+++ /dev/null
@@ -1,559 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_client.c
- * Version:       0.9
- * Description:   IrDA LAN Access Protocol (IrLAN) Client
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Tue Dec 14 15:47:02 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Sources:       skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
- *                slip.c by Laurence Culhane, <loz@holmes.demon.co.uk>
- *                          Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <linux/bitops.h>
-#include <net/arp.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/iriap.h>
-#include <net/irda/timer.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_event.h>
-#include <net/irda/irlan_eth.h>
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_client.h>
-
-#undef CONFIG_IRLAN_GRATUITOUS_ARP
-
-static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap,
-						    LM_REASON reason,
-						    struct sk_buff *);
-static int irlan_client_ctrl_data_indication(void *instance, void *sap,
-					     struct sk_buff *skb);
-static void irlan_client_ctrl_connect_confirm(void *instance, void *sap,
-					      struct qos_info *qos,
-					      __u32 max_sdu_size,
-					      __u8 max_header_size,
-					      struct sk_buff *);
-static void irlan_check_response_param(struct irlan_cb *self, char *param,
-				       char *value, int val_len);
-static void irlan_client_open_ctrl_tsap(struct irlan_cb *self);
-
-static void irlan_client_kick_timer_expired(void *data)
-{
-	struct irlan_cb *self = (struct irlan_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/*
-	 * If we are in peer mode, the client may not have got the discovery
-	 * indication it needs to make progress. If the client is still in
-	 * IDLE state, we must kick it to, but only if the provider is not IDLE
-	 */
-	if ((self->provider.access_type == ACCESS_PEER) &&
-	    (self->client.state == IRLAN_IDLE) &&
-	    (self->provider.state != IRLAN_IDLE)) {
-		irlan_client_wakeup(self, self->saddr, self->daddr);
-	}
-}
-
-static void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout)
-{
-	irda_start_timer(&self->client.kick_timer, timeout, (void *) self,
-			 irlan_client_kick_timer_expired);
-}
-
-/*
- * Function irlan_client_wakeup (self, saddr, daddr)
- *
- *    Wake up client
- *
- */
-void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/*
-	 * Check if we are already awake, or if we are a provider in direct
-	 * mode (in that case we must leave the client idle
-	 */
-	if ((self->client.state != IRLAN_IDLE) ||
-	    (self->provider.access_type == ACCESS_DIRECT))
-	{
-		pr_debug("%s(), already awake!\n", __func__);
-			return;
-	}
-
-	/* Addresses may have changed! */
-	self->saddr = saddr;
-	self->daddr = daddr;
-
-	if (self->disconnect_reason == LM_USER_REQUEST) {
-		pr_debug("%s(), still stopped by user\n", __func__);
-			return;
-	}
-
-	/* Open TSAPs */
-	irlan_client_open_ctrl_tsap(self);
-	irlan_open_data_tsap(self);
-
-	irlan_do_client_event(self, IRLAN_DISCOVERY_INDICATION, NULL);
-
-	/* Start kick timer */
-	irlan_client_start_kick_timer(self, 2*HZ);
-}
-
-/*
- * Function irlan_discovery_indication (daddr)
- *
- *    Remote device with IrLAN server support discovered
- *
- */
-void irlan_client_discovery_indication(discinfo_t *discovery,
-				       DISCOVERY_MODE mode,
-				       void *priv)
-{
-	struct irlan_cb *self;
-	__u32 saddr, daddr;
-
-	IRDA_ASSERT(discovery != NULL, return;);
-
-	/*
-	 * I didn't check it, but I bet that IrLAN suffer from the same
-	 * deficiency as IrComm and doesn't handle two instances
-	 * simultaneously connecting to each other.
-	 * Same workaround, drop passive discoveries.
-	 * Jean II */
-	if(mode == DISCOVERY_PASSIVE)
-		return;
-
-	saddr = discovery->saddr;
-	daddr = discovery->daddr;
-
-	/* Find instance */
-	rcu_read_lock();
-	self = irlan_get_any();
-	if (self) {
-		IRDA_ASSERT(self->magic == IRLAN_MAGIC, goto out;);
-
-		pr_debug("%s(), Found instance (%08x)!\n", __func__ ,
-			 daddr);
-
-		irlan_client_wakeup(self, saddr, daddr);
-	}
-IRDA_ASSERT_LABEL(out:)
-	rcu_read_unlock();
-}
-
-/*
- * Function irlan_client_data_indication (handle, skb)
- *
- *    This function gets the data that is received on the control channel
- *
- */
-static int irlan_client_ctrl_data_indication(void *instance, void *sap,
-					     struct sk_buff *skb)
-{
-	struct irlan_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	irlan_do_client_event(self, IRLAN_DATA_INDICATION, skb);
-
-	/* Ready for a new command */
-	pr_debug("%s(), clearing tx_busy\n", __func__);
-	self->client.tx_busy = FALSE;
-
-	/* Check if we have some queued commands waiting to be sent */
-	irlan_run_ctrl_tx_queue(self);
-
-	return 0;
-}
-
-static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap,
-						    LM_REASON reason,
-						    struct sk_buff *userdata)
-{
-	struct irlan_cb *self;
-	struct tsap_cb *tsap;
-	struct sk_buff *skb;
-
-	pr_debug("%s(), reason=%d\n", __func__ , reason);
-
-	self = instance;
-	tsap = sap;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-	IRDA_ASSERT(tsap != NULL, return;);
-	IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
-
-	IRDA_ASSERT(tsap == self->client.tsap_ctrl, return;);
-
-	/* Remove frames queued on the control channel */
-	while ((skb = skb_dequeue(&self->client.txq)) != NULL) {
-		dev_kfree_skb(skb);
-	}
-	self->client.tx_busy = FALSE;
-
-	irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
-}
-
-/*
- * Function irlan_client_open_tsaps (self)
- *
- *    Initialize callbacks and open IrTTP TSAPs
- *
- */
-static void irlan_client_open_ctrl_tsap(struct irlan_cb *self)
-{
-	struct tsap_cb *tsap;
-	notify_t notify;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* Check if already open */
-	if (self->client.tsap_ctrl)
-		return;
-
-	irda_notify_init(&notify);
-
-	/* Set up callbacks */
-	notify.data_indication       = irlan_client_ctrl_data_indication;
-	notify.connect_confirm       = irlan_client_ctrl_connect_confirm;
-	notify.disconnect_indication = irlan_client_ctrl_disconnect_indication;
-	notify.instance = self;
-	strlcpy(notify.name, "IrLAN ctrl (c)", sizeof(notify.name));
-
-	tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, &notify);
-	if (!tsap) {
-		pr_debug("%s(), Got no tsap!\n", __func__);
-		return;
-	}
-	self->client.tsap_ctrl = tsap;
-}
-
-/*
- * Function irlan_client_connect_confirm (handle, skb)
- *
- *    Connection to peer IrLAN laye confirmed
- *
- */
-static void irlan_client_ctrl_connect_confirm(void *instance, void *sap,
-					      struct qos_info *qos,
-					      __u32 max_sdu_size,
-					      __u8 max_header_size,
-					      struct sk_buff *skb)
-{
-	struct irlan_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	self->client.max_sdu_size = max_sdu_size;
-	self->client.max_header_size = max_header_size;
-
-	/* TODO: we could set the MTU depending on the max_sdu_size */
-
-	irlan_do_client_event(self, IRLAN_CONNECT_COMPLETE, NULL);
-}
-
-/*
- * Function print_ret_code (code)
- *
- *    Print return code of request to peer IrLAN layer.
- *
- */
-static void print_ret_code(__u8 code)
-{
-	switch(code) {
-	case 0:
-		printk(KERN_INFO "Success\n");
-		break;
-	case 1:
-		net_warn_ratelimited("IrLAN: Insufficient resources\n");
-		break;
-	case 2:
-		net_warn_ratelimited("IrLAN: Invalid command format\n");
-		break;
-	case 3:
-		net_warn_ratelimited("IrLAN: Command not supported\n");
-		break;
-	case 4:
-		net_warn_ratelimited("IrLAN: Parameter not supported\n");
-		break;
-	case 5:
-		net_warn_ratelimited("IrLAN: Value not supported\n");
-		break;
-	case 6:
-		net_warn_ratelimited("IrLAN: Not open\n");
-		break;
-	case 7:
-		net_warn_ratelimited("IrLAN: Authentication required\n");
-		break;
-	case 8:
-		net_warn_ratelimited("IrLAN: Invalid password\n");
-		break;
-	case 9:
-		net_warn_ratelimited("IrLAN: Protocol error\n");
-		break;
-	case 255:
-		net_warn_ratelimited("IrLAN: Asynchronous status\n");
-		break;
-	}
-}
-
-/*
- * Function irlan_client_parse_response (self, skb)
- *
- *    Extract all parameters from received buffer, then feed them to
- *    check_params for parsing
- */
-void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb)
-{
-	__u8 *frame;
-	__u8 *ptr;
-	int count;
-	int ret;
-	__u16 val_len;
-	int i;
-	char *name;
-	char *value;
-
-	IRDA_ASSERT(skb != NULL, return;);
-
-	pr_debug("%s() skb->len=%d\n", __func__ , (int)skb->len);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	if (!skb) {
-		net_err_ratelimited("%s(), Got NULL skb!\n", __func__);
-		return;
-	}
-	frame = skb->data;
-
-	/*
-	 *  Check return code and print it if not success
-	 */
-	if (frame[0]) {
-		print_ret_code(frame[0]);
-		return;
-	}
-
-	name = kmalloc(255, GFP_ATOMIC);
-	if (!name)
-		return;
-	value = kmalloc(1016, GFP_ATOMIC);
-	if (!value) {
-		kfree(name);
-		return;
-	}
-
-	/* How many parameters? */
-	count = frame[1];
-
-	pr_debug("%s(), got %d parameters\n", __func__ , count);
-
-	ptr = frame+2;
-
-	/* For all parameters */
-	for (i=0; i<count;i++) {
-		ret = irlan_extract_param(ptr, name, value, &val_len);
-		if (ret < 0) {
-			pr_debug("%s(), IrLAN, Error!\n", __func__);
-			break;
-		}
-		ptr += ret;
-		irlan_check_response_param(self, name, value, val_len);
-	}
-	/* Cleanup */
-	kfree(name);
-	kfree(value);
-}
-
-/*
- * Function irlan_check_response_param (self, param, value, val_len)
- *
- *     Check which parameter is received and update local variables
- *
- */
-static void irlan_check_response_param(struct irlan_cb *self, char *param,
-				       char *value, int val_len)
-{
-	__u16 tmp_cpu; /* Temporary value in host order */
-	__u8 *bytes;
-	int i;
-
-	pr_debug("%s(), parm=%s\n", __func__ , param);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* Media type */
-	if (strcmp(param, "MEDIA") == 0) {
-		if (strcmp(value, "802.3") == 0)
-			self->media = MEDIA_802_3;
-		else
-			self->media = MEDIA_802_5;
-		return;
-	}
-	if (strcmp(param, "FILTER_TYPE") == 0) {
-		if (strcmp(value, "DIRECTED") == 0)
-			self->client.filter_type |= IRLAN_DIRECTED;
-		else if (strcmp(value, "FUNCTIONAL") == 0)
-			self->client.filter_type |= IRLAN_FUNCTIONAL;
-		else if (strcmp(value, "GROUP") == 0)
-			self->client.filter_type |= IRLAN_GROUP;
-		else if (strcmp(value, "MAC_FRAME") == 0)
-			self->client.filter_type |= IRLAN_MAC_FRAME;
-		else if (strcmp(value, "MULTICAST") == 0)
-			self->client.filter_type |= IRLAN_MULTICAST;
-		else if (strcmp(value, "BROADCAST") == 0)
-			self->client.filter_type |= IRLAN_BROADCAST;
-		else if (strcmp(value, "IPX_SOCKET") == 0)
-			self->client.filter_type |= IRLAN_IPX_SOCKET;
-
-	}
-	if (strcmp(param, "ACCESS_TYPE") == 0) {
-		if (strcmp(value, "DIRECT") == 0)
-			self->client.access_type = ACCESS_DIRECT;
-		else if (strcmp(value, "PEER") == 0)
-			self->client.access_type = ACCESS_PEER;
-		else if (strcmp(value, "HOSTED") == 0)
-			self->client.access_type = ACCESS_HOSTED;
-		else {
-			pr_debug("%s(), unknown access type!\n", __func__);
-		}
-	}
-	/* IRLAN version */
-	if (strcmp(param, "IRLAN_VER") == 0) {
-		pr_debug("IrLAN version %d.%d\n", (__u8)value[0],
-			 (__u8)value[1]);
-
-		self->version[0] = value[0];
-		self->version[1] = value[1];
-		return;
-	}
-	/* Which remote TSAP to use for data channel */
-	if (strcmp(param, "DATA_CHAN") == 0) {
-		self->dtsap_sel_data = value[0];
-		pr_debug("Data TSAP = %02x\n", self->dtsap_sel_data);
-		return;
-	}
-	if (strcmp(param, "CON_ARB") == 0) {
-		memcpy(&tmp_cpu, value, 2); /* Align value */
-		le16_to_cpus(&tmp_cpu);     /* Convert to host order */
-		self->client.recv_arb_val = tmp_cpu;
-		pr_debug("%s(), receive arb val=%d\n", __func__ ,
-			 self->client.recv_arb_val);
-	}
-	if (strcmp(param, "MAX_FRAME") == 0) {
-		memcpy(&tmp_cpu, value, 2); /* Align value */
-		le16_to_cpus(&tmp_cpu);     /* Convert to host order */
-		self->client.max_frame = tmp_cpu;
-		pr_debug("%s(), max frame=%d\n", __func__ ,
-			 self->client.max_frame);
-	}
-
-	/* RECONNECT_KEY, in case the link goes down! */
-	if (strcmp(param, "RECONNECT_KEY") == 0) {
-		pr_debug("Got reconnect key: ");
-		/* for (i = 0; i < val_len; i++) */
-/* 			printk("%02x", value[i]); */
-		memcpy(self->client.reconnect_key, value, val_len);
-		self->client.key_len = val_len;
-		pr_debug("\n");
-	}
-	/* FILTER_ENTRY, have we got an ethernet address? */
-	if (strcmp(param, "FILTER_ENTRY") == 0) {
-		bytes = value;
-		pr_debug("Ethernet address = %pM\n", bytes);
-		for (i = 0; i < 6; i++)
-			self->dev->dev_addr[i] = bytes[i];
-	}
-}
-
-/*
- * Function irlan_client_get_value_confirm (obj_id, value)
- *
- *    Got results from remote LM-IAS
- *
- */
-void irlan_client_get_value_confirm(int result, __u16 obj_id,
-				    struct ias_value *value, void *priv)
-{
-	struct irlan_cb *self;
-
-	IRDA_ASSERT(priv != NULL, return;);
-
-	self = priv;
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* We probably don't need to make any more queries */
-	iriap_close(self->client.iriap);
-	self->client.iriap = NULL;
-
-	/* Check if request succeeded */
-	if (result != IAS_SUCCESS) {
-		pr_debug("%s(), got NULL value!\n", __func__);
-		irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL,
-				      NULL);
-		return;
-	}
-
-	switch (value->type) {
-	case IAS_INTEGER:
-		self->dtsap_sel_ctrl = value->t.integer;
-
-		if (value->t.integer != -1) {
-			irlan_do_client_event(self, IRLAN_IAS_PROVIDER_AVAIL,
-					      NULL);
-			return;
-		}
-		irias_delete_value(value);
-		break;
-	default:
-		pr_debug("%s(), unknown type!\n", __func__);
-		break;
-	}
-	irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL, NULL);
-}
diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c
deleted file mode 100644
index cc93fabbbb19..000000000000
--- a/net/irda/irlan/irlan_client_event.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_client_event.c
- * Version:       0.9
- * Description:   IrLAN client state machine
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Sun Dec 26 21:52:24 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/timer.h>
-#include <net/irda/irmod.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irttp.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_client.h>
-#include <net/irda/irlan_event.h>
-
-static int irlan_client_state_idle (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_conn (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_info (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_open (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_wait (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_arb  (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_data (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-static int irlan_client_state_sync (struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb);
-
-static int (*state[])(struct irlan_cb *, IRLAN_EVENT event, struct sk_buff *) =
-{
-	irlan_client_state_idle,
-	irlan_client_state_query,
-	irlan_client_state_conn,
-	irlan_client_state_info,
-	irlan_client_state_media,
-	irlan_client_state_open,
-	irlan_client_state_wait,
-	irlan_client_state_arb,
-	irlan_client_state_data,
-	irlan_client_state_close,
-	irlan_client_state_sync
-};
-
-void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event,
-			   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	(*state[ self->client.state]) (self, event, skb);
-}
-
-/*
- * Function irlan_client_state_idle (event, skb, info)
- *
- *    IDLE, We are waiting for an indication that there is a provider
- *    available.
- */
-static int irlan_client_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-	switch (event) {
-	case IRLAN_DISCOVERY_INDICATION:
-		if (self->client.iriap) {
-			net_warn_ratelimited("%s(), busy with a previous query\n",
-					     __func__);
-			return -EBUSY;
-		}
-
-		self->client.iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-						irlan_client_get_value_confirm);
-		/* Get some values from peer IAS */
-		irlan_next_client_state(self, IRLAN_QUERY);
-		iriap_getvaluebyclass_request(self->client.iriap,
-					      self->saddr, self->daddr,
-					      "IrLAN", "IrDA:TinyTP:LsapSel");
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_query (event, skb, info)
- *
- *    QUERY, We have queryed the remote IAS and is ready to connect
- *    to provider, just waiting for the confirm.
- *
- */
-static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-	switch(event) {
-	case IRLAN_IAS_PROVIDER_AVAIL:
-		IRDA_ASSERT(self->dtsap_sel_ctrl != 0, return -1;);
-
-		self->client.open_retries = 0;
-
-		irttp_connect_request(self->client.tsap_ctrl,
-				      self->dtsap_sel_ctrl,
-				      self->saddr, self->daddr, NULL,
-				      IRLAN_MTU, NULL);
-		irlan_next_client_state(self, IRLAN_CONN);
-		break;
-	case IRLAN_IAS_PROVIDER_NOT_AVAIL:
-		pr_debug("%s(), IAS_PROVIDER_NOT_AVAIL\n", __func__);
-		irlan_next_client_state(self, IRLAN_IDLE);
-
-		/* Give the client a kick! */
-		if ((self->provider.access_type == ACCESS_PEER) &&
-		    (self->provider.state != IRLAN_IDLE))
-			irlan_client_wakeup(self, self->saddr, self->daddr);
-		break;
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_conn (event, skb, info)
- *
- *    CONN, We have connected to a provider but has not issued any
- *    commands yet.
- *
- */
-static int irlan_client_state_conn(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch (event) {
-	case IRLAN_CONNECT_COMPLETE:
-		/* Send getinfo cmd */
-		irlan_get_provider_info(self);
-		irlan_next_client_state(self, IRLAN_INFO);
-		break;
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_info (self, event, skb, info)
- *
- *    INFO, We have issued a GetInfo command and is awaiting a reply.
- */
-static int irlan_client_state_info(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch (event) {
-	case IRLAN_DATA_INDICATION:
-		IRDA_ASSERT(skb != NULL, return -1;);
-
-		irlan_client_parse_response(self, skb);
-
-		irlan_next_client_state(self, IRLAN_MEDIA);
-
-		irlan_get_media_char(self);
-		break;
-
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_media (self, event, skb, info)
- *
- *    MEDIA, The irlan_client has issued a GetMedia command and is awaiting a
- *    reply.
- *
- */
-static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_DATA_INDICATION:
-		irlan_client_parse_response(self, skb);
-		irlan_open_data_channel(self);
-		irlan_next_client_state(self, IRLAN_OPEN);
-		break;
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_open (self, event, skb, info)
- *
- *    OPEN, The irlan_client has issued a OpenData command and is awaiting a
- *    reply
- *
- */
-static int irlan_client_state_open(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	struct qos_info qos;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_DATA_INDICATION:
-		irlan_client_parse_response(self, skb);
-
-		/*
-		 *  Check if we have got the remote TSAP for data
-		 *  communications
-		 */
-		IRDA_ASSERT(self->dtsap_sel_data != 0, return -1;);
-
-		/* Check which access type we are dealing with */
-		switch (self->client.access_type) {
-		case ACCESS_PEER:
-		    if (self->provider.state == IRLAN_OPEN) {
-
-			    irlan_next_client_state(self, IRLAN_ARB);
-			    irlan_do_client_event(self, IRLAN_CHECK_CON_ARB,
-						  NULL);
-		    } else {
-
-			    irlan_next_client_state(self, IRLAN_WAIT);
-		    }
-		    break;
-		case ACCESS_DIRECT:
-		case ACCESS_HOSTED:
-			qos.link_disc_time.bits = 0x01; /* 3 secs */
-
-			irttp_connect_request(self->tsap_data,
-					      self->dtsap_sel_data,
-					      self->saddr, self->daddr, &qos,
-					      IRLAN_MTU, NULL);
-
-			irlan_next_client_state(self, IRLAN_DATA);
-			break;
-		default:
-			pr_debug("%s(), unknown access type!\n", __func__);
-			break;
-		}
-		break;
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_wait (self, event, skb, info)
- *
- *    WAIT, The irlan_client is waiting for the local provider to enter the
- *    provider OPEN state.
- *
- */
-static int irlan_client_state_wait(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_PROVIDER_SIGNAL:
-		irlan_next_client_state(self, IRLAN_ARB);
-		irlan_do_client_event(self, IRLAN_CHECK_CON_ARB, NULL);
-		break;
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-static int irlan_client_state_arb(struct irlan_cb *self, IRLAN_EVENT event,
-				  struct sk_buff *skb)
-{
-	struct qos_info qos;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_CHECK_CON_ARB:
-		if (self->client.recv_arb_val == self->provider.send_arb_val) {
-			irlan_next_client_state(self, IRLAN_CLOSE);
-			irlan_close_data_channel(self);
-		} else if (self->client.recv_arb_val <
-			   self->provider.send_arb_val)
-		{
-			qos.link_disc_time.bits = 0x01; /* 3 secs */
-
-			irlan_next_client_state(self, IRLAN_DATA);
-			irttp_connect_request(self->tsap_data,
-					      self->dtsap_sel_data,
-					      self->saddr, self->daddr, &qos,
-					      IRLAN_MTU, NULL);
-		} else if (self->client.recv_arb_val >
-			   self->provider.send_arb_val)
-		{
-			pr_debug("%s(), lost the battle :-(\n", __func__);
-		}
-		break;
-	case IRLAN_DATA_CONNECT_INDICATION:
-		irlan_next_client_state(self, IRLAN_DATA);
-		break;
-	case IRLAN_LMP_DISCONNECT:
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	case IRLAN_WATCHDOG_TIMEOUT:
-		pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_data (self, event, skb, info)
- *
- *    DATA, The data channel is connected, allowing data transfers between
- *    the local and remote machines.
- *
- */
-static int irlan_client_state_data(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-	switch(event) {
-	case IRLAN_DATA_INDICATION:
-		irlan_client_parse_response(self, skb);
-		break;
-	case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_client_state(self, IRLAN_IDLE);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_close (self, event, skb, info)
- *
- *
- *
- */
-static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event,
-				    struct sk_buff *skb)
-{
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_client_state_sync (self, event, skb, info)
- *
- *
- *
- */
-static int irlan_client_state_sync(struct irlan_cb *self, IRLAN_EVENT event,
-				   struct sk_buff *skb)
-{
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
deleted file mode 100644
index 481bbc2a4349..000000000000
--- a/net/irda/irlan/irlan_common.c
+++ /dev/null
@@ -1,1176 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_common.c
- * Version:       0.9
- * Description:   IrDA LAN Access Protocol Implementation
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Sun Dec 26 21:53:10 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/random.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/moduleparam.h>
-#include <linux/bitops.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/iriap.h>
-#include <net/irda/timer.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_client.h>
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_eth.h>
-#include <net/irda/irlan_filter.h>
-
-
-/* extern char sysctl_devname[]; */
-
-/*
- *  Master structure
- */
-static LIST_HEAD(irlans);
-
-static void *ckey;
-static void *skey;
-
-/* Module parameters */
-static bool eth;   /* Use "eth" or "irlan" name for devices */
-static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */
-
-#ifdef CONFIG_PROC_FS
-static const char *const irlan_access[] = {
-	"UNKNOWN",
-	"DIRECT",
-	"PEER",
-	"HOSTED"
-};
-
-static const char *const irlan_media[] = {
-	"UNKNOWN",
-	"802.3",
-	"802.5"
-};
-
-extern struct proc_dir_entry *proc_irda;
-
-static int irlan_seq_open(struct inode *inode, struct file *file);
-
-static const struct file_operations irlan_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = irlan_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-extern struct proc_dir_entry *proc_irda;
-#endif /* CONFIG_PROC_FS */
-
-static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr);
-static void __irlan_close(struct irlan_cb *self);
-static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
-				__u8 value_byte, __u16 value_short,
-				__u8 *value_array, __u16 value_len);
-static void irlan_open_unicast_addr(struct irlan_cb *self);
-static void irlan_get_unicast_addr(struct irlan_cb *self);
-void irlan_close_tsaps(struct irlan_cb *self);
-
-/*
- * Function irlan_init (void)
- *
- *    Initialize IrLAN layer
- *
- */
-static int __init irlan_init(void)
-{
-	struct irlan_cb *new;
-	__u16 hints;
-
-#ifdef CONFIG_PROC_FS
-	{ struct proc_dir_entry *proc;
-	proc = proc_create("irlan", 0, proc_irda, &irlan_fops);
-	if (!proc) {
-		printk(KERN_ERR "irlan_init: can't create /proc entry!\n");
-		return -ENODEV;
-	}
-	}
-#endif /* CONFIG_PROC_FS */
-
-	hints = irlmp_service_to_hint(S_LAN);
-
-	/* Register with IrLMP as a client */
-	ckey = irlmp_register_client(hints, &irlan_client_discovery_indication,
-				     NULL, NULL);
-	if (!ckey)
-		goto err_ckey;
-
-	/* Register with IrLMP as a service */
-	skey = irlmp_register_service(hints);
-	if (!skey)
-		goto err_skey;
-
-	/* Start the master IrLAN instance (the only one for now) */
-	new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY);
-	if (!new)
-		goto err_open;
-
-	/* The master will only open its (listen) control TSAP */
-	irlan_provider_open_ctrl_tsap(new);
-
-	/* Do some fast discovery! */
-	irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS);
-
-	return 0;
-
-err_open:
-	irlmp_unregister_service(skey);
-err_skey:
-	irlmp_unregister_client(ckey);
-err_ckey:
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("irlan", proc_irda);
-#endif /* CONFIG_PROC_FS */
-
-	return -ENOMEM;
-}
-
-static void __exit irlan_cleanup(void)
-{
-	struct irlan_cb *self, *next;
-
-	irlmp_unregister_client(ckey);
-	irlmp_unregister_service(skey);
-
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("irlan", proc_irda);
-#endif /* CONFIG_PROC_FS */
-
-	/* Cleanup any leftover network devices */
-	rtnl_lock();
-	list_for_each_entry_safe(self, next, &irlans, dev_list) {
-		__irlan_close(self);
-	}
-	rtnl_unlock();
-}
-
-/*
- * Function irlan_open (void)
- *
- *    Open new instance of a client/provider, we should only register the
- *    network device if this instance is ment for a particular client/provider
- */
-static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr)
-{
-	struct net_device *dev;
-	struct irlan_cb *self;
-
-	/* Create network device with irlan */
-	dev = alloc_irlandev(eth ? "eth%d" : "irlan%d");
-	if (!dev)
-		return NULL;
-
-	self = netdev_priv(dev);
-	self->dev = dev;
-
-	/*
-	 *  Initialize local device structure
-	 */
-	self->magic = IRLAN_MAGIC;
-	self->saddr = saddr;
-	self->daddr = daddr;
-
-	/* Provider access can only be PEER, DIRECT, or HOSTED */
-	self->provider.access_type = access;
-	if (access == ACCESS_DIRECT) {
-		/*
-		 * Since we are emulating an IrLAN sever we will have to
-		 * give ourself an ethernet address!
-		 */
-		dev->dev_addr[0] = 0x40;
-		dev->dev_addr[1] = 0x00;
-		dev->dev_addr[2] = 0x00;
-		dev->dev_addr[3] = 0x00;
-		get_random_bytes(dev->dev_addr+4, 1);
-		get_random_bytes(dev->dev_addr+5, 1);
-	}
-
-	self->media = MEDIA_802_3;
-	self->disconnect_reason = LM_USER_REQUEST;
-	init_timer(&self->watchdog_timer);
-	init_timer(&self->client.kick_timer);
-	init_waitqueue_head(&self->open_wait);
-
-	skb_queue_head_init(&self->client.txq);
-
-	irlan_next_client_state(self, IRLAN_IDLE);
-	irlan_next_provider_state(self, IRLAN_IDLE);
-
-	if (register_netdev(dev)) {
-		pr_debug("%s(), register_netdev() failed!\n",
-			 __func__);
-		self = NULL;
-		free_netdev(dev);
-	} else {
-		rtnl_lock();
-		list_add_rcu(&self->dev_list, &irlans);
-		rtnl_unlock();
-	}
-
-	return self;
-}
-/*
- * Function __irlan_close (self)
- *
- *    This function closes and deallocates the IrLAN client instances. Be
- *    aware that other functions which calls client_close() must
- *    remove self from irlans list first.
- */
-static void __irlan_close(struct irlan_cb *self)
-{
-	ASSERT_RTNL();
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	del_timer_sync(&self->watchdog_timer);
-	del_timer_sync(&self->client.kick_timer);
-
-	/* Close all open connections and remove TSAPs */
-	irlan_close_tsaps(self);
-
-	if (self->client.iriap)
-		iriap_close(self->client.iriap);
-
-	/* Remove frames queued on the control channel */
-	skb_queue_purge(&self->client.txq);
-
-	/* Unregister and free self via destructor */
-	unregister_netdevice(self->dev);
-}
-
-/* Find any instance of irlan, used for client discovery wakeup */
-struct irlan_cb *irlan_get_any(void)
-{
-	struct irlan_cb *self;
-
-	list_for_each_entry_rcu(self, &irlans, dev_list) {
-		return self;
-	}
-	return NULL;
-}
-
-/*
- * Function irlan_connect_indication (instance, sap, qos, max_sdu_size, skb)
- *
- *    Here we receive the connect indication for the data channel
- *
- */
-static void irlan_connect_indication(void *instance, void *sap,
-				     struct qos_info *qos,
-				     __u32 max_sdu_size,
-				     __u8 max_header_size,
-				     struct sk_buff *skb)
-{
-	struct irlan_cb *self;
-	struct tsap_cb *tsap;
-
-	self = instance;
-	tsap = sap;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-	IRDA_ASSERT(tsap == self->tsap_data,return;);
-
-	self->max_sdu_size = max_sdu_size;
-	self->max_header_size = max_header_size;
-
-	pr_debug("%s: We are now connected!\n", __func__);
-
-	del_timer(&self->watchdog_timer);
-
-	/* If you want to pass the skb to *both* state machines, you will
-	 * need to skb_clone() it, so that you don't free it twice.
-	 * As the state machines don't need it, git rid of it here...
-	 * Jean II */
-	if (skb)
-		dev_kfree_skb(skb);
-
-	irlan_do_provider_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL);
-	irlan_do_client_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL);
-
-	if (self->provider.access_type == ACCESS_PEER) {
-		/*
-		 * Data channel is open, so we are now allowed to
-		 * configure the remote filter
-		 */
-		irlan_get_unicast_addr(self);
-		irlan_open_unicast_addr(self);
-	}
-	/* Ready to transfer Ethernet frames (at last) */
-	netif_start_queue(self->dev); /* Clear reason */
-}
-
-static void irlan_connect_confirm(void *instance, void *sap,
-				  struct qos_info *qos,
-				  __u32 max_sdu_size,
-				  __u8 max_header_size,
-				  struct sk_buff *skb)
-{
-	struct irlan_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	self->max_sdu_size = max_sdu_size;
-	self->max_header_size = max_header_size;
-
-	/* TODO: we could set the MTU depending on the max_sdu_size */
-
-	pr_debug("%s: We are now connected!\n", __func__);
-	del_timer(&self->watchdog_timer);
-
-	/*
-	 * Data channel is open, so we are now allowed to configure the remote
-	 * filter
-	 */
-	irlan_get_unicast_addr(self);
-	irlan_open_unicast_addr(self);
-
-	/* Open broadcast and multicast filter by default */
-	irlan_set_broadcast_filter(self, TRUE);
-	irlan_set_multicast_filter(self, TRUE);
-
-	/* Ready to transfer Ethernet frames */
-	netif_start_queue(self->dev);
-	self->disconnect_reason = 0; /* Clear reason */
-	wake_up_interruptible(&self->open_wait);
-}
-
-/*
- * Function irlan_client_disconnect_indication (handle)
- *
- *    Callback function for the IrTTP layer. Indicates a disconnection of
- *    the specified connection (handle)
- */
-static void irlan_disconnect_indication(void *instance,
-					void *sap, LM_REASON reason,
-					struct sk_buff *userdata)
-{
-	struct irlan_cb *self;
-	struct tsap_cb *tsap;
-
-	pr_debug("%s(), reason=%d\n", __func__ , reason);
-
-	self = instance;
-	tsap = sap;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-	IRDA_ASSERT(tsap != NULL, return;);
-	IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
-
-	IRDA_ASSERT(tsap == self->tsap_data, return;);
-
-	pr_debug("IrLAN, data channel disconnected by peer!\n");
-
-	/* Save reason so we know if we should try to reconnect or not */
-	self->disconnect_reason = reason;
-
-	switch (reason) {
-	case LM_USER_REQUEST: /* User request */
-		pr_debug("%s(), User requested\n", __func__);
-		break;
-	case LM_LAP_DISCONNECT: /* Unexpected IrLAP disconnect */
-		pr_debug("%s(), Unexpected IrLAP disconnect\n", __func__);
-		break;
-	case LM_CONNECT_FAILURE: /* Failed to establish IrLAP connection */
-		pr_debug("%s(), IrLAP connect failed\n", __func__);
-		break;
-	case LM_LAP_RESET:  /* IrLAP reset */
-		pr_debug("%s(), IrLAP reset\n", __func__);
-		break;
-	case LM_INIT_DISCONNECT:
-		pr_debug("%s(), IrLMP connect failed\n", __func__);
-		break;
-	default:
-		net_err_ratelimited("%s(), Unknown disconnect reason\n",
-				    __func__);
-		break;
-	}
-
-	/* If you want to pass the skb to *both* state machines, you will
-	 * need to skb_clone() it, so that you don't free it twice.
-	 * As the state machines don't need it, git rid of it here...
-	 * Jean II */
-	if (userdata)
-		dev_kfree_skb(userdata);
-
-	irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
-	irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
-
-	wake_up_interruptible(&self->open_wait);
-}
-
-void irlan_open_data_tsap(struct irlan_cb *self)
-{
-	struct tsap_cb *tsap;
-	notify_t notify;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* Check if already open */
-	if (self->tsap_data)
-		return;
-
-	irda_notify_init(&notify);
-
-	notify.data_indication       = irlan_eth_receive;
-	notify.udata_indication      = irlan_eth_receive;
-	notify.connect_indication    = irlan_connect_indication;
-	notify.connect_confirm       = irlan_connect_confirm;
-	notify.flow_indication       = irlan_eth_flow_indication;
-	notify.disconnect_indication = irlan_disconnect_indication;
-	notify.instance              = self;
-	strlcpy(notify.name, "IrLAN data", sizeof(notify.name));
-
-	tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, &notify);
-	if (!tsap) {
-		pr_debug("%s(), Got no tsap!\n", __func__);
-		return;
-	}
-	self->tsap_data = tsap;
-
-	/*
-	 *  This is the data TSAP selector which we will pass to the client
-	 *  when the client ask for it.
-	 */
-	self->stsap_sel_data = self->tsap_data->stsap_sel;
-}
-
-void irlan_close_tsaps(struct irlan_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* Disconnect and close all open TSAP connections */
-	if (self->tsap_data) {
-		irttp_disconnect_request(self->tsap_data, NULL, P_NORMAL);
-		irttp_close_tsap(self->tsap_data);
-		self->tsap_data = NULL;
-	}
-	if (self->client.tsap_ctrl) {
-		irttp_disconnect_request(self->client.tsap_ctrl, NULL,
-					 P_NORMAL);
-		irttp_close_tsap(self->client.tsap_ctrl);
-		self->client.tsap_ctrl = NULL;
-	}
-	if (self->provider.tsap_ctrl) {
-		irttp_disconnect_request(self->provider.tsap_ctrl, NULL,
-					 P_NORMAL);
-		irttp_close_tsap(self->provider.tsap_ctrl);
-		self->provider.tsap_ctrl = NULL;
-	}
-	self->disconnect_reason = LM_USER_REQUEST;
-}
-
-/*
- * Function irlan_ias_register (self, tsap_sel)
- *
- *    Register with LM-IAS
- *
- */
-void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel)
-{
-	struct ias_object *obj;
-	struct ias_value *new_value;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/*
-	 * Check if object has already been registered by a previous provider.
-	 * If that is the case, we just change the value of the attribute
-	 */
-	if (!irias_find_object("IrLAN")) {
-		obj = irias_new_object("IrLAN", IAS_IRLAN_ID);
-		irias_add_integer_attrib(obj, "IrDA:TinyTP:LsapSel", tsap_sel,
-					 IAS_KERNEL_ATTR);
-		irias_insert_object(obj);
-	} else {
-		new_value = irias_new_integer_value(tsap_sel);
-		irias_object_change_attribute("IrLAN", "IrDA:TinyTP:LsapSel",
-					      new_value);
-	}
-
-	/* Register PnP object only if not registered before */
-	if (!irias_find_object("PnP")) {
-		obj = irias_new_object("PnP", IAS_PNP_ID);
-#if 0
-		irias_add_string_attrib(obj, "Name", sysctl_devname,
-					IAS_KERNEL_ATTR);
-#else
-		irias_add_string_attrib(obj, "Name", "Linux", IAS_KERNEL_ATTR);
-#endif
-		irias_add_string_attrib(obj, "DeviceID", "HWP19F0",
-					IAS_KERNEL_ATTR);
-		irias_add_integer_attrib(obj, "CompCnt", 1, IAS_KERNEL_ATTR);
-		if (self->provider.access_type == ACCESS_PEER)
-			irias_add_string_attrib(obj, "Comp#01", "PNP8389",
-						IAS_KERNEL_ATTR);
-		else
-			irias_add_string_attrib(obj, "Comp#01", "PNP8294",
-						IAS_KERNEL_ATTR);
-
-		irias_add_string_attrib(obj, "Manufacturer",
-					"Linux-IrDA Project", IAS_KERNEL_ATTR);
-		irias_insert_object(obj);
-	}
-}
-
-/*
- * Function irlan_run_ctrl_tx_queue (self)
- *
- *    Try to send the next command in the control transmit queue
- *
- */
-int irlan_run_ctrl_tx_queue(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-
-	if (irda_lock(&self->client.tx_busy) == FALSE)
-		return -EBUSY;
-
-	skb = skb_dequeue(&self->client.txq);
-	if (!skb) {
-		self->client.tx_busy = FALSE;
-		return 0;
-	}
-
-	/* Check that it's really possible to send commands */
-	if ((self->client.tsap_ctrl == NULL) ||
-	    (self->client.state == IRLAN_IDLE))
-	{
-		self->client.tx_busy = FALSE;
-		dev_kfree_skb(skb);
-		return -1;
-	}
-	pr_debug("%s(), sending ...\n", __func__);
-
-	return irttp_data_request(self->client.tsap_ctrl, skb);
-}
-
-/*
- * Function irlan_ctrl_data_request (self, skb)
- *
- *    This function makes sure that commands on the control channel is being
- *    sent in a command/response fashion
- */
-static void irlan_ctrl_data_request(struct irlan_cb *self, struct sk_buff *skb)
-{
-	/* Queue command */
-	skb_queue_tail(&self->client.txq, skb);
-
-	/* Try to send command */
-	irlan_run_ctrl_tx_queue(self);
-}
-
-/*
- * Function irlan_get_provider_info (self)
- *
- *    Send Get Provider Information command to peer IrLAN layer
- *
- */
-void irlan_get_provider_info(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER,
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	frame[0] = CMD_GET_PROVIDER_INFO;
-	frame[1] = 0x00;                 /* Zero parameters */
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_open_data_channel (self)
- *
- *    Send an Open Data Command to provider
- *
- */
-void irlan_open_data_channel(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3") +
-			IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "DIRECT"),
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	/* Build frame */
-	frame[0] = CMD_OPEN_DATA_CHANNEL;
-	frame[1] = 0x02; /* Two parameters */
-
-	irlan_insert_string_param(skb, "MEDIA", "802.3");
-	irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT");
-	/* irlan_insert_string_param(skb, "MODE", "UNRELIABLE"); */
-
-/* 	self->use_udata = TRUE; */
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-void irlan_close_data_channel(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* Check if the TSAP is still there */
-	if (self->client.tsap_ctrl == NULL)
-		return;
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN"),
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	/* Build frame */
-	frame[0] = CMD_CLOSE_DATA_CHAN;
-	frame[1] = 0x01; /* One parameter */
-
-	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_open_unicast_addr (self)
- *
- *    Make IrLAN provider accept ethernet frames addressed to the unicast
- *    address.
- *
- */
-static void irlan_open_unicast_addr(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"),
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	frame[0] = CMD_FILTER_OPERATION;
-	frame[1] = 0x03;                 /* Three parameters */
-	irlan_insert_byte_param(skb, "DATA_CHAN" , self->dtsap_sel_data);
-	irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
-	irlan_insert_string_param(skb, "FILTER_MODE", "FILTER");
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_set_broadcast_filter (self, status)
- *
- *    Make IrLAN provider accept ethernet frames addressed to the broadcast
- *    address. Be careful with the use of this one, since there may be a lot
- *    of broadcast traffic out there. We can still function without this
- *    one but then _we_ have to initiate all communication with other
- *    hosts, since ARP request for this host will not be answered.
- */
-void irlan_set_broadcast_filter(struct irlan_cb *self, int status)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") +
-			/* We may waste one byte here...*/
-			IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"),
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	frame[0] = CMD_FILTER_OPERATION;
-	frame[1] = 0x03;                 /* Three parameters */
-	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
-	irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST");
-	if (status)
-		irlan_insert_string_param(skb, "FILTER_MODE", "FILTER");
-	else
-		irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_set_multicast_filter (self, status)
- *
- *    Make IrLAN provider accept ethernet frames addressed to the multicast
- *    address.
- *
- */
-void irlan_set_multicast_filter(struct irlan_cb *self, int status)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") +
-			/* We may waste one byte here...*/
-			IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "NONE"),
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	frame[0] = CMD_FILTER_OPERATION;
-	frame[1] = 0x03;                 /* Three parameters */
-	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
-	irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST");
-	if (status)
-		irlan_insert_string_param(skb, "FILTER_MODE", "ALL");
-	else
-		irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_get_unicast_addr (self)
- *
- *    Retrieves the unicast address from the IrLAN provider. This address
- *    will be inserted into the devices structure, so the ethernet layer
- *    can construct its packets.
- *
- */
-static void irlan_get_unicast_addr(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_OPERATION",
-						   "DYNAMIC"),
-			GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	frame[0] = CMD_FILTER_OPERATION;
-	frame[1] = 0x03;                 /* Three parameters */
-	irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data);
-	irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
-	irlan_insert_string_param(skb, "FILTER_OPERATION", "DYNAMIC");
-
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function irlan_get_media_char (self)
- *
- *
- *
- */
-void irlan_get_media_char(struct irlan_cb *self)
-{
-	struct sk_buff *skb;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3"),
-			GFP_ATOMIC);
-
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->client.max_header_size);
-	skb_put(skb, 2);
-
-	frame = skb->data;
-
-	/* Build frame */
-	frame[0] = CMD_GET_MEDIA_CHAR;
-	frame[1] = 0x01; /* One parameter */
-
-	irlan_insert_string_param(skb, "MEDIA", "802.3");
-	irlan_ctrl_data_request(self, skb);
-}
-
-/*
- * Function insert_byte_param (skb, param, value)
- *
- *    Insert byte parameter into frame
- *
- */
-int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value)
-{
-	return __irlan_insert_param(skb, param, IRLAN_BYTE, value, 0, NULL, 0);
-}
-
-int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value)
-{
-	return __irlan_insert_param(skb, param, IRLAN_SHORT, 0, value, NULL, 0);
-}
-
-/*
- * Function insert_string (skb, param, value)
- *
- *    Insert string parameter into frame
- *
- */
-int irlan_insert_string_param(struct sk_buff *skb, char *param, char *string)
-{
-	int string_len = strlen(string);
-
-	return __irlan_insert_param(skb, param, IRLAN_ARRAY, 0, 0, string,
-				    string_len);
-}
-
-/*
- * Function insert_array_param(skb, param, value, len_value)
- *
- *    Insert array parameter into frame
- *
- */
-int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *array,
-			     __u16 array_len)
-{
-	return __irlan_insert_param(skb, name, IRLAN_ARRAY, 0, 0, array,
-				    array_len);
-}
-
-/*
- * Function insert_param (skb, param, value, byte)
- *
- *    Insert parameter at end of buffer, structure of a parameter is:
- *
- *    -----------------------------------------------------------------------
- *    | Name Length[1] | Param Name[1..255] | Val Length[2] | Value[0..1016]|
- *    -----------------------------------------------------------------------
- */
-static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
-				__u8 value_byte, __u16 value_short,
-				__u8 *value_array, __u16 value_len)
-{
-	__u8 *frame;
-	__u8 param_len;
-	__le16 tmp_le; /* Temporary value in little endian format */
-	int n=0;
-
-	if (skb == NULL) {
-		pr_debug("%s(), Got NULL skb\n", __func__);
-		return 0;
-	}
-
-	param_len = strlen(param);
-	switch (type) {
-	case IRLAN_BYTE:
-		value_len = 1;
-		break;
-	case IRLAN_SHORT:
-		value_len = 2;
-		break;
-	case IRLAN_ARRAY:
-		IRDA_ASSERT(value_array != NULL, return 0;);
-		IRDA_ASSERT(value_len > 0, return 0;);
-		break;
-	default:
-		pr_debug("%s(), Unknown parameter type!\n", __func__);
-		return 0;
-	}
-
-	/* Insert at end of sk-buffer */
-	frame = skb_tail_pointer(skb);
-
-	/* Make space for data */
-	if (skb_tailroom(skb) < (param_len+value_len+3)) {
-		pr_debug("%s(), No more space at end of skb\n", __func__);
-		return 0;
-	}
-	skb_put(skb, param_len+value_len+3);
-
-	/* Insert parameter length */
-	frame[n++] = param_len;
-
-	/* Insert parameter */
-	memcpy(frame+n, param, param_len); n += param_len;
-
-	/* Insert value length (2 byte little endian format, LSB first) */
-	tmp_le = cpu_to_le16(value_len);
-	memcpy(frame+n, &tmp_le, 2); n += 2; /* To avoid alignment problems */
-
-	/* Insert value */
-	switch (type) {
-	case IRLAN_BYTE:
-		frame[n++] = value_byte;
-		break;
-	case IRLAN_SHORT:
-		tmp_le = cpu_to_le16(value_short);
-		memcpy(frame+n, &tmp_le, 2); n += 2;
-		break;
-	case IRLAN_ARRAY:
-		memcpy(frame+n, value_array, value_len); n+=value_len;
-		break;
-	default:
-		break;
-	}
-	IRDA_ASSERT(n == (param_len+value_len+3), return 0;);
-
-	return param_len+value_len+3;
-}
-
-/*
- * Function irlan_extract_param (buf, name, value, len)
- *
- *    Extracts a single parameter name/value pair from buffer and updates
- *    the buffer pointer to point to the next name/value pair.
- */
-int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
-{
-	__u8 name_len;
-	__u16 val_len;
-	int n=0;
-
-	/* get length of parameter name (1 byte) */
-	name_len = buf[n++];
-
-	if (name_len > 254) {
-		pr_debug("%s(), name_len > 254\n", __func__);
-		return -RSP_INVALID_COMMAND_FORMAT;
-	}
-
-	/* get parameter name */
-	memcpy(name, buf+n, name_len);
-	name[name_len] = '\0';
-	n+=name_len;
-
-	/*
-	 *  Get length of parameter value (2 bytes in little endian
-	 *  format)
-	 */
-	memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */
-	le16_to_cpus(&val_len); n+=2;
-
-	if (val_len >= 1016) {
-		pr_debug("%s(), parameter length to long\n", __func__);
-		return -RSP_INVALID_COMMAND_FORMAT;
-	}
-	*len = val_len;
-
-	/* get parameter value */
-	memcpy(value, buf+n, val_len);
-	value[val_len] = '\0';
-	n+=val_len;
-
-	pr_debug("Parameter: %s ", name);
-	pr_debug("Value: %s\n", value);
-
-	return n;
-}
-
-#ifdef CONFIG_PROC_FS
-
-/*
- * Start of reading /proc entries.
- * Return entry at pos,
- *	or start_token to indicate print header line
- *	or NULL if end of file
- */
-static void *irlan_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	rcu_read_lock();
-	return seq_list_start_head(&irlans, *pos);
-}
-
-/* Return entry after v, and increment pos */
-static void *irlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	return seq_list_next(v, &irlans, pos);
-}
-
-/* End of reading /proc file */
-static void irlan_seq_stop(struct seq_file *seq, void *v)
-{
-	rcu_read_unlock();
-}
-
-
-/*
- * Show one entry in /proc file.
- */
-static int irlan_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == &irlans)
-		seq_puts(seq, "IrLAN instances:\n");
-	else {
-		struct irlan_cb *self = list_entry(v, struct irlan_cb, dev_list);
-
-		IRDA_ASSERT(self != NULL, return -1;);
-		IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-		seq_printf(seq,"ifname: %s,\n",
-			       self->dev->name);
-		seq_printf(seq,"client state: %s, ",
-			       irlan_state[ self->client.state]);
-		seq_printf(seq,"provider state: %s,\n",
-			       irlan_state[ self->provider.state]);
-		seq_printf(seq,"saddr: %#08x, ",
-			       self->saddr);
-		seq_printf(seq,"daddr: %#08x\n",
-			       self->daddr);
-		seq_printf(seq,"version: %d.%d,\n",
-			       self->version[1], self->version[0]);
-		seq_printf(seq,"access type: %s\n",
-			       irlan_access[self->client.access_type]);
-		seq_printf(seq,"media: %s\n",
-			       irlan_media[self->media]);
-
-		seq_printf(seq,"local filter:\n");
-		seq_printf(seq,"remote filter: ");
-		irlan_print_filter(seq, self->client.filter_type);
-		seq_printf(seq,"tx busy: %s\n",
-			       netif_queue_stopped(self->dev) ? "TRUE" : "FALSE");
-
-		seq_putc(seq,'\n');
-	}
-	return 0;
-}
-
-static const struct seq_operations irlan_seq_ops = {
-	.start = irlan_seq_start,
-	.next  = irlan_seq_next,
-	.stop  = irlan_seq_stop,
-	.show  = irlan_seq_show,
-};
-
-static int irlan_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &irlan_seq_ops);
-}
-#endif
-
-MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
-MODULE_DESCRIPTION("The Linux IrDA LAN protocol");
-MODULE_LICENSE("GPL");
-
-module_param(eth, bool, 0);
-MODULE_PARM_DESC(eth, "Name devices ethX (0) or irlanX (1)");
-module_param(access, int, 0);
-MODULE_PARM_DESC(access, "Access type DIRECT=1, PEER=2, HOSTED=3");
-
-module_init(irlan_init);
-module_exit(irlan_cleanup);
-
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
deleted file mode 100644
index 3be852808a9d..000000000000
--- a/net/irda/irlan/irlan_eth.c
+++ /dev/null
@@ -1,340 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_eth.c
- * Version:
- * Description:
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Thu Oct 15 08:37:58 1998
- * Modified at:   Tue Mar 21 09:06:41 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Sources:       skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
- *                slip.c by Laurence Culhane,   <loz@holmes.demon.co.uk>
- *                          Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- *
- *     Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_arp.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <net/arp.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_client.h>
-#include <net/irda/irlan_event.h>
-#include <net/irda/irlan_eth.h>
-
-static int  irlan_eth_open(struct net_device *dev);
-static int  irlan_eth_close(struct net_device *dev);
-static netdev_tx_t  irlan_eth_xmit(struct sk_buff *skb,
-					 struct net_device *dev);
-static void irlan_eth_set_multicast_list(struct net_device *dev);
-
-static const struct net_device_ops irlan_eth_netdev_ops = {
-	.ndo_open		= irlan_eth_open,
-	.ndo_stop		= irlan_eth_close,
-	.ndo_start_xmit		= irlan_eth_xmit,
-	.ndo_set_rx_mode	= irlan_eth_set_multicast_list,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-/*
- * Function irlan_eth_setup (dev)
- *
- *    The network device initialization function.
- *
- */
-static void irlan_eth_setup(struct net_device *dev)
-{
-	ether_setup(dev);
-
-	dev->netdev_ops		= &irlan_eth_netdev_ops;
-	dev->needs_free_netdev	= true;
-	dev->min_mtu		= 0;
-	dev->max_mtu		= ETH_MAX_MTU;
-
-	/*
-	 * Lets do all queueing in IrTTP instead of this device driver.
-	 * Queueing here as well can introduce some strange latency
-	 * problems, which we will avoid by setting the queue size to 0.
-	 */
-	/*
-	 * The bugs in IrTTP and IrLAN that created this latency issue
-	 * have now been fixed, and we can propagate flow control properly
-	 * to the network layer. However, this requires a minimal queue of
-	 * packets for the device.
-	 * Without flow control, the Tx Queue is 14 (ttp) + 0 (dev) = 14
-	 * With flow control, the Tx Queue is 7 (ttp) + 4 (dev) = 11
-	 * See irlan_eth_flow_indication()...
-	 * Note : this number was randomly selected and would need to
-	 * be adjusted.
-	 * Jean II */
-	dev->tx_queue_len = 4;
-}
-
-/*
- * Function alloc_irlandev
- *
- *    Allocate network device and control block
- *
- */
-struct net_device *alloc_irlandev(const char *name)
-{
-	return alloc_netdev(sizeof(struct irlan_cb), name, NET_NAME_UNKNOWN,
-			    irlan_eth_setup);
-}
-
-/*
- * Function irlan_eth_open (dev)
- *
- *    Network device has been opened by user
- *
- */
-static int irlan_eth_open(struct net_device *dev)
-{
-	struct irlan_cb *self = netdev_priv(dev);
-
-	/* Ready to play! */
-	netif_stop_queue(dev); /* Wait until data link is ready */
-
-	/* We are now open, so time to do some work */
-	self->disconnect_reason = 0;
-	irlan_client_wakeup(self, self->saddr, self->daddr);
-
-	/* Make sure we have a hardware address before we return,
-	   so DHCP clients gets happy */
-	return wait_event_interruptible(self->open_wait,
-					!self->tsap_data->connected);
-}
-
-/*
- * Function irlan_eth_close (dev)
- *
- *    Stop the ether network device, his function will usually be called by
- *    ifconfig down. We should now disconnect the link, We start the
- *    close timer, so that the instance will be removed if we are unable
- *    to discover the remote device after the disconnect.
- */
-static int irlan_eth_close(struct net_device *dev)
-{
-	struct irlan_cb *self = netdev_priv(dev);
-
-	/* Stop device */
-	netif_stop_queue(dev);
-
-	irlan_close_data_channel(self);
-	irlan_close_tsaps(self);
-
-	irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL);
-	irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
-
-	/* Remove frames queued on the control channel */
-	skb_queue_purge(&self->client.txq);
-
-	self->client.tx_busy = 0;
-
-	return 0;
-}
-
-/*
- * Function irlan_eth_tx (skb)
- *
- *    Transmits ethernet frames over IrDA link.
- *
- */
-static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
-					struct net_device *dev)
-{
-	struct irlan_cb *self = netdev_priv(dev);
-	int ret;
-	unsigned int len;
-
-	/* skb headroom large enough to contain all IrDA-headers? */
-	if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) {
-		struct sk_buff *new_skb =
-			skb_realloc_headroom(skb, self->max_header_size);
-
-		/*  We have to free the original skb anyway */
-		dev_kfree_skb(skb);
-
-		/* Did the realloc succeed? */
-		if (new_skb == NULL)
-			return NETDEV_TX_OK;
-
-		/* Use the new skb instead */
-		skb = new_skb;
-	}
-
-	netif_trans_update(dev);
-
-	len = skb->len;
-	/* Now queue the packet in the transport layer */
-	if (self->use_udata)
-		ret = irttp_udata_request(self->tsap_data, skb);
-	else
-		ret = irttp_data_request(self->tsap_data, skb);
-
-	if (ret < 0) {
-		/*
-		 * IrTTPs tx queue is full, so we just have to
-		 * drop the frame! You might think that we should
-		 * just return -1 and don't deallocate the frame,
-		 * but that is dangerous since it's possible that
-		 * we have replaced the original skb with a new
-		 * one with larger headroom, and that would really
-		 * confuse do_dev_queue_xmit() in dev.c! I have
-		 * tried :-) DB
-		 */
-		/* irttp_data_request already free the packet */
-		dev->stats.tx_dropped++;
-	} else {
-		dev->stats.tx_packets++;
-		dev->stats.tx_bytes += len;
-	}
-
-	return NETDEV_TX_OK;
-}
-
-/*
- * Function irlan_eth_receive (handle, skb)
- *
- *    This function gets the data that is received on the data channel
- *
- */
-int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
-{
-	struct irlan_cb *self = instance;
-	struct net_device *dev = self->dev;
-
-	if (skb == NULL) {
-		dev->stats.rx_dropped++;
-		return 0;
-	}
-	if (skb->len < ETH_HLEN) {
-		pr_debug("%s() : IrLAN frame too short (%d)\n",
-			 __func__, skb->len);
-		dev->stats.rx_dropped++;
-		dev_kfree_skb(skb);
-		return 0;
-	}
-
-	/*
-	 * Adopt this frame! Important to set all these fields since they
-	 * might have been previously set by the low level IrDA network
-	 * device driver
-	 */
-	skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
-
-	dev->stats.rx_packets++;
-	dev->stats.rx_bytes += skb->len;
-
-	netif_rx(skb);   /* Eat it! */
-
-	return 0;
-}
-
-/*
- * Function irlan_eth_flow (status)
- *
- *    Do flow control between IP/Ethernet and IrLAN/IrTTP. This is done by
- *    controlling the queue stop/start.
- *
- * The IrDA link layer has the advantage to have flow control, and
- * IrTTP now properly handles that. Flow controlling the higher layers
- * prevent us to drop Tx packets in here (up to 15% for a TCP socket,
- * more for UDP socket).
- * Also, this allow us to reduce the overall transmit queue, which means
- * less latency in case of mixed traffic.
- * Jean II
- */
-void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
-{
-	struct irlan_cb *self;
-	struct net_device *dev;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	dev = self->dev;
-
-	IRDA_ASSERT(dev != NULL, return;);
-
-	pr_debug("%s() : flow %s ; running %d\n", __func__,
-		 flow == FLOW_STOP ? "FLOW_STOP" : "FLOW_START",
-		 netif_running(dev));
-
-	switch (flow) {
-	case FLOW_STOP:
-		/* IrTTP is full, stop higher layers */
-		netif_stop_queue(dev);
-		break;
-	case FLOW_START:
-	default:
-		/* Tell upper layers that its time to transmit frames again */
-		/* Schedule network layer */
-		netif_wake_queue(dev);
-		break;
-	}
-}
-
-/*
- * Function set_multicast_list (dev)
- *
- *    Configure the filtering of the device
- *
- */
-#define HW_MAX_ADDRS 4 /* Must query to get it! */
-static void irlan_eth_set_multicast_list(struct net_device *dev)
-{
-	struct irlan_cb *self = netdev_priv(dev);
-
-	/* Check if data channel has been connected yet */
-	if (self->client.state != IRLAN_DATA) {
-		pr_debug("%s(), delaying!\n", __func__);
-		return;
-	}
-
-	if (dev->flags & IFF_PROMISC) {
-		/* Enable promiscuous mode */
-		net_warn_ratelimited("Promiscuous mode not implemented by IrLAN!\n");
-	} else if ((dev->flags & IFF_ALLMULTI) ||
-		 netdev_mc_count(dev) > HW_MAX_ADDRS) {
-		/* Disable promiscuous mode, use normal mode. */
-		pr_debug("%s(), Setting multicast filter\n", __func__);
-		/* hardware_set_filter(NULL); */
-
-		irlan_set_multicast_filter(self, TRUE);
-	} else if (!netdev_mc_empty(dev)) {
-		pr_debug("%s(), Setting multicast filter\n", __func__);
-		/* Walk the address list, and load the filter */
-		/* hardware_set_filter(dev->mc_list); */
-
-		irlan_set_multicast_filter(self, TRUE);
-	} else {
-		pr_debug("%s(), Clearing multicast filter\n", __func__);
-		irlan_set_multicast_filter(self, FALSE);
-	}
-
-	if (dev->flags & IFF_BROADCAST)
-		irlan_set_broadcast_filter(self, TRUE);
-	else
-		irlan_set_broadcast_filter(self, FALSE);
-}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
deleted file mode 100644
index 9a1cc11c16f6..000000000000
--- a/net/irda/irlan/irlan_event.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_event.c
- * Version:
- * Description:
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Oct 20 09:10:16 1998
- * Modified at:   Sat Oct 30 12:59:01 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <net/irda/irlan_event.h>
-
-const char * const irlan_state[] = {
-	"IRLAN_IDLE",
-	"IRLAN_QUERY",
-	"IRLAN_CONN",
-	"IRLAN_INFO",
-	"IRLAN_MEDIA",
-	"IRLAN_OPEN",
-	"IRLAN_WAIT",
-	"IRLAN_ARB",
-	"IRLAN_DATA",
-	"IRLAN_CLOSE",
-	"IRLAN_SYNC",
-};
-
-void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state)
-{
-	pr_debug("%s(), %s\n", __func__ , irlan_state[state]);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	self->client.state = state;
-}
-
-void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state)
-{
-	pr_debug("%s(), %s\n", __func__ , irlan_state[state]);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	self->provider.state = state;
-}
-
diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c
deleted file mode 100644
index e755e90b2f26..000000000000
--- a/net/irda/irlan/irlan_filter.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_filter.c
- * Version:
- * Description:
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Fri Jan 29 11:16:38 1999
- * Modified at:   Sat Oct 30 12:58:45 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/random.h>
-#include <linux/seq_file.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_filter.h>
-
-/*
- * Function irlan_filter_request (self, skb)
- *
- *    Handle filter request from client peer device
- *
- */
-void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	if ((self->provider.filter_type == IRLAN_DIRECTED) &&
-	    (self->provider.filter_operation == DYNAMIC))
-	{
-		pr_debug("Giving peer a dynamic Ethernet address\n");
-		self->provider.mac_address[0] = 0x40;
-		self->provider.mac_address[1] = 0x00;
-		self->provider.mac_address[2] = 0x00;
-		self->provider.mac_address[3] = 0x00;
-
-		/* Use arbitration value to generate MAC address */
-		if (self->provider.access_type == ACCESS_PEER) {
-			self->provider.mac_address[4] =
-				self->provider.send_arb_val & 0xff;
-			self->provider.mac_address[5] =
-				(self->provider.send_arb_val >> 8) & 0xff;
-		} else {
-			/* Just generate something for now */
-			get_random_bytes(self->provider.mac_address+4, 1);
-			get_random_bytes(self->provider.mac_address+5, 1);
-		}
-
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x03;
-		irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
-		irlan_insert_short_param(skb, "MAX_ENTRY", 0x0001);
-		irlan_insert_array_param(skb, "FILTER_ENTRY",
-					 self->provider.mac_address, 6);
-		return;
-	}
-
-	if ((self->provider.filter_type == IRLAN_DIRECTED) &&
-	    (self->provider.filter_mode == FILTER))
-	{
-		pr_debug("Directed filter on\n");
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x00;
-		return;
-	}
-	if ((self->provider.filter_type == IRLAN_DIRECTED) &&
-	    (self->provider.filter_mode == NONE))
-	{
-		pr_debug("Directed filter off\n");
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x00;
-		return;
-	}
-
-	if ((self->provider.filter_type == IRLAN_BROADCAST) &&
-	    (self->provider.filter_mode == FILTER))
-	{
-		pr_debug("Broadcast filter on\n");
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x00;
-		return;
-	}
-	if ((self->provider.filter_type == IRLAN_BROADCAST) &&
-	    (self->provider.filter_mode == NONE))
-	{
-		pr_debug("Broadcast filter off\n");
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x00;
-		return;
-	}
-	if ((self->provider.filter_type == IRLAN_MULTICAST) &&
-	    (self->provider.filter_mode == FILTER))
-	{
-		pr_debug("Multicast filter on\n");
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x00;
-		return;
-	}
-	if ((self->provider.filter_type == IRLAN_MULTICAST) &&
-	    (self->provider.filter_mode == NONE))
-	{
-		pr_debug("Multicast filter off\n");
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x00;
-		return;
-	}
-	if ((self->provider.filter_type == IRLAN_MULTICAST) &&
-	    (self->provider.filter_operation == GET))
-	{
-		pr_debug("Multicast filter get\n");
-		skb->data[0] = 0x00; /* Success? */
-		skb->data[1] = 0x02;
-		irlan_insert_string_param(skb, "FILTER_MODE", "NONE");
-		irlan_insert_short_param(skb, "MAX_ENTRY", 16);
-		return;
-	}
-	skb->data[0] = 0x00; /* Command not supported */
-	skb->data[1] = 0x00;
-
-	pr_debug("Not implemented!\n");
-}
-
-/*
- * Function check_request_param (self, param, value)
- *
- *    Check parameters in request from peer device
- *
- */
-void irlan_check_command_param(struct irlan_cb *self, char *param, char *value)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	pr_debug("%s, %s\n", param, value);
-
-	/*
-	 *  This is experimental!! DB.
-	 */
-	 if (strcmp(param, "MODE") == 0) {
-		self->use_udata = TRUE;
-		return;
-	}
-
-	/*
-	 *  FILTER_TYPE
-	 */
-	if (strcmp(param, "FILTER_TYPE") == 0) {
-		if (strcmp(value, "DIRECTED") == 0) {
-			self->provider.filter_type = IRLAN_DIRECTED;
-			return;
-		}
-		if (strcmp(value, "MULTICAST") == 0) {
-			self->provider.filter_type = IRLAN_MULTICAST;
-			return;
-		}
-		if (strcmp(value, "BROADCAST") == 0) {
-			self->provider.filter_type = IRLAN_BROADCAST;
-			return;
-		}
-	}
-	/*
-	 *  FILTER_MODE
-	 */
-	if (strcmp(param, "FILTER_MODE") == 0) {
-		if (strcmp(value, "ALL") == 0) {
-			self->provider.filter_mode = ALL;
-			return;
-		}
-		if (strcmp(value, "FILTER") == 0) {
-			self->provider.filter_mode = FILTER;
-			return;
-		}
-		if (strcmp(value, "NONE") == 0) {
-			self->provider.filter_mode = FILTER;
-			return;
-		}
-	}
-	/*
-	 *  FILTER_OPERATION
-	 */
-	if (strcmp(param, "FILTER_OPERATION") == 0) {
-		if (strcmp(value, "DYNAMIC") == 0) {
-			self->provider.filter_operation = DYNAMIC;
-			return;
-		}
-		if (strcmp(value, "GET") == 0) {
-			self->provider.filter_operation = GET;
-			return;
-		}
-	}
-}
-
-/*
- * Function irlan_print_filter (filter_type, buf)
- *
- *    Print status of filter. Used by /proc file system
- *
- */
-#ifdef CONFIG_PROC_FS
-#define MASK2STR(m,s)	{ .mask = m, .str = s }
-
-void irlan_print_filter(struct seq_file *seq, int filter_type)
-{
-	static struct {
-		int mask;
-		const char *str;
-	} filter_mask2str[] = {
-		MASK2STR(IRLAN_DIRECTED,	"DIRECTED"),
-		MASK2STR(IRLAN_FUNCTIONAL,	"FUNCTIONAL"),
-		MASK2STR(IRLAN_GROUP,		"GROUP"),
-		MASK2STR(IRLAN_MAC_FRAME,	"MAC_FRAME"),
-		MASK2STR(IRLAN_MULTICAST,	"MULTICAST"),
-		MASK2STR(IRLAN_BROADCAST,	"BROADCAST"),
-		MASK2STR(IRLAN_IPX_SOCKET,	"IPX_SOCKET"),
-		MASK2STR(0,			NULL)
-	}, *p;
-
-	for (p = filter_mask2str; p->str; p++) {
-		if (filter_type & p->mask)
-			seq_printf(seq, "%s ", p->str);
-	}
-	seq_putc(seq, '\n');
-}
-#undef MASK2STR
-#endif
diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c
deleted file mode 100644
index 15c292cf2644..000000000000
--- a/net/irda/irlan/irlan_provider.c
+++ /dev/null
@@ -1,408 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_provider.c
- * Version:       0.9
- * Description:   IrDA LAN Access Protocol Implementation
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Sat Oct 30 12:52:10 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Sources:       skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov>
- *                slip.c by Laurence Culhane,   <loz@holmes.demon.co.uk>
- *                          Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/bitops.h>
-#include <linux/slab.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irttp.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/iriap.h>
-#include <net/irda/timer.h>
-
-#include <net/irda/irlan_common.h>
-#include <net/irda/irlan_eth.h>
-#include <net/irda/irlan_event.h>
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_filter.h>
-#include <net/irda/irlan_client.h>
-
-static void irlan_provider_connect_indication(void *instance, void *sap,
-					      struct qos_info *qos,
-					      __u32 max_sdu_size,
-					      __u8 max_header_size,
-					      struct sk_buff *skb);
-
-/*
- * Function irlan_provider_control_data_indication (handle, skb)
- *
- *    This function gets the data that is received on the control channel
- *
- */
-static int irlan_provider_data_indication(void *instance, void *sap,
-					  struct sk_buff *skb)
-{
-	struct irlan_cb *self;
-	__u8 code;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	code = skb->data[0];
-	switch(code) {
-	case CMD_GET_PROVIDER_INFO:
-		pr_debug("Got GET_PROVIDER_INFO command!\n");
-		irlan_do_provider_event(self, IRLAN_GET_INFO_CMD, skb);
-		break;
-
-	case CMD_GET_MEDIA_CHAR:
-		pr_debug("Got GET_MEDIA_CHAR command!\n");
-		irlan_do_provider_event(self, IRLAN_GET_MEDIA_CMD, skb);
-		break;
-	case CMD_OPEN_DATA_CHANNEL:
-		pr_debug("Got OPEN_DATA_CHANNEL command!\n");
-		irlan_do_provider_event(self, IRLAN_OPEN_DATA_CMD, skb);
-		break;
-	case CMD_FILTER_OPERATION:
-		pr_debug("Got FILTER_OPERATION command!\n");
-		irlan_do_provider_event(self, IRLAN_FILTER_CONFIG_CMD, skb);
-		break;
-	case CMD_RECONNECT_DATA_CHAN:
-		pr_debug("%s(), Got RECONNECT_DATA_CHAN command\n", __func__);
-		pr_debug("%s(), NOT IMPLEMENTED\n", __func__);
-		break;
-	case CMD_CLOSE_DATA_CHAN:
-		pr_debug("Got CLOSE_DATA_CHAN command!\n");
-		pr_debug("%s(), NOT IMPLEMENTED\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown command!\n", __func__);
-		break;
-	}
-	return 0;
-}
-
-/*
- * Function irlan_provider_connect_indication (handle, skb, priv)
- *
- *    Got connection from peer IrLAN client
- *
- */
-static void irlan_provider_connect_indication(void *instance, void *sap,
-					      struct qos_info *qos,
-					      __u32 max_sdu_size,
-					      __u8 max_header_size,
-					      struct sk_buff *skb)
-{
-	struct irlan_cb *self;
-	struct tsap_cb *tsap;
-
-	self = instance;
-	tsap = sap;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	IRDA_ASSERT(tsap == self->provider.tsap_ctrl,return;);
-	IRDA_ASSERT(self->provider.state == IRLAN_IDLE, return;);
-
-	self->provider.max_sdu_size = max_sdu_size;
-	self->provider.max_header_size = max_header_size;
-
-	irlan_do_provider_event(self, IRLAN_CONNECT_INDICATION, NULL);
-
-	/*
-	 * If we are in peer mode, the client may not have got the discovery
-	 * indication it needs to make progress. If the client is still in
-	 * IDLE state, we must kick it.
-	 */
-	if ((self->provider.access_type == ACCESS_PEER) &&
-	    (self->client.state == IRLAN_IDLE))
-	{
-		irlan_client_wakeup(self, self->saddr, self->daddr);
-	}
-}
-
-/*
- * Function irlan_provider_connect_response (handle)
- *
- *    Accept incoming connection
- *
- */
-void irlan_provider_connect_response(struct irlan_cb *self,
-				     struct tsap_cb *tsap)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	/* Just accept */
-	irttp_connect_response(tsap, IRLAN_MTU, NULL);
-}
-
-static void irlan_provider_disconnect_indication(void *instance, void *sap,
-						 LM_REASON reason,
-						 struct sk_buff *userdata)
-{
-	struct irlan_cb *self;
-	struct tsap_cb *tsap;
-
-	pr_debug("%s(), reason=%d\n", __func__ , reason);
-
-	self = instance;
-	tsap = sap;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-	IRDA_ASSERT(tsap != NULL, return;);
-	IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;);
-
-	IRDA_ASSERT(tsap == self->provider.tsap_ctrl, return;);
-
-	irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL);
-}
-
-/*
- * Function irlan_parse_open_data_cmd (self, skb)
- *
- *
- *
- */
-int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb)
-{
-	int ret;
-
-	ret = irlan_provider_parse_command(self, CMD_OPEN_DATA_CHANNEL, skb);
-
-	/* Open data channel */
-	irlan_open_data_tsap(self);
-
-	return ret;
-}
-
-/*
- * Function parse_command (skb)
- *
- *    Extract all parameters from received buffer, then feed them to
- *    check_params for parsing
- *
- */
-int irlan_provider_parse_command(struct irlan_cb *self, int cmd,
-				 struct sk_buff *skb)
-{
-	__u8 *frame;
-	__u8 *ptr;
-	int count;
-	__u16 val_len;
-	int i;
-	char *name;
-	char *value;
-	int ret = RSP_SUCCESS;
-
-	IRDA_ASSERT(skb != NULL, return -RSP_PROTOCOL_ERROR;);
-
-	pr_debug("%s(), skb->len=%d\n", __func__ , (int)skb->len);
-
-	IRDA_ASSERT(self != NULL, return -RSP_PROTOCOL_ERROR;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -RSP_PROTOCOL_ERROR;);
-
-	if (!skb)
-		return -RSP_PROTOCOL_ERROR;
-
-	frame = skb->data;
-
-	name = kmalloc(255, GFP_ATOMIC);
-	if (!name)
-		return -RSP_INSUFFICIENT_RESOURCES;
-	value = kmalloc(1016, GFP_ATOMIC);
-	if (!value) {
-		kfree(name);
-		return -RSP_INSUFFICIENT_RESOURCES;
-	}
-
-	/* How many parameters? */
-	count = frame[1];
-
-	pr_debug("Got %d parameters\n", count);
-
-	ptr = frame+2;
-
-	/* For all parameters */
-	for (i=0; i<count;i++) {
-		ret = irlan_extract_param(ptr, name, value, &val_len);
-		if (ret < 0) {
-			pr_debug("%s(), IrLAN, Error!\n", __func__);
-			break;
-		}
-		ptr+=ret;
-		ret = RSP_SUCCESS;
-		irlan_check_command_param(self, name, value);
-	}
-	/* Cleanup */
-	kfree(name);
-	kfree(value);
-
-	return ret;
-}
-
-/*
- * Function irlan_provider_send_reply (self, info)
- *
- *    Send reply to query to peer IrLAN layer
- *
- */
-void irlan_provider_send_reply(struct irlan_cb *self, int command,
-			       int ret_code)
-{
-	struct sk_buff *skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;);
-
-	skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER +
-			/* Bigger param length comes from CMD_GET_MEDIA_CHAR */
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") +
-			IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") +
-			IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "HOSTED"),
-			GFP_ATOMIC);
-
-	if (!skb)
-		return;
-
-	/* Reserve space for TTP, LMP, and LAP header */
-	skb_reserve(skb, self->provider.max_header_size);
-	skb_put(skb, 2);
-
-	switch (command) {
-	case CMD_GET_PROVIDER_INFO:
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x02; /* 2 parameters */
-		switch (self->media) {
-		case MEDIA_802_3:
-			irlan_insert_string_param(skb, "MEDIA", "802.3");
-			break;
-		case MEDIA_802_5:
-			irlan_insert_string_param(skb, "MEDIA", "802.5");
-			break;
-		default:
-			pr_debug("%s(), unknown media type!\n", __func__);
-			break;
-		}
-		irlan_insert_short_param(skb, "IRLAN_VER", 0x0101);
-		break;
-
-	case CMD_GET_MEDIA_CHAR:
-		skb->data[0] = 0x00; /* Success */
-		skb->data[1] = 0x05; /* 5 parameters */
-		irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED");
-		irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST");
-		irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST");
-
-		switch (self->provider.access_type) {
-		case ACCESS_DIRECT:
-			irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT");
-			break;
-		case ACCESS_PEER:
-			irlan_insert_string_param(skb, "ACCESS_TYPE", "PEER");
-			break;
-		case ACCESS_HOSTED:
-			irlan_insert_string_param(skb, "ACCESS_TYPE", "HOSTED");
-			break;
-		default:
-			pr_debug("%s(), Unknown access type\n", __func__);
-			break;
-		}
-		irlan_insert_short_param(skb, "MAX_FRAME", 0x05ee);
-		break;
-	case CMD_OPEN_DATA_CHANNEL:
-		skb->data[0] = 0x00; /* Success */
-		if (self->provider.send_arb_val) {
-			skb->data[1] = 0x03; /* 3 parameters */
-			irlan_insert_short_param(skb, "CON_ARB",
-						 self->provider.send_arb_val);
-		} else
-			skb->data[1] = 0x02; /* 2 parameters */
-		irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data);
-		irlan_insert_string_param(skb, "RECONNECT_KEY", "LINUX RULES!");
-		break;
-	case CMD_FILTER_OPERATION:
-		irlan_filter_request(self, skb);
-		break;
-	default:
-		pr_debug("%s(), Unknown command!\n", __func__);
-		break;
-	}
-
-	irttp_data_request(self->provider.tsap_ctrl, skb);
-}
-
-/*
- * Function irlan_provider_register(void)
- *
- *    Register provider support so we can accept incoming connections.
- *
- */
-int irlan_provider_open_ctrl_tsap(struct irlan_cb *self)
-{
-	struct tsap_cb *tsap;
-	notify_t notify;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-	/* Check if already open */
-	if (self->provider.tsap_ctrl)
-		return -1;
-
-	/*
-	 *  First register well known control TSAP
-	 */
-	irda_notify_init(&notify);
-	notify.data_indication       = irlan_provider_data_indication;
-	notify.connect_indication    = irlan_provider_connect_indication;
-	notify.disconnect_indication = irlan_provider_disconnect_indication;
-	notify.instance = self;
-	strlcpy(notify.name, "IrLAN ctrl (p)", sizeof(notify.name));
-
-	tsap = irttp_open_tsap(LSAP_ANY, 1, &notify);
-	if (!tsap) {
-		pr_debug("%s(), Got no tsap!\n", __func__);
-		return -1;
-	}
-	self->provider.tsap_ctrl = tsap;
-
-	/* Register with LM-IAS */
-	irlan_ias_register(self, tsap->stsap_sel);
-
-	return 0;
-}
-
diff --git a/net/irda/irlan/irlan_provider_event.c b/net/irda/irlan/irlan_provider_event.c
deleted file mode 100644
index 9c4f7f51d6b5..000000000000
--- a/net/irda/irlan/irlan_provider_event.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlan_provider_event.c
- * Version:       0.9
- * Description:   IrLAN provider state machine)
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:37 1997
- * Modified at:   Sat Oct 30 12:52:41 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <net/irda/irda.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irttp.h>
-
-#include <net/irda/irlan_provider.h>
-#include <net/irda/irlan_event.h>
-
-static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb);
-static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb);
-static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb);
-static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb);
-
-static int (*state[])(struct irlan_cb *self, IRLAN_EVENT event,
-		      struct sk_buff *skb) =
-{
-	irlan_provider_state_idle,
-	NULL, /* Query */
-	NULL, /* Info */
-	irlan_provider_state_info,
-	NULL, /* Media */
-	irlan_provider_state_open,
-	NULL, /* Wait */
-	NULL, /* Arb */
-	irlan_provider_state_data,
-	NULL, /* Close */
-	NULL, /* Sync */
-};
-
-void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event,
-			     struct sk_buff *skb)
-{
-	IRDA_ASSERT(*state[ self->provider.state] != NULL, return;);
-
-	(*state[self->provider.state]) (self, event, skb);
-}
-
-/*
- * Function irlan_provider_state_idle (event, skb, info)
- *
- *    IDLE, We are waiting for an indication that there is a provider
- *    available.
- */
-static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_CONNECT_INDICATION:
-	     irlan_provider_connect_response( self, self->provider.tsap_ctrl);
-	     irlan_next_provider_state( self, IRLAN_INFO);
-	     break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_provider_state_info (self, event, skb, info)
- *
- *    INFO, We have issued a GetInfo command and is awaiting a reply.
- */
-static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_GET_INFO_CMD:
-		/* Be sure to use 802.3 in case of peer mode */
-		if (self->provider.access_type == ACCESS_PEER) {
-			self->media = MEDIA_802_3;
-
-			/* Check if client has started yet */
-			if (self->client.state == IRLAN_IDLE) {
-				/* This should get the client going */
-				irlmp_discovery_request(8);
-			}
-		}
-
-		irlan_provider_send_reply(self, CMD_GET_PROVIDER_INFO,
-					  RSP_SUCCESS);
-		/* Keep state */
-		break;
-	case IRLAN_GET_MEDIA_CMD:
-		irlan_provider_send_reply(self, CMD_GET_MEDIA_CHAR,
-					  RSP_SUCCESS);
-		/* Keep state */
-		break;
-	case IRLAN_OPEN_DATA_CMD:
-		ret = irlan_parse_open_data_cmd(self, skb);
-		if (self->provider.access_type == ACCESS_PEER) {
-			/* FIXME: make use of random functions! */
-			self->provider.send_arb_val = (jiffies & 0xffff);
-		}
-		irlan_provider_send_reply(self, CMD_OPEN_DATA_CHANNEL, ret);
-
-		if (ret == RSP_SUCCESS) {
-			irlan_next_provider_state(self, IRLAN_OPEN);
-
-			/* Signal client that we are now open */
-			irlan_do_client_event(self, IRLAN_PROVIDER_SIGNAL, NULL);
-		}
-		break;
-	case IRLAN_LMP_DISCONNECT:  /* FALLTHROUGH */
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_provider_state(self, IRLAN_IDLE);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_provider_state_open (self, event, skb, info)
- *
- *    OPEN, The client has issued a OpenData command and is awaiting a
- *    reply
- *
- */
-static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-
-	switch(event) {
-	case IRLAN_FILTER_CONFIG_CMD:
-		irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb);
-		irlan_provider_send_reply(self, CMD_FILTER_OPERATION,
-					  RSP_SUCCESS);
-		/* Keep state */
-		break;
-	case IRLAN_DATA_CONNECT_INDICATION:
-		irlan_next_provider_state(self, IRLAN_DATA);
-		irlan_provider_connect_response(self, self->tsap_data);
-		break;
-	case IRLAN_LMP_DISCONNECT:  /* FALLTHROUGH */
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_provider_state(self, IRLAN_IDLE);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irlan_provider_state_data (self, event, skb, info)
- *
- *    DATA, The data channel is connected, allowing data transfers between
- *    the local and remote machines.
- *
- */
-static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event,
-				     struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;);
-
-	switch(event) {
-	case IRLAN_FILTER_CONFIG_CMD:
-		irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb);
-		irlan_provider_send_reply(self, CMD_FILTER_OPERATION,
-					  RSP_SUCCESS);
-		break;
-	case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */
-	case IRLAN_LAP_DISCONNECT:
-		irlan_next_provider_state(self, IRLAN_IDLE);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__ , event);
-		break;
-	}
-	if (skb)
-		dev_kfree_skb(skb);
-
-	return 0;
-}
-
-
-
-
-
-
-
-
-
-
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
deleted file mode 100644
index 1cde711bcab5..000000000000
--- a/net/irda/irlap.c
+++ /dev/null
@@ -1,1207 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlap.c
- * Version:       1.0
- * Description:   IrLAP implementation for Linux
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Tue Dec 14 09:26:44 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/delay.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irda_device.h>
-#include <net/irda/irqueue.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/irlap.h>
-#include <net/irda/timer.h>
-#include <net/irda/qos.h>
-
-static hashbin_t *irlap = NULL;
-int sysctl_slot_timeout = SLOT_TIMEOUT * 1000 / HZ;
-
-/* This is the delay of missed pf period before generating an event
- * to the application. The spec mandate 3 seconds, but in some cases
- * it's way too long. - Jean II */
-int sysctl_warn_noreply_time = 3;
-
-extern void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb);
-static void __irlap_close(struct irlap_cb *self);
-static void irlap_init_qos_capabilities(struct irlap_cb *self,
-					struct qos_info *qos_user);
-
-static const char *const lap_reasons[] __maybe_unused = {
-	"ERROR, NOT USED",
-	"LAP_DISC_INDICATION",
-	"LAP_NO_RESPONSE",
-	"LAP_RESET_INDICATION",
-	"LAP_FOUND_NONE",
-	"LAP_MEDIA_BUSY",
-	"LAP_PRIMARY_CONFLICT",
-	"ERROR, NOT USED",
-};
-
-int __init irlap_init(void)
-{
-	/* Check if the compiler did its job properly.
-	 * May happen on some ARM configuration, check with Russell King. */
-	IRDA_ASSERT(sizeof(struct xid_frame) == 14, ;);
-	IRDA_ASSERT(sizeof(struct test_frame) == 10, ;);
-	IRDA_ASSERT(sizeof(struct ua_frame) == 10, ;);
-	IRDA_ASSERT(sizeof(struct snrm_frame) == 11, ;);
-
-	/* Allocate master array */
-	irlap = hashbin_new(HB_LOCK);
-	if (irlap == NULL) {
-		net_err_ratelimited("%s: can't allocate irlap hashbin!\n",
-				    __func__);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-void irlap_cleanup(void)
-{
-	IRDA_ASSERT(irlap != NULL, return;);
-
-	hashbin_delete(irlap, (FREE_FUNC) __irlap_close);
-}
-
-/*
- * Function irlap_open (driver)
- *
- *    Initialize IrLAP layer
- *
- */
-struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
-			    const char *hw_name)
-{
-	struct irlap_cb *self;
-
-	/* Initialize the irlap structure. */
-	self = kzalloc(sizeof(struct irlap_cb), GFP_KERNEL);
-	if (self == NULL)
-		return NULL;
-
-	self->magic = LAP_MAGIC;
-
-	/* Make a binding between the layers */
-	self->netdev = dev;
-	self->qos_dev = qos;
-	/* Copy hardware name */
-	if(hw_name != NULL) {
-		strlcpy(self->hw_name, hw_name, sizeof(self->hw_name));
-	} else {
-		self->hw_name[0] = '\0';
-	}
-
-	/* FIXME: should we get our own field? */
-	dev->atalk_ptr = self;
-
-	self->state = LAP_OFFLINE;
-
-	/* Initialize transmit queue */
-	skb_queue_head_init(&self->txq);
-	skb_queue_head_init(&self->txq_ultra);
-	skb_queue_head_init(&self->wx_list);
-
-	/* My unique IrLAP device address! */
-	/* We don't want the broadcast address, neither the NULL address
-	 * (most often used to signify "invalid"), and we don't want an
-	 * address already in use (otherwise connect won't be able
-	 * to select the proper link). - Jean II */
-	do {
-		get_random_bytes(&self->saddr, sizeof(self->saddr));
-	} while ((self->saddr == 0x0) || (self->saddr == BROADCAST) ||
-		 (hashbin_lock_find(irlap, self->saddr, NULL)) );
-	/* Copy to the driver */
-	memcpy(dev->dev_addr, &self->saddr, 4);
-
-	init_timer(&self->slot_timer);
-	init_timer(&self->query_timer);
-	init_timer(&self->discovery_timer);
-	init_timer(&self->final_timer);
-	init_timer(&self->poll_timer);
-	init_timer(&self->wd_timer);
-	init_timer(&self->backoff_timer);
-	init_timer(&self->media_busy_timer);
-
-	irlap_apply_default_connection_parameters(self);
-
-	self->N3 = 3; /* # connections attempts to try before giving up */
-
-	self->state = LAP_NDM;
-
-	hashbin_insert(irlap, (irda_queue_t *) self, self->saddr, NULL);
-
-	irlmp_register_link(self, self->saddr, &self->notify);
-
-	return self;
-}
-EXPORT_SYMBOL(irlap_open);
-
-/*
- * Function __irlap_close (self)
- *
- *    Remove IrLAP and all allocated memory. Stop any pending timers.
- *
- */
-static void __irlap_close(struct irlap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Stop timers */
-	del_timer(&self->slot_timer);
-	del_timer(&self->query_timer);
-	del_timer(&self->discovery_timer);
-	del_timer(&self->final_timer);
-	del_timer(&self->poll_timer);
-	del_timer(&self->wd_timer);
-	del_timer(&self->backoff_timer);
-	del_timer(&self->media_busy_timer);
-
-	irlap_flush_all_queues(self);
-
-	self->magic = 0;
-
-	kfree(self);
-}
-
-/*
- * Function irlap_close (self)
- *
- *    Remove IrLAP instance
- *
- */
-void irlap_close(struct irlap_cb *self)
-{
-	struct irlap_cb *lap;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* We used to send a LAP_DISC_INDICATION here, but this was
-	 * racy. This has been move within irlmp_unregister_link()
-	 * itself. Jean II */
-
-	/* Kill the LAP and all LSAPs on top of it */
-	irlmp_unregister_link(self->saddr);
-	self->notify.instance = NULL;
-
-	/* Be sure that we manage to remove ourself from the hash */
-	lap = hashbin_remove(irlap, self->saddr, NULL);
-	if (!lap) {
-		pr_debug("%s(), Didn't find myself!\n", __func__);
-		return;
-	}
-	__irlap_close(lap);
-}
-EXPORT_SYMBOL(irlap_close);
-
-/*
- * Function irlap_connect_indication (self, skb)
- *
- *    Another device is attempting to make a connection
- *
- */
-void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_init_qos_capabilities(self, NULL); /* No user QoS! */
-
-	irlmp_link_connect_indication(self->notify.instance, self->saddr,
-				      self->daddr, &self->qos_tx, skb);
-}
-
-/*
- * Function irlap_connect_response (self, skb)
- *
- *    Service user has accepted incoming connection
- *
- */
-void irlap_connect_response(struct irlap_cb *self, struct sk_buff *userdata)
-{
-	irlap_do_event(self, CONNECT_RESPONSE, userdata, NULL);
-}
-
-/*
- * Function irlap_connect_request (self, daddr, qos_user, sniff)
- *
- *    Request connection with another device, sniffing is not implemented
- *    yet.
- *
- */
-void irlap_connect_request(struct irlap_cb *self, __u32 daddr,
-			   struct qos_info *qos_user, int sniff)
-{
-	pr_debug("%s(), daddr=0x%08x\n", __func__, daddr);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	self->daddr = daddr;
-
-	/*
-	 *  If the service user specifies QoS values for this connection,
-	 *  then use them
-	 */
-	irlap_init_qos_capabilities(self, qos_user);
-
-	if ((self->state == LAP_NDM) && !self->media_busy)
-		irlap_do_event(self, CONNECT_REQUEST, NULL, NULL);
-	else
-		self->connect_pending = TRUE;
-}
-
-/*
- * Function irlap_connect_confirm (self, skb)
- *
- *    Connection request has been accepted
- *
- */
-void irlap_connect_confirm(struct irlap_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlmp_link_connect_confirm(self->notify.instance, &self->qos_tx, skb);
-}
-
-/*
- * Function irlap_data_indication (self, skb)
- *
- *    Received data frames from IR-port, so we just pass them up to
- *    IrLMP for further processing
- *
- */
-void irlap_data_indication(struct irlap_cb *self, struct sk_buff *skb,
-			   int unreliable)
-{
-	/* Hide LAP header from IrLMP layer */
-	skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
-	irlmp_link_data_indication(self->notify.instance, skb, unreliable);
-}
-
-
-/*
- * Function irlap_data_request (self, skb)
- *
- *    Queue data for transmission, must wait until XMIT state
- *
- */
-void irlap_data_request(struct irlap_cb *self, struct sk_buff *skb,
-			int unreliable)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER),
-		    return;);
-	skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
-	/*
-	 *  Must set frame format now so that the rest of the code knows
-	 *  if its dealing with an I or an UI frame
-	 */
-	if (unreliable)
-		skb->data[1] = UI_FRAME;
-	else
-		skb->data[1] = I_FRAME;
-
-	/* Don't forget to refcount it - see irlmp_connect_request(). */
-	skb_get(skb);
-
-	/* Add at the end of the queue (keep ordering) - Jean II */
-	skb_queue_tail(&self->txq, skb);
-
-	/*
-	 *  Send event if this frame only if we are in the right state
-	 *  FIXME: udata should be sent first! (skb_queue_head?)
-	 */
-	if ((self->state == LAP_XMIT_P) || (self->state == LAP_XMIT_S)) {
-		/* If we are not already processing the Tx queue, trigger
-		 * transmission immediately - Jean II */
-		if((skb_queue_len(&self->txq) <= 1) && (!self->local_busy))
-			irlap_do_event(self, DATA_REQUEST, skb, NULL);
-		/* Otherwise, the packets will be sent normally at the
-		 * next pf-poll - Jean II */
-	}
-}
-
-/*
- * Function irlap_unitdata_request (self, skb)
- *
- *    Send Ultra data. This is data that must be sent outside any connection
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlap_unitdata_request(struct irlap_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER),
-	       return;);
-	skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
-	skb->data[0] = CBROADCAST;
-	skb->data[1] = UI_FRAME;
-
-	/* Don't need to refcount, see irlmp_connless_data_request() */
-
-	skb_queue_tail(&self->txq_ultra, skb);
-
-	irlap_do_event(self, SEND_UI_FRAME, NULL, NULL);
-}
-#endif /*CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlap_udata_indication (self, skb)
- *
- *    Receive Ultra data. This is data that is received outside any connection
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlap_unitdata_indication(struct irlap_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/* Hide LAP header from IrLMP layer */
-	skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER);
-
-	irlmp_link_unitdata_indication(self->notify.instance, skb);
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlap_disconnect_request (void)
- *
- *    Request to disconnect connection by service user
- */
-void irlap_disconnect_request(struct irlap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Don't disconnect until all data frames are successfully sent */
-	if (!skb_queue_empty(&self->txq)) {
-		self->disconnect_pending = TRUE;
-		return;
-	}
-
-	/* Check if we are in the right state for disconnecting */
-	switch (self->state) {
-	case LAP_XMIT_P:        /* FALLTHROUGH */
-	case LAP_XMIT_S:        /* FALLTHROUGH */
-	case LAP_CONN:          /* FALLTHROUGH */
-	case LAP_RESET_WAIT:    /* FALLTHROUGH */
-	case LAP_RESET_CHECK:
-		irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL);
-		break;
-	default:
-		pr_debug("%s(), disconnect pending!\n", __func__);
-		self->disconnect_pending = TRUE;
-		break;
-	}
-}
-
-/*
- * Function irlap_disconnect_indication (void)
- *
- *    Disconnect request from other device
- *
- */
-void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason)
-{
-	pr_debug("%s(), reason=%s\n", __func__, lap_reasons[reason]);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Flush queues */
-	irlap_flush_all_queues(self);
-
-	switch (reason) {
-	case LAP_RESET_INDICATION:
-		pr_debug("%s(), Sending reset request!\n", __func__);
-		irlap_do_event(self, RESET_REQUEST, NULL, NULL);
-		break;
-	case LAP_NO_RESPONSE:	   /* FALLTHROUGH */
-	case LAP_DISC_INDICATION:  /* FALLTHROUGH */
-	case LAP_FOUND_NONE:       /* FALLTHROUGH */
-	case LAP_MEDIA_BUSY:
-		irlmp_link_disconnect_indication(self->notify.instance, self,
-						 reason, NULL);
-		break;
-	default:
-		net_err_ratelimited("%s: Unknown reason %d\n",
-				    __func__, reason);
-	}
-}
-
-/*
- * Function irlap_discovery_request (gen_addr_bit)
- *
- *    Start one single discovery operation.
- *
- */
-void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery)
-{
-	struct irlap_info info;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(discovery != NULL, return;);
-
-	pr_debug("%s(), nslots = %d\n", __func__, discovery->nslots);
-
-	IRDA_ASSERT((discovery->nslots == 1) || (discovery->nslots == 6) ||
-		    (discovery->nslots == 8) || (discovery->nslots == 16),
-		    return;);
-
-	/* Discovery is only possible in NDM mode */
-	if (self->state != LAP_NDM) {
-		pr_debug("%s(), discovery only possible in NDM mode\n",
-			 __func__);
-		irlap_discovery_confirm(self, NULL);
-		/* Note : in theory, if we are not in NDM, we could postpone
-		 * the discovery like we do for connection request.
-		 * In practice, it's not worth it. If the media was busy,
-		 * it's likely next time around it won't be busy. If we are
-		 * in REPLY state, we will get passive discovery info & event.
-		 * Jean II */
-		return;
-	}
-
-	/* Check if last discovery request finished in time, or if
-	 * it was aborted due to the media busy flag. */
-	if (self->discovery_log != NULL) {
-		hashbin_delete(self->discovery_log, (FREE_FUNC) kfree);
-		self->discovery_log = NULL;
-	}
-
-	/* All operations will occur at predictable time, no need to lock */
-	self->discovery_log = hashbin_new(HB_NOLOCK);
-
-	if (self->discovery_log == NULL) {
-		net_warn_ratelimited("%s(), Unable to allocate discovery log!\n",
-				     __func__);
-		return;
-	}
-
-	info.S = discovery->nslots; /* Number of slots */
-	info.s = 0; /* Current slot */
-
-	self->discovery_cmd = discovery;
-	info.discovery = discovery;
-
-	/* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */
-	self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout);
-
-	irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info);
-}
-
-/*
- * Function irlap_discovery_confirm (log)
- *
- *    A device has been discovered in front of this station, we
- *    report directly to LMP.
- */
-void irlap_discovery_confirm(struct irlap_cb *self, hashbin_t *discovery_log)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	IRDA_ASSERT(self->notify.instance != NULL, return;);
-
-	/*
-	 * Check for successful discovery, since we are then allowed to clear
-	 * the media busy condition (IrLAP 6.13.4 - p.94). This should allow
-	 * us to make connection attempts much faster and easier (i.e. no
-	 * collisions).
-	 * Setting media busy to false will also generate an event allowing
-	 * to process pending events in NDM state machine.
-	 * Note : the spec doesn't define what's a successful discovery is.
-	 * If we want Ultra to work, it's successful even if there is
-	 * nobody discovered - Jean II
-	 */
-	if (discovery_log)
-		irda_device_set_media_busy(self->netdev, FALSE);
-
-	/* Inform IrLMP */
-	irlmp_link_discovery_confirm(self->notify.instance, discovery_log);
-}
-
-/*
- * Function irlap_discovery_indication (log)
- *
- *    Somebody is trying to discover us!
- *
- */
-void irlap_discovery_indication(struct irlap_cb *self, discovery_t *discovery)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(discovery != NULL, return;);
-
-	IRDA_ASSERT(self->notify.instance != NULL, return;);
-
-	/* A device is very likely to connect immediately after it performs
-	 * a successful discovery. This means that in our case, we are much
-	 * more likely to receive a connection request over the medium.
-	 * So, we backoff to avoid collisions.
-	 * IrLAP spec 6.13.4 suggest 100ms...
-	 * Note : this little trick actually make a *BIG* difference. If I set
-	 * my Linux box with discovery enabled and one Ultra frame sent every
-	 * second, my Palm has no trouble connecting to it every time !
-	 * Jean II */
-	irda_device_set_media_busy(self->netdev, SMALL);
-
-	irlmp_link_discovery_indication(self->notify.instance, discovery);
-}
-
-/*
- * Function irlap_status_indication (quality_of_link)
- */
-void irlap_status_indication(struct irlap_cb *self, int quality_of_link)
-{
-	switch (quality_of_link) {
-	case STATUS_NO_ACTIVITY:
-		net_info_ratelimited("IrLAP, no activity on link!\n");
-		break;
-	case STATUS_NOISY:
-		net_info_ratelimited("IrLAP, noisy link!\n");
-		break;
-	default:
-		break;
-	}
-	irlmp_status_indication(self->notify.instance,
-				quality_of_link, LOCK_NO_CHANGE);
-}
-
-/*
- * Function irlap_reset_indication (void)
- */
-void irlap_reset_indication(struct irlap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	if (self->state == LAP_RESET_WAIT)
-		irlap_do_event(self, RESET_REQUEST, NULL, NULL);
-	else
-		irlap_do_event(self, RESET_RESPONSE, NULL, NULL);
-}
-
-/*
- * Function irlap_reset_confirm (void)
- */
-void irlap_reset_confirm(void)
-{
-}
-
-/*
- * Function irlap_generate_rand_time_slot (S, s)
- *
- *    Generate a random time slot between s and S-1 where
- *    S = Number of slots (0 -> S-1)
- *    s = Current slot
- */
-int irlap_generate_rand_time_slot(int S, int s)
-{
-	static int rand;
-	int slot;
-
-	IRDA_ASSERT((S - s) > 0, return 0;);
-
-	rand += jiffies;
-	rand ^= (rand << 12);
-	rand ^= (rand >> 20);
-
-	slot = s + rand % (S-s);
-
-	IRDA_ASSERT((slot >= s) || (slot < S), return 0;);
-
-	return slot;
-}
-
-/*
- * Function irlap_update_nr_received (nr)
- *
- *    Remove all acknowledged frames in current window queue. This code is
- *    not intuitive and you should not try to change it. If you think it
- *    contains bugs, please mail a patch to the author instead.
- */
-void irlap_update_nr_received(struct irlap_cb *self, int nr)
-{
-	struct sk_buff *skb = NULL;
-	int count = 0;
-
-	/*
-	 * Remove all the ack-ed frames from the window queue.
-	 */
-
-	/*
-	 *  Optimize for the common case. It is most likely that the receiver
-	 *  will acknowledge all the frames we have sent! So in that case we
-	 *  delete all frames stored in window.
-	 */
-	if (nr == self->vs) {
-		while ((skb = skb_dequeue(&self->wx_list)) != NULL) {
-			dev_kfree_skb(skb);
-		}
-		/* The last acked frame is the next to send minus one */
-		self->va = nr - 1;
-	} else {
-		/* Remove all acknowledged frames in current window */
-		while ((skb_peek(&self->wx_list) != NULL) &&
-		       (((self->va+1) % 8) != nr))
-		{
-			skb = skb_dequeue(&self->wx_list);
-			dev_kfree_skb(skb);
-
-			self->va = (self->va + 1) % 8;
-			count++;
-		}
-	}
-
-	/* Advance window */
-	self->window = self->window_size - skb_queue_len(&self->wx_list);
-}
-
-/*
- * Function irlap_validate_ns_received (ns)
- *
- *    Validate the next to send (ns) field from received frame.
- */
-int irlap_validate_ns_received(struct irlap_cb *self, int ns)
-{
-	/*  ns as expected?  */
-	if (ns == self->vr)
-		return NS_EXPECTED;
-	/*
-	 *  Stations are allowed to treat invalid NS as unexpected NS
-	 *  IrLAP, Recv ... with-invalid-Ns. p. 84
-	 */
-	return NS_UNEXPECTED;
-
-	/* return NR_INVALID; */
-}
-/*
- * Function irlap_validate_nr_received (nr)
- *
- *    Validate the next to receive (nr) field from received frame.
- *
- */
-int irlap_validate_nr_received(struct irlap_cb *self, int nr)
-{
-	/*  nr as expected?  */
-	if (nr == self->vs) {
-		pr_debug("%s(), expected!\n", __func__);
-		return NR_EXPECTED;
-	}
-
-	/*
-	 *  unexpected nr? (but within current window), first we check if the
-	 *  ns numbers of the frames in the current window wrap.
-	 */
-	if (self->va < self->vs) {
-		if ((nr >= self->va) && (nr <= self->vs))
-			return NR_UNEXPECTED;
-	} else {
-		if ((nr >= self->va) || (nr <= self->vs))
-			return NR_UNEXPECTED;
-	}
-
-	/* Invalid nr!  */
-	return NR_INVALID;
-}
-
-/*
- * Function irlap_initiate_connection_state ()
- *
- *    Initialize the connection state parameters
- *
- */
-void irlap_initiate_connection_state(struct irlap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Next to send and next to receive */
-	self->vs = self->vr = 0;
-
-	/* Last frame which got acked (0 - 1) % 8 */
-	self->va = 7;
-
-	self->window = 1;
-
-	self->remote_busy = FALSE;
-	self->retry_count = 0;
-}
-
-/*
- * Function irlap_wait_min_turn_around (self, qos)
- *
- *    Wait negotiated minimum turn around time, this function actually sets
- *    the number of BOS's that must be sent before the next transmitted
- *    frame in order to delay for the specified amount of time. This is
- *    done to avoid using timers, and the forbidden udelay!
- */
-void irlap_wait_min_turn_around(struct irlap_cb *self, struct qos_info *qos)
-{
-	__u32 min_turn_time;
-	__u32 speed;
-
-	/* Get QoS values.  */
-	speed = qos->baud_rate.value;
-	min_turn_time = qos->min_turn_time.value;
-
-	/* No need to calculate XBOFs for speeds over 115200 bps */
-	if (speed > 115200) {
-		self->mtt_required = min_turn_time;
-		return;
-	}
-
-	/*
-	 *  Send additional BOF's for the next frame for the requested
-	 *  min turn time, so now we must calculate how many chars (XBOF's) we
-	 *  must send for the requested time period (min turn time)
-	 */
-	self->xbofs_delay = irlap_min_turn_time_in_bytes(speed, min_turn_time);
-}
-
-/*
- * Function irlap_flush_all_queues (void)
- *
- *    Flush all queues
- *
- */
-void irlap_flush_all_queues(struct irlap_cb *self)
-{
-	struct sk_buff* skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Free transmission queue */
-	while ((skb = skb_dequeue(&self->txq)) != NULL)
-		dev_kfree_skb(skb);
-
-	while ((skb = skb_dequeue(&self->txq_ultra)) != NULL)
-		dev_kfree_skb(skb);
-
-	/* Free sliding window buffered packets */
-	while ((skb = skb_dequeue(&self->wx_list)) != NULL)
-		dev_kfree_skb(skb);
-}
-
-/*
- * Function irlap_setspeed (self, speed)
- *
- *    Change the speed of the IrDA port
- *
- */
-static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now)
-{
-	struct sk_buff *skb;
-
-	pr_debug("%s(), setting speed to %d\n", __func__, speed);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	self->speed = speed;
-
-	/* Change speed now, or just piggyback speed on frames */
-	if (now) {
-		/* Send down empty frame to trigger speed change */
-		skb = alloc_skb(0, GFP_ATOMIC);
-		if (skb)
-			irlap_queue_xmit(self, skb);
-	}
-}
-
-/*
- * Function irlap_init_qos_capabilities (self, qos)
- *
- *    Initialize QoS for this IrLAP session, What we do is to compute the
- *    intersection of the QoS capabilities for the user, driver and for
- *    IrLAP itself. Normally, IrLAP will not specify any values, but it can
- *    be used to restrict certain values.
- */
-static void irlap_init_qos_capabilities(struct irlap_cb *self,
-					struct qos_info *qos_user)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(self->netdev != NULL, return;);
-
-	/* Start out with the maximum QoS support possible */
-	irda_init_max_qos_capabilies(&self->qos_rx);
-
-	/* Apply drivers QoS capabilities */
-	irda_qos_compute_intersection(&self->qos_rx, self->qos_dev);
-
-	/*
-	 *  Check for user supplied QoS parameters. The service user is only
-	 *  allowed to supply these values. We check each parameter since the
-	 *  user may not have set all of them.
-	 */
-	if (qos_user) {
-		pr_debug("%s(), Found user specified QoS!\n", __func__);
-
-		if (qos_user->baud_rate.bits)
-			self->qos_rx.baud_rate.bits &= qos_user->baud_rate.bits;
-
-		if (qos_user->max_turn_time.bits)
-			self->qos_rx.max_turn_time.bits &= qos_user->max_turn_time.bits;
-		if (qos_user->data_size.bits)
-			self->qos_rx.data_size.bits &= qos_user->data_size.bits;
-
-		if (qos_user->link_disc_time.bits)
-			self->qos_rx.link_disc_time.bits &= qos_user->link_disc_time.bits;
-	}
-
-	/* Use 500ms in IrLAP for now */
-	self->qos_rx.max_turn_time.bits &= 0x01;
-
-	/* Set data size */
-	/*self->qos_rx.data_size.bits &= 0x03;*/
-
-	irda_qos_bits_to_value(&self->qos_rx);
-}
-
-/*
- * Function irlap_apply_default_connection_parameters (void, now)
- *
- *    Use the default connection and transmission parameters
- */
-void irlap_apply_default_connection_parameters(struct irlap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* xbofs : Default value in NDM */
-	self->next_bofs   = 12;
-	self->bofs_count  = 12;
-
-	/* NDM Speed is 9600 */
-	irlap_change_speed(self, 9600, TRUE);
-
-	/* Set mbusy when going to NDM state */
-	irda_device_set_media_busy(self->netdev, TRUE);
-
-	/*
-	 * Generate random connection address for this session, which must
-	 * be 7 bits wide and different from 0x00 and 0xfe
-	 */
-	while ((self->caddr == 0x00) || (self->caddr == 0xfe)) {
-		get_random_bytes(&self->caddr, sizeof(self->caddr));
-		self->caddr &= 0xfe;
-	}
-
-	/* Use default values until connection has been negitiated */
-	self->slot_timeout = sysctl_slot_timeout;
-	self->final_timeout = FINAL_TIMEOUT;
-	self->poll_timeout = POLL_TIMEOUT;
-	self->wd_timeout = WD_TIMEOUT;
-
-	/* Set some default values */
-	self->qos_tx.baud_rate.value = 9600;
-	self->qos_rx.baud_rate.value = 9600;
-	self->qos_tx.max_turn_time.value = 0;
-	self->qos_rx.max_turn_time.value = 0;
-	self->qos_tx.min_turn_time.value = 0;
-	self->qos_rx.min_turn_time.value = 0;
-	self->qos_tx.data_size.value = 64;
-	self->qos_rx.data_size.value = 64;
-	self->qos_tx.window_size.value = 1;
-	self->qos_rx.window_size.value = 1;
-	self->qos_tx.additional_bofs.value = 12;
-	self->qos_rx.additional_bofs.value = 12;
-	self->qos_tx.link_disc_time.value = 0;
-	self->qos_rx.link_disc_time.value = 0;
-
-	irlap_flush_all_queues(self);
-
-	self->disconnect_pending = FALSE;
-	self->connect_pending = FALSE;
-}
-
-/*
- * Function irlap_apply_connection_parameters (qos, now)
- *
- *    Initialize IrLAP with the negotiated QoS values
- *
- * If 'now' is false, the speed and xbofs will be changed after the next
- * frame is sent.
- * If 'now' is true, the speed and xbofs is changed immediately
- */
-void irlap_apply_connection_parameters(struct irlap_cb *self, int now)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Set the negotiated xbofs value */
-	self->next_bofs   = self->qos_tx.additional_bofs.value;
-	if (now)
-		self->bofs_count = self->next_bofs;
-
-	/* Set the negotiated link speed (may need the new xbofs value) */
-	irlap_change_speed(self, self->qos_tx.baud_rate.value, now);
-
-	self->window_size = self->qos_tx.window_size.value;
-	self->window      = self->qos_tx.window_size.value;
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-	/*
-	 *  Calculate how many bytes it is possible to transmit before the
-	 *  link must be turned around
-	 */
-	self->line_capacity =
-		irlap_max_line_capacity(self->qos_tx.baud_rate.value,
-					self->qos_tx.max_turn_time.value);
-	self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
-
-	/*
-	 *  Initialize timeout values, some of the rules are listed on
-	 *  page 92 in IrLAP.
-	 */
-	IRDA_ASSERT(self->qos_tx.max_turn_time.value != 0, return;);
-	IRDA_ASSERT(self->qos_rx.max_turn_time.value != 0, return;);
-	/* The poll timeout applies only to the primary station.
-	 * It defines the maximum time the primary stay in XMIT mode
-	 * before timeout and turning the link around (sending a RR).
-	 * Or, this is how much we can keep the pf bit in primary mode.
-	 * Therefore, it must be lower or equal than our *OWN* max turn around.
-	 * Jean II */
-	self->poll_timeout = msecs_to_jiffies(
-				self->qos_tx.max_turn_time.value);
-	/* The Final timeout applies only to the primary station.
-	 * It defines the maximum time the primary wait (mostly in RECV mode)
-	 * for an answer from the secondary station before polling it again.
-	 * Therefore, it must be greater or equal than our *PARTNER*
-	 * max turn around time - Jean II */
-	self->final_timeout = msecs_to_jiffies(
-				self->qos_rx.max_turn_time.value);
-	/* The Watchdog Bit timeout applies only to the secondary station.
-	 * It defines the maximum time the secondary wait (mostly in RECV mode)
-	 * for poll from the primary station before getting annoyed.
-	 * Therefore, it must be greater or equal than our *PARTNER*
-	 * max turn around time - Jean II */
-	self->wd_timeout = self->final_timeout * 2;
-
-	/*
-	 * N1 and N2 are maximum retry count for *both* the final timer
-	 * and the wd timer (with a factor 2) as defined above.
-	 * After N1 retry of a timer, we give a warning to the user.
-	 * After N2 retry, we consider the link dead and disconnect it.
-	 * Jean II
-	 */
-
-	/*
-	 *  Set N1 to 0 if Link Disconnect/Threshold Time = 3 and set it to
-	 *  3 seconds otherwise. See page 71 in IrLAP for more details.
-	 *  Actually, it's not always 3 seconds, as we allow to set
-	 *  it via sysctl... Max maxtt is 500ms, and N1 need to be multiple
-	 *  of 2, so 1 second is minimum we can allow. - Jean II
-	 */
-	if (self->qos_tx.link_disc_time.value == sysctl_warn_noreply_time)
-		/*
-		 * If we set N1 to 0, it will trigger immediately, which is
-		 * not what we want. What we really want is to disable it,
-		 * Jean II
-		 */
-		self->N1 = -2; /* Disable - Need to be multiple of 2*/
-	else
-		self->N1 = sysctl_warn_noreply_time * 1000 /
-		  self->qos_rx.max_turn_time.value;
-
-	pr_debug("Setting N1 = %d\n", self->N1);
-
-	/* Set N2 to match our own disconnect time */
-	self->N2 = self->qos_tx.link_disc_time.value * 1000 /
-		self->qos_rx.max_turn_time.value;
-	pr_debug("Setting N2 = %d\n", self->N2);
-}
-
-#ifdef CONFIG_PROC_FS
-struct irlap_iter_state {
-	int id;
-};
-
-static void *irlap_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	struct irlap_iter_state *iter = seq->private;
-	struct irlap_cb *self;
-
-	/* Protect our access to the tsap list */
-	spin_lock_irq(&irlap->hb_spinlock);
-	iter->id = 0;
-
-	for (self = (struct irlap_cb *) hashbin_get_first(irlap);
-	     self; self = (struct irlap_cb *) hashbin_get_next(irlap)) {
-		if (iter->id == *pos)
-			break;
-		++iter->id;
-	}
-
-	return self;
-}
-
-static void *irlap_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct irlap_iter_state *iter = seq->private;
-
-	++*pos;
-	++iter->id;
-	return (void *) hashbin_get_next(irlap);
-}
-
-static void irlap_seq_stop(struct seq_file *seq, void *v)
-{
-	spin_unlock_irq(&irlap->hb_spinlock);
-}
-
-static int irlap_seq_show(struct seq_file *seq, void *v)
-{
-	const struct irlap_iter_state *iter = seq->private;
-	const struct irlap_cb *self = v;
-
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EINVAL;);
-
-	seq_printf(seq, "irlap%d ", iter->id);
-	seq_printf(seq, "state: %s\n",
-		   irlap_state[self->state]);
-
-	seq_printf(seq, "  device name: %s, ",
-		   (self->netdev) ? self->netdev->name : "bug");
-	seq_printf(seq, "hardware name: %s\n", self->hw_name);
-
-	seq_printf(seq, "  caddr: %#02x, ", self->caddr);
-	seq_printf(seq, "saddr: %#08x, ", self->saddr);
-	seq_printf(seq, "daddr: %#08x\n", self->daddr);
-
-	seq_printf(seq, "  win size: %d, ",
-		   self->window_size);
-	seq_printf(seq, "win: %d, ", self->window);
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-	seq_printf(seq, "line capacity: %d, ",
-		   self->line_capacity);
-	seq_printf(seq, "bytes left: %d\n", self->bytes_left);
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-	seq_printf(seq, "  tx queue len: %d ",
-		   skb_queue_len(&self->txq));
-	seq_printf(seq, "win queue len: %d ",
-		   skb_queue_len(&self->wx_list));
-	seq_printf(seq, "rbusy: %s", self->remote_busy ?
-		   "TRUE" : "FALSE");
-	seq_printf(seq, " mbusy: %s\n", self->media_busy ?
-		   "TRUE" : "FALSE");
-
-	seq_printf(seq, "  retrans: %d ", self->retry_count);
-	seq_printf(seq, "vs: %d ", self->vs);
-	seq_printf(seq, "vr: %d ", self->vr);
-	seq_printf(seq, "va: %d\n", self->va);
-
-	seq_printf(seq, "  qos\tbps\tmaxtt\tdsize\twinsize\taddbofs\tmintt\tldisc\tcomp\n");
-
-	seq_printf(seq, "  tx\t%d\t",
-		   self->qos_tx.baud_rate.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_tx.max_turn_time.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_tx.data_size.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_tx.window_size.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_tx.additional_bofs.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_tx.min_turn_time.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_tx.link_disc_time.value);
-	seq_printf(seq, "\n");
-
-	seq_printf(seq, "  rx\t%d\t",
-		   self->qos_rx.baud_rate.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_rx.max_turn_time.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_rx.data_size.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_rx.window_size.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_rx.additional_bofs.value);
-	seq_printf(seq, "%d\t",
-		   self->qos_rx.min_turn_time.value);
-	seq_printf(seq, "%d\n",
-		   self->qos_rx.link_disc_time.value);
-
-	return 0;
-}
-
-static const struct seq_operations irlap_seq_ops = {
-	.start  = irlap_seq_start,
-	.next   = irlap_seq_next,
-	.stop   = irlap_seq_stop,
-	.show   = irlap_seq_show,
-};
-
-static int irlap_seq_open(struct inode *inode, struct file *file)
-{
-	if (irlap == NULL)
-		return -EINVAL;
-
-	return seq_open_private(file, &irlap_seq_ops,
-			sizeof(struct irlap_iter_state));
-}
-
-const struct file_operations irlap_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = irlap_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_private,
-};
-
-#endif /* CONFIG_PROC_FS */
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
deleted file mode 100644
index 0e1b4d79f745..000000000000
--- a/net/irda/irlap_event.c
+++ /dev/null
@@ -1,2316 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlap_event.c
- * Version:       0.9
- * Description:   IrLAP state machine implementation
- * Status:        Experimental.
- * Author:        Dag Brattli <dag@brattli.net>
- * Created at:    Sat Aug 16 00:59:29 1997
- * Modified at:   Sat Dec 25 21:07:57 1999
- * Modified by:   Dag Brattli <dag@brattli.net>
- *
- *     Copyright (c) 1998-2000 Dag Brattli <dag@brattli.net>,
- *     Copyright (c) 1998      Thomas Davis <ratbert@radiks.net>
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap_event.h>
-
-#include <net/irda/timer.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/qos.h>
-#include <net/irda/parameters.h>
-#include <net/irda/irlmp.h>		/* irlmp_flow_indication(), ... */
-
-#include <net/irda/irda_device.h>
-
-#ifdef CONFIG_IRDA_FAST_RR
-int sysctl_fast_poll_increase = 50;
-#endif
-
-static int irlap_state_ndm    (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_query  (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reply  (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_conn   (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_setup  (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_xmit_p (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_pclose (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_nrm_p  (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event,
-				  struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reset  (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_nrm_s  (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_xmit_s (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_sclose (struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info);
-static int irlap_state_reset_check(struct irlap_cb *, IRLAP_EVENT event,
-				   struct sk_buff *, struct irlap_info *);
-
-static const char *const irlap_event[] __maybe_unused = {
-	"DISCOVERY_REQUEST",
-	"CONNECT_REQUEST",
-	"CONNECT_RESPONSE",
-	"DISCONNECT_REQUEST",
-	"DATA_REQUEST",
-	"RESET_REQUEST",
-	"RESET_RESPONSE",
-	"SEND_I_CMD",
-	"SEND_UI_FRAME",
-	"RECV_DISCOVERY_XID_CMD",
-	"RECV_DISCOVERY_XID_RSP",
-	"RECV_SNRM_CMD",
-	"RECV_TEST_CMD",
-	"RECV_TEST_RSP",
-	"RECV_UA_RSP",
-	"RECV_DM_RSP",
-	"RECV_RD_RSP",
-	"RECV_I_CMD",
-	"RECV_I_RSP",
-	"RECV_UI_FRAME",
-	"RECV_FRMR_RSP",
-	"RECV_RR_CMD",
-	"RECV_RR_RSP",
-	"RECV_RNR_CMD",
-	"RECV_RNR_RSP",
-	"RECV_REJ_CMD",
-	"RECV_REJ_RSP",
-	"RECV_SREJ_CMD",
-	"RECV_SREJ_RSP",
-	"RECV_DISC_CMD",
-	"SLOT_TIMER_EXPIRED",
-	"QUERY_TIMER_EXPIRED",
-	"FINAL_TIMER_EXPIRED",
-	"POLL_TIMER_EXPIRED",
-	"DISCOVERY_TIMER_EXPIRED",
-	"WD_TIMER_EXPIRED",
-	"BACKOFF_TIMER_EXPIRED",
-	"MEDIA_BUSY_TIMER_EXPIRED",
-};
-
-const char *const irlap_state[] = {
-	"LAP_NDM",
-	"LAP_QUERY",
-	"LAP_REPLY",
-	"LAP_CONN",
-	"LAP_SETUP",
-	"LAP_OFFLINE",
-	"LAP_XMIT_P",
-	"LAP_PCLOSE",
-	"LAP_NRM_P",
-	"LAP_RESET_WAIT",
-	"LAP_RESET",
-	"LAP_NRM_S",
-	"LAP_XMIT_S",
-	"LAP_SCLOSE",
-	"LAP_RESET_CHECK",
-};
-
-static int (*state[])(struct irlap_cb *self, IRLAP_EVENT event,
-		      struct sk_buff *skb, struct irlap_info *info) =
-{
-	irlap_state_ndm,
-	irlap_state_query,
-	irlap_state_reply,
-	irlap_state_conn,
-	irlap_state_setup,
-	irlap_state_offline,
-	irlap_state_xmit_p,
-	irlap_state_pclose,
-	irlap_state_nrm_p,
-	irlap_state_reset_wait,
-	irlap_state_reset,
-	irlap_state_nrm_s,
-	irlap_state_xmit_s,
-	irlap_state_sclose,
-	irlap_state_reset_check,
-};
-
-/*
- * Function irda_poll_timer_expired (data)
- *
- *    Poll timer has expired. Normally we must now send a RR frame to the
- *    remote device
- */
-static void irlap_poll_timer_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Calculate and set time before we will have to send back the pf bit
- * to the peer. Use in primary.
- * Make sure that state is XMIT_P/XMIT_S when calling this function
- * (and that nobody messed up with the state). - Jean II
- */
-static void irlap_start_poll_timer(struct irlap_cb *self, int timeout)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-#ifdef CONFIG_IRDA_FAST_RR
-	/*
-	 * Send out the RR frames faster if our own transmit queue is empty, or
-	 * if the peer is busy. The effect is a much faster conversation
-	 */
-	if (skb_queue_empty(&self->txq) || self->remote_busy) {
-		if (self->fast_RR == TRUE) {
-			/*
-			 *  Assert that the fast poll timer has not reached the
-			 *  normal poll timer yet
-			 */
-			if (self->fast_RR_timeout < timeout) {
-				/*
-				 *  FIXME: this should be a more configurable
-				 *         function
-				 */
-				self->fast_RR_timeout +=
-					(sysctl_fast_poll_increase * HZ/1000);
-
-				/* Use this fast(er) timeout instead */
-				timeout = self->fast_RR_timeout;
-			}
-		} else {
-			self->fast_RR = TRUE;
-
-			/* Start with just 0 ms */
-			self->fast_RR_timeout = 0;
-			timeout = 0;
-		}
-	} else
-		self->fast_RR = FALSE;
-
-	pr_debug("%s(), timeout=%d (%ld)\n", __func__, timeout, jiffies);
-#endif /* CONFIG_IRDA_FAST_RR */
-
-	if (timeout == 0)
-		irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL);
-	else
-		irda_start_timer(&self->poll_timer, timeout, self,
-				 irlap_poll_timer_expired);
-}
-
-/*
- * Function irlap_do_event (event, skb, info)
- *
- *    Rushes through the state machine without any delay. If state == XMIT
- *    then send queued data frames.
- */
-void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event,
-		    struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret;
-
-	if (!self || self->magic != LAP_MAGIC)
-		return;
-
-	pr_debug("%s(), event = %s, state = %s\n", __func__,
-		 irlap_event[event], irlap_state[self->state]);
-
-	ret = (*state[self->state])(self, event, skb, info);
-
-	/*
-	 *  Check if there are any pending events that needs to be executed
-	 */
-	switch (self->state) {
-	case LAP_XMIT_P: /* FALLTHROUGH */
-	case LAP_XMIT_S:
-		/*
-		 * We just received the pf bit and are at the beginning
-		 * of a new LAP transmit window.
-		 * Check if there are any queued data frames, and do not
-		 * try to disconnect link if we send any data frames, since
-		 * that will change the state away form XMIT
-		 */
-		pr_debug("%s() : queue len = %d\n", __func__,
-			 skb_queue_len(&self->txq));
-
-		if (!skb_queue_empty(&self->txq)) {
-			/* Prevent race conditions with irlap_data_request() */
-			self->local_busy = TRUE;
-
-			/* Theory of operation.
-			 * We send frames up to when we fill the window or
-			 * reach line capacity. Those frames will queue up
-			 * in the device queue, and the driver will slowly
-			 * send them.
-			 * After each frame that we send, we poll the higher
-			 * layer for more data. It's the right time to do
-			 * that because the link layer need to perform the mtt
-			 * and then send the first frame, so we can afford
-			 * to send a bit of time in kernel space.
-			 * The explicit flow indication allow to minimise
-			 * buffers (== lower latency), to avoid higher layer
-			 * polling via timers (== less context switches) and
-			 * to implement a crude scheduler - Jean II */
-
-			/* Try to send away all queued data frames */
-			while ((skb = skb_dequeue(&self->txq)) != NULL) {
-				/* Send one frame */
-				ret = (*state[self->state])(self, SEND_I_CMD,
-							    skb, NULL);
-				/* Drop reference count.
-				 * It will be increase as needed in
-				 * irlap_send_data_xxx() */
-				kfree_skb(skb);
-
-				/* Poll the higher layers for one more frame */
-				irlmp_flow_indication(self->notify.instance,
-						      FLOW_START);
-
-				if (ret == -EPROTO)
-					break; /* Try again later! */
-			}
-			/* Finished transmitting */
-			self->local_busy = FALSE;
-		} else if (self->disconnect_pending) {
-			self->disconnect_pending = FALSE;
-
-			ret = (*state[self->state])(self, DISCONNECT_REQUEST,
-						    NULL, NULL);
-		}
-		break;
-/*	case LAP_NDM: */
-/*	case LAP_CONN: */
-/*	case LAP_RESET_WAIT: */
-/*	case LAP_RESET_CHECK: */
-	default:
-		break;
-	}
-}
-
-/*
- * Function irlap_state_ndm (event, skb, frame)
- *
- *    NDM (Normal Disconnected Mode) state
- *
- */
-static int irlap_state_ndm(struct irlap_cb *self, IRLAP_EVENT event,
-			   struct sk_buff *skb, struct irlap_info *info)
-{
-	discovery_t *discovery_rsp;
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case CONNECT_REQUEST:
-		IRDA_ASSERT(self->netdev != NULL, return -1;);
-
-		if (self->media_busy) {
-			/* Note : this will never happen, because we test
-			 * media busy in irlap_connect_request() and
-			 * postpone the event... - Jean II */
-			pr_debug("%s(), CONNECT_REQUEST: media busy!\n",
-				 __func__);
-
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-
-			irlap_disconnect_indication(self, LAP_MEDIA_BUSY);
-		} else {
-			irlap_send_snrm_frame(self, &self->qos_rx);
-
-			/* Start Final-bit timer */
-			irlap_start_final_timer(self, self->final_timeout);
-
-			self->retry_count = 0;
-			irlap_next_state(self, LAP_SETUP);
-		}
-		break;
-	case RECV_SNRM_CMD:
-		/* Check if the frame contains and I field */
-		if (info) {
-			self->daddr = info->daddr;
-			self->caddr = info->caddr;
-
-			irlap_next_state(self, LAP_CONN);
-
-			irlap_connect_indication(self, skb);
-		} else {
-			pr_debug("%s(), SNRM frame does not contain an I field!\n",
-				 __func__);
-		}
-		break;
-	case DISCOVERY_REQUEST:
-		IRDA_ASSERT(info != NULL, return -1;);
-
-		if (self->media_busy) {
-			pr_debug("%s(), DISCOVERY_REQUEST: media busy!\n",
-				 __func__);
-			/* irlap->log.condition = MEDIA_BUSY; */
-
-			/* This will make IrLMP try again */
-			irlap_discovery_confirm(self, NULL);
-			/* Note : the discovery log is not cleaned up here,
-			 * it will be done in irlap_discovery_request()
-			 * Jean II */
-			return 0;
-		}
-
-		self->S = info->S;
-		self->s = info->s;
-		irlap_send_discovery_xid_frame(self, info->S, info->s, TRUE,
-					       info->discovery);
-		self->frame_sent = FALSE;
-		self->s++;
-
-		irlap_start_slot_timer(self, self->slot_timeout);
-		irlap_next_state(self, LAP_QUERY);
-		break;
-	case RECV_DISCOVERY_XID_CMD:
-		IRDA_ASSERT(info != NULL, return -1;);
-
-		/* Assert that this is not the final slot */
-		if (info->s <= info->S) {
-			self->slot = irlap_generate_rand_time_slot(info->S,
-								   info->s);
-			if (self->slot == info->s) {
-				discovery_rsp = irlmp_get_discovery_response();
-				discovery_rsp->data.daddr = info->daddr;
-
-				irlap_send_discovery_xid_frame(self, info->S,
-							       self->slot,
-							       FALSE,
-							       discovery_rsp);
-				self->frame_sent = TRUE;
-			} else
-				self->frame_sent = FALSE;
-
-			/*
-			 * Go to reply state until end of discovery to
-			 * inhibit our own transmissions. Set the timer
-			 * to not stay forever there... Jean II
-			 */
-			irlap_start_query_timer(self, info->S, info->s);
-			irlap_next_state(self, LAP_REPLY);
-		} else {
-		/* This is the final slot. How is it possible ?
-		 * This would happen is both discoveries are just slightly
-		 * offset (if they are in sync, all packets are lost).
-		 * Most often, all the discovery requests will be received
-		 * in QUERY state (see my comment there), except for the
-		 * last frame that will come here.
-		 * The big trouble when it happen is that active discovery
-		 * doesn't happen, because nobody answer the discoveries
-		 * frame of the other guy, so the log shows up empty.
-		 * What should we do ?
-		 * Not much. It's too late to answer those discovery frames,
-		 * so we just pass the info to IrLMP who will put it in the
-		 * log (and post an event).
-		 * Another cause would be devices that do discovery much
-		 * slower than us, however the latest fixes should minimise
-		 * those cases...
-		 * Jean II
-		 */
-			pr_debug("%s(), Receiving final discovery request, missed the discovery slots :-(\n",
-				 __func__);
-
-			/* Last discovery request -> in the log */
-			irlap_discovery_indication(self, info->discovery);
-		}
-		break;
-	case MEDIA_BUSY_TIMER_EXPIRED:
-		/* A bunch of events may be postponed because the media is
-		 * busy (usually immediately after we close a connection),
-		 * or while we are doing discovery (state query/reply).
-		 * In all those cases, the media busy flag will be cleared
-		 * when it's OK for us to process those postponed events.
-		 * This event is not mentioned in the state machines in the
-		 * IrLAP spec. It's because they didn't consider Ultra and
-		 * postponing connection request is optional.
-		 * Jean II */
-#ifdef CONFIG_IRDA_ULTRA
-		/* Send any pending Ultra frames if any */
-		if (!skb_queue_empty(&self->txq_ultra)) {
-			/* We don't send the frame, just post an event.
-			 * Also, previously this code was in timer.c...
-			 * Jean II */
-			ret = (*state[self->state])(self, SEND_UI_FRAME,
-						    NULL, NULL);
-		}
-#endif /* CONFIG_IRDA_ULTRA */
-		/* Check if we should try to connect.
-		 * This code was previously in irlap_do_event() */
-		if (self->connect_pending) {
-			self->connect_pending = FALSE;
-
-			/* This one *should* not pend in this state, except
-			 * if a socket try to connect and immediately
-			 * disconnect. - clear - Jean II */
-			if (self->disconnect_pending)
-				irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-			else
-				ret = (*state[self->state])(self,
-							    CONNECT_REQUEST,
-							    NULL, NULL);
-			self->disconnect_pending = FALSE;
-		}
-		/* Note : one way to test if this code works well (including
-		 * media busy and small busy) is to create a user space
-		 * application generating an Ultra packet every 3.05 sec (or
-		 * 2.95 sec) and to see how it interact with discovery.
-		 * It's fairly easy to check that no packet is lost, that the
-		 * packets are postponed during discovery and that after
-		 * discovery indication you have a 100ms "gap".
-		 * As connection request and Ultra are now processed the same
-		 * way, this avoid the tedious job of trying IrLAP connection
-		 * in all those cases...
-		 * Jean II */
-		break;
-#ifdef CONFIG_IRDA_ULTRA
-	case SEND_UI_FRAME:
-	{
-		int i;
-		/* Only allowed to repeat an operation twice */
-		for (i=0; ((i<2) && (self->media_busy == FALSE)); i++) {
-			skb = skb_dequeue(&self->txq_ultra);
-			if (skb)
-				irlap_send_ui_frame(self, skb, CBROADCAST,
-						    CMD_FRAME);
-			else
-				break;
-			/* irlap_send_ui_frame() won't increase skb reference
-			 * count, so no dev_kfree_skb() - Jean II */
-		}
-		if (i == 2) {
-			/* Force us to listen 500 ms again */
-			irda_device_set_media_busy(self->netdev, TRUE);
-		}
-		break;
-	}
-	case RECV_UI_FRAME:
-		/* Only accept broadcast frames in NDM mode */
-		if (info->caddr != CBROADCAST) {
-			pr_debug("%s(), not a broadcast frame!\n",
-				 __func__);
-		} else
-			irlap_unitdata_indication(self, skb);
-		break;
-#endif /* CONFIG_IRDA_ULTRA */
-	case RECV_TEST_CMD:
-		/* Remove test frame header */
-		skb_pull(skb, sizeof(struct test_frame));
-
-		/*
-		 * Send response. This skb will not be sent out again, and
-		 * will only be used to send out the same info as the cmd
-		 */
-		irlap_send_test_frame(self, CBROADCAST, info->daddr, skb);
-		break;
-	case RECV_TEST_RSP:
-		pr_debug("%s() not implemented!\n", __func__);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n", __func__,
-			 irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_query (event, skb, info)
- *
- *    QUERY state
- *
- */
-static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
-			     struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case RECV_DISCOVERY_XID_RSP:
-		IRDA_ASSERT(info != NULL, return -1;);
-		IRDA_ASSERT(info->discovery != NULL, return -1;);
-
-		pr_debug("%s(), daddr=%08x\n", __func__,
-			 info->discovery->data.daddr);
-
-		if (!self->discovery_log) {
-			net_warn_ratelimited("%s: discovery log is gone! maybe the discovery timeout has been set too short?\n",
-					     __func__);
-			break;
-		}
-		hashbin_insert(self->discovery_log,
-			       (irda_queue_t *) info->discovery,
-			       info->discovery->data.daddr, NULL);
-
-		/* Keep state */
-		/* irlap_next_state(self, LAP_QUERY);  */
-
-		break;
-	case RECV_DISCOVERY_XID_CMD:
-		/* Yes, it is possible to receive those frames in this mode.
-		 * Note that most often the last discovery request won't
-		 * occur here but in NDM state (see my comment there).
-		 * What should we do ?
-		 * Not much. We are currently performing our own discovery,
-		 * therefore we can't answer those frames. We don't want
-		 * to change state either. We just pass the info to
-		 * IrLMP who will put it in the log (and post an event).
-		 * Jean II
-		 */
-
-		IRDA_ASSERT(info != NULL, return -1;);
-
-		pr_debug("%s(), Receiving discovery request (s = %d) while performing discovery :-(\n",
-			 __func__, info->s);
-
-		/* Last discovery request ? */
-		if (info->s == 0xff)
-			irlap_discovery_indication(self, info->discovery);
-		break;
-	case SLOT_TIMER_EXPIRED:
-		/*
-		 * Wait a little longer if we detect an incoming frame. This
-		 * is not mentioned in the spec, but is a good thing to do,
-		 * since we want to work even with devices that violate the
-		 * timing requirements.
-		 */
-		if (irda_device_is_receiving(self->netdev) && !self->add_wait) {
-			pr_debug("%s(), device is slow to answer, waiting some more!\n",
-				 __func__);
-			irlap_start_slot_timer(self, msecs_to_jiffies(10));
-			self->add_wait = TRUE;
-			return ret;
-		}
-		self->add_wait = FALSE;
-
-		if (self->s < self->S) {
-			irlap_send_discovery_xid_frame(self, self->S,
-						       self->s, TRUE,
-						       self->discovery_cmd);
-			self->s++;
-			irlap_start_slot_timer(self, self->slot_timeout);
-
-			/* Keep state */
-			irlap_next_state(self, LAP_QUERY);
-		} else {
-			/* This is the final slot! */
-			irlap_send_discovery_xid_frame(self, self->S, 0xff,
-						       TRUE,
-						       self->discovery_cmd);
-
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-
-			/*
-			 *  We are now finished with the discovery procedure,
-			 *  so now we must return the results
-			 */
-			irlap_discovery_confirm(self, self->discovery_log);
-
-			/* IrLMP should now have taken care of the log */
-			self->discovery_log = NULL;
-		}
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n", __func__,
-			 irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_reply (self, event, skb, info)
- *
- *    REPLY, we have received a XID discovery frame from a device and we
- *    are waiting for the right time slot to send a response XID frame
- *
- */
-static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event,
-			     struct sk_buff *skb, struct irlap_info *info)
-{
-	discovery_t *discovery_rsp;
-	int ret=0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case QUERY_TIMER_EXPIRED:
-		pr_debug("%s(), QUERY_TIMER_EXPIRED <%ld>\n",
-			 __func__, jiffies);
-		irlap_next_state(self, LAP_NDM);
-		break;
-	case RECV_DISCOVERY_XID_CMD:
-		IRDA_ASSERT(info != NULL, return -1;);
-		/* Last frame? */
-		if (info->s == 0xff) {
-			del_timer(&self->query_timer);
-
-			/* info->log.condition = REMOTE; */
-
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-
-			irlap_discovery_indication(self, info->discovery);
-		} else {
-			/* If it's our slot, send our reply */
-			if ((info->s >= self->slot) && (!self->frame_sent)) {
-				discovery_rsp = irlmp_get_discovery_response();
-				discovery_rsp->data.daddr = info->daddr;
-
-				irlap_send_discovery_xid_frame(self, info->S,
-							       self->slot,
-							       FALSE,
-							       discovery_rsp);
-
-				self->frame_sent = TRUE;
-			}
-			/* Readjust our timer to accommodate devices
-			 * doing faster or slower discovery than us...
-			 * Jean II */
-			irlap_start_query_timer(self, info->S, info->s);
-
-			/* Keep state */
-			//irlap_next_state(self, LAP_REPLY);
-		}
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d, %s\n", __func__,
-			 event, irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_conn (event, skb, info)
- *
- *    CONN, we have received a SNRM command and is waiting for the upper
- *    layer to accept or refuse connection
- *
- */
-static int irlap_state_conn(struct irlap_cb *self, IRLAP_EVENT event,
-			    struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case CONNECT_RESPONSE:
-		skb_pull(skb, sizeof(struct snrm_frame));
-
-		IRDA_ASSERT(self->netdev != NULL, return -1;);
-
-		irlap_qos_negotiate(self, skb);
-
-		irlap_initiate_connection_state(self);
-
-		/*
-		 * Applying the parameters now will make sure we change speed
-		 * *after* we have sent the next frame
-		 */
-		irlap_apply_connection_parameters(self, FALSE);
-
-		/*
-		 * Sending this frame will force a speed change after it has
-		 * been sent (i.e. the frame will be sent at 9600).
-		 */
-		irlap_send_ua_response_frame(self, &self->qos_rx);
-
-#if 0
-		/*
-		 * We are allowed to send two frames, but this may increase
-		 * the connect latency, so lets not do it for now.
-		 */
-		/* This is full of good intentions, but doesn't work in
-		 * practice.
-		 * After sending the first UA response, we switch the
-		 * dongle to the negotiated speed, which is usually
-		 * different than 9600 kb/s.
-		 * From there, there is two solutions :
-		 * 1) The other end has received the first UA response :
-		 * it will set up the connection, move to state LAP_NRM_P,
-		 * and will ignore and drop the second UA response.
-		 * Actually, it's even worse : the other side will almost
-		 * immediately send a RR that will likely collide with the
-		 * UA response (depending on negotiated turnaround).
-		 * 2) The other end has not received the first UA response,
-		 * will stay at 9600 and will never see the second UA response.
-		 * Jean II */
-		irlap_send_ua_response_frame(self, &self->qos_rx);
-#endif
-
-		/*
-		 *  The WD-timer could be set to the duration of the P-timer
-		 *  for this case, but it is recommended to use twice the
-		 *  value (note 3 IrLAP p. 60).
-		 */
-		irlap_start_wd_timer(self, self->wd_timeout);
-		irlap_next_state(self, LAP_NRM_S);
-
-		break;
-	case RECV_DISCOVERY_XID_CMD:
-		pr_debug("%s(), event RECV_DISCOVER_XID_CMD!\n",
-			 __func__);
-		irlap_next_state(self, LAP_NDM);
-
-		break;
-	case DISCONNECT_REQUEST:
-		pr_debug("%s(), Disconnect request!\n", __func__);
-		irlap_send_dm_frame(self);
-		irlap_next_state( self, LAP_NDM);
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d, %s\n", __func__,
-			 event, irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-
-	return ret;
-}
-
-/*
- * Function irlap_state_setup (event, skb, frame)
- *
- *    SETUP state, The local layer has transmitted a SNRM command frame to
- *    a remote peer layer and is awaiting a reply .
- *
- */
-static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event,
-			     struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case FINAL_TIMER_EXPIRED:
-		if (self->retry_count < self->N3) {
-/*
- *  Perform random backoff, Wait a random number of time units, minimum
- *  duration half the time taken to transmitt a SNRM frame, maximum duration
- *  1.5 times the time taken to transmit a SNRM frame. So this time should
- *  between 15 msecs and 45 msecs.
- */
-			irlap_start_backoff_timer(self, msecs_to_jiffies(20 +
-							(jiffies % 30)));
-		} else {
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-
-			irlap_disconnect_indication(self, LAP_FOUND_NONE);
-		}
-		break;
-	case BACKOFF_TIMER_EXPIRED:
-		irlap_send_snrm_frame(self, &self->qos_rx);
-		irlap_start_final_timer(self, self->final_timeout);
-		self->retry_count++;
-		break;
-	case RECV_SNRM_CMD:
-		pr_debug("%s(), SNRM battle!\n", __func__);
-
-		IRDA_ASSERT(skb != NULL, return 0;);
-		IRDA_ASSERT(info != NULL, return 0;);
-
-		/*
-		 *  The device with the largest device address wins the battle
-		 *  (both have sent a SNRM command!)
-		 */
-		if (info &&(info->daddr > self->saddr)) {
-			del_timer(&self->final_timer);
-			irlap_initiate_connection_state(self);
-
-			IRDA_ASSERT(self->netdev != NULL, return -1;);
-
-			skb_pull(skb, sizeof(struct snrm_frame));
-
-			irlap_qos_negotiate(self, skb);
-
-			/* Send UA frame and then change link settings */
-			irlap_apply_connection_parameters(self, FALSE);
-			irlap_send_ua_response_frame(self, &self->qos_rx);
-
-			irlap_next_state(self, LAP_NRM_S);
-			irlap_connect_confirm(self, skb);
-
-			/*
-			 *  The WD-timer could be set to the duration of the
-			 *  P-timer for this case, but it is recommended
-			 *  to use twice the value (note 3 IrLAP p. 60).
-			 */
-			irlap_start_wd_timer(self, self->wd_timeout);
-		} else {
-			/* We just ignore the other device! */
-			irlap_next_state(self, LAP_SETUP);
-		}
-		break;
-	case RECV_UA_RSP:
-		/* Stop F-timer */
-		del_timer(&self->final_timer);
-
-		/* Initiate connection state */
-		irlap_initiate_connection_state(self);
-
-		/* Negotiate connection parameters */
-		IRDA_ASSERT(skb->len > 10, return -1;);
-
-		skb_pull(skb, sizeof(struct ua_frame));
-
-		IRDA_ASSERT(self->netdev != NULL, return -1;);
-
-		irlap_qos_negotiate(self, skb);
-
-		/* Set the new link setting *now* (before the rr frame) */
-		irlap_apply_connection_parameters(self, TRUE);
-		self->retry_count = 0;
-
-		/* Wait for turnaround time to give a chance to the other
-		 * device to be ready to receive us.
-		 * Note : the time to switch speed is typically larger
-		 * than the turnaround time, but as we don't have the other
-		 * side speed switch time, that's our best guess...
-		 * Jean II */
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-
-		/* This frame will actually be sent at the new speed */
-		irlap_send_rr_frame(self, CMD_FRAME);
-
-		/* The timer is set to half the normal timer to quickly
-		 * detect a failure to negotiate the new connection
-		 * parameters. IrLAP 6.11.3.2, note 3.
-		 * Note that currently we don't process this failure
-		 * properly, as we should do a quick disconnect.
-		 * Jean II */
-		irlap_start_final_timer(self, self->final_timeout/2);
-		irlap_next_state(self, LAP_NRM_P);
-
-		irlap_connect_confirm(self, skb);
-		break;
-	case RECV_DM_RSP:     /* FALLTHROUGH */
-	case RECV_DISC_CMD:
-		del_timer(&self->final_timer);
-		irlap_next_state(self, LAP_NDM);
-
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d, %s\n", __func__,
-			 event, irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_offline (self, event, skb, info)
- *
- *    OFFLINE state, not used for now!
- *
- */
-static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event,
-			       struct sk_buff *skb, struct irlap_info *info)
-{
-	pr_debug("%s(), Unknown event\n", __func__);
-
-	return -1;
-}
-
-/*
- * Function irlap_state_xmit_p (self, event, skb, info)
- *
- *    XMIT, Only the primary station has right to transmit, and we
- *    therefore do not expect to receive any transmissions from other
- *    stations.
- *
- */
-static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event,
-			      struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	switch (event) {
-	case SEND_I_CMD:
-		/*
-		 *  Only send frame if send-window > 0.
-		 */
-		if ((self->window > 0) && (!self->remote_busy)) {
-			int nextfit;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-			struct sk_buff *skb_next;
-
-			/* With DYNAMIC_WINDOW, we keep the window size
-			 * maximum, and adapt on the packets we are sending.
-			 * At 115k, we can send only 2 packets of 2048 bytes
-			 * in a 500 ms turnaround. Without this option, we
-			 * would always limit the window to 2. With this
-			 * option, if we send smaller packets, we can send
-			 * up to 7 of them (always depending on QoS).
-			 * Jean II */
-
-			/* Look at the next skb. This is safe, as we are
-			 * the only consumer of the Tx queue (if we are not,
-			 * we have other problems) - Jean II */
-			skb_next = skb_peek(&self->txq);
-
-			/* Check if a subsequent skb exist and would fit in
-			 * the current window (with respect to turnaround
-			 * time).
-			 * This allow us to properly mark the current packet
-			 * with the pf bit, to avoid falling back on the
-			 * second test below, and avoid waiting the
-			 * end of the window and sending a extra RR.
-			 * Note : (skb_next != NULL) <=> (skb_queue_len() > 0)
-			 * Jean II */
-			nextfit = ((skb_next != NULL) &&
-				   ((skb_next->len + skb->len) <=
-				    self->bytes_left));
-
-			/*
-			 * The current packet may not fit ! Because of test
-			 * above, this should not happen any more !!!
-			 *  Test if we have transmitted more bytes over the
-			 *  link than its possible to do with the current
-			 *  speed and turn-around-time.
-			 */
-			if((!nextfit) && (skb->len > self->bytes_left)) {
-				pr_debug("%s(), Not allowed to transmit more bytes!\n",
-					 __func__);
-				/* Requeue the skb */
-				skb_queue_head(&self->txq, skb_get(skb));
-				/*
-				 *  We should switch state to LAP_NRM_P, but
-				 *  that is not possible since we must be sure
-				 *  that we poll the other side. Since we have
-				 *  used up our time, the poll timer should
-				 *  trigger anyway now, so we just wait for it
-				 *  DB
-				 */
-				/*
-				 * Sorry, but that's not totally true. If
-				 * we send 2000B packets, we may wait another
-				 * 1000B until our turnaround expire. That's
-				 * why we need to be proactive in avoiding
-				 * coming here. - Jean II
-				 */
-				return -EPROTO;
-			}
-
-			/* Subtract space used by this skb */
-			self->bytes_left -= skb->len;
-#else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
-			/* Window has been adjusted for the max packet
-			 * size, so much simpler... - Jean II */
-			nextfit = !skb_queue_empty(&self->txq);
-#endif	/* CONFIG_IRDA_DYNAMIC_WINDOW */
-			/*
-			 *  Send data with poll bit cleared only if window > 1
-			 *  and there is more frames after this one to be sent
-			 */
-			if ((self->window > 1) && (nextfit)) {
-				/* More packet to send in current window */
-				irlap_send_data_primary(self, skb);
-				irlap_next_state(self, LAP_XMIT_P);
-			} else {
-				/* Final packet of window */
-				irlap_send_data_primary_poll(self, skb);
-
-				/*
-				 * Make sure state machine does not try to send
-				 * any more frames
-				 */
-				ret = -EPROTO;
-			}
-#ifdef CONFIG_IRDA_FAST_RR
-			/* Peer may want to reply immediately */
-			self->fast_RR = FALSE;
-#endif /* CONFIG_IRDA_FAST_RR */
-		} else {
-			pr_debug("%s(), Unable to send! remote busy?\n",
-				 __func__);
-			skb_queue_head(&self->txq, skb_get(skb));
-
-			/*
-			 *  The next ret is important, because it tells
-			 *  irlap_next_state _not_ to deliver more frames
-			 */
-			ret = -EPROTO;
-		}
-		break;
-	case POLL_TIMER_EXPIRED:
-		pr_debug("%s(), POLL_TIMER_EXPIRED <%ld>\n",
-			 __func__, jiffies);
-		irlap_send_rr_frame(self, CMD_FRAME);
-		/* Return to NRM properly - Jean II  */
-		self->window = self->window_size;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-		/* Allowed to transmit a maximum number of bytes again. */
-		self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-		irlap_start_final_timer(self, self->final_timeout);
-		irlap_next_state(self, LAP_NRM_P);
-		break;
-	case DISCONNECT_REQUEST:
-		del_timer(&self->poll_timer);
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-		irlap_send_disc_frame(self);
-		irlap_flush_all_queues(self);
-		irlap_start_final_timer(self, self->final_timeout);
-		self->retry_count = 0;
-		irlap_next_state(self, LAP_PCLOSE);
-		break;
-	case DATA_REQUEST:
-		/* Nothing to do, irlap_do_event() will send the packet
-		 * when we return... - Jean II */
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n",
-			 __func__, irlap_event[event]);
-
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_pclose (event, skb, info)
- *
- *    PCLOSE state
- */
-static int irlap_state_pclose(struct irlap_cb *self, IRLAP_EVENT event,
-			      struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case RECV_UA_RSP: /* FALLTHROUGH */
-	case RECV_DM_RSP:
-		del_timer(&self->final_timer);
-
-		/* Set new link parameters */
-		irlap_apply_default_connection_parameters(self);
-
-		/* Always switch state before calling upper layers */
-		irlap_next_state(self, LAP_NDM);
-
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	case FINAL_TIMER_EXPIRED:
-		if (self->retry_count < self->N3) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_disc_frame(self);
-			irlap_start_final_timer(self, self->final_timeout);
-			self->retry_count++;
-			/* Keep state */
-		} else {
-			irlap_apply_default_connection_parameters(self);
-
-			/*  Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-
-			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
-		}
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d\n", __func__, event);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_nrm_p (self, event, skb, info)
- *
- *   NRM_P (Normal Response Mode as Primary), The primary station has given
- *   permissions to a secondary station to transmit IrLAP resonse frames
- *   (by sending a frame with the P bit set). The primary station will not
- *   transmit any frames and is expecting to receive frames only from the
- *   secondary to which transmission permissions has been given.
- */
-static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event,
-			     struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-	int ns_status;
-	int nr_status;
-
-	switch (event) {
-	case RECV_I_RSP: /* Optimize for the common case */
-		if (unlikely(skb->len <= LAP_ADDR_HEADER + LAP_CTRL_HEADER)) {
-			/*
-			 * Input validation check: a stir4200/mcp2150
-			 * combination sometimes results in an empty i:rsp.
-			 * This makes no sense; we can just ignore the frame
-			 * and send an rr:cmd immediately. This happens before
-			 * changing nr or ns so triggers a retransmit
-			 */
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rr_frame(self, CMD_FRAME);
-			/* Keep state */
-			break;
-		}
-		/* FIXME: must check for remote_busy below */
-#ifdef CONFIG_IRDA_FAST_RR
-		/*
-		 *  Reset the fast_RR so we can use the fast RR code with
-		 *  full speed the next time since peer may have more frames
-		 *  to transmitt
-		 */
-		self->fast_RR = FALSE;
-#endif /* CONFIG_IRDA_FAST_RR */
-		IRDA_ASSERT( info != NULL, return -1;);
-
-		ns_status = irlap_validate_ns_received(self, info->ns);
-		nr_status = irlap_validate_nr_received(self, info->nr);
-
-		/*
-		 *  Check for expected I(nformation) frame
-		 */
-		if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) {
-
-			/* Update Vr (next frame for us to receive) */
-			self->vr = (self->vr + 1) % 8;
-
-			/* Update Nr received, cleanup our retry queue */
-			irlap_update_nr_received(self, info->nr);
-
-			/*
-			 *  Got expected NR, so reset the
-			 *  retry_count. This is not done by IrLAP spec,
-			 *  which is strange!
-			 */
-			self->retry_count = 0;
-			self->ack_required = TRUE;
-
-			/*  poll bit cleared?  */
-			if (!info->pf) {
-				/* Keep state, do not move this line */
-				irlap_next_state(self, LAP_NRM_P);
-
-				irlap_data_indication(self, skb, FALSE);
-			} else {
-				/* No longer waiting for pf */
-				del_timer(&self->final_timer);
-
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-
-				/* Call higher layer *before* changing state
-				 * to give them a chance to send data in the
-				 * next LAP frame.
-				 * Jean II */
-				irlap_data_indication(self, skb, FALSE);
-
-				/* XMIT states are the most dangerous state
-				 * to be in, because user requests are
-				 * processed directly and may change state.
-				 * On the other hand, in NDM_P, those
-				 * requests are queued and we will process
-				 * them when we return to irlap_do_event().
-				 * Jean II
-				 */
-				irlap_next_state(self, LAP_XMIT_P);
-
-				/* This is the last frame.
-				 * Make sure it's always called in XMIT state.
-				 * - Jean II */
-				irlap_start_poll_timer(self, self->poll_timeout);
-			}
-			break;
-
-		}
-		/* Unexpected next to send (Ns) */
-		if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED))
-		{
-			if (!info->pf) {
-				irlap_update_nr_received(self, info->nr);
-
-				/*
-				 *  Wait until the last frame before doing
-				 *  anything
-				 */
-
-				/* Keep state */
-				irlap_next_state(self, LAP_NRM_P);
-			} else {
-				pr_debug("%s(), missing or duplicate frame!\n",
-					 __func__);
-
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-				irlap_send_rr_frame(self, CMD_FRAME);
-
-				self->ack_required = FALSE;
-
-				irlap_start_final_timer(self, self->final_timeout);
-				irlap_next_state(self, LAP_NRM_P);
-			}
-			break;
-		}
-		/*
-		 *  Unexpected next to receive (Nr)
-		 */
-		if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED))
-		{
-			if (info->pf) {
-				self->vr = (self->vr + 1) % 8;
-
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-
-				/* Resend rejected frames */
-				irlap_resend_rejected_frames(self, CMD_FRAME);
-
-				self->ack_required = FALSE;
-
-				/* Make sure we account for the time
-				 * to transmit our frames. See comemnts
-				 * in irlap_send_data_primary_poll().
-				 * Jean II */
-				irlap_start_final_timer(self, 2 * self->final_timeout);
-
-				/* Keep state, do not move this line */
-				irlap_next_state(self, LAP_NRM_P);
-
-				irlap_data_indication(self, skb, FALSE);
-			} else {
-				/*
-				 *  Do not resend frames until the last
-				 *  frame has arrived from the other
-				 *  device. This is not documented in
-				 *  IrLAP!!
-				 */
-				self->vr = (self->vr + 1) % 8;
-
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-
-				self->ack_required = FALSE;
-
-				/* Keep state, do not move this line!*/
-				irlap_next_state(self, LAP_NRM_P);
-
-				irlap_data_indication(self, skb, FALSE);
-			}
-			break;
-		}
-		/*
-		 *  Unexpected next to send (Ns) and next to receive (Nr)
-		 *  Not documented by IrLAP!
-		 */
-		if ((ns_status == NS_UNEXPECTED) &&
-		    (nr_status == NR_UNEXPECTED))
-		{
-			pr_debug("%s(), unexpected nr and ns!\n",
-				 __func__);
-			if (info->pf) {
-				/* Resend rejected frames */
-				irlap_resend_rejected_frames(self, CMD_FRAME);
-
-				/* Give peer some time to retransmit!
-				 * But account for our own Tx. */
-				irlap_start_final_timer(self, 2 * self->final_timeout);
-
-				/* Keep state, do not move this line */
-				irlap_next_state(self, LAP_NRM_P);
-			} else {
-				/* Update Nr received */
-				/* irlap_update_nr_received( info->nr); */
-
-				self->ack_required = FALSE;
-			}
-			break;
-		}
-
-		/*
-		 *  Invalid NR or NS
-		 */
-		if ((nr_status == NR_INVALID) || (ns_status == NS_INVALID)) {
-			if (info->pf) {
-				del_timer(&self->final_timer);
-
-				irlap_next_state(self, LAP_RESET_WAIT);
-
-				irlap_disconnect_indication(self, LAP_RESET_INDICATION);
-				self->xmitflag = TRUE;
-			} else {
-				del_timer(&self->final_timer);
-
-				irlap_disconnect_indication(self, LAP_RESET_INDICATION);
-
-				self->xmitflag = FALSE;
-			}
-			break;
-		}
-		pr_debug("%s(), Not implemented!\n", __func__);
-		pr_debug("%s(), event=%s, ns_status=%d, nr_status=%d\n",
-			 __func__, irlap_event[event], ns_status, nr_status);
-		break;
-	case RECV_UI_FRAME:
-		/* Poll bit cleared? */
-		if (!info->pf) {
-			irlap_data_indication(self, skb, TRUE);
-			irlap_next_state(self, LAP_NRM_P);
-		} else {
-			del_timer(&self->final_timer);
-			irlap_data_indication(self, skb, TRUE);
-			irlap_next_state(self, LAP_XMIT_P);
-			pr_debug("%s: RECV_UI_FRAME: next state %s\n",
-				 __func__, irlap_state[self->state]);
-			irlap_start_poll_timer(self, self->poll_timeout);
-		}
-		break;
-	case RECV_RR_RSP:
-		/*
-		 *  If you get a RR, the remote isn't busy anymore,
-		 *  no matter what the NR
-		 */
-		self->remote_busy = FALSE;
-
-		/* Stop final timer */
-		del_timer(&self->final_timer);
-
-		/*
-		 *  Nr as expected?
-		 */
-		ret = irlap_validate_nr_received(self, info->nr);
-		if (ret == NR_EXPECTED) {
-			/* Update Nr received */
-			irlap_update_nr_received(self, info->nr);
-
-			/*
-			 *  Got expected NR, so reset the retry_count. This
-			 *  is not done by the IrLAP standard , which is
-			 *  strange! DB.
-			 */
-			self->retry_count = 0;
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-
-			irlap_next_state(self, LAP_XMIT_P);
-
-			/* Start poll timer */
-			irlap_start_poll_timer(self, self->poll_timeout);
-		} else if (ret == NR_UNEXPECTED) {
-			IRDA_ASSERT(info != NULL, return -1;);
-			/*
-			 *  Unexpected nr!
-			 */
-
-			/* Update Nr received */
-			irlap_update_nr_received(self, info->nr);
-
-			pr_debug("RECV_RR_FRAME: Retrans:%d, nr=%d, va=%d, vs=%d, vr=%d\n",
-				 self->retry_count, info->nr, self->va,
-				 self->vs, self->vr);
-
-			/* Resend rejected frames */
-			irlap_resend_rejected_frames(self, CMD_FRAME);
-			irlap_start_final_timer(self, self->final_timeout * 2);
-
-			irlap_next_state(self, LAP_NRM_P);
-		} else if (ret == NR_INVALID) {
-			pr_debug("%s(), Received RR with invalid nr !\n",
-				 __func__);
-
-			irlap_next_state(self, LAP_RESET_WAIT);
-
-			irlap_disconnect_indication(self, LAP_RESET_INDICATION);
-			self->xmitflag = TRUE;
-		}
-		break;
-	case RECV_RNR_RSP:
-		IRDA_ASSERT(info != NULL, return -1;);
-
-		/* Stop final timer */
-		del_timer(&self->final_timer);
-		self->remote_busy = TRUE;
-
-		/* Update Nr received */
-		irlap_update_nr_received(self, info->nr);
-		irlap_next_state(self, LAP_XMIT_P);
-
-		/* Start poll timer */
-		irlap_start_poll_timer(self, self->poll_timeout);
-		break;
-	case RECV_FRMR_RSP:
-		del_timer(&self->final_timer);
-		self->xmitflag = TRUE;
-		irlap_next_state(self, LAP_RESET_WAIT);
-		irlap_reset_indication(self);
-		break;
-	case FINAL_TIMER_EXPIRED:
-		/*
-		 *  We are allowed to wait for additional 300 ms if
-		 *  final timer expires when we are in the middle
-		 *  of receiving a frame (page 45, IrLAP). Check that
-		 *  we only do this once for each frame.
-		 */
-		if (irda_device_is_receiving(self->netdev) && !self->add_wait) {
-			pr_debug("FINAL_TIMER_EXPIRED when receiving a frame! Waiting a little bit more!\n");
-			irlap_start_final_timer(self, msecs_to_jiffies(300));
-
-			/*
-			 *  Don't allow this to happen one more time in a row,
-			 *  or else we can get a pretty tight loop here if
-			 *  if we only receive half a frame. DB.
-			 */
-			self->add_wait = TRUE;
-			break;
-		}
-		self->add_wait = FALSE;
-
-		/* N2 is the disconnect timer. Until we reach it, we retry */
-		if (self->retry_count < self->N2) {
-			if (skb_peek(&self->wx_list) == NULL) {
-				/* Retry sending the pf bit to the secondary */
-				pr_debug("nrm_p: resending rr");
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-				irlap_send_rr_frame(self, CMD_FRAME);
-			} else {
-				pr_debug("nrm_p: resend frames");
-				irlap_resend_rejected_frames(self, CMD_FRAME);
-			}
-
-			irlap_start_final_timer(self, self->final_timeout);
-			self->retry_count++;
-			pr_debug("irlap_state_nrm_p: FINAL_TIMER_EXPIRED: retry_count=%d\n",
-				 self->retry_count);
-
-			/* Early warning event. I'm using a pretty liberal
-			 * interpretation of the spec and generate an event
-			 * every time the timer is multiple of N1 (and not
-			 * only the first time). This allow application
-			 * to know precisely if connectivity restart...
-			 * Jean II */
-			if((self->retry_count % self->N1) == 0)
-				irlap_status_indication(self,
-							STATUS_NO_ACTIVITY);
-
-			/* Keep state */
-		} else {
-			irlap_apply_default_connection_parameters(self);
-
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
-		}
-		break;
-	case RECV_REJ_RSP:
-		irlap_update_nr_received(self, info->nr);
-		if (self->remote_busy) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rr_frame(self, CMD_FRAME);
-		} else
-			irlap_resend_rejected_frames(self, CMD_FRAME);
-		irlap_start_final_timer(self, 2 * self->final_timeout);
-		break;
-	case RECV_SREJ_RSP:
-		irlap_update_nr_received(self, info->nr);
-		if (self->remote_busy) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rr_frame(self, CMD_FRAME);
-		} else
-			irlap_resend_rejected_frame(self, CMD_FRAME);
-		irlap_start_final_timer(self, 2 * self->final_timeout);
-		break;
-	case RECV_RD_RSP:
-		pr_debug("%s(), RECV_RD_RSP\n", __func__);
-
-		irlap_flush_all_queues(self);
-		irlap_next_state(self, LAP_XMIT_P);
-		/* Call back the LAP state machine to do a proper disconnect */
-		irlap_disconnect_request(self);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n",
-			 __func__, irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_reset_wait (event, skb, info)
- *
- *    We have informed the service user of a reset condition, and is
- *    awaiting reset of disconnect request.
- *
- */
-static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event,
-				  struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s(), event = %s\n", __func__, irlap_event[event]);
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case RESET_REQUEST:
-		if (self->xmitflag) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_snrm_frame(self, NULL);
-			irlap_start_final_timer(self, self->final_timeout);
-			irlap_next_state(self, LAP_RESET);
-		} else {
-			irlap_start_final_timer(self, self->final_timeout);
-			irlap_next_state(self, LAP_RESET);
-		}
-		break;
-	case DISCONNECT_REQUEST:
-		irlap_wait_min_turn_around( self, &self->qos_tx);
-		irlap_send_disc_frame( self);
-		irlap_flush_all_queues( self);
-		irlap_start_final_timer( self, self->final_timeout);
-		self->retry_count = 0;
-		irlap_next_state( self, LAP_PCLOSE);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n", __func__,
-			 irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_reset (self, event, skb, info)
- *
- *    We have sent a SNRM reset command to the peer layer, and is awaiting
- *    reply.
- *
- */
-static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event,
-			     struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s(), event = %s\n", __func__, irlap_event[event]);
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case RECV_DISC_CMD:
-		del_timer(&self->final_timer);
-
-		irlap_apply_default_connection_parameters(self);
-
-		/* Always switch state before calling upper layers */
-		irlap_next_state(self, LAP_NDM);
-
-		irlap_disconnect_indication(self, LAP_NO_RESPONSE);
-
-		break;
-	case RECV_UA_RSP:
-		del_timer(&self->final_timer);
-
-		/* Initiate connection state */
-		irlap_initiate_connection_state(self);
-
-		irlap_reset_confirm();
-
-		self->remote_busy = FALSE;
-
-		irlap_next_state(self, LAP_XMIT_P);
-
-		irlap_start_poll_timer(self, self->poll_timeout);
-
-		break;
-	case FINAL_TIMER_EXPIRED:
-		if (self->retry_count < 3) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-
-			IRDA_ASSERT(self->netdev != NULL, return -1;);
-			irlap_send_snrm_frame(self, self->qos_dev);
-
-			self->retry_count++; /* Experimental!! */
-
-			irlap_start_final_timer(self, self->final_timeout);
-			irlap_next_state(self, LAP_RESET);
-		} else if (self->retry_count >= self->N3) {
-			irlap_apply_default_connection_parameters(self);
-
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-
-			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
-		}
-		break;
-	case RECV_SNRM_CMD:
-		/*
-		 * SNRM frame is not allowed to contain an I-field in this
-		 * state
-		 */
-		if (!info) {
-			pr_debug("%s(), RECV_SNRM_CMD\n", __func__);
-			irlap_initiate_connection_state(self);
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_ua_response_frame(self, &self->qos_rx);
-			irlap_reset_confirm();
-			irlap_start_wd_timer(self, self->wd_timeout);
-			irlap_next_state(self, LAP_NDM);
-		} else {
-			pr_debug("%s(), SNRM frame contained an I field!\n",
-				 __func__);
-		}
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n",
-			 __func__, irlap_event[event]);
-
-		ret = -1;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_xmit_s (event, skb, info)
- *
- *   XMIT_S, The secondary station has been given the right to transmit,
- *   and we therefore do not expect to receive any transmissions from other
- *   stations.
- */
-static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
-			      struct sk_buff *skb, struct irlap_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
-	IRDA_ASSERT(self != NULL, return -ENODEV;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
-
-	switch (event) {
-	case SEND_I_CMD:
-		/*
-		 *  Send frame only if send window > 0
-		 */
-		if ((self->window > 0) && (!self->remote_busy)) {
-			int nextfit;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-			struct sk_buff *skb_next;
-
-			/*
-			 * Same deal as in irlap_state_xmit_p(), so see
-			 * the comments at that point.
-			 * We are the secondary, so there are only subtle
-			 * differences. - Jean II
-			 */
-
-			/* Check if a subsequent skb exist and would fit in
-			 * the current window (with respect to turnaround
-			 * time). - Jean II */
-			skb_next = skb_peek(&self->txq);
-			nextfit = ((skb_next != NULL) &&
-				   ((skb_next->len + skb->len) <=
-				    self->bytes_left));
-
-			/*
-			 *  Test if we have transmitted more bytes over the
-			 *  link than its possible to do with the current
-			 *  speed and turn-around-time.
-			 */
-			if((!nextfit) && (skb->len > self->bytes_left)) {
-				pr_debug("%s(), Not allowed to transmit more bytes!\n",
-					 __func__);
-				/* Requeue the skb */
-				skb_queue_head(&self->txq, skb_get(skb));
-
-				/*
-				 *  Switch to NRM_S, this is only possible
-				 *  when we are in secondary mode, since we
-				 *  must be sure that we don't miss any RR
-				 *  frames
-				 */
-				self->window = self->window_size;
-				self->bytes_left = self->line_capacity;
-				irlap_start_wd_timer(self, self->wd_timeout);
-
-				irlap_next_state(self, LAP_NRM_S);
-				/* Slight difference with primary :
-				 * here we would wait for the other side to
-				 * expire the turnaround. - Jean II */
-
-				return -EPROTO; /* Try again later */
-			}
-			/* Subtract space used by this skb */
-			self->bytes_left -= skb->len;
-#else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
-			/* Window has been adjusted for the max packet
-			 * size, so much simpler... - Jean II */
-			nextfit = !skb_queue_empty(&self->txq);
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-			/*
-			 *  Send data with final bit cleared only if window > 1
-			 *  and there is more frames to be sent
-			 */
-			if ((self->window > 1) && (nextfit)) {
-				irlap_send_data_secondary(self, skb);
-				irlap_next_state(self, LAP_XMIT_S);
-			} else {
-				irlap_send_data_secondary_final(self, skb);
-				irlap_next_state(self, LAP_NRM_S);
-
-				/*
-				 * Make sure state machine does not try to send
-				 * any more frames
-				 */
-				ret = -EPROTO;
-			}
-		} else {
-			pr_debug("%s(), Unable to send!\n", __func__);
-			skb_queue_head(&self->txq, skb_get(skb));
-			ret = -EPROTO;
-		}
-		break;
-	case DISCONNECT_REQUEST:
-		irlap_send_rd_frame(self);
-		irlap_flush_all_queues(self);
-		irlap_start_wd_timer(self, self->wd_timeout);
-		irlap_next_state(self, LAP_SCLOSE);
-		break;
-	case DATA_REQUEST:
-		/* Nothing to do, irlap_do_event() will send the packet
-		 * when we return... - Jean II */
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n", __func__,
-			 irlap_event[event]);
-
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_nrm_s (event, skb, info)
- *
- *    NRM_S (Normal Response Mode as Secondary) state, in this state we are
- *    expecting to receive frames from the primary station
- *
- */
-static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
-			     struct sk_buff *skb, struct irlap_info *info)
-{
-	int ns_status;
-	int nr_status;
-	int ret = 0;
-
-	pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	switch (event) {
-	case RECV_I_CMD: /* Optimize for the common case */
-		/* FIXME: must check for remote_busy below */
-		pr_debug("%s(), event=%s nr=%d, vs=%d, ns=%d, vr=%d, pf=%d\n",
-			 __func__, irlap_event[event], info->nr,
-			 self->vs, info->ns, self->vr, info->pf);
-
-		self->retry_count = 0;
-
-		ns_status = irlap_validate_ns_received(self, info->ns);
-		nr_status = irlap_validate_nr_received(self, info->nr);
-		/*
-		 *  Check for expected I(nformation) frame
-		 */
-		if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) {
-
-			/* Update Vr (next frame for us to receive) */
-			self->vr = (self->vr + 1) % 8;
-
-			/* Update Nr received */
-			irlap_update_nr_received(self, info->nr);
-
-			/*
-			 *  poll bit cleared?
-			 */
-			if (!info->pf) {
-
-				self->ack_required = TRUE;
-
-				/*
-				 *  Starting WD-timer here is optional, but
-				 *  not recommended. Note 6 IrLAP p. 83
-				 */
-#if 0
-				irda_start_timer(WD_TIMER, self->wd_timeout);
-#endif
-				/* Keep state, do not move this line */
-				irlap_next_state(self, LAP_NRM_S);
-
-				irlap_data_indication(self, skb, FALSE);
-				break;
-			} else {
-				/*
-				 *  We should wait before sending RR, and
-				 *  also before changing to XMIT_S
-				 *  state. (note 1, IrLAP p. 82)
-				 */
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-
-				/*
-				 * Give higher layers a chance to
-				 * immediately reply with some data before
-				 * we decide if we should send a RR frame
-				 * or not
-				 */
-				irlap_data_indication(self, skb, FALSE);
-
-				/* Any pending data requests?  */
-				if (!skb_queue_empty(&self->txq) &&
-				    (self->window > 0))
-				{
-					self->ack_required = TRUE;
-
-					del_timer(&self->wd_timer);
-
-					irlap_next_state(self, LAP_XMIT_S);
-				} else {
-					irlap_send_rr_frame(self, RSP_FRAME);
-					irlap_start_wd_timer(self,
-							     self->wd_timeout);
-
-					/* Keep the state */
-					irlap_next_state(self, LAP_NRM_S);
-				}
-				break;
-			}
-		}
-		/*
-		 *  Check for Unexpected next to send (Ns)
-		 */
-		if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED))
-		{
-			/* Unexpected next to send, with final bit cleared */
-			if (!info->pf) {
-				irlap_update_nr_received(self, info->nr);
-
-				irlap_start_wd_timer(self, self->wd_timeout);
-			} else {
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-				irlap_send_rr_frame(self, RSP_FRAME);
-
-				irlap_start_wd_timer(self, self->wd_timeout);
-			}
-			break;
-		}
-
-		/*
-		 *  Unexpected Next to Receive(NR) ?
-		 */
-		if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED))
-		{
-			if (info->pf) {
-				pr_debug("RECV_I_RSP: frame(s) lost\n");
-
-				self->vr = (self->vr + 1) % 8;
-
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-
-				/* Resend rejected frames */
-				irlap_resend_rejected_frames(self, RSP_FRAME);
-
-				/* Keep state, do not move this line */
-				irlap_next_state(self, LAP_NRM_S);
-
-				irlap_data_indication(self, skb, FALSE);
-				irlap_start_wd_timer(self, self->wd_timeout);
-				break;
-			}
-			/*
-			 *  This is not documented in IrLAP!! Unexpected NR
-			 *  with poll bit cleared
-			 */
-			if (!info->pf) {
-				self->vr = (self->vr + 1) % 8;
-
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-
-				/* Keep state, do not move this line */
-				irlap_next_state(self, LAP_NRM_S);
-
-				irlap_data_indication(self, skb, FALSE);
-				irlap_start_wd_timer(self, self->wd_timeout);
-			}
-			break;
-		}
-
-		if (ret == NR_INVALID) {
-			pr_debug("NRM_S, NR_INVALID not implemented!\n");
-		}
-		if (ret == NS_INVALID) {
-			pr_debug("NRM_S, NS_INVALID not implemented!\n");
-		}
-		break;
-	case RECV_UI_FRAME:
-		/*
-		 *  poll bit cleared?
-		 */
-		if (!info->pf) {
-			irlap_data_indication(self, skb, TRUE);
-			irlap_next_state(self, LAP_NRM_S); /* Keep state */
-		} else {
-			/*
-			 *  Any pending data requests?
-			 */
-			if (!skb_queue_empty(&self->txq) &&
-			    (self->window > 0) && !self->remote_busy)
-			{
-				irlap_data_indication(self, skb, TRUE);
-
-				del_timer(&self->wd_timer);
-
-				irlap_next_state(self, LAP_XMIT_S);
-			} else {
-				irlap_data_indication(self, skb, TRUE);
-
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-
-				irlap_send_rr_frame(self, RSP_FRAME);
-				self->ack_required = FALSE;
-
-				irlap_start_wd_timer(self, self->wd_timeout);
-
-				/* Keep the state */
-				irlap_next_state(self, LAP_NRM_S);
-			}
-		}
-		break;
-	case RECV_RR_CMD:
-		self->retry_count = 0;
-
-		/*
-		 *  Nr as expected?
-		 */
-		nr_status = irlap_validate_nr_received(self, info->nr);
-		if (nr_status == NR_EXPECTED) {
-			if (!skb_queue_empty(&self->txq) &&
-			    (self->window > 0)) {
-				self->remote_busy = FALSE;
-
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-				del_timer(&self->wd_timer);
-
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-				irlap_next_state(self, LAP_XMIT_S);
-			} else {
-				self->remote_busy = FALSE;
-				/* Update Nr received */
-				irlap_update_nr_received(self, info->nr);
-				irlap_wait_min_turn_around(self, &self->qos_tx);
-				irlap_start_wd_timer(self, self->wd_timeout);
-
-				/* Note : if the link is idle (this case),
-				 * we never go in XMIT_S, so we never get a
-				 * chance to process any DISCONNECT_REQUEST.
-				 * Do it now ! - Jean II */
-				if (self->disconnect_pending) {
-					/* Disconnect */
-					irlap_send_rd_frame(self);
-					irlap_flush_all_queues(self);
-
-					irlap_next_state(self, LAP_SCLOSE);
-				} else {
-					/* Just send back pf bit */
-					irlap_send_rr_frame(self, RSP_FRAME);
-
-					irlap_next_state(self, LAP_NRM_S);
-				}
-			}
-		} else if (nr_status == NR_UNEXPECTED) {
-			self->remote_busy = FALSE;
-			irlap_update_nr_received(self, info->nr);
-			irlap_resend_rejected_frames(self, RSP_FRAME);
-
-			irlap_start_wd_timer(self, self->wd_timeout);
-
-			/* Keep state */
-			irlap_next_state(self, LAP_NRM_S);
-		} else {
-			pr_debug("%s(), invalid nr not implemented!\n",
-				 __func__);
-		}
-		break;
-	case RECV_SNRM_CMD:
-		/* SNRM frame is not allowed to contain an I-field */
-		if (!info) {
-			del_timer(&self->wd_timer);
-			pr_debug("%s(), received SNRM cmd\n", __func__);
-			irlap_next_state(self, LAP_RESET_CHECK);
-
-			irlap_reset_indication(self);
-		} else {
-			pr_debug("%s(), SNRM frame contained an I-field!\n",
-				 __func__);
-
-		}
-		break;
-	case RECV_REJ_CMD:
-		irlap_update_nr_received(self, info->nr);
-		if (self->remote_busy) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rr_frame(self, RSP_FRAME);
-		} else
-			irlap_resend_rejected_frames(self, RSP_FRAME);
-		irlap_start_wd_timer(self, self->wd_timeout);
-		break;
-	case RECV_SREJ_CMD:
-		irlap_update_nr_received(self, info->nr);
-		if (self->remote_busy) {
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rr_frame(self, RSP_FRAME);
-		} else
-			irlap_resend_rejected_frame(self, RSP_FRAME);
-		irlap_start_wd_timer(self, self->wd_timeout);
-		break;
-	case WD_TIMER_EXPIRED:
-		/*
-		 *  Wait until retry_count * n matches negotiated threshold/
-		 *  disconnect time (note 2 in IrLAP p. 82)
-		 *
-		 * Similar to irlap_state_nrm_p() -> FINAL_TIMER_EXPIRED
-		 * Note : self->wd_timeout = (self->final_timeout * 2),
-		 *   which explain why we use (self->N2 / 2) here !!!
-		 * Jean II
-		 */
-		pr_debug("%s(), retry_count = %d\n", __func__,
-			 self->retry_count);
-
-		if (self->retry_count < (self->N2 / 2)) {
-			/* No retry, just wait for primary */
-			irlap_start_wd_timer(self, self->wd_timeout);
-			self->retry_count++;
-
-			if((self->retry_count % (self->N1 / 2)) == 0)
-				irlap_status_indication(self,
-							STATUS_NO_ACTIVITY);
-		} else {
-			irlap_apply_default_connection_parameters(self);
-
-			/* Always switch state before calling upper layers */
-			irlap_next_state(self, LAP_NDM);
-			irlap_disconnect_indication(self, LAP_NO_RESPONSE);
-		}
-		break;
-	case RECV_DISC_CMD:
-		/* Always switch state before calling upper layers */
-		irlap_next_state(self, LAP_NDM);
-
-		/* Send disconnect response */
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-		irlap_send_ua_response_frame(self, NULL);
-
-		del_timer(&self->wd_timer);
-		irlap_flush_all_queues(self);
-		/* Set default link parameters */
-		irlap_apply_default_connection_parameters(self);
-
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	case RECV_DISCOVERY_XID_CMD:
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-		irlap_send_rr_frame(self, RSP_FRAME);
-		self->ack_required = TRUE;
-		irlap_start_wd_timer(self, self->wd_timeout);
-		irlap_next_state(self, LAP_NRM_S);
-
-		break;
-	case RECV_TEST_CMD:
-		/* Remove test frame header (only LAP header in NRM) */
-		skb_pull(skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
-
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-		irlap_start_wd_timer(self, self->wd_timeout);
-
-		/* Send response (info will be copied) */
-		irlap_send_test_frame(self, self->caddr, info->daddr, skb);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d, (%s)\n", __func__,
-			 event, irlap_event[event]);
-
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlap_state_sclose (self, event, skb, info)
- */
-static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event,
-			      struct sk_buff *skb, struct irlap_info *info)
-{
-	IRDA_ASSERT(self != NULL, return -ENODEV;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
-
-	switch (event) {
-	case RECV_DISC_CMD:
-		/* Always switch state before calling upper layers */
-		irlap_next_state(self, LAP_NDM);
-
-		/* Send disconnect response */
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-		irlap_send_ua_response_frame(self, NULL);
-
-		del_timer(&self->wd_timer);
-		/* Set default link parameters */
-		irlap_apply_default_connection_parameters(self);
-
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	case RECV_DM_RSP:
-		/* IrLAP-1.1 p.82: in SCLOSE, S and I type RSP frames
-		 * shall take us down into default NDM state, like DM_RSP
-		 */
-	case RECV_RR_RSP:
-	case RECV_RNR_RSP:
-	case RECV_REJ_RSP:
-	case RECV_SREJ_RSP:
-	case RECV_I_RSP:
-		/* Always switch state before calling upper layers */
-		irlap_next_state(self, LAP_NDM);
-
-		del_timer(&self->wd_timer);
-		irlap_apply_default_connection_parameters(self);
-
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	case WD_TIMER_EXPIRED:
-		/* Always switch state before calling upper layers */
-		irlap_next_state(self, LAP_NDM);
-
-		irlap_apply_default_connection_parameters(self);
-
-		irlap_disconnect_indication(self, LAP_DISC_INDICATION);
-		break;
-	default:
-		/* IrLAP-1.1 p.82: in SCLOSE, basically any received frame
-		 * with pf=1 shall restart the wd-timer and resend the rd:rsp
-		 */
-		if (info != NULL  &&  info->pf) {
-			del_timer(&self->wd_timer);
-			irlap_wait_min_turn_around(self, &self->qos_tx);
-			irlap_send_rd_frame(self);
-			irlap_start_wd_timer(self, self->wd_timeout);
-			break;		/* stay in SCLOSE */
-		}
-
-		pr_debug("%s(), Unknown event %d, (%s)\n", __func__,
-			 event, irlap_event[event]);
-
-		break;
-	}
-
-	return -1;
-}
-
-static int irlap_state_reset_check( struct irlap_cb *self, IRLAP_EVENT event,
-				   struct sk_buff *skb,
-				   struct irlap_info *info)
-{
-	int ret = 0;
-
-	pr_debug("%s(), event=%s\n", __func__, irlap_event[event]);
-
-	IRDA_ASSERT(self != NULL, return -ENODEV;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;);
-
-	switch (event) {
-	case RESET_RESPONSE:
-		irlap_send_ua_response_frame(self, &self->qos_rx);
-		irlap_initiate_connection_state(self);
-		irlap_start_wd_timer(self, WD_TIMEOUT);
-		irlap_flush_all_queues(self);
-
-		irlap_next_state(self, LAP_NRM_S);
-		break;
-	case DISCONNECT_REQUEST:
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-		irlap_send_rd_frame(self);
-		irlap_start_wd_timer(self, WD_TIMEOUT);
-		irlap_next_state(self, LAP_SCLOSE);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %d, (%s)\n", __func__,
-			 event, irlap_event[event]);
-
-		ret = -EINVAL;
-		break;
-	}
-	return ret;
-}
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
deleted file mode 100644
index debda3de4726..000000000000
--- a/net/irda/irlap_frame.c
+++ /dev/null
@@ -1,1407 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlap_frame.c
- * Version:       1.0
- * Description:   Build and transmit IrLAP frames
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Aug 19 10:27:26 1997
- * Modified at:   Wed Jan  5 08:59:04 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/netdevice.h>
-#include <linux/irda.h>
-#include <linux/slab.h>
-
-#include <net/pkt_sched.h>
-#include <net/sock.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irda_device.h>
-#include <net/irda/irlap.h>
-#include <net/irda/wrapper.h>
-#include <net/irda/timer.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/qos.h>
-
-static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb,
-			       int command);
-
-/*
- * Function irlap_insert_info (self, skb)
- *
- *    Insert minimum turnaround time and speed information into the skb. We
- *    need to do this since it's per packet relevant information. Safe to
- *    have this function inlined since it's only called from one place
- */
-static inline void irlap_insert_info(struct irlap_cb *self,
-				     struct sk_buff *skb)
-{
-	struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb;
-
-	/*
-	 * Insert MTT (min. turn time) and speed into skb, so that the
-	 * device driver knows which settings to use
-	 */
-	cb->magic = LAP_MAGIC;
-	cb->mtt = self->mtt_required;
-	cb->next_speed = self->speed;
-
-	/* Reset */
-	self->mtt_required = 0;
-
-	/*
-	 * Delay equals negotiated BOFs count, plus the number of BOFs to
-	 * force the negotiated minimum turnaround time
-	 */
-	cb->xbofs = self->bofs_count;
-	cb->next_xbofs = self->next_bofs;
-	cb->xbofs_delay = self->xbofs_delay;
-
-	/* Reset XBOF's delay (used only for getting min turn time) */
-	self->xbofs_delay = 0;
-	/* Put the correct xbofs value for the next packet */
-	self->bofs_count = self->next_bofs;
-}
-
-/*
- * Function irlap_queue_xmit (self, skb)
- *
- *    A little wrapper for dev_queue_xmit, so we can insert some common
- *    code into it.
- */
-void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
-{
-	/* Some common init stuff */
-	skb->dev = self->netdev;
-	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	skb->protocol = htons(ETH_P_IRDA);
-	skb->priority = TC_PRIO_BESTEFFORT;
-
-	irlap_insert_info(self, skb);
-
-	if (unlikely(self->mode & IRDA_MODE_MONITOR)) {
-		pr_debug("%s(): %s is in monitor mode\n", __func__,
-			 self->netdev->name);
-		dev_kfree_skb(skb);
-		return;
-	}
-
-	dev_queue_xmit(skb);
-}
-
-/*
- * Function irlap_send_snrm_cmd (void)
- *
- *    Transmits a connect SNRM command frame
- */
-void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos)
-{
-	struct sk_buff *tx_skb;
-	struct snrm_frame *frame;
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Allocate frame */
-	tx_skb = alloc_skb(sizeof(struct snrm_frame) +
-			   IRLAP_NEGOCIATION_PARAMS_LEN,
-			   GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	frame = skb_put(tx_skb, 2);
-
-	/* Insert connection address field */
-	if (qos)
-		frame->caddr = CMD_FRAME | CBROADCAST;
-	else
-		frame->caddr = CMD_FRAME | self->caddr;
-
-	/* Insert control field */
-	frame->control = SNRM_CMD | PF_BIT;
-
-	/*
-	 *  If we are establishing a connection then insert QoS parameters
-	 */
-	if (qos) {
-		skb_put(tx_skb, 9); /* 25 left */
-		frame->saddr = cpu_to_le32(self->saddr);
-		frame->daddr = cpu_to_le32(self->daddr);
-
-		frame->ncaddr = self->caddr;
-
-		ret = irlap_insert_qos_negotiation_params(self, tx_skb);
-		if (ret < 0) {
-			dev_kfree_skb(tx_skb);
-			return;
-		}
-	}
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_snrm_cmd (skb, info)
- *
- *    Received SNRM (Set Normal Response Mode) command frame
- *
- */
-static void irlap_recv_snrm_cmd(struct irlap_cb *self, struct sk_buff *skb,
-				struct irlap_info *info)
-{
-	struct snrm_frame *frame;
-
-	if (pskb_may_pull(skb,sizeof(struct snrm_frame))) {
-		frame = (struct snrm_frame *) skb->data;
-
-		/* Copy the new connection address ignoring the C/R bit */
-		info->caddr = frame->ncaddr & 0xFE;
-
-		/* Check if the new connection address is valid */
-		if ((info->caddr == 0x00) || (info->caddr == 0xfe)) {
-			pr_debug("%s(), invalid connection address!\n",
-				 __func__);
-			return;
-		}
-
-		/* Copy peer device address */
-		info->daddr = le32_to_cpu(frame->saddr);
-		info->saddr = le32_to_cpu(frame->daddr);
-
-		/* Only accept if addressed directly to us */
-		if (info->saddr != self->saddr) {
-			pr_debug("%s(), not addressed to us!\n",
-				 __func__);
-			return;
-		}
-		irlap_do_event(self, RECV_SNRM_CMD, skb, info);
-	} else {
-		/* Signal that this SNRM frame does not contain and I-field */
-		irlap_do_event(self, RECV_SNRM_CMD, skb, NULL);
-	}
-}
-
-/*
- * Function irlap_send_ua_response_frame (qos)
- *
- *    Send UA (Unnumbered Acknowledgement) frame
- *
- */
-void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos)
-{
-	struct sk_buff *tx_skb;
-	struct ua_frame *frame;
-	int ret;
-
-	pr_debug("%s() <%ld>\n", __func__, jiffies);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/* Allocate frame */
-	tx_skb = alloc_skb(sizeof(struct ua_frame) +
-			   IRLAP_NEGOCIATION_PARAMS_LEN,
-			   GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	frame = skb_put(tx_skb, 10);
-
-	/* Build UA response */
-	frame->caddr = self->caddr;
-	frame->control = UA_RSP | PF_BIT;
-
-	frame->saddr = cpu_to_le32(self->saddr);
-	frame->daddr = cpu_to_le32(self->daddr);
-
-	/* Should we send QoS negotiation parameters? */
-	if (qos) {
-		ret = irlap_insert_qos_negotiation_params(self, tx_skb);
-		if (ret < 0) {
-			dev_kfree_skb(tx_skb);
-			return;
-		}
-	}
-
-	irlap_queue_xmit(self, tx_skb);
-}
-
-
-/*
- * Function irlap_send_dm_frame (void)
- *
- *    Send disconnected mode (DM) frame
- *
- */
-void irlap_send_dm_frame( struct irlap_cb *self)
-{
-	struct sk_buff *tx_skb = NULL;
-	struct dm_frame *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	tx_skb = alloc_skb(sizeof(struct dm_frame), GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	frame = skb_put(tx_skb, 2);
-
-	if (self->state == LAP_NDM)
-		frame->caddr = CBROADCAST;
-	else
-		frame->caddr = self->caddr;
-
-	frame->control = DM_RSP | PF_BIT;
-
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_send_disc_frame (void)
- *
- *    Send disconnect (DISC) frame
- *
- */
-void irlap_send_disc_frame(struct irlap_cb *self)
-{
-	struct sk_buff *tx_skb = NULL;
-	struct disc_frame *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	tx_skb = alloc_skb(sizeof(struct disc_frame), GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	frame = skb_put(tx_skb, 2);
-
-	frame->caddr = self->caddr | CMD_FRAME;
-	frame->control = DISC_CMD | PF_BIT;
-
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_send_discovery_xid_frame (S, s, command)
- *
- *    Build and transmit a XID (eXchange station IDentifier) discovery
- *    frame.
- */
-void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s,
-				    __u8 command, discovery_t *discovery)
-{
-	struct sk_buff *tx_skb = NULL;
-	struct xid_frame *frame;
-	__u32 bcast = BROADCAST;
-	__u8 *info;
-
-	pr_debug("%s(), s=%d, S=%d, command=%d\n", __func__,
-		 s, S, command);
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(discovery != NULL, return;);
-
-	tx_skb = alloc_skb(sizeof(struct xid_frame) + IRLAP_DISCOVERY_INFO_LEN,
-			   GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	skb_put(tx_skb, 14);
-	frame = (struct xid_frame *) tx_skb->data;
-
-	if (command) {
-		frame->caddr = CBROADCAST | CMD_FRAME;
-		frame->control =  XID_CMD | PF_BIT;
-	} else {
-		frame->caddr = CBROADCAST;
-		frame->control =  XID_RSP | PF_BIT;
-	}
-	frame->ident = XID_FORMAT;
-
-	frame->saddr = cpu_to_le32(self->saddr);
-
-	if (command)
-		frame->daddr = cpu_to_le32(bcast);
-	else
-		frame->daddr = cpu_to_le32(discovery->data.daddr);
-
-	switch (S) {
-	case 1:
-		frame->flags = 0x00;
-		break;
-	case 6:
-		frame->flags = 0x01;
-		break;
-	case 8:
-		frame->flags = 0x02;
-		break;
-	case 16:
-		frame->flags = 0x03;
-		break;
-	default:
-		frame->flags = 0x02;
-		break;
-	}
-
-	frame->slotnr = s;
-	frame->version = 0x00;
-
-	/*
-	 *  Provide info for final slot only in commands, and for all
-	 *  responses. Send the second byte of the hint only if the
-	 *  EXTENSION bit is set in the first byte.
-	 */
-	if (!command || (frame->slotnr == 0xff)) {
-		int len;
-
-		if (discovery->data.hints[0] & HINT_EXTENSION) {
-			info = skb_put(tx_skb, 2);
-			info[0] = discovery->data.hints[0];
-			info[1] = discovery->data.hints[1];
-		} else {
-			info = skb_put(tx_skb, 1);
-			info[0] = discovery->data.hints[0];
-		}
-		info = skb_put(tx_skb, 1);
-		info[0] = discovery->data.charset;
-
-		len = IRDA_MIN(discovery->name_len, skb_tailroom(tx_skb));
-		skb_put_data(tx_skb, discovery->data.info, len);
-	}
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_discovery_xid_rsp (skb, info)
- *
- *    Received a XID discovery response
- *
- */
-static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
-					 struct sk_buff *skb,
-					 struct irlap_info *info)
-{
-	struct xid_frame *xid;
-	discovery_t *discovery = NULL;
-	__u8 *discovery_info;
-	char *text;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-		net_err_ratelimited("%s: frame too short!\n", __func__);
-		return;
-	}
-
-	xid = (struct xid_frame *) skb->data;
-
-	info->daddr = le32_to_cpu(xid->saddr);
-	info->saddr = le32_to_cpu(xid->daddr);
-
-	/* Make sure frame is addressed to us */
-	if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) {
-		pr_debug("%s(), frame is not addressed to us!\n",
-			 __func__);
-		return;
-	}
-
-	if ((discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) {
-		net_warn_ratelimited("%s: kmalloc failed!\n", __func__);
-		return;
-	}
-
-	discovery->data.daddr = info->daddr;
-	discovery->data.saddr = self->saddr;
-	discovery->timestamp = jiffies;
-
-	pr_debug("%s(), daddr=%08x\n", __func__,
-		 discovery->data.daddr);
-
-	discovery_info = skb_pull(skb, sizeof(struct xid_frame));
-
-	/* Get info returned from peer */
-	discovery->data.hints[0] = discovery_info[0];
-	if (discovery_info[0] & HINT_EXTENSION) {
-		pr_debug("EXTENSION\n");
-		discovery->data.hints[1] = discovery_info[1];
-		discovery->data.charset = discovery_info[2];
-		text = (char *) &discovery_info[3];
-	} else {
-		discovery->data.hints[1] = 0;
-		discovery->data.charset = discovery_info[1];
-		text = (char *) &discovery_info[2];
-	}
-	/*
-	 *  Terminate info string, should be safe since this is where the
-	 *  FCS bytes resides.
-	 */
-	skb->data[skb->len] = '\0';
-	strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
-	discovery->name_len = strlen(discovery->data.info);
-
-	info->discovery = discovery;
-
-	irlap_do_event(self, RECV_DISCOVERY_XID_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_discovery_xid_cmd (skb, info)
- *
- *    Received a XID discovery command
- *
- */
-static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
-					 struct sk_buff *skb,
-					 struct irlap_info *info)
-{
-	struct xid_frame *xid;
-	discovery_t *discovery = NULL;
-	__u8 *discovery_info;
-	char *text;
-
-	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-		net_err_ratelimited("%s: frame too short!\n", __func__);
-		return;
-	}
-
-	xid = (struct xid_frame *) skb->data;
-
-	info->daddr = le32_to_cpu(xid->saddr);
-	info->saddr = le32_to_cpu(xid->daddr);
-
-	/* Make sure frame is addressed to us */
-	if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) {
-		pr_debug("%s(), frame is not addressed to us!\n",
-			 __func__);
-		return;
-	}
-
-	switch (xid->flags & 0x03) {
-	case 0x00:
-		info->S = 1;
-		break;
-	case 0x01:
-		info->S = 6;
-		break;
-	case 0x02:
-		info->S = 8;
-		break;
-	case 0x03:
-		info->S = 16;
-		break;
-	default:
-		/* Error!! */
-		return;
-	}
-	info->s = xid->slotnr;
-
-	discovery_info = skb_pull(skb, sizeof(struct xid_frame));
-
-	/*
-	 *  Check if last frame
-	 */
-	if (info->s == 0xff) {
-		/* Check if things are sane at this point... */
-		if((discovery_info == NULL) ||
-		   !pskb_may_pull(skb, 3)) {
-			net_err_ratelimited("%s: discovery frame too short!\n",
-					    __func__);
-			return;
-		}
-
-		/*
-		 *  We now have some discovery info to deliver!
-		 */
-		discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC);
-		if (!discovery)
-			return;
-
-		discovery->data.daddr = info->daddr;
-		discovery->data.saddr = self->saddr;
-		discovery->timestamp = jiffies;
-
-		discovery->data.hints[0] = discovery_info[0];
-		if (discovery_info[0] & HINT_EXTENSION) {
-			discovery->data.hints[1] = discovery_info[1];
-			discovery->data.charset = discovery_info[2];
-			text = (char *) &discovery_info[3];
-		} else {
-			discovery->data.hints[1] = 0;
-			discovery->data.charset = discovery_info[1];
-			text = (char *) &discovery_info[2];
-		}
-		/*
-		 *  Terminate string, should be safe since this is where the
-		 *  FCS bytes resides.
-		 */
-		skb->data[skb->len] = '\0';
-		strncpy(discovery->data.info, text, NICKNAME_MAX_LEN);
-		discovery->name_len = strlen(discovery->data.info);
-
-		info->discovery = discovery;
-	} else
-		info->discovery = NULL;
-
-	irlap_do_event(self, RECV_DISCOVERY_XID_CMD, skb, info);
-}
-
-/*
- * Function irlap_send_rr_frame (self, command)
- *
- *    Build and transmit RR (Receive Ready) frame. Notice that it is currently
- *    only possible to send RR frames with the poll bit set.
- */
-void irlap_send_rr_frame(struct irlap_cb *self, int command)
-{
-	struct sk_buff *tx_skb;
-	struct rr_frame *frame;
-
-	tx_skb = alloc_skb(sizeof(struct rr_frame), GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	frame = skb_put(tx_skb, 2);
-
-	frame->caddr = self->caddr;
-	frame->caddr |= (command) ? CMD_FRAME : 0;
-
-	frame->control = RR | PF_BIT | (self->vr << 5);
-
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_send_rd_frame (self)
- *
- *    Request disconnect. Used by a secondary station to request the
- *    disconnection of the link.
- */
-void irlap_send_rd_frame(struct irlap_cb *self)
-{
-	struct sk_buff *tx_skb;
-	struct rd_frame *frame;
-
-	tx_skb = alloc_skb(sizeof(struct rd_frame), GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	frame = skb_put(tx_skb, 2);
-
-	frame->caddr = self->caddr;
-	frame->control = RD_RSP | PF_BIT;
-
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_rr_frame (skb, info)
- *
- *    Received RR (Receive Ready) frame from peer station, no harm in
- *    making it inline since its called only from one single place
- *    (irlap_driver_rcv).
- */
-static inline void irlap_recv_rr_frame(struct irlap_cb *self,
-				       struct sk_buff *skb,
-				       struct irlap_info *info, int command)
-{
-	info->nr = skb->data[1] >> 5;
-
-	/* Check if this is a command or a response frame */
-	if (command)
-		irlap_do_event(self, RECV_RR_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_RR_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_rnr_frame (self, skb, info)
- *
- *    Received RNR (Receive Not Ready) frame from peer station
- *
- */
-static void irlap_recv_rnr_frame(struct irlap_cb *self, struct sk_buff *skb,
-				 struct irlap_info *info, int command)
-{
-	info->nr = skb->data[1] >> 5;
-
-	pr_debug("%s(), nr=%d, %ld\n", __func__, info->nr, jiffies);
-
-	if (command)
-		irlap_do_event(self, RECV_RNR_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_RNR_RSP, skb, info);
-}
-
-static void irlap_recv_rej_frame(struct irlap_cb *self, struct sk_buff *skb,
-				 struct irlap_info *info, int command)
-{
-	info->nr = skb->data[1] >> 5;
-
-	/* Check if this is a command or a response frame */
-	if (command)
-		irlap_do_event(self, RECV_REJ_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_REJ_RSP, skb, info);
-}
-
-static void irlap_recv_srej_frame(struct irlap_cb *self, struct sk_buff *skb,
-				  struct irlap_info *info, int command)
-{
-	info->nr = skb->data[1] >> 5;
-
-	/* Check if this is a command or a response frame */
-	if (command)
-		irlap_do_event(self, RECV_SREJ_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_SREJ_RSP, skb, info);
-}
-
-static void irlap_recv_disc_frame(struct irlap_cb *self, struct sk_buff *skb,
-				  struct irlap_info *info, int command)
-{
-	/* Check if this is a command or a response frame */
-	if (command)
-		irlap_do_event(self, RECV_DISC_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_RD_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_ua_frame (skb, frame)
- *
- *    Received UA (Unnumbered Acknowledgement) frame
- *
- */
-static inline void irlap_recv_ua_frame(struct irlap_cb *self,
-				       struct sk_buff *skb,
-				       struct irlap_info *info)
-{
-	irlap_do_event(self, RECV_UA_RSP, skb, info);
-}
-
-/*
- * Function irlap_send_data_primary(self, skb)
- *
- *    Send I-frames as the primary station but without the poll bit set
- *
- */
-void irlap_send_data_primary(struct irlap_cb *self, struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb;
-
-	if (skb->data[1] == I_FRAME) {
-
-		/*
-		 *  Insert frame sequence number (Vs) in control field before
-		 *  inserting into transmit window queue.
-		 */
-		skb->data[1] = I_FRAME | (self->vs << 1);
-
-		/*
-		 *  Insert frame in store, in case of retransmissions
-		 *  Increase skb reference count, see irlap_do_event()
-		 */
-		skb_get(skb);
-		skb_queue_tail(&self->wx_list, skb);
-
-		/* Copy buffer */
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-		if (tx_skb == NULL) {
-			return;
-		}
-
-		self->vs = (self->vs + 1) % 8;
-		self->ack_required = FALSE;
-		self->window -= 1;
-
-		irlap_send_i_frame( self, tx_skb, CMD_FRAME);
-	} else {
-		pr_debug("%s(), sending unreliable frame\n", __func__);
-		irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
-		self->window -= 1;
-	}
-}
-/*
- * Function irlap_send_data_primary_poll (self, skb)
- *
- *    Send I(nformation) frame as primary with poll bit set
- */
-void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb;
-	int transmission_time;
-
-	/* Stop P timer */
-	del_timer(&self->poll_timer);
-
-	/* Is this reliable or unreliable data? */
-	if (skb->data[1] == I_FRAME) {
-
-		/*
-		 *  Insert frame sequence number (Vs) in control field before
-		 *  inserting into transmit window queue.
-		 */
-		skb->data[1] = I_FRAME | (self->vs << 1);
-
-		/*
-		 *  Insert frame in store, in case of retransmissions
-		 *  Increase skb reference count, see irlap_do_event()
-		 */
-		skb_get(skb);
-		skb_queue_tail(&self->wx_list, skb);
-
-		/* Copy buffer */
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-		if (tx_skb == NULL) {
-			return;
-		}
-
-		/*
-		 *  Set poll bit if necessary. We do this to the copied
-		 *  skb, since retransmitted need to set or clear the poll
-		 *  bit depending on when they are sent.
-		 */
-		tx_skb->data[1] |= PF_BIT;
-
-		self->vs = (self->vs + 1) % 8;
-		self->ack_required = FALSE;
-
-		irlap_next_state(self, LAP_NRM_P);
-		irlap_send_i_frame(self, tx_skb, CMD_FRAME);
-	} else {
-		pr_debug("%s(), sending unreliable frame\n", __func__);
-
-		if (self->ack_required) {
-			irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
-			irlap_next_state(self, LAP_NRM_P);
-			irlap_send_rr_frame(self, CMD_FRAME);
-			self->ack_required = FALSE;
-		} else {
-			skb->data[1] |= PF_BIT;
-			irlap_next_state(self, LAP_NRM_P);
-			irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME);
-		}
-	}
-
-	/* How much time we took for transmission of all frames.
-	 * We don't know, so let assume we used the full window. Jean II */
-	transmission_time = self->final_timeout;
-
-	/* Reset parameter so that we can fill next window */
-	self->window = self->window_size;
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-	/* Remove what we have not used. Just do a prorata of the
-	 * bytes left in window to window capacity.
-	 * See max_line_capacities[][] in qos.c for details. Jean II */
-	transmission_time -= (self->final_timeout * self->bytes_left
-			      / self->line_capacity);
-	pr_debug("%s() adjusting transmission_time : ft=%d, bl=%d, lc=%d -> tt=%d\n",
-		 __func__, self->final_timeout, self->bytes_left,
-		 self->line_capacity, transmission_time);
-
-	/* We are allowed to transmit a maximum number of bytes again. */
-	self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
-	/*
-	 * The network layer has a intermediate buffer between IrLAP
-	 * and the IrDA driver which can contain 8 frames. So, even
-	 * though IrLAP is currently sending the *last* frame of the
-	 * tx-window, the driver most likely has only just started
-	 * sending the *first* frame of the same tx-window.
-	 * I.e. we are always at the very beginning of or Tx window.
-	 * Now, we are supposed to set the final timer from the end
-	 * of our tx-window to let the other peer reply. So, we need
-	 * to add extra time to compensate for the fact that we
-	 * are really at the start of tx-window, otherwise the final timer
-	 * might expire before he can answer...
-	 * Jean II
-	 */
-	irlap_start_final_timer(self, self->final_timeout + transmission_time);
-
-	/*
-	 * The clever amongst you might ask why we do this adjustement
-	 * only here, and not in all the other cases in irlap_event.c.
-	 * In all those other case, we only send a very short management
-	 * frame (few bytes), so the adjustement would be lost in the
-	 * noise...
-	 * The exception of course is irlap_resend_rejected_frame().
-	 * Jean II */
-}
-
-/*
- * Function irlap_send_data_secondary_final (self, skb)
- *
- *    Send I(nformation) frame as secondary with final bit set
- *
- */
-void irlap_send_data_secondary_final(struct irlap_cb *self,
-				     struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb = NULL;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/* Is this reliable or unreliable data? */
-	if (skb->data[1] == I_FRAME) {
-
-		/*
-		 *  Insert frame sequence number (Vs) in control field before
-		 *  inserting into transmit window queue.
-		 */
-		skb->data[1] = I_FRAME | (self->vs << 1);
-
-		/*
-		 *  Insert frame in store, in case of retransmissions
-		 *  Increase skb reference count, see irlap_do_event()
-		 */
-		skb_get(skb);
-		skb_queue_tail(&self->wx_list, skb);
-
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-		if (tx_skb == NULL) {
-			return;
-		}
-
-		tx_skb->data[1] |= PF_BIT;
-
-		self->vs = (self->vs + 1) % 8;
-		self->ack_required = FALSE;
-
-		irlap_send_i_frame(self, tx_skb, RSP_FRAME);
-	} else {
-		if (self->ack_required) {
-			irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
-			irlap_send_rr_frame(self, RSP_FRAME);
-			self->ack_required = FALSE;
-		} else {
-			skb->data[1] |= PF_BIT;
-			irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
-		}
-	}
-
-	self->window = self->window_size;
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-	/* We are allowed to transmit a maximum number of bytes again. */
-	self->bytes_left = self->line_capacity;
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-
-	irlap_start_wd_timer(self, self->wd_timeout);
-}
-
-/*
- * Function irlap_send_data_secondary (self, skb)
- *
- *    Send I(nformation) frame as secondary without final bit set
- *
- */
-void irlap_send_data_secondary(struct irlap_cb *self, struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb = NULL;
-
-	/* Is this reliable or unreliable data? */
-	if (skb->data[1] == I_FRAME) {
-
-		/*
-		 *  Insert frame sequence number (Vs) in control field before
-		 *  inserting into transmit window queue.
-		 */
-		skb->data[1] = I_FRAME | (self->vs << 1);
-
-		/*
-		 *  Insert frame in store, in case of retransmissions
-		 *  Increase skb reference count, see irlap_do_event()
-		 */
-		skb_get(skb);
-		skb_queue_tail(&self->wx_list, skb);
-
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-		if (tx_skb == NULL) {
-			return;
-		}
-
-		self->vs = (self->vs + 1) % 8;
-		self->ack_required = FALSE;
-		self->window -= 1;
-
-		irlap_send_i_frame(self, tx_skb, RSP_FRAME);
-	} else {
-		irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME);
-		self->window -= 1;
-	}
-}
-
-/*
- * Function irlap_resend_rejected_frames (nr)
- *
- *    Resend frames which has not been acknowledged. Should be safe to
- *    traverse the list without locking it since this function will only be
- *    called from interrupt context (BH)
- */
-void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
-{
-	struct sk_buff *tx_skb;
-	struct sk_buff *skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/*  Resend unacknowledged frame(s) */
-	skb_queue_walk(&self->wx_list, skb) {
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-
-		/* We copy the skb to be retransmitted since we will have to
-		 * modify it. Cloning will confuse packet sniffers
-		 */
-		/* tx_skb = skb_clone( skb, GFP_ATOMIC); */
-		tx_skb = skb_copy(skb, GFP_ATOMIC);
-		if (!tx_skb) {
-			pr_debug("%s(), unable to copy\n", __func__);
-			return;
-		}
-
-		/* Clear old Nr field + poll bit */
-		tx_skb->data[1] &= 0x0f;
-
-		/*
-		 *  Set poll bit on the last frame retransmitted
-		 */
-		if (skb_queue_is_last(&self->wx_list, skb))
-			tx_skb->data[1] |= PF_BIT; /* Set p/f bit */
-		else
-			tx_skb->data[1] &= ~PF_BIT; /* Clear p/f bit */
-
-		irlap_send_i_frame(self, tx_skb, command);
-	}
-#if 0 /* Not yet */
-	/*
-	 *  We can now fill the window with additional data frames
-	 */
-	while (!skb_queue_empty(&self->txq)) {
-
-		pr_debug("%s(), sending additional frames!\n", __func__);
-		if (self->window > 0) {
-			skb = skb_dequeue( &self->txq);
-			IRDA_ASSERT(skb != NULL, return;);
-
-			/*
-			 *  If send window > 1 then send frame with pf
-			 *  bit cleared
-			 */
-			if ((self->window > 1) &&
-			    !skb_queue_empty(&self->txq)) {
-				irlap_send_data_primary(self, skb);
-			} else {
-				irlap_send_data_primary_poll(self, skb);
-			}
-			kfree_skb(skb);
-		}
-	}
-#endif
-}
-
-void irlap_resend_rejected_frame(struct irlap_cb *self, int command)
-{
-	struct sk_buff *tx_skb;
-	struct sk_buff *skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	/*  Resend unacknowledged frame(s) */
-	skb = skb_peek(&self->wx_list);
-	if (skb != NULL) {
-		irlap_wait_min_turn_around(self, &self->qos_tx);
-
-		/* We copy the skb to be retransmitted since we will have to
-		 * modify it. Cloning will confuse packet sniffers
-		 */
-		/* tx_skb = skb_clone( skb, GFP_ATOMIC); */
-		tx_skb = skb_copy(skb, GFP_ATOMIC);
-		if (!tx_skb) {
-			pr_debug("%s(), unable to copy\n", __func__);
-			return;
-		}
-
-		/* Clear old Nr field + poll bit */
-		tx_skb->data[1] &= 0x0f;
-
-		/*  Set poll/final bit */
-		tx_skb->data[1] |= PF_BIT; /* Set p/f bit */
-
-		irlap_send_i_frame(self, tx_skb, command);
-	}
-}
-
-/*
- * Function irlap_send_ui_frame (self, skb, command)
- *
- *    Contruct and transmit an Unnumbered Information (UI) frame
- *
- */
-void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
-			 __u8 caddr, int command)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/* Insert connection address */
-	skb->data[0] = caddr | ((command) ? CMD_FRAME : 0);
-
-	irlap_queue_xmit(self, skb);
-}
-
-/*
- * Function irlap_send_i_frame (skb)
- *
- *    Contruct and transmit Information (I) frame
- */
-static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb,
-			       int command)
-{
-	/* Insert connection address */
-	skb->data[0] = self->caddr;
-	skb->data[0] |= (command) ? CMD_FRAME : 0;
-
-	/* Insert next to receive (Vr) */
-	skb->data[1] |= (self->vr << 5);  /* insert nr */
-
-	irlap_queue_xmit(self, skb);
-}
-
-/*
- * Function irlap_recv_i_frame (skb, frame)
- *
- *    Receive and parse an I (Information) frame, no harm in making it inline
- *    since it's called only from one single place (irlap_driver_rcv).
- */
-static inline void irlap_recv_i_frame(struct irlap_cb *self,
-				      struct sk_buff *skb,
-				      struct irlap_info *info, int command)
-{
-	info->nr = skb->data[1] >> 5;          /* Next to receive */
-	info->pf = skb->data[1] & PF_BIT;      /* Final bit */
-	info->ns = (skb->data[1] >> 1) & 0x07; /* Next to send */
-
-	/* Check if this is a command or a response frame */
-	if (command)
-		irlap_do_event(self, RECV_I_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_I_RSP, skb, info);
-}
-
-/*
- * Function irlap_recv_ui_frame (self, skb, info)
- *
- *    Receive and parse an Unnumbered Information (UI) frame
- *
- */
-static void irlap_recv_ui_frame(struct irlap_cb *self, struct sk_buff *skb,
-				struct irlap_info *info)
-{
-	info->pf = skb->data[1] & PF_BIT;      /* Final bit */
-
-	irlap_do_event(self, RECV_UI_FRAME, skb, info);
-}
-
-/*
- * Function irlap_recv_frmr_frame (skb, frame)
- *
- *    Received Frame Reject response.
- *
- */
-static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
-				  struct irlap_info *info)
-{
-	__u8 *frame;
-	int w, x, y, z;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(info != NULL, return;);
-
-	if (!pskb_may_pull(skb, 4)) {
-		net_err_ratelimited("%s: frame too short!\n", __func__);
-		return;
-	}
-
-	frame = skb->data;
-
-	info->nr = frame[2] >> 5;          /* Next to receive */
-	info->pf = frame[2] & PF_BIT;      /* Final bit */
-	info->ns = (frame[2] >> 1) & 0x07; /* Next to send */
-
-	w = frame[3] & 0x01;
-	x = frame[3] & 0x02;
-	y = frame[3] & 0x04;
-	z = frame[3] & 0x08;
-
-	if (w) {
-		pr_debug("Rejected control field is undefined or not implemented\n");
-	}
-	if (x) {
-		pr_debug("Rejected control field was invalid because it contained a non permitted I field\n");
-	}
-	if (y) {
-		pr_debug("Received I field exceeded the maximum negotiated for the existing connection or exceeded the maximum this station supports if no connection exists\n");
-	}
-	if (z) {
-		pr_debug("Rejected control field control field contained an invalid Nr count\n");
-	}
-	irlap_do_event(self, RECV_FRMR_RSP, skb, info);
-}
-
-/*
- * Function irlap_send_test_frame (self, daddr)
- *
- *    Send a test frame response
- *
- */
-void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr,
-			   struct sk_buff *cmd)
-{
-	struct sk_buff *tx_skb;
-	struct test_frame *frame;
-
-	tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	/* Broadcast frames must include saddr and daddr fields */
-	if (caddr == CBROADCAST) {
-		frame = skb_put(tx_skb, sizeof(struct test_frame));
-
-		/* Insert the swapped addresses */
-		frame->saddr = cpu_to_le32(self->saddr);
-		frame->daddr = cpu_to_le32(daddr);
-	} else
-		frame = skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER);
-
-	frame->caddr = caddr;
-	frame->control = TEST_RSP | PF_BIT;
-
-	/* Copy info */
-	skb_put_data(tx_skb, cmd->data, cmd->len);
-
-	/* Return to sender */
-	irlap_wait_min_turn_around(self, &self->qos_tx);
-	irlap_queue_xmit(self, tx_skb);
-}
-
-/*
- * Function irlap_recv_test_frame (self, skb)
- *
- *    Receive a test frame
- *
- */
-static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
-				  struct irlap_info *info, int command)
-{
-	struct test_frame *frame;
-
-	if (!pskb_may_pull(skb, sizeof(*frame))) {
-		net_err_ratelimited("%s: frame too short!\n", __func__);
-		return;
-	}
-	frame = (struct test_frame *) skb->data;
-
-	/* Broadcast frames must carry saddr and daddr fields */
-	if (info->caddr == CBROADCAST) {
-		if (skb->len < sizeof(struct test_frame)) {
-			pr_debug("%s() test frame too short!\n",
-				 __func__);
-			return;
-		}
-
-		/* Read and swap addresses */
-		info->daddr = le32_to_cpu(frame->saddr);
-		info->saddr = le32_to_cpu(frame->daddr);
-
-		/* Make sure frame is addressed to us */
-		if ((info->saddr != self->saddr) &&
-		    (info->saddr != BROADCAST)) {
-			return;
-		}
-	}
-
-	if (command)
-		irlap_do_event(self, RECV_TEST_CMD, skb, info);
-	else
-		irlap_do_event(self, RECV_TEST_RSP, skb, info);
-}
-
-/*
- * Function irlap_driver_rcv (skb, netdev, ptype)
- *
- *    Called when a frame is received. Dispatches the right receive function
- *    for processing of the frame.
- *
- * Note on skb management :
- * After calling the higher layers of the IrDA stack, we always
- * kfree() the skb, which drop the reference count (and potentially
- * destroy it).
- * If a higher layer of the stack want to keep the skb around (to put
- * in a queue or pass it to the higher layer), it will need to use
- * skb_get() to keep a reference on it. This is usually done at the
- * LMP level in irlmp.c.
- * Jean II
- */
-int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
-		     struct packet_type *ptype, struct net_device *orig_dev)
-{
-	struct irlap_info info;
-	struct irlap_cb *self;
-	int command;
-	__u8 control;
-	int ret = -1;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		goto out;
-
-	/* FIXME: should we get our own field? */
-	self = (struct irlap_cb *) dev->atalk_ptr;
-
-	/* If the net device is down, then IrLAP is gone! */
-	if (!self || self->magic != LAP_MAGIC)
-		goto err;
-
-	/* We are no longer an "old" protocol, so we need to handle
-	 * share and non linear skbs. This should never happen, so
-	 * we don't need to be clever about it. Jean II */
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
-		net_err_ratelimited("%s: can't clone shared skb!\n", __func__);
-		goto err;
-	}
-
-	/* Check if frame is large enough for parsing */
-	if (!pskb_may_pull(skb, 2)) {
-		net_err_ratelimited("%s: frame too short!\n", __func__);
-		goto err;
-	}
-
-	command    = skb->data[0] & CMD_FRAME;
-	info.caddr = skb->data[0] & CBROADCAST;
-
-	info.pf      = skb->data[1] &  PF_BIT;
-	info.control = skb->data[1] & ~PF_BIT; /* Mask away poll/final bit */
-
-	control = info.control;
-
-	/*  First we check if this frame has a valid connection address */
-	if ((info.caddr != self->caddr) && (info.caddr != CBROADCAST)) {
-		pr_debug("%s(), wrong connection address!\n",
-			 __func__);
-		goto out;
-	}
-	/*
-	 *  Optimize for the common case and check if the frame is an
-	 *  I(nformation) frame. Only I-frames have bit 0 set to 0
-	 */
-	if (~control & 0x01) {
-		irlap_recv_i_frame(self, skb, &info, command);
-		goto out;
-	}
-	/*
-	 *  We now check is the frame is an S(upervisory) frame. Only
-	 *  S-frames have bit 0 set to 1 and bit 1 set to 0
-	 */
-	if (~control & 0x02) {
-		/*
-		 *  Received S(upervisory) frame, check which frame type it is
-		 *  only the first nibble is of interest
-		 */
-		switch (control & 0x0f) {
-		case RR:
-			irlap_recv_rr_frame(self, skb, &info, command);
-			break;
-		case RNR:
-			irlap_recv_rnr_frame(self, skb, &info, command);
-			break;
-		case REJ:
-			irlap_recv_rej_frame(self, skb, &info, command);
-			break;
-		case SREJ:
-			irlap_recv_srej_frame(self, skb, &info, command);
-			break;
-		default:
-			net_warn_ratelimited("%s: Unknown S-frame %02x received!\n",
-					     __func__, info.control);
-			break;
-		}
-		goto out;
-	}
-	/*
-	 *  This must be a C(ontrol) frame
-	 */
-	switch (control) {
-	case XID_RSP:
-		irlap_recv_discovery_xid_rsp(self, skb, &info);
-		break;
-	case XID_CMD:
-		irlap_recv_discovery_xid_cmd(self, skb, &info);
-		break;
-	case SNRM_CMD:
-		irlap_recv_snrm_cmd(self, skb, &info);
-		break;
-	case DM_RSP:
-		irlap_do_event(self, RECV_DM_RSP, skb, &info);
-		break;
-	case DISC_CMD: /* And RD_RSP since they have the same value */
-		irlap_recv_disc_frame(self, skb, &info, command);
-		break;
-	case TEST_CMD:
-		irlap_recv_test_frame(self, skb, &info, command);
-		break;
-	case UA_RSP:
-		irlap_recv_ua_frame(self, skb, &info);
-		break;
-	case FRMR_RSP:
-		irlap_recv_frmr_frame(self, skb, &info);
-		break;
-	case UI_FRAME:
-		irlap_recv_ui_frame(self, skb, &info);
-		break;
-	default:
-		net_warn_ratelimited("%s: Unknown frame %02x received!\n",
-				     __func__, info.control);
-		break;
-	}
-out:
-	ret = 0;
-err:
-	/* Always drop our reference on the skb */
-	dev_kfree_skb(skb);
-	return ret;
-}
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
deleted file mode 100644
index 43964594aa12..000000000000
--- a/net/irda/irlmp.c
+++ /dev/null
@@ -1,1996 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlmp.c
- * Version:       1.0
- * Description:   IrDA Link Management Protocol (LMP) layer
- * Status:        Stable.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 17 20:54:32 1997
- * Modified at:   Wed Jan  5 11:26:03 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/skbuff.h>
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/kmod.h>
-#include <linux/random.h>
-#include <linux/seq_file.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/timer.h>
-#include <net/irda/qos.h>
-#include <net/irda/irlap.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-
-#include <asm/unaligned.h>
-
-static __u8 irlmp_find_free_slsap(void);
-static int irlmp_slsap_inuse(__u8 slsap_sel);
-
-/* Master structure */
-struct irlmp_cb *irlmp = NULL;
-
-/* These can be altered by the sysctl interface */
-int  sysctl_discovery         = 0;
-int  sysctl_discovery_timeout = 3; /* 3 seconds by default */
-int  sysctl_discovery_slots   = 6; /* 6 slots by default */
-int  sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
-char sysctl_devname[65];
-
-static const char *irlmp_reasons[] = {
-	"ERROR, NOT USED",
-	"LM_USER_REQUEST",
-	"LM_LAP_DISCONNECT",
-	"LM_CONNECT_FAILURE",
-	"LM_LAP_RESET",
-	"LM_INIT_DISCONNECT",
-	"ERROR, NOT USED",
-	"UNKNOWN",
-};
-
-const char *irlmp_reason_str(LM_REASON reason)
-{
-	reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1);
-	return irlmp_reasons[reason];
-}
-
-/*
- * Function irlmp_init (void)
- *
- *    Create (allocate) the main IrLMP structure
- *
- */
-int __init irlmp_init(void)
-{
-	/* Initialize the irlmp structure. */
-	irlmp = kzalloc( sizeof(struct irlmp_cb), GFP_KERNEL);
-	if (irlmp == NULL)
-		return -ENOMEM;
-
-	irlmp->magic = LMP_MAGIC;
-
-	irlmp->clients = hashbin_new(HB_LOCK);
-	irlmp->services = hashbin_new(HB_LOCK);
-	irlmp->links = hashbin_new(HB_LOCK);
-	irlmp->unconnected_lsaps = hashbin_new(HB_LOCK);
-	irlmp->cachelog = hashbin_new(HB_NOLOCK);
-
-	if ((irlmp->clients == NULL) ||
-	    (irlmp->services == NULL) ||
-	    (irlmp->links == NULL) ||
-	    (irlmp->unconnected_lsaps == NULL) ||
-	    (irlmp->cachelog == NULL)) {
-		return -ENOMEM;
-	}
-
-	spin_lock_init(&irlmp->cachelog->hb_spinlock);
-
-	irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */
-	strcpy(sysctl_devname, "Linux");
-
-	init_timer(&irlmp->discovery_timer);
-
-	/* Do discovery every 3 seconds, conditionally */
-	if (sysctl_discovery)
-		irlmp_start_discovery_timer(irlmp,
-					    sysctl_discovery_timeout*HZ);
-
-	return 0;
-}
-
-/*
- * Function irlmp_cleanup (void)
- *
- *    Remove IrLMP layer
- *
- */
-void irlmp_cleanup(void)
-{
-	/* Check for main structure */
-	IRDA_ASSERT(irlmp != NULL, return;);
-	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;);
-
-	del_timer(&irlmp->discovery_timer);
-
-	hashbin_delete(irlmp->links, (FREE_FUNC) kfree);
-	hashbin_delete(irlmp->unconnected_lsaps, (FREE_FUNC) kfree);
-	hashbin_delete(irlmp->clients, (FREE_FUNC) kfree);
-	hashbin_delete(irlmp->services, (FREE_FUNC) kfree);
-	hashbin_delete(irlmp->cachelog, (FREE_FUNC) kfree);
-
-	/* De-allocate main structure */
-	kfree(irlmp);
-	irlmp = NULL;
-}
-
-/*
- * Function irlmp_open_lsap (slsap, notify)
- *
- *   Register with IrLMP and create a local LSAP,
- *   returns handle to LSAP.
- */
-struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid)
-{
-	struct lsap_cb *self;
-
-	IRDA_ASSERT(notify != NULL, return NULL;);
-	IRDA_ASSERT(irlmp != NULL, return NULL;);
-	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return NULL;);
-	IRDA_ASSERT(notify->instance != NULL, return NULL;);
-
-	/*  Does the client care which Source LSAP selector it gets?  */
-	if (slsap_sel == LSAP_ANY) {
-		slsap_sel = irlmp_find_free_slsap();
-		if (!slsap_sel)
-			return NULL;
-	} else if (irlmp_slsap_inuse(slsap_sel))
-		return NULL;
-
-	/* Allocate new instance of a LSAP connection */
-	self = kzalloc(sizeof(struct lsap_cb), GFP_ATOMIC);
-	if (self == NULL)
-		return NULL;
-
-	self->magic = LMP_LSAP_MAGIC;
-	self->slsap_sel = slsap_sel;
-
-	/* Fix connectionless LSAP's */
-	if (slsap_sel == LSAP_CONNLESS) {
-#ifdef CONFIG_IRDA_ULTRA
-		self->dlsap_sel = LSAP_CONNLESS;
-		self->pid = pid;
-#endif /* CONFIG_IRDA_ULTRA */
-	} else
-		self->dlsap_sel = LSAP_ANY;
-	/* self->connected = FALSE; -> already NULL via memset() */
-
-	init_timer(&self->watchdog_timer);
-
-	self->notify = *notify;
-
-	self->lsap_state = LSAP_DISCONNECTED;
-
-	/* Insert into queue of unconnected LSAPs */
-	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self,
-		       (long) self, NULL);
-
-	return self;
-}
-EXPORT_SYMBOL(irlmp_open_lsap);
-
-/*
- * Function __irlmp_close_lsap (self)
- *
- *    Remove an instance of LSAP
- */
-static void __irlmp_close_lsap(struct lsap_cb *self)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
-	/*
-	 *  Set some of the variables to preset values
-	 */
-	self->magic = 0;
-	del_timer(&self->watchdog_timer); /* Important! */
-
-	if (self->conn_skb)
-		dev_kfree_skb(self->conn_skb);
-
-	kfree(self);
-}
-
-/*
- * Function irlmp_close_lsap (self)
- *
- *    Close and remove LSAP
- *
- */
-void irlmp_close_lsap(struct lsap_cb *self)
-{
-	struct lap_cb *lap;
-	struct lsap_cb *lsap = NULL;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
-	/*
-	 *  Find out if we should remove this LSAP from a link or from the
-	 *  list of unconnected lsaps (not associated with a link)
-	 */
-	lap = self->lap;
-	if (lap) {
-		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-		/* We might close a LSAP before it has completed the
-		 * connection setup. In those case, higher layers won't
-		 * send a proper disconnect request. Harmless, except
-		 * that we will forget to close LAP... - Jean II */
-		if(self->lsap_state != LSAP_DISCONNECTED) {
-			self->lsap_state = LSAP_DISCONNECTED;
-			irlmp_do_lap_event(self->lap,
-					   LM_LAP_DISCONNECT_REQUEST, NULL);
-		}
-		/* Now, remove from the link */
-		lsap = hashbin_remove(lap->lsaps, (long) self, NULL);
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-		lap->cache.valid = FALSE;
-#endif
-	}
-	self->lap = NULL;
-	/* Check if we found the LSAP! If not then try the unconnected lsaps */
-	if (!lsap) {
-		lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self,
-				      NULL);
-	}
-	if (!lsap) {
-		pr_debug("%s(), Looks like somebody has removed me already!\n",
-			 __func__);
-		return;
-	}
-	__irlmp_close_lsap(self);
-}
-EXPORT_SYMBOL(irlmp_close_lsap);
-
-/*
- * Function irlmp_register_irlap (saddr, notify)
- *
- *    Register IrLAP layer with IrLMP. There is possible to have multiple
- *    instances of the IrLAP layer, each connected to different IrDA ports
- *
- */
-void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify)
-{
-	struct lap_cb *lap;
-
-	IRDA_ASSERT(irlmp != NULL, return;);
-	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;);
-	IRDA_ASSERT(notify != NULL, return;);
-
-	/*
-	 *  Allocate new instance of a LSAP connection
-	 */
-	lap = kzalloc(sizeof(struct lap_cb), GFP_KERNEL);
-	if (lap == NULL)
-		return;
-
-	lap->irlap = irlap;
-	lap->magic = LMP_LAP_MAGIC;
-	lap->saddr = saddr;
-	lap->daddr = DEV_ADDR_ANY;
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	lap->cache.valid = FALSE;
-#endif
-	lap->lsaps = hashbin_new(HB_LOCK);
-	if (lap->lsaps == NULL) {
-		net_warn_ratelimited("%s(), unable to kmalloc lsaps\n",
-				     __func__);
-		kfree(lap);
-		return;
-	}
-
-	lap->lap_state = LAP_STANDBY;
-
-	init_timer(&lap->idle_timer);
-
-	/*
-	 *  Insert into queue of LMP links
-	 */
-	hashbin_insert(irlmp->links, (irda_queue_t *) lap, lap->saddr, NULL);
-
-	/*
-	 *  We set only this variable so IrLAP can tell us on which link the
-	 *  different events happened on
-	 */
-	irda_notify_init(notify);
-	notify->instance = lap;
-}
-
-/*
- * Function irlmp_unregister_irlap (saddr)
- *
- *    IrLAP layer has been removed!
- *
- */
-void irlmp_unregister_link(__u32 saddr)
-{
-	struct lap_cb *link;
-
-	/* We must remove ourselves from the hashbin *first*. This ensure
-	 * that no more LSAPs will be open on this link and no discovery
-	 * will be triggered anymore. Jean II */
-	link = hashbin_remove(irlmp->links, saddr, NULL);
-	if (link) {
-		IRDA_ASSERT(link->magic == LMP_LAP_MAGIC, return;);
-
-		/* Kill all the LSAPs on this link. Jean II */
-		link->reason = LAP_DISC_INDICATION;
-		link->daddr = DEV_ADDR_ANY;
-		irlmp_do_lap_event(link, LM_LAP_DISCONNECT_INDICATION, NULL);
-
-		/* Remove all discoveries discovered at this link */
-		irlmp_expire_discoveries(irlmp->cachelog, link->saddr, TRUE);
-
-		/* Final cleanup */
-		del_timer(&link->idle_timer);
-		link->magic = 0;
-		hashbin_delete(link->lsaps, (FREE_FUNC) __irlmp_close_lsap);
-		kfree(link);
-	}
-}
-
-/*
- * Function irlmp_connect_request (handle, dlsap, userdata)
- *
- *    Connect with a peer LSAP
- *
- */
-int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel,
-			  __u32 saddr, __u32 daddr,
-			  struct qos_info *qos, struct sk_buff *userdata)
-{
-	struct sk_buff *tx_skb = userdata;
-	struct lap_cb *lap;
-	struct lsap_cb *lsap;
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -EBADR;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EBADR;);
-
-	pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x, saddr=%08x, daddr=%08x\n",
-		 __func__, self->slsap_sel, dlsap_sel, saddr, daddr);
-
-	if (test_bit(0, &self->connected)) {
-		ret = -EISCONN;
-		goto err;
-	}
-
-	/* Client must supply destination device address */
-	if (!daddr) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	/* Any userdata? */
-	if (tx_skb == NULL) {
-		tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
-		if (!tx_skb)
-			return -ENOMEM;
-
-		skb_reserve(tx_skb, LMP_MAX_HEADER);
-	}
-
-	/* Make room for MUX control header (3 bytes) */
-	IRDA_ASSERT(skb_headroom(tx_skb) >= LMP_CONTROL_HEADER, return -1;);
-	skb_push(tx_skb, LMP_CONTROL_HEADER);
-
-	self->dlsap_sel = dlsap_sel;
-
-	/*
-	 * Find the link to where we should try to connect since there may
-	 * be more than one IrDA port on this machine. If the client has
-	 * passed us the saddr (and already knows which link to use), then
-	 * we use that to find the link, if not then we have to look in the
-	 * discovery log and check if any of the links has discovered a
-	 * device with the given daddr
-	 */
-	if ((!saddr) || (saddr == DEV_ADDR_ANY)) {
-		discovery_t *discovery;
-		unsigned long flags;
-
-		spin_lock_irqsave(&irlmp->cachelog->hb_spinlock, flags);
-		if (daddr != DEV_ADDR_ANY)
-			discovery = hashbin_find(irlmp->cachelog, daddr, NULL);
-		else {
-			pr_debug("%s(), no daddr\n", __func__);
-			discovery = (discovery_t *)
-				hashbin_get_first(irlmp->cachelog);
-		}
-
-		if (discovery) {
-			saddr = discovery->data.saddr;
-			daddr = discovery->data.daddr;
-		}
-		spin_unlock_irqrestore(&irlmp->cachelog->hb_spinlock, flags);
-	}
-	lap = hashbin_lock_find(irlmp->links, saddr, NULL);
-	if (lap == NULL) {
-		pr_debug("%s(), Unable to find a usable link!\n", __func__);
-		ret = -EHOSTUNREACH;
-		goto err;
-	}
-
-	/* Check if LAP is disconnected or already connected */
-	if (lap->daddr == DEV_ADDR_ANY)
-		lap->daddr = daddr;
-	else if (lap->daddr != daddr) {
-		/* Check if some LSAPs are active on this LAP */
-		if (HASHBIN_GET_SIZE(lap->lsaps) == 0) {
-			/* No active connection, but LAP hasn't been
-			 * disconnected yet (waiting for timeout in LAP).
-			 * Maybe we could give LAP a bit of help in this case.
-			 */
-			pr_debug("%s(), sorry, but I'm waiting for LAP to timeout!\n",
-				 __func__);
-			ret = -EAGAIN;
-			goto err;
-		}
-
-		/* LAP is already connected to a different node, and LAP
-		 * can only talk to one node at a time */
-		pr_debug("%s(), sorry, but link is busy!\n", __func__);
-		ret = -EBUSY;
-		goto err;
-	}
-
-	self->lap = lap;
-
-	/*
-	 *  Remove LSAP from list of unconnected LSAPs and insert it into the
-	 *  list of connected LSAPs for the particular link
-	 */
-	lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self, NULL);
-
-	IRDA_ASSERT(lsap != NULL, return -1;);
-	IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;);
-	IRDA_ASSERT(lsap->lap != NULL, return -1;);
-	IRDA_ASSERT(lsap->lap->magic == LMP_LAP_MAGIC, return -1;);
-
-	hashbin_insert(self->lap->lsaps, (irda_queue_t *) self, (long) self,
-		       NULL);
-
-	set_bit(0, &self->connected);	/* TRUE */
-
-	/*
-	 *  User supplied qos specifications?
-	 */
-	if (qos)
-		self->qos = *qos;
-
-	irlmp_do_lsap_event(self, LM_CONNECT_REQUEST, tx_skb);
-
-	/* Drop reference count - see irlap_data_request(). */
-	dev_kfree_skb(tx_skb);
-
-	return 0;
-
-err:
-	/* Cleanup */
-	if(tx_skb)
-		dev_kfree_skb(tx_skb);
-	return ret;
-}
-EXPORT_SYMBOL(irlmp_connect_request);
-
-/*
- * Function irlmp_connect_indication (self)
- *
- *    Incoming connection
- *
- */
-void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
-	int max_seg_size;
-	int lap_header_size;
-	int max_header_size;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(self->lap != NULL, return;);
-
-	pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
-		 __func__, self->slsap_sel, self->dlsap_sel);
-
-	/* Note : self->lap is set in irlmp_link_data_indication(),
-	 * (case CONNECT_CMD:) because we have no way to set it here.
-	 * Similarly, self->dlsap_sel is usually set in irlmp_find_lsap().
-	 * Jean II */
-
-	self->qos = *self->lap->qos;
-
-	max_seg_size = self->lap->qos->data_size.value-LMP_HEADER;
-	lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap);
-	max_header_size = LMP_HEADER + lap_header_size;
-
-	/* Hide LMP_CONTROL_HEADER header from layer above */
-	skb_pull(skb, LMP_CONTROL_HEADER);
-
-	if (self->notify.connect_indication) {
-		/* Don't forget to refcount it - see irlap_driver_rcv(). */
-		skb_get(skb);
-		self->notify.connect_indication(self->notify.instance, self,
-						&self->qos, max_seg_size,
-						max_header_size, skb);
-	}
-}
-
-/*
- * Function irlmp_connect_response (handle, userdata)
- *
- *    Service user is accepting connection
- *
- */
-int irlmp_connect_response(struct lsap_cb *self, struct sk_buff *userdata)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-	IRDA_ASSERT(userdata != NULL, return -1;);
-
-	/* We set the connected bit and move the lsap to the connected list
-	 * in the state machine itself. Jean II */
-
-	pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
-		 __func__, self->slsap_sel, self->dlsap_sel);
-
-	/* Make room for MUX control header (3 bytes) */
-	IRDA_ASSERT(skb_headroom(userdata) >= LMP_CONTROL_HEADER, return -1;);
-	skb_push(userdata, LMP_CONTROL_HEADER);
-
-	irlmp_do_lsap_event(self, LM_CONNECT_RESPONSE, userdata);
-
-	/* Drop reference count - see irlap_data_request(). */
-	dev_kfree_skb(userdata);
-
-	return 0;
-}
-EXPORT_SYMBOL(irlmp_connect_response);
-
-/*
- * Function irlmp_connect_confirm (handle, skb)
- *
- *    LSAP connection confirmed peer device!
- */
-void irlmp_connect_confirm(struct lsap_cb *self, struct sk_buff *skb)
-{
-	int max_header_size;
-	int lap_header_size;
-	int max_seg_size;
-
-	IRDA_ASSERT(skb != NULL, return;);
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-	IRDA_ASSERT(self->lap != NULL, return;);
-
-	self->qos = *self->lap->qos;
-
-	max_seg_size    = self->lap->qos->data_size.value-LMP_HEADER;
-	lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap);
-	max_header_size = LMP_HEADER + lap_header_size;
-
-	pr_debug("%s(), max_header_size=%d\n",
-		 __func__, max_header_size);
-
-	/* Hide LMP_CONTROL_HEADER header from layer above */
-	skb_pull(skb, LMP_CONTROL_HEADER);
-
-	if (self->notify.connect_confirm) {
-		/* Don't forget to refcount it - see irlap_driver_rcv() */
-		skb_get(skb);
-		self->notify.connect_confirm(self->notify.instance, self,
-					     &self->qos, max_seg_size,
-					     max_header_size, skb);
-	}
-}
-
-/*
- * Function irlmp_dup (orig, instance)
- *
- *    Duplicate LSAP, can be used by servers to confirm a connection on a
- *    new LSAP so it can keep listening on the old one.
- *
- */
-struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance)
-{
-	struct lsap_cb *new;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
-	/* Only allowed to duplicate unconnected LSAP's, and only LSAPs
-	 * that have received a connect indication. Jean II */
-	if ((!hashbin_find(irlmp->unconnected_lsaps, (long) orig, NULL)) ||
-	    (orig->lap == NULL)) {
-		pr_debug("%s(), invalid LSAP (wrong state)\n",
-			 __func__);
-		spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
-				       flags);
-		return NULL;
-	}
-
-	/* Allocate a new instance */
-	new = kmemdup(orig, sizeof(*new), GFP_ATOMIC);
-	if (!new)  {
-		pr_debug("%s(), unable to kmalloc\n", __func__);
-		spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock,
-				       flags);
-		return NULL;
-	}
-	/* new->lap = orig->lap; => done in the memcpy() */
-	/* new->slsap_sel = orig->slsap_sel; => done in the memcpy() */
-	new->conn_skb = NULL;
-
-	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
-	/* Not everything is the same */
-	new->notify.instance = instance;
-
-	init_timer(&new->watchdog_timer);
-
-	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) new,
-		       (long) new, NULL);
-
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	/* Make sure that we invalidate the LSAP cache */
-	new->lap->cache.valid = FALSE;
-#endif /* CONFIG_IRDA_CACHE_LAST_LSAP */
-
-	return new;
-}
-
-/*
- * Function irlmp_disconnect_request (handle, userdata)
- *
- *    The service user is requesting disconnection, this will not remove the
- *    LSAP, but only mark it as disconnected
- */
-int irlmp_disconnect_request(struct lsap_cb *self, struct sk_buff *userdata)
-{
-	struct lsap_cb *lsap;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-	IRDA_ASSERT(userdata != NULL, return -1;);
-
-	/* Already disconnected ?
-	 * There is a race condition between irlmp_disconnect_indication()
-	 * and us that might mess up the hashbins below. This fixes it.
-	 * Jean II */
-	if (! test_and_clear_bit(0, &self->connected)) {
-		pr_debug("%s(), already disconnected!\n", __func__);
-		dev_kfree_skb(userdata);
-		return -1;
-	}
-
-	skb_push(userdata, LMP_CONTROL_HEADER);
-
-	/*
-	 *  Do the event before the other stuff since we must know
-	 *  which lap layer that the frame should be transmitted on
-	 */
-	irlmp_do_lsap_event(self, LM_DISCONNECT_REQUEST, userdata);
-
-	/* Drop reference count - see irlap_data_request(). */
-	dev_kfree_skb(userdata);
-
-	/*
-	 *  Remove LSAP from list of connected LSAPs for the particular link
-	 *  and insert it into the list of unconnected LSAPs
-	 */
-	IRDA_ASSERT(self->lap != NULL, return -1;);
-	IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-	IRDA_ASSERT(self->lap->lsaps != NULL, return -1;);
-
-	lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL);
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	self->lap->cache.valid = FALSE;
-#endif
-
-	IRDA_ASSERT(lsap != NULL, return -1;);
-	IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;);
-	IRDA_ASSERT(lsap == self, return -1;);
-
-	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self,
-		       (long) self, NULL);
-
-	/* Reset some values */
-	self->dlsap_sel = LSAP_ANY;
-	self->lap = NULL;
-
-	return 0;
-}
-EXPORT_SYMBOL(irlmp_disconnect_request);
-
-/*
- * Function irlmp_disconnect_indication (reason, userdata)
- *
- *    LSAP is being closed!
- */
-void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason,
-				 struct sk_buff *skb)
-{
-	struct lsap_cb *lsap;
-
-	pr_debug("%s(), reason=%s [%d]\n", __func__,
-		 irlmp_reason_str(reason), reason);
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
-	pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n",
-		 __func__, self->slsap_sel, self->dlsap_sel);
-
-	/* Already disconnected ?
-	 * There is a race condition between irlmp_disconnect_request()
-	 * and us that might mess up the hashbins below. This fixes it.
-	 * Jean II */
-	if (! test_and_clear_bit(0, &self->connected)) {
-		pr_debug("%s(), already disconnected!\n", __func__);
-		return;
-	}
-
-	/*
-	 *  Remove association between this LSAP and the link it used
-	 */
-	IRDA_ASSERT(self->lap != NULL, return;);
-	IRDA_ASSERT(self->lap->lsaps != NULL, return;);
-
-	lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL);
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	self->lap->cache.valid = FALSE;
-#endif
-
-	IRDA_ASSERT(lsap != NULL, return;);
-	IRDA_ASSERT(lsap == self, return;);
-	hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) lsap,
-		       (long) lsap, NULL);
-
-	self->dlsap_sel = LSAP_ANY;
-	self->lap = NULL;
-
-	/*
-	 *  Inform service user
-	 */
-	if (self->notify.disconnect_indication) {
-		/* Don't forget to refcount it - see irlap_driver_rcv(). */
-		if(skb)
-			skb_get(skb);
-		self->notify.disconnect_indication(self->notify.instance,
-						   self, reason, skb);
-	} else {
-		pr_debug("%s(), no handler\n", __func__);
-	}
-}
-
-/*
- * Function irlmp_do_expiry (void)
- *
- *    Do a cleanup of the discovery log (remove old entries)
- *
- * Note : separate from irlmp_do_discovery() so that we can handle
- * passive discovery properly.
- */
-void irlmp_do_expiry(void)
-{
-	struct lap_cb *lap;
-
-	/*
-	 * Expire discovery on all links which are *not* connected.
-	 * On links which are connected, we can't do discovery
-	 * anymore and can't refresh the log, so we freeze the
-	 * discovery log to keep info about the device we are
-	 * connected to.
-	 * This info is mandatory if we want irlmp_connect_request()
-	 * to work properly. - Jean II
-	 */
-	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
-	while (lap != NULL) {
-		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-
-		if (lap->lap_state == LAP_STANDBY) {
-			/* Expire discoveries discovered on this link */
-			irlmp_expire_discoveries(irlmp->cachelog, lap->saddr,
-						 FALSE);
-		}
-		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
-	}
-}
-
-/*
- * Function irlmp_do_discovery (nslots)
- *
- *    Do some discovery on all links
- *
- * Note : log expiry is done above.
- */
-void irlmp_do_discovery(int nslots)
-{
-	struct lap_cb *lap;
-	__u16 *data_hintsp;
-
-	/* Make sure the value is sane */
-	if ((nslots != 1) && (nslots != 6) && (nslots != 8) && (nslots != 16)){
-		net_warn_ratelimited("%s: invalid value for number of slots!\n",
-				     __func__);
-		nslots = sysctl_discovery_slots = 8;
-	}
-
-	/* Construct new discovery info to be used by IrLAP, */
-	data_hintsp = (__u16 *) irlmp->discovery_cmd.data.hints;
-	put_unaligned(irlmp->hints.word, data_hintsp);
-
-	/*
-	 *  Set character set for device name (we use ASCII), and
-	 *  copy device name. Remember to make room for a \0 at the
-	 *  end
-	 */
-	irlmp->discovery_cmd.data.charset = CS_ASCII;
-	strncpy(irlmp->discovery_cmd.data.info, sysctl_devname,
-		NICKNAME_MAX_LEN);
-	irlmp->discovery_cmd.name_len = strlen(irlmp->discovery_cmd.data.info);
-	irlmp->discovery_cmd.nslots = nslots;
-
-	/*
-	 * Try to send discovery packets on all links
-	 */
-	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
-	while (lap != NULL) {
-		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-
-		if (lap->lap_state == LAP_STANDBY) {
-			/* Try to discover */
-			irlmp_do_lap_event(lap, LM_LAP_DISCOVERY_REQUEST,
-					   NULL);
-		}
-		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
-	}
-}
-
-/*
- * Function irlmp_discovery_request (nslots)
- *
- *    Do a discovery of devices in front of the computer
- *
- * If the caller has registered a client discovery callback, this
- * allow him to receive the full content of the discovery log through
- * this callback (as normally he will receive only new discoveries).
- */
-void irlmp_discovery_request(int nslots)
-{
-	/* Return current cached discovery log (in full) */
-	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_LOG);
-
-	/*
-	 * Start a single discovery operation if discovery is not already
-	 * running
-	 */
-	if (!sysctl_discovery) {
-		/* Check if user wants to override the default */
-		if (nslots == DISCOVERY_DEFAULT_SLOTS)
-			nslots = sysctl_discovery_slots;
-
-		irlmp_do_discovery(nslots);
-		/* Note : we never do expiry here. Expiry will run on the
-		 * discovery timer regardless of the state of sysctl_discovery
-		 * Jean II */
-	}
-}
-EXPORT_SYMBOL(irlmp_discovery_request);
-
-/*
- * Function irlmp_get_discoveries (pn, mask, slots)
- *
- *    Return the current discovery log
- *
- * If discovery is not enabled, you should call this function again
- * after 1 or 2 seconds (i.e. after discovery has been done).
- */
-struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
-{
-	/* If discovery is not enabled, it's likely that the discovery log
-	 * will be empty. So, we trigger a single discovery, so that next
-	 * time the user call us there might be some results in the log.
-	 * Jean II
-	 */
-	if (!sysctl_discovery) {
-		/* Check if user wants to override the default */
-		if (nslots == DISCOVERY_DEFAULT_SLOTS)
-			nslots = sysctl_discovery_slots;
-
-		/* Start discovery - will complete sometime later */
-		irlmp_do_discovery(nslots);
-		/* Note : we never do expiry here. Expiry will run on the
-		 * discovery timer regardless of the state of sysctl_discovery
-		 * Jean II */
-	}
-
-	/* Return current cached discovery log */
-	return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
-}
-EXPORT_SYMBOL(irlmp_get_discoveries);
-
-/*
- * Function irlmp_notify_client (log)
- *
- *    Notify all about discovered devices
- *
- * Clients registered with IrLMP are :
- *	o IrComm
- *	o IrLAN
- *	o Any socket (in any state - ouch, that may be a lot !)
- * The client may have defined a callback to be notified in case of
- * partial/selective discovery based on the hints that it passed to IrLMP.
- */
-static inline void
-irlmp_notify_client(irlmp_client_t *client,
-		    hashbin_t *log, DISCOVERY_MODE mode)
-{
-	discinfo_t *discoveries;	/* Copy of the discovery log */
-	int	number;			/* Number of nodes in the log */
-	int	i;
-
-	/* Check if client wants or not partial/selective log (optimisation) */
-	if (!client->disco_callback)
-		return;
-
-	/*
-	 * Locking notes :
-	 * the old code was manipulating the log directly, which was
-	 * very racy. Now, we use copy_discoveries, that protects
-	 * itself while dumping the log for us.
-	 * The overhead of the copy is compensated by the fact that
-	 * we only pass new discoveries in normal mode and don't
-	 * pass the same old entry every 3s to the caller as we used
-	 * to do (virtual function calling is expensive).
-	 * Jean II
-	 */
-
-	/*
-	 * Now, check all discovered devices (if any), and notify client
-	 * only about the services that the client is interested in
-	 * We also notify only about the new devices unless the caller
-	 * explicitly request a dump of the log. Jean II
-	 */
-	discoveries = irlmp_copy_discoveries(log, &number,
-					     client->hint_mask.word,
-					     (mode == DISCOVERY_LOG));
-	/* Check if the we got some results */
-	if (discoveries == NULL)
-		return;	/* No nodes discovered */
-
-	/* Pass all entries to the listener */
-	for(i = 0; i < number; i++)
-		client->disco_callback(&(discoveries[i]), mode, client->priv);
-
-	/* Free up our buffer */
-	kfree(discoveries);
-}
-
-/*
- * Function irlmp_discovery_confirm ( self, log)
- *
- *    Some device(s) answered to our discovery request! Check to see which
- *    device it is, and give indication to the client(s)
- *
- */
-void irlmp_discovery_confirm(hashbin_t *log, DISCOVERY_MODE mode)
-{
-	irlmp_client_t *client;
-	irlmp_client_t *client_next;
-
-	IRDA_ASSERT(log != NULL, return;);
-
-	if (!(HASHBIN_GET_SIZE(log)))
-		return;
-
-	/* For each client - notify callback may touch client list */
-	client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
-	while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
-					 (void *) &client_next) ) {
-		/* Check if we should notify client */
-		irlmp_notify_client(client, log, mode);
-
-		client = client_next;
-	}
-}
-
-/*
- * Function irlmp_discovery_expiry (expiry)
- *
- *	This device is no longer been discovered, and therefore it is being
- *	purged from the discovery log. Inform all clients who have
- *	registered for this event...
- *
- *	Note : called exclusively from discovery.c
- *	Note : this is no longer called under discovery spinlock, so the
- *		client can do whatever he wants in the callback.
- */
-void irlmp_discovery_expiry(discinfo_t *expiries, int number)
-{
-	irlmp_client_t *client;
-	irlmp_client_t *client_next;
-	int		i;
-
-	IRDA_ASSERT(expiries != NULL, return;);
-
-	/* For each client - notify callback may touch client list */
-	client = (irlmp_client_t *) hashbin_get_first(irlmp->clients);
-	while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL,
-					 (void *) &client_next) ) {
-
-		/* Pass all entries to the listener */
-		for(i = 0; i < number; i++) {
-			/* Check if we should notify client */
-			if ((client->expir_callback) &&
-			    (client->hint_mask.word &
-			     get_unaligned((__u16 *)expiries[i].hints)
-			     & 0x7f7f) )
-				client->expir_callback(&(expiries[i]),
-						       EXPIRY_TIMEOUT,
-						       client->priv);
-		}
-
-		/* Next client */
-		client = client_next;
-	}
-}
-
-/*
- * Function irlmp_get_discovery_response ()
- *
- *    Used by IrLAP to get the discovery info it needs when answering
- *    discovery requests by other devices.
- */
-discovery_t *irlmp_get_discovery_response(void)
-{
-	IRDA_ASSERT(irlmp != NULL, return NULL;);
-
-	put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints);
-
-	/*
-	 *  Set character set for device name (we use ASCII), and
-	 *  copy device name. Remember to make room for a \0 at the
-	 *  end
-	 */
-	irlmp->discovery_rsp.data.charset = CS_ASCII;
-
-	strncpy(irlmp->discovery_rsp.data.info, sysctl_devname,
-		NICKNAME_MAX_LEN);
-	irlmp->discovery_rsp.name_len = strlen(irlmp->discovery_rsp.data.info);
-
-	return &irlmp->discovery_rsp;
-}
-
-/*
- * Function irlmp_data_request (self, skb)
- *
- *    Send some data to peer device
- *
- * Note on skb management :
- * After calling the lower layers of the IrDA stack, we always
- * kfree() the skb, which drop the reference count (and potentially
- * destroy it).
- * IrLMP and IrLAP may queue the packet, and in those cases will need
- * to use skb_get() to keep it around.
- * Jean II
- */
-int irlmp_data_request(struct lsap_cb *self, struct sk_buff *userdata)
-{
-	int	ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
-	/* Make room for MUX header */
-	IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;);
-	skb_push(userdata, LMP_HEADER);
-
-	ret = irlmp_do_lsap_event(self, LM_DATA_REQUEST, userdata);
-
-	/* Drop reference count - see irlap_data_request(). */
-	dev_kfree_skb(userdata);
-
-	return ret;
-}
-EXPORT_SYMBOL(irlmp_data_request);
-
-/*
- * Function irlmp_data_indication (handle, skb)
- *
- *    Got data from LAP layer so pass it up to upper layer
- *
- */
-void irlmp_data_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
-	/* Hide LMP header from layer above */
-	skb_pull(skb, LMP_HEADER);
-
-	if (self->notify.data_indication) {
-		/* Don't forget to refcount it - see irlap_driver_rcv(). */
-		skb_get(skb);
-		self->notify.data_indication(self->notify.instance, self, skb);
-	}
-}
-
-/*
- * Function irlmp_udata_request (self, skb)
- */
-int irlmp_udata_request(struct lsap_cb *self, struct sk_buff *userdata)
-{
-	int	ret;
-
-	IRDA_ASSERT(userdata != NULL, return -1;);
-
-	/* Make room for MUX header */
-	IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;);
-	skb_push(userdata, LMP_HEADER);
-
-	ret = irlmp_do_lsap_event(self, LM_UDATA_REQUEST, userdata);
-
-	/* Drop reference count - see irlap_data_request(). */
-	dev_kfree_skb(userdata);
-
-	return ret;
-}
-
-/*
- * Function irlmp_udata_indication (self, skb)
- *
- *    Send unreliable data (but still within the connection)
- *
- */
-void irlmp_udata_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/* Hide LMP header from layer above */
-	skb_pull(skb, LMP_HEADER);
-
-	if (self->notify.udata_indication) {
-		/* Don't forget to refcount it - see irlap_driver_rcv(). */
-		skb_get(skb);
-		self->notify.udata_indication(self->notify.instance, self,
-					      skb);
-	}
-}
-
-/*
- * Function irlmp_connless_data_request (self, skb)
- */
-#ifdef CONFIG_IRDA_ULTRA
-int irlmp_connless_data_request(struct lsap_cb *self, struct sk_buff *userdata,
-				__u8 pid)
-{
-	struct sk_buff *clone_skb;
-	struct lap_cb *lap;
-
-	IRDA_ASSERT(userdata != NULL, return -1;);
-
-	/* Make room for MUX and PID header */
-	IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER+LMP_PID_HEADER,
-		    return -1;);
-
-	/* Insert protocol identifier */
-	skb_push(userdata, LMP_PID_HEADER);
-	if(self != NULL)
-	  userdata->data[0] = self->pid;
-	else
-	  userdata->data[0] = pid;
-
-	/* Connectionless sockets must use 0x70 */
-	skb_push(userdata, LMP_HEADER);
-	userdata->data[0] = userdata->data[1] = LSAP_CONNLESS;
-
-	/* Try to send Connectionless  packets out on all links */
-	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
-	while (lap != NULL) {
-		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return -1;);
-
-		clone_skb = skb_clone(userdata, GFP_ATOMIC);
-		if (!clone_skb) {
-			dev_kfree_skb(userdata);
-			return -ENOMEM;
-		}
-
-		irlap_unitdata_request(lap->irlap, clone_skb);
-		/* irlap_unitdata_request() don't increase refcount,
-		 * so no dev_kfree_skb() - Jean II */
-
-		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
-	}
-	dev_kfree_skb(userdata);
-
-	return 0;
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlmp_connless_data_indication (self, skb)
- *
- *    Receive unreliable data outside any connection. Mostly used by Ultra
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlmp_connless_data_indication(struct lsap_cb *self, struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/* Hide LMP and PID header from layer above */
-	skb_pull(skb, LMP_HEADER+LMP_PID_HEADER);
-
-	if (self->notify.udata_indication) {
-		/* Don't forget to refcount it - see irlap_driver_rcv(). */
-		skb_get(skb);
-		self->notify.udata_indication(self->notify.instance, self,
-					      skb);
-	}
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Propagate status indication from LAP to LSAPs (via LMP)
- * This don't trigger any change of state in lap_cb, lmp_cb or lsap_cb,
- * and the event is stateless, therefore we can bypass both state machines
- * and send the event direct to the LSAP user.
- * Jean II
- */
-void irlmp_status_indication(struct lap_cb *self,
-			     LINK_STATUS link, LOCK_STATUS lock)
-{
-	struct lsap_cb *next;
-	struct lsap_cb *curr;
-
-	/* Send status_indication to all LSAPs using this link */
-	curr = (struct lsap_cb *) hashbin_get_first( self->lsaps);
-	while (NULL != hashbin_find_next(self->lsaps, (long) curr, NULL,
-					 (void *) &next) ) {
-		IRDA_ASSERT(curr->magic == LMP_LSAP_MAGIC, return;);
-		/*
-		 *  Inform service user if he has requested it
-		 */
-		if (curr->notify.status_indication != NULL)
-			curr->notify.status_indication(curr->notify.instance,
-						       link, lock);
-		else
-			pr_debug("%s(), no handler\n", __func__);
-
-		curr = next;
-	}
-}
-
-/*
- * Receive flow control indication from LAP.
- * LAP want us to send it one more frame. We implement a simple round
- * robin scheduler between the active sockets so that we get a bit of
- * fairness. Note that the round robin is far from perfect, but it's
- * better than nothing.
- * We then poll the selected socket so that we can do synchronous
- * refilling of IrLAP (which allow to minimise the number of buffers).
- * Jean II
- */
-void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow)
-{
-	struct lsap_cb *next;
-	struct lsap_cb *curr;
-	int	lsap_todo;
-
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-	IRDA_ASSERT(flow == FLOW_START, return;);
-
-	/* Get the number of lsap. That's the only safe way to know
-	 * that we have looped around... - Jean II */
-	lsap_todo = HASHBIN_GET_SIZE(self->lsaps);
-	pr_debug("%s() : %d lsaps to scan\n", __func__, lsap_todo);
-
-	/* Poll lsap in order until the queue is full or until we
-	 * tried them all.
-	 * Most often, the current LSAP will have something to send,
-	 * so we will go through this loop only once. - Jean II */
-	while((lsap_todo--) &&
-	      (IRLAP_GET_TX_QUEUE_LEN(self->irlap) < LAP_HIGH_THRESHOLD)) {
-		/* Try to find the next lsap we should poll. */
-		next = self->flow_next;
-		/* If we have no lsap, restart from first one */
-		if(next == NULL)
-			next = (struct lsap_cb *) hashbin_get_first(self->lsaps);
-		/* Verify current one and find the next one */
-		curr = hashbin_find_next(self->lsaps, (long) next, NULL,
-					 (void *) &self->flow_next);
-		/* Uh-oh... Paranoia */
-		if(curr == NULL)
-			break;
-		pr_debug("%s() : curr is %p, next was %p and is now %p, still %d to go - queue len = %d\n",
-			 __func__, curr, next, self->flow_next, lsap_todo,
-			 IRLAP_GET_TX_QUEUE_LEN(self->irlap));
-
-		/* Inform lsap user that it can send one more packet. */
-		if (curr->notify.flow_indication != NULL)
-			curr->notify.flow_indication(curr->notify.instance,
-						     curr, flow);
-		else
-			pr_debug("%s(), no handler\n", __func__);
-	}
-}
-
-#if 0
-/*
- * Function irlmp_hint_to_service (hint)
- *
- *    Returns a list of all servics contained in the given hint bits. This
- *    function assumes that the hint bits have the size of two bytes only
- */
-__u8 *irlmp_hint_to_service(__u8 *hint)
-{
-	__u8 *service;
-	int i = 0;
-
-	/*
-	 * Allocate array to store services in. 16 entries should be safe
-	 * since we currently only support 2 hint bytes
-	 */
-	service = kmalloc(16, GFP_ATOMIC);
-	if (!service)
-		return NULL;
-
-	if (!hint[0]) {
-		pr_debug("<None>\n");
-		kfree(service);
-		return NULL;
-	}
-	if (hint[0] & HINT_PNP)
-		pr_debug("PnP Compatible ");
-	if (hint[0] & HINT_PDA)
-		pr_debug("PDA/Palmtop ");
-	if (hint[0] & HINT_COMPUTER)
-		pr_debug("Computer ");
-	if (hint[0] & HINT_PRINTER) {
-		pr_debug("Printer ");
-		service[i++] = S_PRINTER;
-	}
-	if (hint[0] & HINT_MODEM)
-		pr_debug("Modem ");
-	if (hint[0] & HINT_FAX)
-		pr_debug("Fax ");
-	if (hint[0] & HINT_LAN) {
-		pr_debug("LAN Access ");
-		service[i++] = S_LAN;
-	}
-	/*
-	 *  Test if extension byte exists. This byte will usually be
-	 *  there, but this is not really required by the standard.
-	 *  (IrLMP p. 29)
-	 */
-	if (hint[0] & HINT_EXTENSION) {
-		if (hint[1] & HINT_TELEPHONY) {
-			pr_debug("Telephony ");
-			service[i++] = S_TELEPHONY;
-		}
-		if (hint[1] & HINT_FILE_SERVER)
-			pr_debug("File Server ");
-
-		if (hint[1] & HINT_COMM) {
-			pr_debug("IrCOMM ");
-			service[i++] = S_COMM;
-		}
-		if (hint[1] & HINT_OBEX) {
-			pr_debug("IrOBEX ");
-			service[i++] = S_OBEX;
-		}
-	}
-	pr_debug("\n");
-
-	/* So that client can be notified about any discovery */
-	service[i++] = S_ANY;
-
-	service[i] = S_END;
-
-	return service;
-}
-#endif
-
-static const __u16 service_hint_mapping[S_END][2] = {
-	{ HINT_PNP,		0 },			/* S_PNP */
-	{ HINT_PDA,		0 },			/* S_PDA */
-	{ HINT_COMPUTER,	0 },			/* S_COMPUTER */
-	{ HINT_PRINTER,		0 },			/* S_PRINTER */
-	{ HINT_MODEM,		0 },			/* S_MODEM */
-	{ HINT_FAX,		0 },			/* S_FAX */
-	{ HINT_LAN,		0 },			/* S_LAN */
-	{ HINT_EXTENSION,	HINT_TELEPHONY },	/* S_TELEPHONY */
-	{ HINT_EXTENSION,	HINT_COMM },		/* S_COMM */
-	{ HINT_EXTENSION,	HINT_OBEX },		/* S_OBEX */
-	{ 0xFF,			0xFF },			/* S_ANY */
-};
-
-/*
- * Function irlmp_service_to_hint (service)
- *
- *    Converts a service type, to a hint bit
- *
- *    Returns: a 16 bit hint value, with the service bit set
- */
-__u16 irlmp_service_to_hint(int service)
-{
-	__u16_host_order hint;
-
-	hint.byte[0] = service_hint_mapping[service][0];
-	hint.byte[1] = service_hint_mapping[service][1];
-
-	return hint.word;
-}
-EXPORT_SYMBOL(irlmp_service_to_hint);
-
-/*
- * Function irlmp_register_service (service)
- *
- *    Register local service with IrLMP
- *
- */
-void *irlmp_register_service(__u16 hints)
-{
-	irlmp_service_t *service;
-
-	pr_debug("%s(), hints = %04x\n", __func__, hints);
-
-	/* Make a new registration */
-	service = kmalloc(sizeof(irlmp_service_t), GFP_ATOMIC);
-	if (!service)
-		return NULL;
-
-	service->hints.word = hints;
-	hashbin_insert(irlmp->services, (irda_queue_t *) service,
-		       (long) service, NULL);
-
-	irlmp->hints.word |= hints;
-
-	return (void *)service;
-}
-EXPORT_SYMBOL(irlmp_register_service);
-
-/*
- * Function irlmp_unregister_service (handle)
- *
- *    Unregister service with IrLMP.
- *
- *    Returns: 0 on success, -1 on error
- */
-int irlmp_unregister_service(void *handle)
-{
-	irlmp_service_t *service;
-	unsigned long flags;
-
-	if (!handle)
-		return -1;
-
-	/* Caller may call with invalid handle (it's legal) - Jean II */
-	service = hashbin_lock_find(irlmp->services, (long) handle, NULL);
-	if (!service) {
-		pr_debug("%s(), Unknown service!\n", __func__);
-		return -1;
-	}
-
-	hashbin_remove_this(irlmp->services, (irda_queue_t *) service);
-	kfree(service);
-
-	/* Remove old hint bits */
-	irlmp->hints.word = 0;
-
-	/* Refresh current hint bits */
-	spin_lock_irqsave(&irlmp->services->hb_spinlock, flags);
-	service = (irlmp_service_t *) hashbin_get_first(irlmp->services);
-	while (service) {
-		irlmp->hints.word |= service->hints.word;
-
-		service = (irlmp_service_t *)hashbin_get_next(irlmp->services);
-	}
-	spin_unlock_irqrestore(&irlmp->services->hb_spinlock, flags);
-	return 0;
-}
-EXPORT_SYMBOL(irlmp_unregister_service);
-
-/*
- * Function irlmp_register_client (hint_mask, callback1, callback2)
- *
- *    Register a local client with IrLMP
- *	First callback is selective discovery (based on hints)
- *	Second callback is for selective discovery expiries
- *
- *    Returns: handle > 0 on success, 0 on error
- */
-void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb,
-			    DISCOVERY_CALLBACK2 expir_clb, void *priv)
-{
-	irlmp_client_t *client;
-
-	IRDA_ASSERT(irlmp != NULL, return NULL;);
-
-	/* Make a new registration */
-	client = kmalloc(sizeof(irlmp_client_t), GFP_ATOMIC);
-	if (!client)
-		return NULL;
-
-	/* Register the details */
-	client->hint_mask.word = hint_mask;
-	client->disco_callback = disco_clb;
-	client->expir_callback = expir_clb;
-	client->priv = priv;
-
-	hashbin_insert(irlmp->clients, (irda_queue_t *) client,
-		       (long) client, NULL);
-
-	return (void *) client;
-}
-EXPORT_SYMBOL(irlmp_register_client);
-
-/*
- * Function irlmp_update_client (handle, hint_mask, callback1, callback2)
- *
- *    Updates specified client (handle) with possibly new hint_mask and
- *    callback
- *
- *    Returns: 0 on success, -1 on error
- */
-int irlmp_update_client(void *handle, __u16 hint_mask,
-			DISCOVERY_CALLBACK1 disco_clb,
-			DISCOVERY_CALLBACK2 expir_clb, void *priv)
-{
-	irlmp_client_t *client;
-
-	if (!handle)
-		return -1;
-
-	client = hashbin_lock_find(irlmp->clients, (long) handle, NULL);
-	if (!client) {
-		pr_debug("%s(), Unknown client!\n", __func__);
-		return -1;
-	}
-
-	client->hint_mask.word = hint_mask;
-	client->disco_callback = disco_clb;
-	client->expir_callback = expir_clb;
-	client->priv = priv;
-
-	return 0;
-}
-EXPORT_SYMBOL(irlmp_update_client);
-
-/*
- * Function irlmp_unregister_client (handle)
- *
- *    Returns: 0 on success, -1 on error
- *
- */
-int irlmp_unregister_client(void *handle)
-{
-	struct irlmp_client *client;
-
-	if (!handle)
-		return -1;
-
-	/* Caller may call with invalid handle (it's legal) - Jean II */
-	client = hashbin_lock_find(irlmp->clients, (long) handle, NULL);
-	if (!client) {
-		pr_debug("%s(), Unknown client!\n", __func__);
-		return -1;
-	}
-
-	pr_debug("%s(), removing client!\n", __func__);
-	hashbin_remove_this(irlmp->clients, (irda_queue_t *) client);
-	kfree(client);
-
-	return 0;
-}
-EXPORT_SYMBOL(irlmp_unregister_client);
-
-/*
- * Function irlmp_slsap_inuse (slsap)
- *
- *    Check if the given source LSAP selector is in use
- *
- * This function is clearly not very efficient. On the mitigating side, the
- * stack make sure that in 99% of the cases, we are called only once
- * for each socket allocation. We could probably keep a bitmap
- * of the allocated LSAP, but I'm not sure the complexity is worth it.
- * Jean II
- */
-static int irlmp_slsap_inuse(__u8 slsap_sel)
-{
-	struct lsap_cb *self;
-	struct lap_cb *lap;
-	unsigned long flags;
-
-	IRDA_ASSERT(irlmp != NULL, return TRUE;);
-	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return TRUE;);
-	IRDA_ASSERT(slsap_sel != LSAP_ANY, return TRUE;);
-
-#ifdef CONFIG_IRDA_ULTRA
-	/* Accept all bindings to the connectionless LSAP */
-	if (slsap_sel == LSAP_CONNLESS)
-		return FALSE;
-#endif /* CONFIG_IRDA_ULTRA */
-
-	/* Valid values are between 0 and 127 (0x0-0x6F) */
-	if (slsap_sel > LSAP_MAX)
-		return TRUE;
-
-	/*
-	 *  Check if slsap is already in use. To do this we have to loop over
-	 *  every IrLAP connection and check every LSAP associated with each
-	 *  the connection.
-	 */
-	spin_lock_irqsave_nested(&irlmp->links->hb_spinlock, flags,
-			SINGLE_DEPTH_NESTING);
-	lap = (struct lap_cb *) hashbin_get_first(irlmp->links);
-	while (lap != NULL) {
-		IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, goto errlap;);
-
-		/* Careful for priority inversions here !
-		 * irlmp->links is never taken while another IrDA
-		 * spinlock is held, so we are safe. Jean II */
-		spin_lock(&lap->lsaps->hb_spinlock);
-
-		/* For this IrLAP, check all the LSAPs */
-		self = (struct lsap_cb *) hashbin_get_first(lap->lsaps);
-		while (self != NULL) {
-			IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC,
-				    goto errlsap;);
-
-			if ((self->slsap_sel == slsap_sel)) {
-				pr_debug("Source LSAP selector=%02x in use\n",
-					 self->slsap_sel);
-				goto errlsap;
-			}
-			self = (struct lsap_cb*) hashbin_get_next(lap->lsaps);
-		}
-		spin_unlock(&lap->lsaps->hb_spinlock);
-
-		/* Next LAP */
-		lap = (struct lap_cb *) hashbin_get_next(irlmp->links);
-	}
-	spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags);
-
-	/*
-	 * Server sockets are typically waiting for connections and
-	 * therefore reside in the unconnected list. We don't want
-	 * to give out their LSAPs for obvious reasons...
-	 * Jean II
-	 */
-	spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
-	self = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps);
-	while (self != NULL) {
-		IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, goto erruncon;);
-		if ((self->slsap_sel == slsap_sel)) {
-			pr_debug("Source LSAP selector=%02x in use (unconnected)\n",
-				 self->slsap_sel);
-			goto erruncon;
-		}
-		self = (struct lsap_cb*) hashbin_get_next(irlmp->unconnected_lsaps);
-	}
-	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
-	return FALSE;
-
-	/* Error exit from within one of the two nested loops.
-	 * Make sure we release the right spinlock in the righ order.
-	 * Jean II */
-errlsap:
-	spin_unlock(&lap->lsaps->hb_spinlock);
-IRDA_ASSERT_LABEL(errlap:)
-	spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags);
-	return TRUE;
-
-	/* Error exit from within the unconnected loop.
-	 * Just one spinlock to release... Jean II */
-erruncon:
-	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-	return TRUE;
-}
-
-/*
- * Function irlmp_find_free_slsap ()
- *
- *    Find a free source LSAP to use. This function is called if the service
- *    user has requested a source LSAP equal to LM_ANY
- */
-static __u8 irlmp_find_free_slsap(void)
-{
-	__u8 lsap_sel;
-	int wrapped = 0;
-
-	IRDA_ASSERT(irlmp != NULL, return -1;);
-	IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return -1;);
-
-	/* Most users don't really care which LSAPs they are given,
-	 * and therefore we automatically give them a free LSAP.
-	 * This function try to find a suitable LSAP, i.e. which is
-	 * not in use and is within the acceptable range. Jean II */
-
-	do {
-		/* Always increment to LSAP number before using it.
-		 * In theory, we could reuse the last LSAP number, as long
-		 * as it is no longer in use. Some IrDA stack do that.
-		 * However, the previous socket may be half closed, i.e.
-		 * we closed it, we think it's no longer in use, but the
-		 * other side did not receive our close and think it's
-		 * active and still send data on it.
-		 * This is similar to what is done with PIDs and TCP ports.
-		 * Also, this reduce the number of calls to irlmp_slsap_inuse()
-		 * which is an expensive function to call.
-		 * Jean II */
-		irlmp->last_lsap_sel++;
-
-		/* Check if we need to wraparound (0x70-0x7f are reserved) */
-		if (irlmp->last_lsap_sel > LSAP_MAX) {
-			/* 0x00-0x10 are also reserved for well know ports */
-			irlmp->last_lsap_sel = 0x10;
-
-			/* Make sure we terminate the loop */
-			if (wrapped++) {
-				net_err_ratelimited("%s: no more free LSAPs !\n",
-						    __func__);
-				return 0;
-			}
-		}
-
-		/* If the LSAP is in use, try the next one.
-		 * Despite the autoincrement, we need to check if the lsap
-		 * is really in use or not, first because LSAP may be
-		 * directly allocated in irlmp_open_lsap(), and also because
-		 * we may wraparound on old sockets. Jean II */
-	} while (irlmp_slsap_inuse(irlmp->last_lsap_sel));
-
-	/* Got it ! */
-	lsap_sel = irlmp->last_lsap_sel;
-	pr_debug("%s(), found free lsap_sel=%02x\n",
-		 __func__, lsap_sel);
-
-	return lsap_sel;
-}
-
-/*
- * Function irlmp_convert_lap_reason (lap_reason)
- *
- *    Converts IrLAP disconnect reason codes to IrLMP disconnect reason
- *    codes
- *
- */
-LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason)
-{
-	int reason = LM_LAP_DISCONNECT;
-
-	switch (lap_reason) {
-	case LAP_DISC_INDICATION: /* Received a disconnect request from peer */
-		pr_debug("%s(), LAP_DISC_INDICATION\n", __func__);
-		reason = LM_USER_REQUEST;
-		break;
-	case LAP_NO_RESPONSE:    /* To many retransmits without response */
-		pr_debug("%s(), LAP_NO_RESPONSE\n", __func__);
-		reason = LM_LAP_DISCONNECT;
-		break;
-	case LAP_RESET_INDICATION:
-		pr_debug("%s(), LAP_RESET_INDICATION\n", __func__);
-		reason = LM_LAP_RESET;
-		break;
-	case LAP_FOUND_NONE:
-	case LAP_MEDIA_BUSY:
-	case LAP_PRIMARY_CONFLICT:
-		pr_debug("%s(), LAP_FOUND_NONE, LAP_MEDIA_BUSY or LAP_PRIMARY_CONFLICT\n",
-			 __func__);
-		reason = LM_CONNECT_FAILURE;
-		break;
-	default:
-		pr_debug("%s(), Unknown IrLAP disconnect reason %d!\n",
-			 __func__, lap_reason);
-		reason = LM_LAP_DISCONNECT;
-		break;
-	}
-
-	return reason;
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct irlmp_iter_state {
-	hashbin_t *hashbin;
-};
-
-#define LSAP_START_TOKEN	((void *)1)
-#define LINK_START_TOKEN	((void *)2)
-
-static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
-{
-	void *element;
-
-	spin_lock_irq(&iter->hashbin->hb_spinlock);
-	for (element = hashbin_get_first(iter->hashbin);
-	     element != NULL;
-	     element = hashbin_get_next(iter->hashbin)) {
-		if (!off || (*off)-- == 0) {
-			/* NB: hashbin left locked */
-			return element;
-		}
-	}
-	spin_unlock_irq(&iter->hashbin->hb_spinlock);
-	iter->hashbin = NULL;
-	return NULL;
-}
-
-
-static void *irlmp_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	struct irlmp_iter_state *iter = seq->private;
-	void *v;
-	loff_t off = *pos;
-
-	iter->hashbin = NULL;
-	if (off-- == 0)
-		return LSAP_START_TOKEN;
-
-	iter->hashbin = irlmp->unconnected_lsaps;
-	v = irlmp_seq_hb_idx(iter, &off);
-	if (v)
-		return v;
-
-	if (off-- == 0)
-		return LINK_START_TOKEN;
-
-	iter->hashbin = irlmp->links;
-	return irlmp_seq_hb_idx(iter, &off);
-}
-
-static void *irlmp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct irlmp_iter_state *iter = seq->private;
-
-	++*pos;
-
-	if (v == LSAP_START_TOKEN) {		/* start of list of lsaps */
-		iter->hashbin = irlmp->unconnected_lsaps;
-		v = irlmp_seq_hb_idx(iter, NULL);
-		return v ? v : LINK_START_TOKEN;
-	}
-
-	if (v == LINK_START_TOKEN) {		/* start of list of links */
-		iter->hashbin = irlmp->links;
-		return irlmp_seq_hb_idx(iter, NULL);
-	}
-
-	v = hashbin_get_next(iter->hashbin);
-
-	if (v == NULL) {			/* no more in this hash bin */
-		spin_unlock_irq(&iter->hashbin->hb_spinlock);
-
-		if (iter->hashbin == irlmp->unconnected_lsaps)
-			v =  LINK_START_TOKEN;
-
-		iter->hashbin = NULL;
-	}
-	return v;
-}
-
-static void irlmp_seq_stop(struct seq_file *seq, void *v)
-{
-	struct irlmp_iter_state *iter = seq->private;
-
-	if (iter->hashbin)
-		spin_unlock_irq(&iter->hashbin->hb_spinlock);
-}
-
-static int irlmp_seq_show(struct seq_file *seq, void *v)
-{
-	const struct irlmp_iter_state *iter = seq->private;
-	struct lsap_cb *self = v;
-
-	if (v == LSAP_START_TOKEN)
-		seq_puts(seq, "Unconnected LSAPs:\n");
-	else if (v == LINK_START_TOKEN)
-		seq_puts(seq, "\nRegistered Link Layers:\n");
-	else if (iter->hashbin == irlmp->unconnected_lsaps) {
-		self = v;
-		IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EINVAL; );
-		seq_printf(seq, "lsap state: %s, ",
-			   irlsap_state[ self->lsap_state]);
-		seq_printf(seq,
-			   "slsap_sel: %#02x, dlsap_sel: %#02x, ",
-			   self->slsap_sel, self->dlsap_sel);
-		seq_printf(seq, "(%s)", self->notify.name);
-		seq_printf(seq, "\n");
-	} else if (iter->hashbin == irlmp->links) {
-		struct lap_cb *lap = v;
-
-		seq_printf(seq, "lap state: %s, ",
-			   irlmp_state[lap->lap_state]);
-
-		seq_printf(seq, "saddr: %#08x, daddr: %#08x, ",
-			   lap->saddr, lap->daddr);
-		seq_printf(seq, "num lsaps: %d",
-			   HASHBIN_GET_SIZE(lap->lsaps));
-		seq_printf(seq, "\n");
-
-		/* Careful for priority inversions here !
-		 * All other uses of attrib spinlock are independent of
-		 * the object spinlock, so we are safe. Jean II */
-		spin_lock(&lap->lsaps->hb_spinlock);
-
-		seq_printf(seq, "\n  Connected LSAPs:\n");
-		for (self = (struct lsap_cb *) hashbin_get_first(lap->lsaps);
-		     self != NULL;
-		     self = (struct lsap_cb *)hashbin_get_next(lap->lsaps)) {
-			IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC,
-				    goto outloop;);
-			seq_printf(seq, "  lsap state: %s, ",
-				   irlsap_state[ self->lsap_state]);
-			seq_printf(seq,
-				   "slsap_sel: %#02x, dlsap_sel: %#02x, ",
-				   self->slsap_sel, self->dlsap_sel);
-			seq_printf(seq, "(%s)", self->notify.name);
-			seq_putc(seq, '\n');
-
-		}
-	IRDA_ASSERT_LABEL(outloop:)
-		spin_unlock(&lap->lsaps->hb_spinlock);
-		seq_putc(seq, '\n');
-	} else
-		return -EINVAL;
-
-	return 0;
-}
-
-static const struct seq_operations irlmp_seq_ops = {
-	.start  = irlmp_seq_start,
-	.next   = irlmp_seq_next,
-	.stop   = irlmp_seq_stop,
-	.show   = irlmp_seq_show,
-};
-
-static int irlmp_seq_open(struct inode *inode, struct file *file)
-{
-	IRDA_ASSERT(irlmp != NULL, return -EINVAL;);
-
-	return seq_open_private(file, &irlmp_seq_ops,
-			sizeof(struct irlmp_iter_state));
-}
-
-const struct file_operations irlmp_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = irlmp_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_private,
-};
-
-#endif /* PROC_FS */
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
deleted file mode 100644
index e306cf2c1e04..000000000000
--- a/net/irda/irlmp_event.c
+++ /dev/null
@@ -1,886 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlmp_event.c
- * Version:       0.8
- * Description:   An IrDA LMP event driver for Linux
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Tue Dec 14 23:04:16 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/kernel.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/timer.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-#include <net/irda/irlmp_event.h>
-
-const char *const irlmp_state[] = {
-	"LAP_STANDBY",
-	"LAP_U_CONNECT",
-	"LAP_ACTIVE",
-};
-
-const char *const irlsap_state[] = {
-	"LSAP_DISCONNECTED",
-	"LSAP_CONNECT",
-	"LSAP_CONNECT_PEND",
-	"LSAP_DATA_TRANSFER_READY",
-	"LSAP_SETUP",
-	"LSAP_SETUP_PEND",
-};
-
-static const char *const irlmp_event[] __maybe_unused = {
-	"LM_CONNECT_REQUEST",
-	"LM_CONNECT_CONFIRM",
-	"LM_CONNECT_RESPONSE",
-	"LM_CONNECT_INDICATION",
-
-	"LM_DISCONNECT_INDICATION",
-	"LM_DISCONNECT_REQUEST",
-
-	"LM_DATA_REQUEST",
-	"LM_UDATA_REQUEST",
-	"LM_DATA_INDICATION",
-	"LM_UDATA_INDICATION",
-
-	"LM_WATCHDOG_TIMEOUT",
-
-	/* IrLAP events */
-	"LM_LAP_CONNECT_REQUEST",
-	"LM_LAP_CONNECT_INDICATION",
-	"LM_LAP_CONNECT_CONFIRM",
-	"LM_LAP_DISCONNECT_INDICATION",
-	"LM_LAP_DISCONNECT_REQUEST",
-	"LM_LAP_DISCOVERY_REQUEST",
-	"LM_LAP_DISCOVERY_CONFIRM",
-	"LM_LAP_IDLE_TIMEOUT",
-};
-
-/* LAP Connection control proto declarations */
-static void irlmp_state_standby  (struct lap_cb *, IRLMP_EVENT,
-				  struct sk_buff *);
-static void irlmp_state_u_connect(struct lap_cb *, IRLMP_EVENT,
-				  struct sk_buff *);
-static void irlmp_state_active   (struct lap_cb *, IRLMP_EVENT,
-				  struct sk_buff *);
-
-/* LSAP Connection control proto declarations */
-static int irlmp_state_disconnected(struct lsap_cb *, IRLMP_EVENT,
-				    struct sk_buff *);
-static int irlmp_state_connect     (struct lsap_cb *, IRLMP_EVENT,
-				    struct sk_buff *);
-static int irlmp_state_connect_pend(struct lsap_cb *, IRLMP_EVENT,
-				    struct sk_buff *);
-static int irlmp_state_dtr         (struct lsap_cb *, IRLMP_EVENT,
-				    struct sk_buff *);
-static int irlmp_state_setup       (struct lsap_cb *, IRLMP_EVENT,
-				    struct sk_buff *);
-static int irlmp_state_setup_pend  (struct lsap_cb *, IRLMP_EVENT,
-				    struct sk_buff *);
-
-static void (*lap_state[]) (struct lap_cb *, IRLMP_EVENT, struct sk_buff *) =
-{
-	irlmp_state_standby,
-	irlmp_state_u_connect,
-	irlmp_state_active,
-};
-
-static int (*lsap_state[])( struct lsap_cb *, IRLMP_EVENT, struct sk_buff *) =
-{
-	irlmp_state_disconnected,
-	irlmp_state_connect,
-	irlmp_state_connect_pend,
-	irlmp_state_dtr,
-	irlmp_state_setup,
-	irlmp_state_setup_pend
-};
-
-static inline void irlmp_next_lap_state(struct lap_cb *self,
-					IRLMP_STATE state)
-{
-	/*
-	  pr_debug("%s(), LMP LAP = %s\n", __func__, irlmp_state[state]);
-	*/
-	self->lap_state = state;
-}
-
-static inline void irlmp_next_lsap_state(struct lsap_cb *self,
-					 LSAP_STATE state)
-{
-	/*
-	IRDA_ASSERT(self != NULL, return;);
-	pr_debug("%s(), LMP LSAP = %s\n", __func__, irlsap_state[state]);
-	*/
-	self->lsap_state = state;
-}
-
-/* Do connection control events */
-int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event,
-			struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
-	pr_debug("%s(), EVENT = %s, STATE = %s\n",
-		 __func__, irlmp_event[event], irlsap_state[self->lsap_state]);
-
-	return (*lsap_state[self->lsap_state]) (self, event, skb);
-}
-
-/*
- * Function do_lap_event (event, skb, info)
- *
- *    Do IrLAP control events
- *
- */
-void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event,
-			struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
-	pr_debug("%s(), EVENT = %s, STATE = %s\n", __func__,
-		 irlmp_event[event],
-		 irlmp_state[self->lap_state]);
-
-	(*lap_state[self->lap_state]) (self, event, skb);
-}
-
-void irlmp_discovery_timer_expired(void *data)
-{
-	/* We always cleanup the log (active & passive discovery) */
-	irlmp_do_expiry();
-
-	irlmp_do_discovery(sysctl_discovery_slots);
-
-	/* Restart timer */
-	irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ);
-}
-
-void irlmp_watchdog_timer_expired(void *data)
-{
-	struct lsap_cb *self = (struct lsap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
-
-	irlmp_do_lsap_event(self, LM_WATCHDOG_TIMEOUT, NULL);
-}
-
-void irlmp_idle_timer_expired(void *data)
-{
-	struct lap_cb *self = (struct lap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
-	irlmp_do_lap_event(self, LM_LAP_IDLE_TIMEOUT, NULL);
-}
-
-/*
- * Send an event on all LSAPs attached to this LAP.
- */
-static inline void
-irlmp_do_all_lsap_event(hashbin_t *	lsap_hashbin,
-			IRLMP_EVENT	event)
-{
-	struct lsap_cb *lsap;
-	struct lsap_cb *lsap_next;
-
-	/* Note : this function use the new hashbin_find_next()
-	 * function, instead of the old hashbin_get_next().
-	 * This make sure that we are always pointing one lsap
-	 * ahead, so that if the current lsap is removed as the
-	 * result of sending the event, we don't care.
-	 * Also, as we store the context ourselves, if an enumeration
-	 * of the same lsap hashbin happens as the result of sending the
-	 * event, we don't care.
-	 * The only problem is if the next lsap is removed. In that case,
-	 * hashbin_find_next() will return NULL and we will abort the
-	 * enumeration. - Jean II */
-
-	/* Also : we don't accept any skb in input. We can *NOT* pass
-	 * the same skb to multiple clients safely, we would need to
-	 * skb_clone() it. - Jean II */
-
-	lsap = (struct lsap_cb *) hashbin_get_first(lsap_hashbin);
-
-	while (NULL != hashbin_find_next(lsap_hashbin,
-					 (long) lsap,
-					 NULL,
-					 (void *) &lsap_next) ) {
-		irlmp_do_lsap_event(lsap, event, NULL);
-		lsap = lsap_next;
-	}
-}
-
-/*********************************************************************
- *
- *    LAP connection control states
- *
- ********************************************************************/
-
-/*
- * Function irlmp_state_standby (event, skb, info)
- *
- *    STANDBY, The IrLAP connection does not exist.
- *
- */
-static void irlmp_state_standby(struct lap_cb *self, IRLMP_EVENT event,
-				struct sk_buff *skb)
-{
-	IRDA_ASSERT(self->irlap != NULL, return;);
-
-	switch (event) {
-	case LM_LAP_DISCOVERY_REQUEST:
-		/* irlmp_next_station_state( LMP_DISCOVER); */
-
-		irlap_discovery_request(self->irlap, &irlmp->discovery_cmd);
-		break;
-	case LM_LAP_CONNECT_INDICATION:
-		/*  It's important to switch state first, to avoid IrLMP to
-		 *  think that the link is free since IrLMP may then start
-		 *  discovery before the connection is properly set up. DB.
-		 */
-		irlmp_next_lap_state(self, LAP_ACTIVE);
-
-		/* Just accept connection TODO, this should be fixed */
-		irlap_connect_response(self->irlap, skb);
-		break;
-	case LM_LAP_CONNECT_REQUEST:
-		pr_debug("%s() LS_CONNECT_REQUEST\n", __func__);
-
-		irlmp_next_lap_state(self, LAP_U_CONNECT);
-
-		/* FIXME: need to set users requested QoS */
-		irlap_connect_request(self->irlap, self->daddr, NULL, 0);
-		break;
-	case LM_LAP_DISCONNECT_INDICATION:
-		pr_debug("%s(), Error LM_LAP_DISCONNECT_INDICATION\n",
-			 __func__);
-
-		irlmp_next_lap_state(self, LAP_STANDBY);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n",
-			 __func__, irlmp_event[event]);
-		break;
-	}
-}
-
-/*
- * Function irlmp_state_u_connect (event, skb, info)
- *
- *    U_CONNECT, The layer above has tried to open an LSAP connection but
- *    since the IrLAP connection does not exist, we must first start an
- *    IrLAP connection. We are now waiting response from IrLAP.
- * */
-static void irlmp_state_u_connect(struct lap_cb *self, IRLMP_EVENT event,
-				  struct sk_buff *skb)
-{
-	pr_debug("%s(), event=%s\n", __func__, irlmp_event[event]);
-
-	switch (event) {
-	case LM_LAP_CONNECT_INDICATION:
-		/*  It's important to switch state first, to avoid IrLMP to
-		 *  think that the link is free since IrLMP may then start
-		 *  discovery before the connection is properly set up. DB.
-		 */
-		irlmp_next_lap_state(self, LAP_ACTIVE);
-
-		/* Just accept connection TODO, this should be fixed */
-		irlap_connect_response(self->irlap, skb);
-
-		/* Tell LSAPs that they can start sending data */
-		irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
-
-		/* Note : by the time we get there (LAP retries and co),
-		 * the lsaps may already have gone. This avoid getting stuck
-		 * forever in LAP_ACTIVE state - Jean II */
-		if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
-			pr_debug("%s() NO LSAPs !\n",  __func__);
-			irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT);
-		}
-		break;
-	case LM_LAP_CONNECT_REQUEST:
-		/* Already trying to connect */
-		break;
-	case LM_LAP_CONNECT_CONFIRM:
-		/* For all lsap_ce E Associated do LS_Connect_confirm */
-		irlmp_next_lap_state(self, LAP_ACTIVE);
-
-		/* Tell LSAPs that they can start sending data */
-		irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
-
-		/* Note : by the time we get there (LAP retries and co),
-		 * the lsaps may already have gone. This avoid getting stuck
-		 * forever in LAP_ACTIVE state - Jean II */
-		if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
-			pr_debug("%s() NO LSAPs !\n",  __func__);
-			irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT);
-		}
-		break;
-	case LM_LAP_DISCONNECT_INDICATION:
-		pr_debug("%s(), LM_LAP_DISCONNECT_INDICATION\n",  __func__);
-		irlmp_next_lap_state(self, LAP_STANDBY);
-
-		/* Send disconnect event to all LSAPs using this link */
-		irlmp_do_all_lsap_event(self->lsaps,
-					LM_LAP_DISCONNECT_INDICATION);
-		break;
-	case LM_LAP_DISCONNECT_REQUEST:
-		pr_debug("%s(), LM_LAP_DISCONNECT_REQUEST\n",  __func__);
-
-		/* One of the LSAP did timeout or was closed, if it was
-		 * the last one, try to get out of here - Jean II */
-		if (HASHBIN_GET_SIZE(self->lsaps) <= 1) {
-			irlap_disconnect_request(self->irlap);
-		}
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n",
-			 __func__, irlmp_event[event]);
-		break;
-	}
-}
-
-/*
- * Function irlmp_state_active (event, skb, info)
- *
- *    ACTIVE, IrLAP connection is active
- *
- */
-static void irlmp_state_active(struct lap_cb *self, IRLMP_EVENT event,
-			       struct sk_buff *skb)
-{
-	switch (event) {
-	case LM_LAP_CONNECT_REQUEST:
-		pr_debug("%s(), LS_CONNECT_REQUEST\n", __func__);
-
-		/*
-		 * IrLAP may have a pending disconnect. We tried to close
-		 * IrLAP, but it was postponed because the link was
-		 * busy or we were still sending packets. As we now
-		 * need it, make sure it stays on. Jean II
-		 */
-		irlap_clear_disconnect(self->irlap);
-
-		/*
-		 *  LAP connection already active, just bounce back! Since we
-		 *  don't know which LSAP that tried to do this, we have to
-		 *  notify all LSAPs using this LAP, but that should be safe to
-		 *  do anyway.
-		 */
-		irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM);
-
-		/* Needed by connect indication */
-		irlmp_do_all_lsap_event(irlmp->unconnected_lsaps,
-					LM_LAP_CONNECT_CONFIRM);
-		/* Keep state */
-		break;
-	case LM_LAP_DISCONNECT_REQUEST:
-		/*
-		 *  Need to find out if we should close IrLAP or not. If there
-		 *  is only one LSAP connection left on this link, that LSAP
-		 *  must be the one that tries to close IrLAP. It will be
-		 *  removed later and moved to the list of unconnected LSAPs
-		 */
-		if (HASHBIN_GET_SIZE(self->lsaps) > 0) {
-			/* Timer value is checked in irsysctl - Jean II */
-			irlmp_start_idle_timer(self, sysctl_lap_keepalive_time * HZ / 1000);
-		} else {
-			/* No more connections, so close IrLAP */
-
-			/* We don't want to change state just yet, because
-			 * we want to reflect accurately the real state of
-			 * the LAP, not the state we wish it was in,
-			 * so that we don't lose LM_LAP_CONNECT_REQUEST.
-			 * In some cases, IrLAP won't close the LAP
-			 * immediately. For example, it might still be
-			 * retrying packets or waiting for the pf bit.
-			 * As the LAP always send a DISCONNECT_INDICATION
-			 * in PCLOSE or SCLOSE, just change state on that.
-			 * Jean II */
-			irlap_disconnect_request(self->irlap);
-		}
-		break;
-	case LM_LAP_IDLE_TIMEOUT:
-		if (HASHBIN_GET_SIZE(self->lsaps) == 0) {
-			/* Same reasoning as above - keep state */
-			irlap_disconnect_request(self->irlap);
-		}
-		break;
-	case LM_LAP_DISCONNECT_INDICATION:
-		irlmp_next_lap_state(self, LAP_STANDBY);
-
-		/* In some case, at this point our side has already closed
-		 * all lsaps, and we are waiting for the idle_timer to
-		 * expire. If another device reconnect immediately, the
-		 * idle timer will expire in the midle of the connection
-		 * initialisation, screwing up things a lot...
-		 * Therefore, we must stop the timer... */
-		irlmp_stop_idle_timer(self);
-
-		/*
-		 *  Inform all connected LSAP's using this link
-		 */
-		irlmp_do_all_lsap_event(self->lsaps,
-					LM_LAP_DISCONNECT_INDICATION);
-
-		/* Force an expiry of the discovery log.
-		 * Now that the LAP is free, the system may attempt to
-		 * connect to another device. Unfortunately, our entries
-		 * are stale. There is a small window (<3s) before the
-		 * normal discovery will run and where irlmp_connect_request()
-		 * can get the wrong info, so make sure things get
-		 * cleaned *NOW* ;-) - Jean II */
-		irlmp_do_expiry();
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s\n",
-			 __func__, irlmp_event[event]);
-		break;
-	}
-}
-
-/*********************************************************************
- *
- *    LSAP connection control states
- *
- ********************************************************************/
-
-/*
- * Function irlmp_state_disconnected (event, skb, info)
- *
- *    DISCONNECTED
- *
- */
-static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event,
-				    struct sk_buff *skb)
-{
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
-	switch (event) {
-#ifdef CONFIG_IRDA_ULTRA
-	case LM_UDATA_INDICATION:
-		/* This is most bizarre. Those packets are  aka unreliable
-		 * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA.
-		 * Why do we pass them as Ultra ??? Jean II */
-		irlmp_connless_data_indication(self, skb);
-		break;
-#endif /* CONFIG_IRDA_ULTRA */
-	case LM_CONNECT_REQUEST:
-		pr_debug("%s(), LM_CONNECT_REQUEST\n", __func__);
-
-		if (self->conn_skb) {
-			net_warn_ratelimited("%s: busy with another request!\n",
-					     __func__);
-			return -EBUSY;
-		}
-		/* Don't forget to refcount it (see irlmp_connect_request()) */
-		skb_get(skb);
-		self->conn_skb = skb;
-
-		irlmp_next_lsap_state(self, LSAP_SETUP_PEND);
-
-		/* Start watchdog timer (5 secs for now) */
-		irlmp_start_watchdog_timer(self, 5*HZ);
-
-		irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL);
-		break;
-	case LM_CONNECT_INDICATION:
-		if (self->conn_skb) {
-			net_warn_ratelimited("%s: busy with another request!\n",
-					     __func__);
-			return -EBUSY;
-		}
-		/* Don't forget to refcount it (see irlap_driver_rcv()) */
-		skb_get(skb);
-		self->conn_skb = skb;
-
-		irlmp_next_lsap_state(self, LSAP_CONNECT_PEND);
-
-		/* Start watchdog timer
-		 * This is not mentionned in the spec, but there is a rare
-		 * race condition that can get the socket stuck.
-		 * If we receive this event while our LAP is closing down,
-		 * the LM_LAP_CONNECT_REQUEST get lost and we get stuck in
-		 * CONNECT_PEND state forever.
-		 * The other cause of getting stuck down there is if the
-		 * higher layer never reply to the CONNECT_INDICATION.
-		 * Anyway, it make sense to make sure that we always have
-		 * a backup plan. 1 second is plenty (should be immediate).
-		 * Jean II */
-		irlmp_start_watchdog_timer(self, 1*HZ);
-
-		irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
-			 __func__, irlmp_event[event], self->slsap_sel);
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlmp_state_connect (self, event, skb)
- *
- *    CONNECT
- *
- */
-static int irlmp_state_connect(struct lsap_cb *self, IRLMP_EVENT event,
-				struct sk_buff *skb)
-{
-	struct lsap_cb *lsap;
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
-	switch (event) {
-	case LM_CONNECT_RESPONSE:
-		/*
-		 *  Bind this LSAP to the IrLAP link where the connect was
-		 *  received
-		 */
-		lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self,
-				      NULL);
-
-		IRDA_ASSERT(lsap == self, return -1;);
-		IRDA_ASSERT(self->lap != NULL, return -1;);
-		IRDA_ASSERT(self->lap->lsaps != NULL, return -1;);
-
-		hashbin_insert(self->lap->lsaps, (irda_queue_t *) self,
-			       (long) self, NULL);
-
-		set_bit(0, &self->connected);	/* TRUE */
-
-		irlmp_send_lcf_pdu(self->lap, self->dlsap_sel,
-				   self->slsap_sel, CONNECT_CNF, skb);
-
-		del_timer(&self->watchdog_timer);
-
-		irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY);
-		break;
-	case LM_WATCHDOG_TIMEOUT:
-		/* May happen, who knows...
-		 * Jean II */
-		pr_debug("%s() WATCHDOG_TIMEOUT!\n",  __func__);
-
-		/* Disconnect, get out... - Jean II */
-		self->lap = NULL;
-		self->dlsap_sel = LSAP_ANY;
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-		break;
-	default:
-		/* LM_LAP_DISCONNECT_INDICATION : Should never happen, we
-		 * are *not* yet bound to the IrLAP link. Jean II */
-		pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
-			 __func__, irlmp_event[event], self->slsap_sel);
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlmp_state_connect_pend (event, skb, info)
- *
- *    CONNECT_PEND
- *
- */
-static int irlmp_state_connect_pend(struct lsap_cb *self, IRLMP_EVENT event,
-				    struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb;
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
-	switch (event) {
-	case LM_CONNECT_REQUEST:
-		/* Keep state */
-		break;
-	case LM_CONNECT_RESPONSE:
-		pr_debug("%s(), LM_CONNECT_RESPONSE, no indication issued yet\n",
-			 __func__);
-		/* Keep state */
-		break;
-	case LM_DISCONNECT_REQUEST:
-		pr_debug("%s(), LM_DISCONNECT_REQUEST, not yet bound to IrLAP connection\n",
-			 __func__);
-		/* Keep state */
-		break;
-	case LM_LAP_CONNECT_CONFIRM:
-		pr_debug("%s(), LS_CONNECT_CONFIRM\n",  __func__);
-		irlmp_next_lsap_state(self, LSAP_CONNECT);
-
-		tx_skb = self->conn_skb;
-		self->conn_skb = NULL;
-
-		irlmp_connect_indication(self, tx_skb);
-		/* Drop reference count - see irlmp_connect_indication(). */
-		dev_kfree_skb(tx_skb);
-		break;
-	case LM_WATCHDOG_TIMEOUT:
-		/* Will happen in some rare cases because of a race condition.
-		 * Just make sure we don't stay there forever...
-		 * Jean II */
-		pr_debug("%s() WATCHDOG_TIMEOUT!\n",  __func__);
-
-		/* Go back to disconnected mode, keep the socket waiting */
-		self->lap = NULL;
-		self->dlsap_sel = LSAP_ANY;
-		if(self->conn_skb)
-			dev_kfree_skb(self->conn_skb);
-		self->conn_skb = NULL;
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-		break;
-	default:
-		/* LM_LAP_DISCONNECT_INDICATION : Should never happen, we
-		 * are *not* yet bound to the IrLAP link. Jean II */
-		pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
-			 __func__, irlmp_event[event], self->slsap_sel);
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlmp_state_dtr (self, event, skb)
- *
- *    DATA_TRANSFER_READY
- *
- */
-static int irlmp_state_dtr(struct lsap_cb *self, IRLMP_EVENT event,
-			   struct sk_buff *skb)
-{
-	LM_REASON reason;
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-	IRDA_ASSERT(self->lap != NULL, return -1;);
-
-	switch (event) {
-	case LM_DATA_REQUEST: /* Optimize for the common case */
-		irlmp_send_data_pdu(self->lap, self->dlsap_sel,
-				    self->slsap_sel, FALSE, skb);
-		break;
-	case LM_DATA_INDICATION: /* Optimize for the common case */
-		irlmp_data_indication(self, skb);
-		break;
-	case LM_UDATA_REQUEST:
-		IRDA_ASSERT(skb != NULL, return -1;);
-		irlmp_send_data_pdu(self->lap, self->dlsap_sel,
-				    self->slsap_sel, TRUE, skb);
-		break;
-	case LM_UDATA_INDICATION:
-		irlmp_udata_indication(self, skb);
-		break;
-	case LM_CONNECT_REQUEST:
-		pr_debug("%s(), LM_CONNECT_REQUEST, error, LSAP already connected\n",
-			 __func__);
-		/* Keep state */
-		break;
-	case LM_CONNECT_RESPONSE:
-		pr_debug("%s(), LM_CONNECT_RESPONSE, error, LSAP already connected\n",
-			 __func__);
-		/* Keep state */
-		break;
-	case LM_DISCONNECT_REQUEST:
-		irlmp_send_lcf_pdu(self->lap, self->dlsap_sel, self->slsap_sel,
-				   DISCONNECT, skb);
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-		/* Called only from irlmp_disconnect_request(), will
-		 * unbind from LAP over there. Jean II */
-
-		/* Try to close the LAP connection if its still there */
-		if (self->lap) {
-			pr_debug("%s(), trying to close IrLAP\n",
-				 __func__);
-			irlmp_do_lap_event(self->lap,
-					   LM_LAP_DISCONNECT_REQUEST,
-					   NULL);
-		}
-		break;
-	case LM_LAP_DISCONNECT_INDICATION:
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		reason = irlmp_convert_lap_reason(self->lap->reason);
-
-		irlmp_disconnect_indication(self, reason, NULL);
-		break;
-	case LM_DISCONNECT_INDICATION:
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		IRDA_ASSERT(self->lap != NULL, return -1;);
-		IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-
-		IRDA_ASSERT(skb != NULL, return -1;);
-		IRDA_ASSERT(skb->len > 3, return -1;);
-		reason = skb->data[3];
-
-		 /* Try to close the LAP connection */
-		pr_debug("%s(), trying to close IrLAP\n", __func__);
-		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
-
-		irlmp_disconnect_indication(self, reason, skb);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
-			 __func__, irlmp_event[event], self->slsap_sel);
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlmp_state_setup (event, skb, info)
- *
- *    SETUP, Station Control has set up the underlying IrLAP connection.
- *    An LSAP connection request has been transmitted to the peer
- *    LSAP-Connection Control FSM and we are awaiting reply.
- */
-static int irlmp_state_setup(struct lsap_cb *self, IRLMP_EVENT event,
-			     struct sk_buff *skb)
-{
-	LM_REASON reason;
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;);
-
-	switch (event) {
-	case LM_CONNECT_CONFIRM:
-		irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY);
-
-		del_timer(&self->watchdog_timer);
-
-		irlmp_connect_confirm(self, skb);
-		break;
-	case LM_DISCONNECT_INDICATION:
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		IRDA_ASSERT(self->lap != NULL, return -1;);
-		IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-
-		IRDA_ASSERT(skb != NULL, return -1;);
-		IRDA_ASSERT(skb->len > 3, return -1;);
-		reason = skb->data[3];
-
-		 /* Try to close the LAP connection */
-		pr_debug("%s(), trying to close IrLAP\n",  __func__);
-		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
-
-		irlmp_disconnect_indication(self, reason, skb);
-		break;
-	case LM_LAP_DISCONNECT_INDICATION:
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		del_timer(&self->watchdog_timer);
-
-		IRDA_ASSERT(self->lap != NULL, return -1;);
-		IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;);
-
-		reason = irlmp_convert_lap_reason(self->lap->reason);
-
-		irlmp_disconnect_indication(self, reason, skb);
-		break;
-	case LM_WATCHDOG_TIMEOUT:
-		pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__);
-
-		IRDA_ASSERT(self->lap != NULL, return -1;);
-		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
-			 __func__, irlmp_event[event], self->slsap_sel);
-		break;
-	}
-	return ret;
-}
-
-/*
- * Function irlmp_state_setup_pend (event, skb, info)
- *
- *    SETUP_PEND, An LM_CONNECT_REQUEST has been received from the service
- *    user to set up an LSAP connection. A request has been sent to the
- *    LAP FSM to set up the underlying IrLAP connection, and we
- *    are awaiting confirm.
- */
-static int irlmp_state_setup_pend(struct lsap_cb *self, IRLMP_EVENT event,
-				  struct sk_buff *skb)
-{
-	struct sk_buff *tx_skb;
-	LM_REASON reason;
-	int ret = 0;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(irlmp != NULL, return -1;);
-
-	switch (event) {
-	case LM_LAP_CONNECT_CONFIRM:
-		IRDA_ASSERT(self->conn_skb != NULL, return -1;);
-
-		tx_skb = self->conn_skb;
-		self->conn_skb = NULL;
-
-		irlmp_send_lcf_pdu(self->lap, self->dlsap_sel,
-				   self->slsap_sel, CONNECT_CMD, tx_skb);
-		/* Drop reference count - see irlap_data_request(). */
-		dev_kfree_skb(tx_skb);
-
-		irlmp_next_lsap_state(self, LSAP_SETUP);
-		break;
-	case LM_WATCHDOG_TIMEOUT:
-		pr_debug("%s() : WATCHDOG_TIMEOUT !\n",  __func__);
-
-		IRDA_ASSERT(self->lap != NULL, return -1;);
-		irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL);
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL);
-		break;
-	case LM_LAP_DISCONNECT_INDICATION: /* LS_Disconnect.indication */
-		del_timer( &self->watchdog_timer);
-
-		irlmp_next_lsap_state(self, LSAP_DISCONNECTED);
-
-		reason = irlmp_convert_lap_reason(self->lap->reason);
-
-		irlmp_disconnect_indication(self, reason, NULL);
-		break;
-	default:
-		pr_debug("%s(), Unknown event %s on LSAP %#02x\n",
-			 __func__, irlmp_event[event], self->slsap_sel);
-		break;
-	}
-	return ret;
-}
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
deleted file mode 100644
index 38b0f994bc7b..000000000000
--- a/net/irda/irlmp_frame.c
+++ /dev/null
@@ -1,476 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irlmp_frame.c
- * Version:       0.9
- * Description:   IrLMP frame implementation
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Aug 19 02:09:59 1997
- * Modified at:   Mon Dec 13 13:41:12 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/irda/timer.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irlmp_frame.h>
-#include <net/irda/discovery.h>
-
-static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap,
-				       __u8 slsap, int status, hashbin_t *);
-
-inline void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
-				int expedited, struct sk_buff *skb)
-{
-	skb->data[0] = dlsap;
-	skb->data[1] = slsap;
-
-	if (expedited) {
-		pr_debug("%s(), sending expedited data\n", __func__);
-		irlap_data_request(self->irlap, skb, TRUE);
-	} else
-		irlap_data_request(self->irlap, skb, FALSE);
-}
-
-/*
- * Function irlmp_send_lcf_pdu (dlsap, slsap, opcode,skb)
- *
- *    Send Link Control Frame to IrLAP
- */
-void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap,
-			__u8 opcode, struct sk_buff *skb)
-{
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	frame = skb->data;
-
-	frame[0] = dlsap | CONTROL_BIT;
-	frame[1] = slsap;
-
-	frame[2] = opcode;
-
-	if (opcode == DISCONNECT)
-		frame[3] = 0x01; /* Service user request */
-	else
-		frame[3] = 0x00; /* rsvd */
-
-	irlap_data_request(self->irlap, skb, FALSE);
-}
-
-/*
- * Function irlmp_input (skb)
- *
- *    Used by IrLAP to pass received data frames to IrLMP layer
- *
- */
-void irlmp_link_data_indication(struct lap_cb *self, struct sk_buff *skb,
-				int unreliable)
-{
-	struct lsap_cb *lsap;
-	__u8   slsap_sel;   /* Source (this) LSAP address */
-	__u8   dlsap_sel;   /* Destination LSAP address */
-	__u8   *fp;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-	IRDA_ASSERT(skb->len > 2, return;);
-
-	fp = skb->data;
-
-	/*
-	 *  The next statements may be confusing, but we do this so that
-	 *  destination LSAP of received frame is source LSAP in our view
-	 */
-	slsap_sel = fp[0] & LSAP_MASK;
-	dlsap_sel = fp[1];
-
-	/*
-	 *  Check if this is an incoming connection, since we must deal with
-	 *  it in a different way than other established connections.
-	 */
-	if ((fp[0] & CONTROL_BIT) && (fp[2] == CONNECT_CMD)) {
-		pr_debug("%s(), incoming connection, source LSAP=%d, dest LSAP=%d\n",
-			 __func__, slsap_sel, dlsap_sel);
-
-		/* Try to find LSAP among the unconnected LSAPs */
-		lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, CONNECT_CMD,
-				       irlmp->unconnected_lsaps);
-
-		/* Maybe LSAP was already connected, so try one more time */
-		if (!lsap) {
-			pr_debug("%s(), incoming connection for LSAP already connected\n",
-				 __func__);
-			lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0,
-					       self->lsaps);
-		}
-	} else
-		lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0,
-				       self->lsaps);
-
-	if (lsap == NULL) {
-		pr_debug("IrLMP, Sorry, no LSAP for received frame!\n");
-		pr_debug("%s(), slsap_sel = %02x, dlsap_sel = %02x\n",
-			 __func__, slsap_sel, dlsap_sel);
-		if (fp[0] & CONTROL_BIT) {
-			pr_debug("%s(), received control frame %02x\n",
-				 __func__, fp[2]);
-		} else {
-			pr_debug("%s(), received data frame\n", __func__);
-		}
-		return;
-	}
-
-	/*
-	 *  Check if we received a control frame?
-	 */
-	if (fp[0] & CONTROL_BIT) {
-		switch (fp[2]) {
-		case CONNECT_CMD:
-			lsap->lap = self;
-			irlmp_do_lsap_event(lsap, LM_CONNECT_INDICATION, skb);
-			break;
-		case CONNECT_CNF:
-			irlmp_do_lsap_event(lsap, LM_CONNECT_CONFIRM, skb);
-			break;
-		case DISCONNECT:
-			pr_debug("%s(), Disconnect indication!\n",
-				 __func__);
-			irlmp_do_lsap_event(lsap, LM_DISCONNECT_INDICATION,
-					    skb);
-			break;
-		case ACCESSMODE_CMD:
-			pr_debug("Access mode cmd not implemented!\n");
-			break;
-		case ACCESSMODE_CNF:
-			pr_debug("Access mode cnf not implemented!\n");
-			break;
-		default:
-			pr_debug("%s(), Unknown control frame %02x\n",
-				 __func__, fp[2]);
-			break;
-		}
-	} else if (unreliable) {
-		/* Optimize and bypass the state machine if possible */
-		if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY)
-			irlmp_udata_indication(lsap, skb);
-		else
-			irlmp_do_lsap_event(lsap, LM_UDATA_INDICATION, skb);
-	} else {
-		/* Optimize and bypass the state machine if possible */
-		if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY)
-			irlmp_data_indication(lsap, skb);
-		else
-			irlmp_do_lsap_event(lsap, LM_DATA_INDICATION, skb);
-	}
-}
-
-/*
- * Function irlmp_link_unitdata_indication (self, skb)
- *
- *
- *
- */
-#ifdef CONFIG_IRDA_ULTRA
-void irlmp_link_unitdata_indication(struct lap_cb *self, struct sk_buff *skb)
-{
-	struct lsap_cb *lsap;
-	__u8   slsap_sel;   /* Source (this) LSAP address */
-	__u8   dlsap_sel;   /* Destination LSAP address */
-	__u8   pid;         /* Protocol identifier */
-	__u8   *fp;
-	unsigned long flags;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-	IRDA_ASSERT(skb->len > 2, return;);
-
-	fp = skb->data;
-
-	/*
-	 *  The next statements may be confusing, but we do this so that
-	 *  destination LSAP of received frame is source LSAP in our view
-	 */
-	slsap_sel = fp[0] & LSAP_MASK;
-	dlsap_sel = fp[1];
-	pid       = fp[2];
-
-	if (pid & 0x80) {
-		pr_debug("%s(), extension in PID not supp!\n",
-			 __func__);
-		return;
-	}
-
-	/* Check if frame is addressed to the connectionless LSAP */
-	if ((slsap_sel != LSAP_CONNLESS) || (dlsap_sel != LSAP_CONNLESS)) {
-		pr_debug("%s(), dropping frame!\n", __func__);
-		return;
-	}
-
-	/* Search the connectionless LSAP */
-	spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-	lsap = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps);
-	while (lsap != NULL) {
-		/*
-		 *  Check if source LSAP and dest LSAP selectors and PID match.
-		 */
-		if ((lsap->slsap_sel == slsap_sel) &&
-		    (lsap->dlsap_sel == dlsap_sel) &&
-		    (lsap->pid == pid))
-		{
-			break;
-		}
-		lsap = (struct lsap_cb *) hashbin_get_next(irlmp->unconnected_lsaps);
-	}
-	spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags);
-
-	if (lsap)
-		irlmp_connless_data_indication(lsap, skb);
-	else {
-		pr_debug("%s(), found no matching LSAP!\n", __func__);
-	}
-}
-#endif /* CONFIG_IRDA_ULTRA */
-
-/*
- * Function irlmp_link_disconnect_indication (reason, userdata)
- *
- *    IrLAP has disconnected
- *
- */
-void irlmp_link_disconnect_indication(struct lap_cb *lap,
-				      struct irlap_cb *irlap,
-				      LAP_REASON reason,
-				      struct sk_buff *skb)
-{
-	IRDA_ASSERT(lap != NULL, return;);
-	IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;);
-
-	lap->reason = reason;
-	lap->daddr = DEV_ADDR_ANY;
-
-	/* FIXME: must do something with the skb if any */
-
-	/*
-	 *  Inform station state machine
-	 */
-	irlmp_do_lap_event(lap, LM_LAP_DISCONNECT_INDICATION, NULL);
-}
-
-/*
- * Function irlmp_link_connect_indication (qos)
- *
- *    Incoming LAP connection!
- *
- */
-void irlmp_link_connect_indication(struct lap_cb *self, __u32 saddr,
-				   __u32 daddr, struct qos_info *qos,
-				   struct sk_buff *skb)
-{
-	/* Copy QoS settings for this session */
-	self->qos = qos;
-
-	/* Update destination device address */
-	self->daddr = daddr;
-	IRDA_ASSERT(self->saddr == saddr, return;);
-
-	irlmp_do_lap_event(self, LM_LAP_CONNECT_INDICATION, skb);
-}
-
-/*
- * Function irlmp_link_connect_confirm (qos)
- *
- *    LAP connection confirmed!
- *
- */
-void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos,
-				struct sk_buff *skb)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-	IRDA_ASSERT(qos != NULL, return;);
-
-	/* Don't need use the skb for now */
-
-	/* Copy QoS settings for this session */
-	self->qos = qos;
-
-	irlmp_do_lap_event(self, LM_LAP_CONNECT_CONFIRM, NULL);
-}
-
-/*
- * Function irlmp_link_discovery_indication (self, log)
- *
- *    Device is discovering us
- *
- * It's not an answer to our own discoveries, just another device trying
- * to perform discovery, but we don't want to miss the opportunity
- * to exploit this information, because :
- *	o We may not actively perform discovery (just passive discovery)
- *	o This type of discovery is much more reliable. In some cases, it
- *	  seem that less than 50% of our discoveries get an answer, while
- *	  we always get ~100% of these.
- *	o Make faster discovery, statistically divide time of discovery
- *	  events by 2 (important for the latency aspect and user feel)
- *	o Even is we do active discovery, the other node might not
- *	  answer our discoveries (ex: Palm). The Palm will just perform
- *	  one active discovery and connect directly to us.
- *
- * However, when both devices discover each other, they might attempt to
- * connect to each other following the discovery event, and it would create
- * collisions on the medium (SNRM battle).
- * The "fix" for that is to disable all connection requests in IrLAP
- * for 100ms after a discovery indication by setting the media_busy flag.
- * Previously, we used to postpone the event which was quite ugly. Now
- * that IrLAP takes care of this problem, just pass the event up...
- *
- * Jean II
- */
-void irlmp_link_discovery_indication(struct lap_cb *self,
-				     discovery_t *discovery)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
-	/* Add to main log, cleanup */
-	irlmp_add_discovery(irlmp->cachelog, discovery);
-
-	/* Just handle it the same way as a discovery confirm,
-	 * bypass the LM_LAP state machine (see below) */
-	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_PASSIVE);
-}
-
-/*
- * Function irlmp_link_discovery_confirm (self, log)
- *
- *    Called by IrLAP with a list of discoveries after the discovery
- *    request has been carried out. A NULL log is received if IrLAP
- *    was unable to carry out the discovery request
- *
- */
-void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;);
-
-	/* Add to main log, cleanup */
-	irlmp_add_discovery_log(irlmp->cachelog, log);
-
-	/* Propagate event to various LSAPs registered for it.
-	 * We bypass the LM_LAP state machine because
-	 *	1) We do it regardless of the LM_LAP state
-	 *	2) It doesn't affect the LM_LAP state
-	 *	3) Faster, slimer, simpler, ...
-	 * Jean II */
-	irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_ACTIVE);
-}
-
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-static inline void irlmp_update_cache(struct lap_cb *lap,
-				      struct lsap_cb *lsap)
-{
-	/* Prevent concurrent read to get garbage */
-	lap->cache.valid = FALSE;
-	/* Update cache entry */
-	lap->cache.dlsap_sel = lsap->dlsap_sel;
-	lap->cache.slsap_sel = lsap->slsap_sel;
-	lap->cache.lsap = lsap;
-	lap->cache.valid = TRUE;
-}
-#endif
-
-/*
- * Function irlmp_find_handle (self, dlsap_sel, slsap_sel, status, queue)
- *
- *    Find handle associated with destination and source LSAP
- *
- * Any IrDA connection (LSAP/TSAP) is uniquely identified by
- * 3 parameters, the local lsap, the remote lsap and the remote address.
- * We may initiate multiple connections to the same remote service
- * (they will have different local lsap), a remote device may initiate
- * multiple connections to the same local service (they will have
- * different remote lsap), or multiple devices may connect to the same
- * service and may use the same remote lsap (and they will have
- * different remote address).
- * So, where is the remote address ? Each LAP connection is made with
- * a single remote device, so imply a specific remote address.
- * Jean II
- */
-static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
-				       __u8 slsap_sel, int status,
-				       hashbin_t *queue)
-{
-	struct lsap_cb *lsap;
-	unsigned long flags;
-
-	/*
-	 *  Optimize for the common case. We assume that the last frame
-	 *  received is in the same connection as the last one, so check in
-	 *  cache first to avoid the linear search
-	 */
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	if ((self->cache.valid) &&
-	    (self->cache.slsap_sel == slsap_sel) &&
-	    (self->cache.dlsap_sel == dlsap_sel))
-	{
-		return self->cache.lsap;
-	}
-#endif
-
-	spin_lock_irqsave(&queue->hb_spinlock, flags);
-
-	lsap = (struct lsap_cb *) hashbin_get_first(queue);
-	while (lsap != NULL) {
-		/*
-		 *  If this is an incoming connection, then the destination
-		 *  LSAP selector may have been specified as LM_ANY so that
-		 *  any client can connect. In that case we only need to check
-		 *  if the source LSAP (in our view!) match!
-		 */
-		if ((status == CONNECT_CMD) &&
-		    (lsap->slsap_sel == slsap_sel) &&
-		    (lsap->dlsap_sel == LSAP_ANY)) {
-			/* This is where the dest lsap sel is set on incoming
-			 * lsaps */
-			lsap->dlsap_sel = dlsap_sel;
-			break;
-		}
-		/*
-		 *  Check if source LSAP and dest LSAP selectors match.
-		 */
-		if ((lsap->slsap_sel == slsap_sel) &&
-		    (lsap->dlsap_sel == dlsap_sel))
-			break;
-
-		lsap = (struct lsap_cb *) hashbin_get_next(queue);
-	}
-#ifdef CONFIG_IRDA_CACHE_LAST_LSAP
-	if(lsap)
-		irlmp_update_cache(self, lsap);
-#endif
-	spin_unlock_irqrestore(&queue->hb_spinlock, flags);
-
-	/* Return what we've found or NULL */
-	return lsap;
-}
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
deleted file mode 100644
index c5e35b85c477..000000000000
--- a/net/irda/irmod.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irmod.c
- * Version:       0.9
- * Description:   IrDA stack main entry points
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Dec 15 13:55:39 1997
- * Modified at:   Wed Jan  5 15:12:41 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1997, 1999-2000 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2004 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-/*
- * This file contains the main entry points of the IrDA stack.
- * They are in this file and not af_irda.c because some developpers
- * are using the IrDA stack without the socket API (compiling out
- * af_irda.c).
- * Jean II
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irmod.h>		/* notify_t */
-#include <net/irda/irlap.h>		/* irlap_init */
-#include <net/irda/irlmp.h>		/* irlmp_init */
-#include <net/irda/iriap.h>		/* iriap_init */
-#include <net/irda/irttp.h>		/* irttp_init */
-#include <net/irda/irda_device.h>	/* irda_device_init */
-
-/* Packet type handler.
- * Tell the kernel how IrDA packets should be handled.
- */
-static struct packet_type irda_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_IRDA),
-	.func	= irlap_driver_rcv,	/* Packet type handler irlap_frame.c */
-};
-
-/*
- * Function irda_notify_init (notify)
- *
- *    Used for initializing the notify structure
- *
- */
-void irda_notify_init(notify_t *notify)
-{
-	notify->data_indication = NULL;
-	notify->udata_indication = NULL;
-	notify->connect_confirm = NULL;
-	notify->connect_indication = NULL;
-	notify->disconnect_indication = NULL;
-	notify->flow_indication = NULL;
-	notify->status_indication = NULL;
-	notify->instance = NULL;
-	strlcpy(notify->name, "Unknown", sizeof(notify->name));
-}
-EXPORT_SYMBOL(irda_notify_init);
-
-/*
- * Function irda_init (void)
- *
- *  Protocol stack initialisation entry point.
- *  Initialise the various components of the IrDA stack
- */
-static int __init irda_init(void)
-{
-	int ret = 0;
-
-	/* Lower layer of the stack */
-	irlmp_init();
-	irlap_init();
-
-	/* Driver/dongle support */
-	irda_device_init();
-
-	/* Higher layers of the stack */
-	iriap_init();
-	irttp_init();
-	ret = irsock_init();
-	if (ret < 0)
-		goto out_err_1;
-
-	/* Add IrDA packet type (Start receiving packets) */
-	dev_add_pack(&irda_packet_type);
-
-	/* External APIs */
-#ifdef CONFIG_PROC_FS
-	irda_proc_register();
-#endif
-#ifdef CONFIG_SYSCTL
-	ret = irda_sysctl_register();
-	if (ret < 0)
-		goto out_err_2;
-#endif
-
-	ret = irda_nl_register();
-	if (ret < 0)
-		goto out_err_3;
-
-	return 0;
-
- out_err_3:
-#ifdef CONFIG_SYSCTL
-	irda_sysctl_unregister();
- out_err_2:
-#endif
-#ifdef CONFIG_PROC_FS
-	irda_proc_unregister();
-#endif
-
-	/* Remove IrDA packet type (stop receiving packets) */
-	dev_remove_pack(&irda_packet_type);
-
-	/* Remove higher layers */
-	irsock_cleanup();
- out_err_1:
-	irttp_cleanup();
-	iriap_cleanup();
-
-	/* Remove lower layers */
-	irda_device_cleanup();
-	irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */
-
-	/* Remove middle layer */
-	irlmp_cleanup();
-
-
-	return ret;
-}
-
-/*
- * Function irda_cleanup (void)
- *
- *  Protocol stack cleanup/removal entry point.
- *  Cleanup the various components of the IrDA stack
- */
-static void __exit irda_cleanup(void)
-{
-	/* Remove External APIs */
-	irda_nl_unregister();
-
-#ifdef CONFIG_SYSCTL
-	irda_sysctl_unregister();
-#endif
-#ifdef CONFIG_PROC_FS
-	irda_proc_unregister();
-#endif
-
-	/* Remove IrDA packet type (stop receiving packets) */
-	dev_remove_pack(&irda_packet_type);
-
-	/* Remove higher layers */
-	irsock_cleanup();
-	irttp_cleanup();
-	iriap_cleanup();
-
-	/* Remove lower layers */
-	irda_device_cleanup();
-	irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */
-
-	/* Remove middle layer */
-	irlmp_cleanup();
-}
-
-/*
- * The IrDA stack must be initialised *before* drivers get initialised,
- * and *before* higher protocols (IrLAN/IrCOMM/IrNET) get initialised,
- * otherwise bad things will happen (hashbins will be NULL for example).
- * Those modules are at module_init()/device_initcall() level.
- *
- * On the other hand, it needs to be initialised *after* the basic
- * networking, the /proc/net filesystem and sysctl module. Those are
- * currently initialised in .../init/main.c (before initcalls).
- * Also, IrDA drivers needs to be initialised *after* the random number
- * generator (main stack and higher layer init don't need it anymore).
- *
- * Jean II
- */
-subsys_initcall(irda_init);
-module_exit(irda_cleanup);
-
-MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no> & Jean Tourrilhes <jt@hpl.hp.com>");
-MODULE_DESCRIPTION("The Linux IrDA Protocol Stack");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_IRDA);
diff --git a/net/irda/irnet/Kconfig b/net/irda/irnet/Kconfig
deleted file mode 100644
index 28c557f0fdd2..000000000000
--- a/net/irda/irnet/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-config IRNET
-	tristate "IrNET protocol"
-	depends on IRDA && PPP
-	help
-	  Say Y here if you want to build support for the IrNET protocol.
-	  To compile it as a module, choose M here: the module will be
-	  called irnet.  IrNET is a PPP driver, so you will also need a
-	  working PPP subsystem (driver, daemon and config)...
-
-	  IrNET is an alternate way to transfer TCP/IP traffic over IrDA.  It
-	  uses synchronous PPP over a set of point to point IrDA sockets.  You
-	  can use it between Linux machine or with W2k.
-
diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile
deleted file mode 100644
index 61c365c8a2a0..000000000000
--- a/net/irda/irnet/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux IrDA IrNET protocol layer.
-#
-
-obj-$(CONFIG_IRNET) += irnet.o
-
-irnet-y := irnet_ppp.o irnet_irda.o
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
deleted file mode 100644
index 9d451f8ed47a..000000000000
--- a/net/irda/irnet/irnet.h
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- *	IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- *		Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file contains definitions and declarations global to the IrNET module,
- * all grouped in one place...
- * This file is a *private* header, so other modules don't want to know
- * what's in there...
- *
- * Note : as most part of the Linux kernel, this module is available
- * under the GNU General Public License (GPL).
- */
-
-#ifndef IRNET_H
-#define IRNET_H
-
-/************************** DOCUMENTATION ***************************/
-/*
- * What is IrNET
- * -------------
- * IrNET is a protocol allowing to carry TCP/IP traffic between two
- * IrDA peers in an efficient fashion. It is a thin layer, passing PPP
- * packets to IrTTP and vice versa. It uses PPP in synchronous mode,
- * because IrTTP offer a reliable sequenced packet service (as opposed
- * to a byte stream). In fact, you could see IrNET as carrying TCP/IP
- * in a IrDA socket, using PPP to provide the glue.
- *
- * The main difference with traditional PPP over IrCOMM is that we
- * avoid the framing and serial emulation which are a performance
- * bottleneck. It also allows multipoint communications in a sensible
- * fashion.
- *
- * The main difference with IrLAN is that we use PPP for the link
- * management, which is more standard, interoperable and flexible than
- * the IrLAN protocol. For example, PPP adds authentication,
- * encryption, compression, header compression and automated routing
- * setup. And, as IrNET let PPP do the hard work, the implementation
- * is much simpler than IrLAN.
- *
- * The Linux implementation
- * ------------------------
- * IrNET is written on top of the Linux-IrDA stack, and interface with
- * the generic Linux PPP driver. Because IrNET depend on recent
- * changes of the PPP driver interface, IrNET will work only with very
- * recent kernel (2.3.99-pre6 and up).
- *
- * The present implementation offer the following features :
- *	o simple user interface using pppd
- *	o efficient implementation (interface directly to PPP and IrTTP)
- *	o addressing (you can specify the name of the IrNET recipient)
- *	o multipoint operation (limited by IrLAP specification)
- *	o information in /proc/net/irda/irnet
- *	o IrNET events on /dev/irnet (for user space daemon)
- *	o IrNET daemon (irnetd) to automatically handle incoming requests
- *	o Windows 2000 compatibility (tested, but need more work)
- * Currently missing :
- *	o Lot's of testing (that's your job)
- *	o Connection retries (may be too hard to do)
- *	o Check pppd persist mode
- *	o User space daemon (to automatically handle incoming requests)
- *
- * The setup is not currently the most easy, but this should get much
- * better when everything will get integrated...
- *
- * Acknowledgements
- * ----------------
- * This module is based on :
- *	o The PPP driver (ppp_synctty/ppp_generic) by Paul Mackerras
- *	o The IrLAN protocol (irlan_common/XXX) by Dag Brattli
- *	o The IrSock interface (af_irda) by Dag Brattli
- *	o Some other bits from the kernel and my drivers...
- * Infinite thanks to those brave souls for providing the infrastructure
- * upon which IrNET is built.
- *
- * Thanks to all my colleagues in HP for helping me. In particular,
- * thanks to Salil Pradhan and Bill Serra for W2k testing...
- * Thanks to Luiz Magalhaes for irnetd and much testing...
- *
- * Thanks to Alan Cox for answering lot's of my stupid questions, and
- * to Paul Mackerras answering my questions on how to best integrate
- * IrNET and pppd.
- *
- * Jean II
- *
- * Note on some implementations choices...
- * ------------------------------------
- *	1) Direct interface vs tty/socket
- * I could have used a tty interface to hook to ppp and use the full
- * socket API to connect to IrDA. The code would have been easier to
- * maintain, and maybe the code would have been smaller...
- * Instead, we hook directly to ppp_generic and to IrTTP, which make
- * things more complicated...
- *
- * The first reason is flexibility : this allow us to create IrNET
- * instances on demand (no /dev/ircommX crap) and to allow linkname
- * specification on pppd command line...
- *
- * Second reason is speed optimisation. If you look closely at the
- * transmit and receive paths, you will notice that they are "super lean"
- * (that's why they look ugly), with no function calls and as little data
- * copy and modification as I could...
- *
- *	2) irnetd in user space
- * irnetd is implemented in user space, which is necessary to call pppd.
- * This also give maximum benefits in term of flexibility and customability,
- * and allow to offer the event channel, useful for other stuff like debug.
- *
- * On the other hand, this require a loose coordination between the
- * present module and irnetd. One critical area is how incoming request
- * are handled.
- * When irnet receive an incoming request, it send an event to irnetd and
- * drop the incoming IrNET socket.
- * irnetd start a pppd instance, which create a new IrNET socket. This new
- * socket is then connected in the originating node to the pppd instance.
- * At this point, in the originating node, the first socket is closed.
- *
- * I admit, this is a bit messy and waste some resources. The alternative
- * is caching incoming socket, and that's also quite messy and waste
- * resources.
- * We also make connection time slower. For example, on a 115 kb/s link it
- * adds 60ms to the connection time (770 ms). However, this is slower than
- * the time it takes to fire up pppd on my P133...
- *
- *
- * History :
- * -------
- *
- * v1 - 15.5.00 - Jean II
- *	o Basic IrNET (hook to ppp_generic & IrTTP - incl. multipoint)
- *	o control channel on /dev/irnet (set name/address)
- *	o event channel on /dev/irnet (for user space daemon)
- *
- * v2 - 5.6.00 - Jean II
- *	o Enable DROP_NOT_READY to avoid PPP timeouts & other weirdness...
- *	o Add DISCONNECT_TO event and rename DISCONNECT_FROM.
- *	o Set official device number alloaction on /dev/irnet
- *
- * v3 - 30.8.00 - Jean II
- *	o Update to latest Linux-IrDA changes :
- *		- queue_t => irda_queue_t
- *	o Update to ppp-2.4.0 :
- *		- move irda_irnet_connect from PPPIOCATTACH to TIOCSETD
- *	o Add EXPIRE event (depend on new IrDA-Linux patch)
- *	o Switch from `hashbin_remove' to `hashbin_remove_this' to fix
- *	  a multilink bug... (depend on new IrDA-Linux patch)
- *	o fix a self->daddr to self->raddr in irda_irnet_connect to fix
- *	  another multilink bug (darn !)
- *	o Remove LINKNAME_IOCTL cruft
- *
- * v3b - 31.8.00 - Jean II
- *	o Dump discovery log at event channel startup
- *
- * v4 - 28.9.00 - Jean II
- *	o Fix interaction between poll/select and dump discovery log
- *	o Add IRNET_BLOCKED_LINK event (depend on new IrDA-Linux patch)
- *	o Add IRNET_NOANSWER_FROM event (mostly to help support)
- *	o Release flow control in disconnect_indication
- *	o Block packets while connecting (speed up connections)
- *
- * v5 - 11.01.01 - Jean II
- *	o Init self->max_header_size, just in case...
- *	o Set up ap->chan.hdrlen, to get zero copy on tx side working.
- *	o avoid tx->ttp->flow->ppp->tx->... loop, by checking flow state
- *		Thanks to Christian Gennerat for finding this bug !
- *	---
- *	o Declare the proper MTU/MRU that we can support
- *		(but PPP doesn't read the MTU value :-()
- *	o Declare hashbin HB_NOLOCK instead of HB_LOCAL to avoid
- *		disabling and enabling irq twice
- *
- * v6 - 31.05.01 - Jean II
- *	o Print source address in Found, Discovery, Expiry & Request events
- *	o Print requested source address in /proc/net/irnet
- *	o Change control channel input. Allow multiple commands in one line.
- *	o Add saddr command to change ap->rsaddr (and use that in IrDA)
- *	---
- *	o Make the IrDA connection procedure totally asynchronous.
- *	  Heavy rewrite of the IAS query code and the whole connection
- *	  procedure. Now, irnet_connect() no longer need to be called from
- *	  a process context...
- *	o Enable IrDA connect retries in ppp_irnet_send(). The good thing
- *	  is that IrDA connect retries are directly driven by PPP LCP
- *	  retries (we retry for each LCP packet), so that everything
- *	  is transparently controlled from pppd lcp-max-configure.
- *	o Add ttp_connect flag to prevent rentry on the connect procedure
- *	o Test and fixups to eliminate side effects of retries
- *
- * v7 - 22.08.01 - Jean II
- *	o Cleanup : Change "saddr = 0x0" to "saddr = DEV_ADDR_ANY"
- *	o Fix bug in BLOCK_WHEN_CONNECT introduced in v6 : due to the
- *	  asynchronous IAS query, self->tsap is NULL when PPP send the
- *	  first packet.  This was preventing "connect-delay 0" to work.
- *	  Change the test in ppp_irnet_send() to self->ttp_connect.
- *
- * v8 - 1.11.01 - Jean II
- *	o Tighten the use of self->ttp_connect and self->ttp_open to
- *	  prevent various race conditions.
- *	o Avoid leaking discovery log and skb
- *	o Replace "self" with "server" in irnet_connect_indication() to
- *	  better detect cut'n'paste error ;-)
- *
- * v9 - 29.11.01 - Jean II
- *	o Fix event generation in disconnect indication that I broke in v8
- *	  It was always generation "No-Answer" because I was testing ttp_open
- *	  just after clearing it. *blush*.
- *	o Use newly created irttp_listen() to fix potential crash when LAP
- *	  destroyed before irnet module removed.
- *
- * v10 - 4.3.2 - Jean II
- *	o When receiving a disconnect indication, don't reenable the
- *	  PPP Tx queue, this will trigger a reconnect. Instead, close
- *	  the channel, which will kill pppd...
- *
- * v11 - 20.3.02 - Jean II
- *	o Oops ! v10 fix disabled IrNET retries and passive behaviour.
- *	  Better fix in irnet_disconnect_indication() :
- *	  - if connected, kill pppd via hangup.
- *	  - if not connected, reenable ppp Tx, which trigger IrNET retry.
- *
- * v12 - 10.4.02 - Jean II
- *	o Fix race condition in irnet_connect_indication().
- *	  If the socket was already trying to connect, drop old connection
- *	  and use new one only if acting as primary. See comments.
- *
- * v13 - 30.5.02 - Jean II
- *	o Update module init code
- *
- * v14 - 20.2.03 - Jean II
- *	o Add discovery hint bits in the control channel.
- *	o Remove obsolete MOD_INC/DEC_USE_COUNT in favor of .owner
- *
- * v15 - 7.4.03 - Jean II
- *	o Replace spin_lock_irqsave() with spin_lock_bh() so that we can
- *	  use ppp_unit_number(). It's probably also better overall...
- *	o Disable call to ppp_unregister_channel(), because we can't do it.
- */
-
-/***************************** INCLUDES *****************************/
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/tty.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/poll.h>
-#include <linux/capability.h>
-#include <linux/ctype.h>	/* isspace() */
-#include <linux/string.h>	/* skip_spaces() */
-#include <linux/uaccess.h>
-#include <linux/init.h>
-
-#include <linux/ppp_defs.h>
-#include <linux/ppp-ioctl.h>
-#include <linux/ppp_channel.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/iriap.h>
-#include <net/irda/irias_object.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/irttp.h>
-#include <net/irda/discovery.h>
-
-/***************************** OPTIONS *****************************/
-/*
- * Define or undefine to compile or not some optional part of the
- * IrNET driver...
- * Note : the present defaults make sense, play with that at your
- * own risk...
- */
-/* IrDA side of the business... */
-#define DISCOVERY_NOMASK	/* To enable W2k compatibility... */
-#define ADVERTISE_HINT		/* Advertise IrLAN hint bit */
-#define ALLOW_SIMULT_CONNECT	/* This seem to work, cross fingers... */
-#define DISCOVERY_EVENTS	/* Query the discovery log to post events */
-#define INITIAL_DISCOVERY	/* Dump current discovery log as events */
-#undef STREAM_COMPAT		/* Not needed - potentially messy */
-#undef CONNECT_INDIC_KICK	/* Might mess IrDA, not needed */
-#undef FAIL_SEND_DISCONNECT	/* Might mess IrDA, not needed */
-#undef PASS_CONNECT_PACKETS	/* Not needed ? Safe */
-#undef MISSING_PPP_API		/* Stuff I wish I could do */
-
-/* PPP side of the business */
-#define BLOCK_WHEN_CONNECT	/* Block packets when connecting */
-#define CONNECT_IN_SEND		/* Retry IrDA connection procedure */
-#undef FLUSH_TO_PPP		/* Not sure about this one, let's play safe */
-#undef SECURE_DEVIRNET		/* Bah... */
-
-/****************************** DEBUG ******************************/
-
-/*
- * This set of flags enable and disable all the various warning,
- * error and debug message of this driver.
- * Each section can be enabled and disabled independently
- */
-/* In the PPP part */
-#define DEBUG_CTRL_TRACE	0	/* Control channel */
-#define DEBUG_CTRL_INFO		0	/* various info */
-#define DEBUG_CTRL_ERROR	1	/* problems */
-#define DEBUG_FS_TRACE		0	/* filesystem callbacks */
-#define DEBUG_FS_INFO		0	/* various info */
-#define DEBUG_FS_ERROR		1	/* problems */
-#define DEBUG_PPP_TRACE		0	/* PPP related functions */
-#define DEBUG_PPP_INFO		0	/* various info */
-#define DEBUG_PPP_ERROR		1	/* problems */
-#define DEBUG_MODULE_TRACE	0	/* module insertion/removal */
-#define DEBUG_MODULE_ERROR	1	/* problems */
-
-/* In the IrDA part */
-#define DEBUG_IRDA_SR_TRACE	0	/* IRDA subroutines */
-#define DEBUG_IRDA_SR_INFO	0	/* various info */
-#define DEBUG_IRDA_SR_ERROR	1	/* problems */
-#define DEBUG_IRDA_SOCK_TRACE	0	/* IRDA main socket functions */
-#define DEBUG_IRDA_SOCK_INFO	0	/* various info */
-#define DEBUG_IRDA_SOCK_ERROR	1	/* problems */
-#define DEBUG_IRDA_SERV_TRACE	0	/* The IrNET server */
-#define DEBUG_IRDA_SERV_INFO	0	/* various info */
-#define DEBUG_IRDA_SERV_ERROR	1	/* problems */
-#define DEBUG_IRDA_TCB_TRACE	0	/* IRDA IrTTP callbacks */
-#define DEBUG_IRDA_CB_INFO	0	/* various info */
-#define DEBUG_IRDA_CB_ERROR	1	/* problems */
-#define DEBUG_IRDA_OCB_TRACE	0	/* IRDA other callbacks */
-#define DEBUG_IRDA_OCB_INFO	0	/* various info */
-#define DEBUG_IRDA_OCB_ERROR	1	/* problems */
-
-#define DEBUG_ASSERT		0	/* Verify all assertions */
-
-/*
- * These are the macros we are using to actually print the debug
- * statements. Don't look at it, it's ugly...
- *
- * One of the trick is that, as the DEBUG_XXX are constant, the
- * compiler will optimise away the if() in all cases.
- */
-/* All error messages (will show up in the normal logs) */
-#define DERROR(dbg, format, args...) \
-	{if(DEBUG_##dbg) \
-		printk(KERN_INFO "irnet: %s(): " format, __func__ , ##args);}
-
-/* Normal debug message (will show up in /var/log/debug) */
-#define DEBUG(dbg, format, args...) \
-	{if(DEBUG_##dbg) \
-		printk(KERN_DEBUG "irnet: %s(): " format, __func__ , ##args);}
-
-/* Entering a function (trace) */
-#define DENTER(dbg, format, args...) \
-	{if(DEBUG_##dbg) \
-		printk(KERN_DEBUG "irnet: -> %s" format, __func__ , ##args);}
-
-/* Entering and exiting a function in one go (trace) */
-#define DPASS(dbg, format, args...) \
-	{if(DEBUG_##dbg) \
-		printk(KERN_DEBUG "irnet: <>%s" format, __func__ , ##args);}
-
-/* Exiting a function (trace) */
-#define DEXIT(dbg, format, args...) \
-	{if(DEBUG_##dbg) \
-		printk(KERN_DEBUG "irnet: <-%s()" format, __func__ , ##args);}
-
-/* Exit a function with debug */
-#define DRETURN(ret, dbg, args...) \
-	{DEXIT(dbg, ": " args);\
-	return ret; }
-
-/* Exit a function on failed condition */
-#define DABORT(cond, ret, dbg, args...) \
-	{if(cond) {\
-		DERROR(dbg, args);\
-		return ret; }}
-
-/* Invalid assertion, print out an error and exit... */
-#define DASSERT(cond, ret, dbg, args...) \
-	{if((DEBUG_ASSERT) && !(cond)) {\
-		DERROR(dbg, "Invalid assertion: " args);\
-		return ret; }}
-
-/************************ CONSTANTS & MACROS ************************/
-
-/* Paranoia */
-#define IRNET_MAGIC	0xB00754
-
-/* Number of control events in the control channel buffer... */
-#define IRNET_MAX_EVENTS	8	/* Should be more than enough... */
-
-/****************************** TYPES ******************************/
-
-/*
- * This is the main structure where we store all the data pertaining to
- * one instance of irnet.
- * Note : in irnet functions, a pointer this structure is usually called
- * "ap" or "self". If the code is borrowed from the IrDA stack, it tend
- * to be called "self", and if it is borrowed from the PPP driver it is
- * "ap". Apart from that, it's exactly the same structure ;-)
- */
-typedef struct irnet_socket
-{
-  /* ------------------- Instance management ------------------- */
-  /* We manage a linked list of IrNET socket instances */
-  irda_queue_t		q;		/* Must be first - for hasbin */
-  int			magic;		/* Paranoia */
-
-  /* --------------------- FileSystem part --------------------- */
-  /* "pppd" interact directly with us on a /dev/ file */
-  struct file *		file;		/* File descriptor of this instance */
-  /* TTY stuff - to keep "pppd" happy */
-  struct ktermios	termios;	/* Various tty flags */
-  /* Stuff for the control channel */
-  int			event_index;	/* Last read in the event log */
-
-  /* ------------------------- PPP part ------------------------- */
-  /* We interface directly to the ppp_generic driver in the kernel */
-  int			ppp_open;	/* registered with ppp_generic */
-  struct ppp_channel	chan;		/* Interface to generic ppp layer */
-
-  int			mru;		/* Max size of PPP payload */
-  u32			xaccm[8];	/* Asynchronous character map (just */
-  u32			raccm;		/* to please pppd - dummy) */
-  unsigned int		flags;		/* PPP flags (compression, ...) */
-  unsigned int		rbits;		/* Unused receive flags ??? */
-  struct work_struct disconnect_work;   /* Process context disconnection */
-  /* ------------------------ IrTTP part ------------------------ */
-  /* We create a pseudo "socket" over the IrDA tranport */
-  unsigned long		ttp_open;	/* Set when IrTTP is ready */
-  unsigned long		ttp_connect;	/* Set when IrTTP is connecting */
-  struct tsap_cb *	tsap;		/* IrTTP instance (the connection) */
-
-  char			rname[NICKNAME_MAX_LEN + 1];
-					/* IrDA nickname of destination */
-  __u32			rdaddr;		/* Requested peer IrDA address */
-  __u32			rsaddr;		/* Requested local IrDA address */
-  __u32			daddr;		/* actual peer IrDA address */
-  __u32			saddr;		/* my local IrDA address */
-  __u8			dtsap_sel;	/* Remote TSAP selector */
-  __u8			stsap_sel;	/* Local TSAP selector */
-
-  __u32			max_sdu_size_rx;/* Socket parameters used for IrTTP */
-  __u32			max_sdu_size_tx;
-  __u32			max_data_size;
-  __u8			max_header_size;
-  LOCAL_FLOW		tx_flow;	/* State of the Tx path in IrTTP */
-
-  /* ------------------- IrLMP and IrIAS part ------------------- */
-  /* Used for IrDA Discovery and socket name resolution */
-  void *		ckey;		/* IrLMP client handle */
-  __u16			mask;		/* Hint bits mask (filter discov.)*/
-  int			nslots;		/* Number of slots for discovery */
-
-  struct iriap_cb *	iriap;		/* Used to query remote IAS */
-  int			errno;		/* status of the IAS query */
-
-  /* -------------------- Discovery log part -------------------- */
-  /* Used by initial discovery on the control channel
-   * and by irnet_discover_daddr_and_lsap_sel() */
-  struct irda_device_info *discoveries;	/* Copy of the discovery log */
-  int			disco_index;	/* Last read in the discovery log */
-  int			disco_number;	/* Size of the discovery log */
-
-  struct mutex		lock;
-
-} irnet_socket;
-
-/*
- * This is the various event that we will generate on the control channel
- */
-typedef enum irnet_event
-{
-  IRNET_DISCOVER,		/* New IrNET node discovered */
-  IRNET_EXPIRE,			/* IrNET node expired */
-  IRNET_CONNECT_TO,		/* IrNET socket has connected to other node */
-  IRNET_CONNECT_FROM,		/* Other node has connected to IrNET socket */
-  IRNET_REQUEST_FROM,		/* Non satisfied connection request */
-  IRNET_NOANSWER_FROM,		/* Failed connection request */
-  IRNET_BLOCKED_LINK,		/* Link (IrLAP) is blocked for > 3s */
-  IRNET_DISCONNECT_FROM,	/* IrNET socket has disconnected */
-  IRNET_DISCONNECT_TO		/* Closing IrNET socket */
-} irnet_event;
-
-/*
- * This is the storage for an event and its arguments
- */
-typedef struct irnet_log
-{
-  irnet_event	event;
-  int		unit;
-  __u32		saddr;
-  __u32		daddr;
-  char		name[NICKNAME_MAX_LEN + 1];	/* 21 + 1 */
-  __u16_host_order hints;			/* Discovery hint bits */
-} irnet_log;
-
-/*
- * This is the storage for all events and related stuff...
- */
-typedef struct irnet_ctrl_channel
-{
-  irnet_log	log[IRNET_MAX_EVENTS];	/* Event log */
-  int		index;		/* Current index in log */
-  spinlock_t	spinlock;	/* Serialize access to the event log */
-  wait_queue_head_t	rwait;	/* processes blocked on read (or poll) */
-} irnet_ctrl_channel;
-
-/**************************** PROTOTYPES ****************************/
-/*
- * Global functions of the IrNET module
- * Note : we list here also functions called from one file to the other.
- */
-
-/* -------------------------- IRDA PART -------------------------- */
-int irda_irnet_create(irnet_socket *);	/* Initialise an IrNET socket */
-int irda_irnet_connect(irnet_socket *);	/* Try to connect over IrDA */
-void irda_irnet_destroy(irnet_socket *);	/* Teardown an IrNET socket */
-int irda_irnet_init(void);		/* Initialise IrDA part of IrNET */
-void irda_irnet_cleanup(void);		/* Teardown IrDA part of IrNET */
-
-/**************************** VARIABLES ****************************/
-
-/* Control channel stuff - allocated in irnet_irda.h */
-extern struct irnet_ctrl_channel	irnet_events;
-
-#endif /* IRNET_H */
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
deleted file mode 100644
index e390bceeb2f8..000000000000
--- a/net/irda/irnet/irnet_irda.c
+++ /dev/null
@@ -1,1885 +0,0 @@
-/*
- *	IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- *		Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file implement the IRDA interface of IrNET.
- * Basically, we sit on top of IrTTP. We set up IrTTP, IrIAS properly,
- * and exchange frames with IrTTP.
- */
-
-#include "irnet_irda.h"		/* Private header */
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <asm/unaligned.h>
-
-/*
- * PPP disconnect work: we need to make sure we're in
- * process context when calling ppp_unregister_channel().
- */
-static void irnet_ppp_disconnect(struct work_struct *work)
-{
-	irnet_socket * self =
-		container_of(work, irnet_socket, disconnect_work);
-
-	if (self == NULL)
-		return;
-	/*
-	 * If we were connected, cleanup & close the PPP
-	 * channel, which will kill pppd (hangup) and the rest.
-	 */
-	if (self->ppp_open && !self->ttp_open && !self->ttp_connect) {
-		ppp_unregister_channel(&self->chan);
-		self->ppp_open = 0;
-	}
-}
-
-/************************* CONTROL CHANNEL *************************/
-/*
- * When ppp is not active, /dev/irnet act as a control channel.
- * Writing allow to set up the IrDA destination of the IrNET channel,
- * and any application may be read events happening on IrNET...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Post an event to the control channel...
- * Put the event in the log, and then wait all process blocked on read
- * so they can read the log...
- */
-static void
-irnet_post_event(irnet_socket *	ap,
-		 irnet_event	event,
-		 __u32		saddr,
-		 __u32		daddr,
-		 char *		name,
-		 __u16		hints)
-{
-  int			index;		/* In the log */
-
-  DENTER(CTRL_TRACE, "(ap=0x%p, event=%d, daddr=%08x, name=``%s'')\n",
-	 ap, event, daddr, name);
-
-  /* Protect this section via spinlock.
-   * Note : as we are the only event producer, we only need to exclude
-   * ourself when touching the log, which is nice and easy.
-   */
-  spin_lock_bh(&irnet_events.spinlock);
-
-  /* Copy the event in the log */
-  index = irnet_events.index;
-  irnet_events.log[index].event = event;
-  irnet_events.log[index].daddr = daddr;
-  irnet_events.log[index].saddr = saddr;
-  /* Try to copy IrDA nickname */
-  if(name)
-    strcpy(irnet_events.log[index].name, name);
-  else
-    irnet_events.log[index].name[0] = '\0';
-  /* Copy hints */
-  irnet_events.log[index].hints.word = hints;
-  /* Try to get ppp unit number */
-  if((ap != (irnet_socket *) NULL) && (ap->ppp_open))
-    irnet_events.log[index].unit = ppp_unit_number(&ap->chan);
-  else
-    irnet_events.log[index].unit = -1;
-
-  /* Increment the index
-   * Note that we increment the index only after the event is written,
-   * to make sure that the readers don't get garbage... */
-  irnet_events.index = (index + 1) % IRNET_MAX_EVENTS;
-
-  DEBUG(CTRL_INFO, "New event index is %d\n", irnet_events.index);
-
-  /* Spin lock end */
-  spin_unlock_bh(&irnet_events.spinlock);
-
-  /* Now : wake up everybody waiting for events... */
-  wake_up_interruptible_all(&irnet_events.rwait);
-
-  DEXIT(CTRL_TRACE, "\n");
-}
-
-/************************* IRDA SUBROUTINES *************************/
-/*
- * These are a bunch of subroutines called from other functions
- * down there, mostly common code or to improve readability...
- *
- * Note : we duplicate quite heavily some routines of af_irda.c,
- * because our input structure (self) is quite different
- * (struct irnet instead of struct irda_sock), which make sharing
- * the same code impossible (at least, without templates).
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_open_tsap (self)
- *
- *    Open local Transport Service Access Point (TSAP)
- *
- * Create a IrTTP instance for us and set all the IrTTP callbacks.
- */
-static inline int
-irnet_open_tsap(irnet_socket *	self)
-{
-  notify_t	notify;		/* Callback structure */
-
-  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
-  DABORT(self->tsap != NULL, -EBUSY, IRDA_SR_ERROR, "Already busy !\n");
-
-  /* Initialize IrTTP callbacks to be used by the IrDA stack */
-  irda_notify_init(&notify);
-  notify.connect_confirm	= irnet_connect_confirm;
-  notify.connect_indication	= irnet_connect_indication;
-  notify.disconnect_indication	= irnet_disconnect_indication;
-  notify.data_indication	= irnet_data_indication;
-  /*notify.udata_indication	= NULL;*/
-  notify.flow_indication	= irnet_flow_indication;
-  notify.status_indication	= irnet_status_indication;
-  notify.instance		= self;
-  strlcpy(notify.name, IRNET_NOTIFY_NAME, sizeof(notify.name));
-
-  /* Open an IrTTP instance */
-  self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT,
-			       &notify);
-  DABORT(self->tsap == NULL, -ENOMEM,
-	 IRDA_SR_ERROR, "Unable to allocate TSAP !\n");
-
-  /* Remember which TSAP selector we actually got */
-  self->stsap_sel = self->tsap->stsap_sel;
-
-  DEXIT(IRDA_SR_TRACE, " - tsap=0x%p, sel=0x%X\n",
-	self->tsap, self->stsap_sel);
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_ias_to_tsap (self, result, value)
- *
- *    Examine an IAS object and extract TSAP
- *
- * We do an IAP query to find the TSAP associated with the IrNET service.
- * When IrIAP pass us the result of the query, this function look at
- * the return values to check for failures and extract the TSAP if
- * possible.
- * Also deallocate value
- * The failure is in self->errno
- * Return TSAP or -1
- */
-static inline __u8
-irnet_ias_to_tsap(irnet_socket *	self,
-		  int			result,
-		  struct ias_value *	value)
-{
-  __u8	dtsap_sel = 0;		/* TSAP we are looking for */
-
-  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
-  /* By default, no error */
-  self->errno = 0;
-
-  /* Check if request succeeded */
-  switch(result)
-    {
-      /* Standard errors : service not available */
-    case IAS_CLASS_UNKNOWN:
-    case IAS_ATTRIB_UNKNOWN:
-      DEBUG(IRDA_SR_INFO, "IAS object doesn't exist ! (%d)\n", result);
-      self->errno = -EADDRNOTAVAIL;
-      break;
-
-      /* Other errors, most likely IrDA stack failure */
-    default :
-      DEBUG(IRDA_SR_INFO, "IAS query failed ! (%d)\n", result);
-      self->errno = -EHOSTUNREACH;
-      break;
-
-      /* Success : we got what we wanted */
-    case IAS_SUCCESS:
-      break;
-    }
-
-  /* Check what was returned to us */
-  if(value != NULL)
-    {
-      /* What type of argument have we got ? */
-      switch(value->type)
-	{
-	case IAS_INTEGER:
-	  DEBUG(IRDA_SR_INFO, "result=%d\n", value->t.integer);
-	  if(value->t.integer != -1)
-	    /* Get the remote TSAP selector */
-	    dtsap_sel = value->t.integer;
-	  else
-	    self->errno = -EADDRNOTAVAIL;
-	  break;
-	default:
-	  self->errno = -EADDRNOTAVAIL;
-	  DERROR(IRDA_SR_ERROR, "bad type ! (0x%X)\n", value->type);
-	  break;
-	}
-
-      /* Cleanup */
-      irias_delete_value(value);
-    }
-  else	/* value == NULL */
-    {
-      /* Nothing returned to us - usually result != SUCCESS */
-      if(!(self->errno))
-	{
-	  DERROR(IRDA_SR_ERROR,
-		 "IrDA bug : result == SUCCESS && value == NULL\n");
-	  self->errno = -EHOSTUNREACH;
-	}
-    }
-  DEXIT(IRDA_SR_TRACE, "\n");
-
-  /* Return the TSAP */
-  return dtsap_sel;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_find_lsap_sel (self)
- *
- *    Try to lookup LSAP selector in remote LM-IAS
- *
- * Basically, we start a IAP query, and then go to sleep. When the query
- * return, irnet_getvalue_confirm will wake us up, and we can examine the
- * result of the query...
- * Note that in some case, the query fail even before we go to sleep,
- * creating some races...
- */
-static inline int
-irnet_find_lsap_sel(irnet_socket *	self)
-{
-  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
-  /* This should not happen */
-  DABORT(self->iriap, -EBUSY, IRDA_SR_ERROR, "busy with a previous query.\n");
-
-  /* Create an IAP instance, will be closed in irnet_getvalue_confirm() */
-  self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-			   irnet_getvalue_confirm);
-
-  /* Treat unexpected signals as disconnect */
-  self->errno = -EHOSTUNREACH;
-
-  /* Query remote LM-IAS */
-  iriap_getvaluebyclass_request(self->iriap, self->rsaddr, self->daddr,
-				IRNET_SERVICE_NAME, IRNET_IAS_VALUE);
-
-  /* The above request is non-blocking.
-   * After a while, IrDA will call us back in irnet_getvalue_confirm()
-   * We will then call irnet_ias_to_tsap() and finish the
-   * connection procedure */
-
-  DEXIT(IRDA_SR_TRACE, "\n");
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_connect_tsap (self)
- *
- *    Initialise the TTP socket and initiate TTP connection
- *
- */
-static inline int
-irnet_connect_tsap(irnet_socket *	self)
-{
-  int		err;
-
-  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
-  /* Open a local TSAP (an IrTTP instance) */
-  err = irnet_open_tsap(self);
-  if(err != 0)
-    {
-      clear_bit(0, &self->ttp_connect);
-      DERROR(IRDA_SR_ERROR, "connect aborted!\n");
-      return err;
-    }
-
-  /* Connect to remote device */
-  err = irttp_connect_request(self->tsap, self->dtsap_sel,
-			      self->rsaddr, self->daddr, NULL,
-			      self->max_sdu_size_rx, NULL);
-  if(err != 0)
-    {
-      clear_bit(0, &self->ttp_connect);
-      DERROR(IRDA_SR_ERROR, "connect aborted!\n");
-      return err;
-    }
-
-  /* The above call is non-blocking.
-   * After a while, the IrDA stack will either call us back in
-   * irnet_connect_confirm() or irnet_disconnect_indication()
-   * See you there ;-) */
-
-  DEXIT(IRDA_SR_TRACE, "\n");
-  return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discover_next_daddr (self)
- *
- *    Query the IrNET TSAP of the next device in the log.
- *
- * Used in the TSAP discovery procedure.
- */
-static inline int
-irnet_discover_next_daddr(irnet_socket *	self)
-{
-  /* Close the last instance of IrIAP, and open a new one.
-   * We can't reuse the IrIAP instance in the IrIAP callback */
-  if(self->iriap)
-    {
-      iriap_close(self->iriap);
-      self->iriap = NULL;
-    }
-  /* Create a new IAP instance */
-  self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
-			   irnet_discovervalue_confirm);
-  if(self->iriap == NULL)
-    return -ENOMEM;
-
-  /* Next discovery - before the call to avoid races */
-  self->disco_index++;
-
-  /* Check if we have one more address to try */
-  if(self->disco_index < self->disco_number)
-    {
-      /* Query remote LM-IAS */
-      iriap_getvaluebyclass_request(self->iriap,
-				    self->discoveries[self->disco_index].saddr,
-				    self->discoveries[self->disco_index].daddr,
-				    IRNET_SERVICE_NAME, IRNET_IAS_VALUE);
-      /* The above request is non-blocking.
-       * After a while, IrDA will call us back in irnet_discovervalue_confirm()
-       * We will then call irnet_ias_to_tsap() and come back here again... */
-      return 0;
-    }
-  else
-    return 1;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discover_daddr_and_lsap_sel (self)
- *
- *    This try to find a device with the requested service.
- *
- * Initiate a TSAP discovery procedure.
- * It basically look into the discovery log. For each address in the list,
- * it queries the LM-IAS of the device to find if this device offer
- * the requested service.
- * If there is more than one node supporting the service, we complain
- * to the user (it should move devices around).
- * If we find one node which have the requested TSAP, we connect to it.
- *
- * This function just start the whole procedure. It request the discovery
- * log and submit the first IAS query.
- * The bulk of the job is handled in irnet_discovervalue_confirm()
- *
- * Note : this procedure fails if there is more than one device in range
- * on the same dongle, because IrLMP doesn't disconnect the LAP when the
- * last LSAP is closed. Moreover, we would need to wait the LAP
- * disconnection...
- */
-static inline int
-irnet_discover_daddr_and_lsap_sel(irnet_socket *	self)
-{
-  int	ret;
-
-  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
-  /* Ask lmp for the current discovery log */
-  self->discoveries = irlmp_get_discoveries(&self->disco_number, self->mask,
-					    DISCOVERY_DEFAULT_SLOTS);
-
-  /* Check if the we got some results */
-  if(self->discoveries == NULL)
-    {
-      self->disco_number = -1;
-      clear_bit(0, &self->ttp_connect);
-      DRETURN(-ENETUNREACH, IRDA_SR_INFO, "No Cachelog...\n");
-    }
-  DEBUG(IRDA_SR_INFO, "Got the log (0x%p), size is %d\n",
-	self->discoveries, self->disco_number);
-
-  /* Start with the first discovery */
-  self->disco_index = -1;
-  self->daddr = DEV_ADDR_ANY;
-
-  /* This will fail if the log is empty - this is non-blocking */
-  ret = irnet_discover_next_daddr(self);
-  if(ret)
-    {
-      /* Close IAP */
-      if(self->iriap)
-	iriap_close(self->iriap);
-      self->iriap = NULL;
-
-      /* Cleanup our copy of the discovery log */
-      kfree(self->discoveries);
-      self->discoveries = NULL;
-
-      clear_bit(0, &self->ttp_connect);
-      DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n");
-    }
-
-  /* Follow me in irnet_discovervalue_confirm() */
-
-  DEXIT(IRDA_SR_TRACE, "\n");
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_dname_to_daddr (self)
- *
- *    Convert an IrDA nickname to a valid IrDA address
- *
- * It basically look into the discovery log until there is a match.
- */
-static inline int
-irnet_dname_to_daddr(irnet_socket *	self)
-{
-  struct irda_device_info *discoveries;	/* Copy of the discovery log */
-  int	number;			/* Number of nodes in the log */
-  int	i;
-
-  DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self);
-
-  /* Ask lmp for the current discovery log */
-  discoveries = irlmp_get_discoveries(&number, 0xffff,
-				      DISCOVERY_DEFAULT_SLOTS);
-  /* Check if the we got some results */
-  if(discoveries == NULL)
-    DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n");
-
-  /*
-   * Now, check all discovered devices (if any), and connect
-   * client only about the services that the client is
-   * interested in...
-   */
-  for(i = 0; i < number; i++)
-    {
-      /* Does the name match ? */
-      if(!strncmp(discoveries[i].info, self->rname, NICKNAME_MAX_LEN))
-	{
-	  /* Yes !!! Get it.. */
-	  self->daddr = discoveries[i].daddr;
-	  DEBUG(IRDA_SR_INFO, "discovered device ``%s'' at address 0x%08x.\n",
-		self->rname, self->daddr);
-	  kfree(discoveries);
-	  DEXIT(IRDA_SR_TRACE, "\n");
-	  return 0;
-	}
-    }
-  /* No luck ! */
-  DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
-  kfree(discoveries);
-  return -EADDRNOTAVAIL;
-}
-
-
-/************************* SOCKET ROUTINES *************************/
-/*
- * This are the main operations on IrNET sockets, basically to create
- * and destroy IrNET sockets. These are called from the PPP part...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Create a IrNET instance : just initialise some parameters...
- */
-int
-irda_irnet_create(irnet_socket *	self)
-{
-  DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
-
-  self->magic = IRNET_MAGIC;	/* Paranoia */
-
-  self->ttp_open = 0;		/* Prevent higher layer from accessing IrTTP */
-  self->ttp_connect = 0;	/* Not connecting yet */
-  self->rname[0] = '\0';	/* May be set via control channel */
-  self->rdaddr = DEV_ADDR_ANY;	/* May be set via control channel */
-  self->rsaddr = DEV_ADDR_ANY;	/* May be set via control channel */
-  self->daddr = DEV_ADDR_ANY;	/* Until we get connected */
-  self->saddr = DEV_ADDR_ANY;	/* Until we get connected */
-  self->max_sdu_size_rx = TTP_SAR_UNBOUND;
-
-  /* Register as a client with IrLMP */
-  self->ckey = irlmp_register_client(0, NULL, NULL, NULL);
-#ifdef DISCOVERY_NOMASK
-  self->mask = 0xffff;		/* For W2k compatibility */
-#else /* DISCOVERY_NOMASK */
-  self->mask = irlmp_service_to_hint(S_LAN);
-#endif /* DISCOVERY_NOMASK */
-  self->tx_flow = FLOW_START;	/* Flow control from IrTTP */
-
-  INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
-
-  DEXIT(IRDA_SOCK_TRACE, "\n");
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Connect to the other side :
- *	o convert device name to an address
- *	o find the socket number (dlsap)
- *	o Establish the connection
- *
- * Note : We no longer mimic af_irda. The IAS query for finding the TSAP
- * is done asynchronously, like the TTP connection. This allow us to
- * call this function from any context (not only process).
- * The downside is that following what's happening in there is tricky
- * because it involve various functions all over the place...
- */
-int
-irda_irnet_connect(irnet_socket *	self)
-{
-  int		err;
-
-  DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
-
-  /* Check if we are already trying to connect.
-   * Because irda_irnet_connect() can be called directly by pppd plus
-   * packet retries in ppp_generic and connect may take time, plus we may
-   * race with irnet_connect_indication(), we need to be careful there... */
-  if(test_and_set_bit(0, &self->ttp_connect))
-    DRETURN(-EBUSY, IRDA_SOCK_INFO, "Already connecting...\n");
-  if((self->iriap != NULL) || (self->tsap != NULL))
-    DERROR(IRDA_SOCK_ERROR, "Socket not cleaned up...\n");
-
-  /* Insert ourselves in the hashbin so that the IrNET server can find us.
-   * Notes : 4th arg is string of 32 char max and must be null terminated
-   *	     When 4th arg is used (string), 3rd arg isn't (int)
-   *	     Can't re-insert (MUST remove first) so check for that... */
-  if((irnet_server.running) && (self->q.q_next == NULL))
-    {
-      spin_lock_bh(&irnet_server.spinlock);
-      hashbin_insert(irnet_server.list, (irda_queue_t *) self, 0, self->rname);
-      spin_unlock_bh(&irnet_server.spinlock);
-      DEBUG(IRDA_SOCK_INFO, "Inserted ``%s'' in hashbin...\n", self->rname);
-    }
-
-  /* If we don't have anything (no address, no name) */
-  if((self->rdaddr == DEV_ADDR_ANY) && (self->rname[0] == '\0'))
-    {
-      /* Try to find a suitable address */
-      if((err = irnet_discover_daddr_and_lsap_sel(self)) != 0)
-	DRETURN(err, IRDA_SOCK_INFO, "auto-connect failed!\n");
-      /* In most cases, the call above is non-blocking */
-    }
-  else
-    {
-      /* If we have only the name (no address), try to get an address */
-      if(self->rdaddr == DEV_ADDR_ANY)
-	{
-	  if((err = irnet_dname_to_daddr(self)) != 0)
-	    DRETURN(err, IRDA_SOCK_INFO, "name connect failed!\n");
-	}
-      else
-	/* Use the requested destination address */
-	self->daddr = self->rdaddr;
-
-      /* Query remote LM-IAS to find LSAP selector */
-      irnet_find_lsap_sel(self);
-      /* The above call is non blocking */
-    }
-
-  /* At this point, we are waiting for the IrDA stack to call us back,
-   * or we have already failed.
-   * We will finish the connection procedure in irnet_connect_tsap().
-   */
-  DEXIT(IRDA_SOCK_TRACE, "\n");
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_irnet_destroy(self)
- *
- *    Destroy irnet instance
- *
- * Note : this need to be called from a process context.
- */
-void
-irda_irnet_destroy(irnet_socket *	self)
-{
-  DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self);
-  if(self == NULL)
-    return;
-
-  /* Remove ourselves from hashbin (if we are queued in hashbin)
-   * Note : `irnet_server.running' protect us from calls in hashbin_delete() */
-  if((irnet_server.running) && (self->q.q_next != NULL))
-    {
-      struct irnet_socket *	entry;
-      DEBUG(IRDA_SOCK_INFO, "Removing from hash..\n");
-      spin_lock_bh(&irnet_server.spinlock);
-      entry = hashbin_remove_this(irnet_server.list, (irda_queue_t *) self);
-      self->q.q_next = NULL;
-      spin_unlock_bh(&irnet_server.spinlock);
-      DASSERT(entry == self, , IRDA_SOCK_ERROR, "Can't remove from hash.\n");
-    }
-
-  /* If we were connected, post a message */
-  if(test_bit(0, &self->ttp_open))
-    {
-      /* Note : as the disconnect comes from ppp_generic, the unit number
-       * doesn't exist anymore when we post the event, so we need to pass
-       * NULL as the first arg... */
-      irnet_post_event(NULL, IRNET_DISCONNECT_TO,
-		       self->saddr, self->daddr, self->rname, 0);
-    }
-
-  /* Prevent various IrDA callbacks from messing up things
-   * Need to be first */
-  clear_bit(0, &self->ttp_connect);
-
-  /* Prevent higher layer from accessing IrTTP */
-  clear_bit(0, &self->ttp_open);
-
-  /* Unregister with IrLMP */
-  irlmp_unregister_client(self->ckey);
-
-  /* Unregister with LM-IAS */
-  if(self->iriap)
-    {
-      iriap_close(self->iriap);
-      self->iriap = NULL;
-    }
-
-  /* Cleanup eventual discoveries from connection attempt or control channel */
-  if(self->discoveries != NULL)
-    {
-      /* Cleanup our copy of the discovery log */
-      kfree(self->discoveries);
-      self->discoveries = NULL;
-    }
-
-  /* Close our IrTTP connection */
-  if(self->tsap)
-    {
-      DEBUG(IRDA_SOCK_INFO, "Closing our TTP connection.\n");
-      irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
-      irttp_close_tsap(self->tsap);
-      self->tsap = NULL;
-    }
-  self->stsap_sel = 0;
-
-  DEXIT(IRDA_SOCK_TRACE, "\n");
-}
-
-
-/************************** SERVER SOCKET **************************/
-/*
- * The IrNET service is composed of one server socket and a variable
- * number of regular IrNET sockets. The server socket is supposed to
- * handle incoming connections and redirect them to one IrNET sockets.
- * It's a superset of the regular IrNET socket, but has a very distinct
- * behaviour...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_daddr_to_dname (self)
- *
- *    Convert an IrDA address to a IrDA nickname
- *
- * It basically look into the discovery log until there is a match.
- */
-static inline int
-irnet_daddr_to_dname(irnet_socket *	self)
-{
-  struct irda_device_info *discoveries;	/* Copy of the discovery log */
-  int	number;			/* Number of nodes in the log */
-  int	i;
-
-  DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
-
-  /* Ask lmp for the current discovery log */
-  discoveries = irlmp_get_discoveries(&number, 0xffff,
-				      DISCOVERY_DEFAULT_SLOTS);
-  /* Check if the we got some results */
-  if (discoveries == NULL)
-    DRETURN(-ENETUNREACH, IRDA_SERV_INFO, "Cachelog empty...\n");
-
-  /* Now, check all discovered devices (if any) */
-  for(i = 0; i < number; i++)
-    {
-      /* Does the name match ? */
-      if(discoveries[i].daddr == self->daddr)
-	{
-	  /* Yes !!! Get it.. */
-	  strlcpy(self->rname, discoveries[i].info, sizeof(self->rname));
-	  self->rname[sizeof(self->rname) - 1] = '\0';
-	  DEBUG(IRDA_SERV_INFO, "Device 0x%08x is in fact ``%s''.\n",
-		self->daddr, self->rname);
-	  kfree(discoveries);
-	  DEXIT(IRDA_SERV_TRACE, "\n");
-	  return 0;
-	}
-    }
-  /* No luck ! */
-  DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
-  kfree(discoveries);
-  return -EADDRNOTAVAIL;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_find_socket (self)
- *
- *    Find the correct IrNET socket
- *
- * Look into the list of IrNET sockets and finds one with the right
- * properties...
- */
-static inline irnet_socket *
-irnet_find_socket(irnet_socket *	self)
-{
-  irnet_socket *	new = (irnet_socket *) NULL;
-  int			err;
-
-  DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
-
-  /* Get the addresses of the requester */
-  self->daddr = irttp_get_daddr(self->tsap);
-  self->saddr = irttp_get_saddr(self->tsap);
-
-  /* Try to get the IrDA nickname of the requester */
-  err = irnet_daddr_to_dname(self);
-
-  /* Protect access to the instance list */
-  spin_lock_bh(&irnet_server.spinlock);
-
-  /* So now, try to get an socket having specifically
-   * requested that nickname */
-  if(err == 0)
-    {
-      new = (irnet_socket *) hashbin_find(irnet_server.list,
-					  0, self->rname);
-      if(new)
-	DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches rname ``%s''.\n",
-	      new, new->rname);
-    }
-
-  /* If no name matches, try to find an socket by the destination address */
-  /* It can be either the requested destination address (set via the
-   * control channel), or the current destination address if the
-   * socket is in the middle of a connection request */
-  if(new == (irnet_socket *) NULL)
-    {
-      new = (irnet_socket *) hashbin_get_first(irnet_server.list);
-      while(new !=(irnet_socket *) NULL)
-	{
-	  /* Does it have the same address ? */
-	  if((new->rdaddr == self->daddr) || (new->daddr == self->daddr))
-	    {
-	      /* Yes !!! Get it.. */
-	      DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches daddr %#08x.\n",
-		    new, self->daddr);
-	      break;
-	    }
-	  new = (irnet_socket *) hashbin_get_next(irnet_server.list);
-	}
-    }
-
-  /* If we don't have any socket, get the first unconnected socket */
-  if(new == (irnet_socket *) NULL)
-    {
-      new = (irnet_socket *) hashbin_get_first(irnet_server.list);
-      while(new !=(irnet_socket *) NULL)
-	{
-	  /* Is it available ? */
-	  if(!(test_bit(0, &new->ttp_open)) && (new->rdaddr == DEV_ADDR_ANY) &&
-	     (new->rname[0] == '\0') && (new->ppp_open))
-	    {
-	      /* Yes !!! Get it.. */
-	      DEBUG(IRDA_SERV_INFO, "Socket 0x%p is free.\n",
-		    new);
-	      break;
-	    }
-	  new = (irnet_socket *) hashbin_get_next(irnet_server.list);
-	}
-    }
-
-  /* Spin lock end */
-  spin_unlock_bh(&irnet_server.spinlock);
-
-  DEXIT(IRDA_SERV_TRACE, " - new = 0x%p\n", new);
-  return new;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_connect_socket (self)
- *
- *    Connect an incoming connection to the socket
- *
- */
-static inline int
-irnet_connect_socket(irnet_socket *	server,
-		     irnet_socket *	new,
-		     struct qos_info *	qos,
-		     __u32		max_sdu_size,
-		     __u8		max_header_size)
-{
-  DENTER(IRDA_SERV_TRACE, "(server=0x%p, new=0x%p)\n",
-	 server, new);
-
-  /* Now attach up the new socket */
-  new->tsap = irttp_dup(server->tsap, new);
-  DABORT(new->tsap == NULL, -1, IRDA_SERV_ERROR, "dup failed!\n");
-
-  /* Set up all the relevant parameters on the new socket */
-  new->stsap_sel = new->tsap->stsap_sel;
-  new->dtsap_sel = new->tsap->dtsap_sel;
-  new->saddr = irttp_get_saddr(new->tsap);
-  new->daddr = irttp_get_daddr(new->tsap);
-
-  new->max_header_size = max_header_size;
-  new->max_sdu_size_tx = max_sdu_size;
-  new->max_data_size   = max_sdu_size;
-#ifdef STREAM_COMPAT
-  /* If we want to receive "stream sockets" */
-  if(max_sdu_size == 0)
-    new->max_data_size = irttp_get_max_seg_size(new->tsap);
-#endif /* STREAM_COMPAT */
-
-  /* Clean up the original one to keep it in listen state */
-  irttp_listen(server->tsap);
-
-  /* Send a connection response on the new socket */
-  irttp_connect_response(new->tsap, new->max_sdu_size_rx, NULL);
-
-  /* Allow PPP to send its junk over the new socket... */
-  set_bit(0, &new->ttp_open);
-
-  /* Not connecting anymore, and clean up last possible remains
-   * of connection attempts on the socket */
-  clear_bit(0, &new->ttp_connect);
-  if(new->iriap)
-    {
-      iriap_close(new->iriap);
-      new->iriap = NULL;
-    }
-  if(new->discoveries != NULL)
-    {
-      kfree(new->discoveries);
-      new->discoveries = NULL;
-    }
-
-#ifdef CONNECT_INDIC_KICK
-  /* As currently we don't block packets in ppp_irnet_send() while passive,
-   * this is not really needed...
-   * Also, not doing it give IrDA a chance to finish the setup properly
-   * before being swamped with packets... */
-  ppp_output_wakeup(&new->chan);
-#endif /* CONNECT_INDIC_KICK */
-
-  /* Notify the control channel */
-  irnet_post_event(new, IRNET_CONNECT_FROM,
-		   new->saddr, new->daddr, server->rname, 0);
-
-  DEXIT(IRDA_SERV_TRACE, "\n");
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_disconnect_server (self)
- *
- *    Cleanup the server socket when the incoming connection abort
- *
- */
-static inline void
-irnet_disconnect_server(irnet_socket *	self,
-			struct sk_buff *skb)
-{
-  DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self);
-
-  /* Put the received packet in the black hole */
-  kfree_skb(skb);
-
-#ifdef FAIL_SEND_DISCONNECT
-  /* Tell the other party we don't want to be connected */
-  /* Hum... Is it the right thing to do ? And do we need to send
-   * a connect response before ? It looks ok without this... */
-  irttp_disconnect_request(self->tsap, NULL, P_NORMAL);
-#endif /* FAIL_SEND_DISCONNECT */
-
-  /* Notify the control channel (see irnet_find_socket()) */
-  irnet_post_event(NULL, IRNET_REQUEST_FROM,
-		   self->saddr, self->daddr, self->rname, 0);
-
-  /* Clean up the server to keep it in listen state */
-  irttp_listen(self->tsap);
-
-  DEXIT(IRDA_SERV_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_setup_server (self)
- *
- *    Create a IrTTP server and set it up...
- *
- * Register the IrLAN hint bit, create a IrTTP instance for us,
- * set all the IrTTP callbacks and create an IrIAS entry...
- */
-static inline int
-irnet_setup_server(void)
-{
-  __u16		hints;
-
-  DENTER(IRDA_SERV_TRACE, "()\n");
-
-  /* Initialise the regular socket part of the server */
-  irda_irnet_create(&irnet_server.s);
-
-  /* Open a local TSAP (an IrTTP instance) for the server */
-  irnet_open_tsap(&irnet_server.s);
-
-  /* PPP part setup */
-  irnet_server.s.ppp_open = 0;
-  irnet_server.s.chan.private = NULL;
-  irnet_server.s.file = NULL;
-
-  /* Get the hint bit corresponding to IrLAN */
-  /* Note : we overload the IrLAN hint bit. As it is only a "hint", and as
-   * we provide roughly the same functionality as IrLAN, this is ok.
-   * In fact, the situation is similar as JetSend overloading the Obex hint
-   */
-  hints = irlmp_service_to_hint(S_LAN);
-
-#ifdef ADVERTISE_HINT
-  /* Register with IrLMP as a service (advertise our hint bit) */
-  irnet_server.skey = irlmp_register_service(hints);
-#endif /* ADVERTISE_HINT */
-
-  /* Register with LM-IAS (so that people can connect to us) */
-  irnet_server.ias_obj = irias_new_object(IRNET_SERVICE_NAME, jiffies);
-  irias_add_integer_attrib(irnet_server.ias_obj, IRNET_IAS_VALUE,
-			   irnet_server.s.stsap_sel, IAS_KERNEL_ATTR);
-  irias_insert_object(irnet_server.ias_obj);
-
-#ifdef DISCOVERY_EVENTS
-  /* Tell IrLMP we want to be notified of newly discovered nodes */
-  irlmp_update_client(irnet_server.s.ckey, hints,
-		      irnet_discovery_indication, irnet_expiry_indication,
-		      (void *) &irnet_server.s);
-#endif
-
-  DEXIT(IRDA_SERV_TRACE, " - self=0x%p\n", &irnet_server.s);
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irda_destroy_server (self)
- *
- *    Destroy the IrTTP server...
- *
- * Reverse of the previous function...
- */
-static inline void
-irnet_destroy_server(void)
-{
-  DENTER(IRDA_SERV_TRACE, "()\n");
-
-#ifdef ADVERTISE_HINT
-  /* Unregister with IrLMP */
-  irlmp_unregister_service(irnet_server.skey);
-#endif /* ADVERTISE_HINT */
-
-  /* Unregister with LM-IAS */
-  if(irnet_server.ias_obj)
-    irias_delete_object(irnet_server.ias_obj);
-
-  /* Cleanup the socket part */
-  irda_irnet_destroy(&irnet_server.s);
-
-  DEXIT(IRDA_SERV_TRACE, "\n");
-}
-
-
-/************************ IRDA-TTP CALLBACKS ************************/
-/*
- * When we create a IrTTP instance, we pass to it a set of callbacks
- * that IrTTP will call in case of various events.
- * We take care of those events here.
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_data_indication (instance, sap, skb)
- *
- *    Received some data from TinyTP. Just queue it on the receive queue
- *
- */
-static int
-irnet_data_indication(void *	instance,
-		      void *	sap,
-		      struct sk_buff *skb)
-{
-  irnet_socket *	ap = (irnet_socket *) instance;
-  unsigned char *	p;
-  int			code = 0;
-
-  DENTER(IRDA_TCB_TRACE, "(self/ap=0x%p, skb=0x%p)\n",
-	 ap, skb);
-  DASSERT(skb != NULL, 0, IRDA_CB_ERROR, "skb is NULL !!!\n");
-
-  /* Check is ppp is ready to receive our packet */
-  if(!ap->ppp_open)
-    {
-      DERROR(IRDA_CB_ERROR, "PPP not ready, dropping packet...\n");
-      /* When we return error, TTP will need to requeue the skb and
-       * will stop the sender. IrTTP will stall until we send it a
-       * flow control request... */
-      return -ENOMEM;
-    }
-
-  /* strip address/control field if present */
-  p = skb->data;
-  if((p[0] == PPP_ALLSTATIONS) && (p[1] == PPP_UI))
-    {
-      /* chop off address/control */
-      if(skb->len < 3)
-	goto err_exit;
-      p = skb_pull(skb, 2);
-    }
-
-  /* decompress protocol field if compressed */
-  if(p[0] & 1)
-    {
-      /* protocol is compressed */
-      *(u8 *)skb_push(skb, 1) = 0;
-    }
-  else
-    if(skb->len < 2)
-      goto err_exit;
-
-  /* pass to generic ppp layer */
-  /* Note : how do I know if ppp can accept or not the packet ? This is
-   * essential if I want to manage flow control smoothly... */
-  ppp_input(&ap->chan, skb);
-
-  DEXIT(IRDA_TCB_TRACE, "\n");
-  return 0;
-
- err_exit:
-  DERROR(IRDA_CB_ERROR, "Packet too small, dropping...\n");
-  kfree_skb(skb);
-  ppp_input_error(&ap->chan, code);
-  return 0;	/* Don't return an error code, only for flow control... */
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_disconnect_indication (instance, sap, reason, skb)
- *
- *    Connection has been closed. Chech reason to find out why
- *
- * Note : there are many cases where we come here :
- *	o attempted to connect, timeout
- *	o connected, link is broken, LAP has timeout
- *	o connected, other side close the link
- *	o connection request on the server not handled
- */
-static void
-irnet_disconnect_indication(void *	instance,
-			    void *	sap,
-			    LM_REASON	reason,
-			    struct sk_buff *skb)
-{
-  irnet_socket *	self = (irnet_socket *) instance;
-  int			test_open;
-  int			test_connect;
-
-  DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
-  DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n");
-
-  /* Don't care about it, but let's not leak it */
-  if(skb)
-    dev_kfree_skb(skb);
-
-  /* Prevent higher layer from accessing IrTTP */
-  test_open = test_and_clear_bit(0, &self->ttp_open);
-  /* Not connecting anymore...
-   * (note : TSAP is open, so IAP callbacks are no longer pending...) */
-  test_connect = test_and_clear_bit(0, &self->ttp_connect);
-
-  /* If both self->ttp_open and self->ttp_connect are NULL, it mean that we
-   * have a race condition with irda_irnet_destroy() or
-   * irnet_connect_indication(), so don't mess up tsap...
-   */
-  if(!(test_open || test_connect))
-    {
-      DERROR(IRDA_CB_ERROR, "Race condition detected...\n");
-      return;
-    }
-
-  /* If we were active, notify the control channel */
-  if(test_open)
-    irnet_post_event(self, IRNET_DISCONNECT_FROM,
-		     self->saddr, self->daddr, self->rname, 0);
-  else
-    /* If we were trying to connect, notify the control channel */
-    if((self->tsap) && (self != &irnet_server.s))
-      irnet_post_event(self, IRNET_NOANSWER_FROM,
-		       self->saddr, self->daddr, self->rname, 0);
-
-  /* Close our IrTTP connection, cleanup tsap */
-  if((self->tsap) && (self != &irnet_server.s))
-    {
-      DEBUG(IRDA_CB_INFO, "Closing our TTP connection.\n");
-      irttp_close_tsap(self->tsap);
-      self->tsap = NULL;
-    }
-  /* Cleanup the socket in case we want to reconnect in ppp_output_wakeup() */
-  self->stsap_sel = 0;
-  self->daddr = DEV_ADDR_ANY;
-  self->tx_flow = FLOW_START;
-
-  /* Deal with the ppp instance if it's still alive */
-  if(self->ppp_open)
-    {
-      if(test_open)
-	{
-	  /* ppp_unregister_channel() wants a user context. */
-	  schedule_work(&self->disconnect_work);
-	}
-      else
-	{
-	  /* If we were trying to connect, flush (drain) ppp_generic
-	   * Tx queue (most often we have blocked it), which will
-	   * trigger an other attempt to connect. If we are passive,
-	   * this will empty the Tx queue after last try. */
-	  ppp_output_wakeup(&self->chan);
-	}
-    }
-
-  DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_connect_confirm (instance, sap, qos, max_sdu_size, skb)
- *
- *    Connections has been confirmed by the remote device
- *
- */
-static void
-irnet_connect_confirm(void *	instance,
-		      void *	sap,
-		      struct qos_info *qos,
-		      __u32	max_sdu_size,
-		      __u8	max_header_size,
-		      struct sk_buff *skb)
-{
-  irnet_socket *	self = (irnet_socket *) instance;
-
-  DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
-
-  /* Check if socket is closing down (via irda_irnet_destroy()) */
-  if(! test_bit(0, &self->ttp_connect))
-    {
-      DERROR(IRDA_CB_ERROR, "Socket no longer connecting. Ouch !\n");
-      return;
-    }
-
-  /* How much header space do we need to reserve */
-  self->max_header_size = max_header_size;
-
-  /* IrTTP max SDU size in transmit direction */
-  self->max_sdu_size_tx = max_sdu_size;
-  self->max_data_size = max_sdu_size;
-#ifdef STREAM_COMPAT
-  if(max_sdu_size == 0)
-    self->max_data_size = irttp_get_max_seg_size(self->tsap);
-#endif /* STREAM_COMPAT */
-
-  /* At this point, IrLMP has assigned our source address */
-  self->saddr = irttp_get_saddr(self->tsap);
-
-  /* Allow higher layer to access IrTTP */
-  set_bit(0, &self->ttp_open);
-  clear_bit(0, &self->ttp_connect);	/* Not racy, IrDA traffic is serial */
-  /* Give a kick in the ass of ppp_generic so that he sends us some data */
-  ppp_output_wakeup(&self->chan);
-
-  /* Check size of received packet */
-  if(skb->len > 0)
-    {
-#ifdef PASS_CONNECT_PACKETS
-      DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n");
-      /* Try to pass it to PPP */
-      irnet_data_indication(instance, sap, skb);
-#else /* PASS_CONNECT_PACKETS */
-      DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n");
-      kfree_skb(skb);	/* Note : will be optimised with other kfree... */
-#endif /* PASS_CONNECT_PACKETS */
-    }
-  else
-    kfree_skb(skb);
-
-  /* Notify the control channel */
-  irnet_post_event(self, IRNET_CONNECT_TO,
-		   self->saddr, self->daddr, self->rname, 0);
-
-  DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_flow_indication (instance, sap, flow)
- *
- *    Used by TinyTP to tell us if it can accept more data or not
- *
- */
-static void
-irnet_flow_indication(void *	instance,
-		      void *	sap,
-		      LOCAL_FLOW flow)
-{
-  irnet_socket *	self = (irnet_socket *) instance;
-  LOCAL_FLOW		oldflow = self->tx_flow;
-
-  DENTER(IRDA_TCB_TRACE, "(self=0x%p, flow=%d)\n", self, flow);
-
-  /* Update our state */
-  self->tx_flow = flow;
-
-  /* Check what IrTTP want us to do... */
-  switch(flow)
-    {
-    case FLOW_START:
-      DEBUG(IRDA_CB_INFO, "IrTTP wants us to start again\n");
-      /* Check if we really need to wake up PPP */
-      if(oldflow == FLOW_STOP)
-	ppp_output_wakeup(&self->chan);
-      else
-	DEBUG(IRDA_CB_INFO, "But we were already transmitting !!!\n");
-      break;
-    case FLOW_STOP:
-      DEBUG(IRDA_CB_INFO, "IrTTP wants us to slow down\n");
-      break;
-    default:
-      DEBUG(IRDA_CB_INFO, "Unknown flow command!\n");
-      break;
-    }
-
-  DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_status_indication (instance, sap, reason, skb)
- *
- *    Link (IrLAP) status report.
- *
- */
-static void
-irnet_status_indication(void *	instance,
-			LINK_STATUS link,
-			LOCK_STATUS lock)
-{
-  irnet_socket *	self = (irnet_socket *) instance;
-
-  DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self);
-  DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n");
-
-  /* We can only get this event if we are connected */
-  switch(link)
-    {
-    case STATUS_NO_ACTIVITY:
-      irnet_post_event(self, IRNET_BLOCKED_LINK,
-		       self->saddr, self->daddr, self->rname, 0);
-      break;
-    default:
-      DEBUG(IRDA_CB_INFO, "Unknown status...\n");
-    }
-
-  DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_connect_indication(instance, sap, qos, max_sdu_size, userdata)
- *
- *    Incoming connection
- *
- * In theory, this function is called only on the server socket.
- * Some other node is attempting to connect to the IrNET service, and has
- * sent a connection request on our server socket.
- * We just redirect the connection to the relevant IrNET socket.
- *
- * Note : we also make sure that between 2 irnet nodes, there can
- * exist only one irnet connection.
- */
-static void
-irnet_connect_indication(void *		instance,
-			 void *		sap,
-			 struct qos_info *qos,
-			 __u32		max_sdu_size,
-			 __u8		max_header_size,
-			 struct sk_buff *skb)
-{
-  irnet_socket *	server = &irnet_server.s;
-  irnet_socket *	new = (irnet_socket *) NULL;
-
-  DENTER(IRDA_TCB_TRACE, "(server=0x%p)\n", server);
-  DASSERT(instance == &irnet_server, , IRDA_CB_ERROR,
-	  "Invalid instance (0x%p) !!!\n", instance);
-  DASSERT(sap == irnet_server.s.tsap, , IRDA_CB_ERROR, "Invalid sap !!!\n");
-
-  /* Try to find the most appropriate IrNET socket */
-  new = irnet_find_socket(server);
-
-  /* After all this hard work, do we have an socket ? */
-  if(new == (irnet_socket *) NULL)
-    {
-      DEXIT(IRDA_CB_INFO, ": No socket waiting for this connection.\n");
-      irnet_disconnect_server(server, skb);
-      return;
-    }
-
-  /* Is the socket already busy ? */
-  if(test_bit(0, &new->ttp_open))
-    {
-      DEXIT(IRDA_CB_INFO, ": Socket already connected.\n");
-      irnet_disconnect_server(server, skb);
-      return;
-    }
-
-  /* The following code is a bit tricky, so need comments ;-)
-   */
-  /* If ttp_connect is set, the socket is trying to connect to the other
-   * end and may have sent a IrTTP connection request and is waiting for
-   * a connection response (that may never come).
-   * Now, the pain is that the socket may have opened a tsap and is
-   * waiting on it, while the other end is trying to connect to it on
-   * another tsap.
-   * Because IrNET can be peer to peer, we need to workaround this.
-   * Furthermore, the way the irnetd script is implemented, the
-   * target will create a second IrNET connection back to the
-   * originator and expect the originator to bind this new connection
-   * to the original PPPD instance.
-   * And of course, if we don't use irnetd, we can have a race when
-   * both side try to connect simultaneously, which could leave both
-   * connections half closed (yuck).
-   * Conclusions :
-   *	1) The "originator" must accept the new connection and get rid
-   *	   of the old one so that irnetd works
-   *	2) One side must deny the new connection to avoid races,
-   *	   but both side must agree on which side it is...
-   * Most often, the originator is primary at the LAP layer.
-   * Jean II
-   */
-  /* Now, let's look at the way I wrote the test...
-   * We need to clear up the ttp_connect flag atomically to prevent
-   * irnet_disconnect_indication() to mess up the tsap we are going to close.
-   * We want to clear the ttp_connect flag only if we close the tsap,
-   * otherwise we will never close it, so we need to check for primary
-   * *before* doing the test on the flag.
-   * And of course, ALLOW_SIMULT_CONNECT can disable this entirely...
-   * Jean II
-   */
-
-  /* Socket already connecting ? On primary ? */
-  if(0
-#ifdef ALLOW_SIMULT_CONNECT
-     || ((irttp_is_primary(server->tsap) == 1) &&	/* primary */
-	 (test_and_clear_bit(0, &new->ttp_connect)))
-#endif /* ALLOW_SIMULT_CONNECT */
-     )
-    {
-      DERROR(IRDA_CB_ERROR, "Socket already connecting, but going to reuse it !\n");
-
-      /* Cleanup the old TSAP if necessary - IrIAP will be cleaned up later */
-      if(new->tsap != NULL)
-	{
-	  /* Close the old connection the new socket was attempting,
-	   * so that we can hook it up to the new connection.
-	   * It's now safe to do it... */
-	  irttp_close_tsap(new->tsap);
-	  new->tsap = NULL;
-	}
-    }
-  else
-    {
-      /* Three options :
-       * 1) socket was not connecting or connected : ttp_connect should be 0.
-       * 2) we don't want to connect the socket because we are secondary or
-       * ALLOW_SIMULT_CONNECT is undefined. ttp_connect should be 1.
-       * 3) we are half way in irnet_disconnect_indication(), and it's a
-       * nice race condition... Fortunately, we can detect that by checking
-       * if tsap is still alive. On the other hand, we can't be in
-       * irda_irnet_destroy() otherwise we would not have found this
-       * socket in the hashbin.
-       * Jean II */
-      if((test_bit(0, &new->ttp_connect)) || (new->tsap != NULL))
-	{
-	  /* Don't mess this socket, somebody else in in charge... */
-	  DERROR(IRDA_CB_ERROR, "Race condition detected, socket in use, abort connect...\n");
-	  irnet_disconnect_server(server, skb);
-	  return;
-	}
-    }
-
-  /* So : at this point, we have a socket, and it is idle. Good ! */
-  irnet_connect_socket(server, new, qos, max_sdu_size, max_header_size);
-
-  /* Check size of received packet */
-  if(skb->len > 0)
-    {
-#ifdef PASS_CONNECT_PACKETS
-      DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n");
-      /* Try to pass it to PPP */
-      irnet_data_indication(new, new->tsap, skb);
-#else /* PASS_CONNECT_PACKETS */
-      DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n");
-      kfree_skb(skb);	/* Note : will be optimised with other kfree... */
-#endif /* PASS_CONNECT_PACKETS */
-    }
-  else
-    kfree_skb(skb);
-
-  DEXIT(IRDA_TCB_TRACE, "\n");
-}
-
-
-/********************** IRDA-IAS/LMP CALLBACKS **********************/
-/*
- * These are the callbacks called by other layers of the IrDA stack,
- * mainly LMP for discovery and IAS for name queries.
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_getvalue_confirm (result, obj_id, value, priv)
- *
- *    Got answer from remote LM-IAS, just connect
- *
- * This is the reply to a IAS query we were doing to find the TSAP of
- * the device we want to connect to.
- * If we have found a valid TSAP, just initiate the TTP connection
- * on this TSAP.
- */
-static void
-irnet_getvalue_confirm(int	result,
-		       __u16	obj_id,
-		       struct ias_value *value,
-		       void *	priv)
-{
-  irnet_socket *	self = (irnet_socket *) priv;
-
-  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
-  DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n");
-
-  /* Check if already connected (via irnet_connect_socket())
-   * or socket is closing down (via irda_irnet_destroy()) */
-  if(! test_bit(0, &self->ttp_connect))
-    {
-      DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n");
-      return;
-    }
-
-  /* We probably don't need to make any more queries */
-  iriap_close(self->iriap);
-  self->iriap = NULL;
-
-  /* Post process the IAS reply */
-  self->dtsap_sel = irnet_ias_to_tsap(self, result, value);
-
-  /* If error, just go out */
-  if(self->errno)
-    {
-      clear_bit(0, &self->ttp_connect);
-      DERROR(IRDA_OCB_ERROR, "IAS connect failed ! (0x%X)\n", self->errno);
-      return;
-    }
-
-  DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n",
-	self->daddr, self->dtsap_sel);
-
-  /* Start up TTP - non blocking */
-  irnet_connect_tsap(self);
-
-  DEXIT(IRDA_OCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discovervalue_confirm (result, obj_id, value, priv)
- *
- *    Handle the TSAP discovery procedure state machine.
- *    Got answer from remote LM-IAS, try next device
- *
- * We are doing a  TSAP discovery procedure, and we got an answer to
- * a IAS query we were doing to find the TSAP on one of the address
- * in the discovery log.
- *
- * If we have found a valid TSAP for the first time, save it. If it's
- * not the first time we found one, complain.
- *
- * If we have more addresses in the log, just initiate a new query.
- * Note that those query may fail (see irnet_discover_daddr_and_lsap_sel())
- *
- * Otherwise, wrap up the procedure (cleanup), check if we have found
- * any device and connect to it.
- */
-static void
-irnet_discovervalue_confirm(int		result,
-			    __u16	obj_id,
-			    struct ias_value *value,
-			    void *	priv)
-{
-  irnet_socket *	self = (irnet_socket *) priv;
-  __u8			dtsap_sel;		/* TSAP we are looking for */
-
-  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
-  DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n");
-
-  /* Check if already connected (via irnet_connect_socket())
-   * or socket is closing down (via irda_irnet_destroy()) */
-  if(! test_bit(0, &self->ttp_connect))
-    {
-      DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n");
-      return;
-    }
-
-  /* Post process the IAS reply */
-  dtsap_sel = irnet_ias_to_tsap(self, result, value);
-
-  /* Have we got something ? */
-  if(self->errno == 0)
-    {
-      /* We found the requested service */
-      if(self->daddr != DEV_ADDR_ANY)
-	{
-	  DERROR(IRDA_OCB_ERROR, "More than one device in range supports IrNET...\n");
-	}
-      else
-	{
-	  /* First time we found that one, save it ! */
-	  self->daddr = self->discoveries[self->disco_index].daddr;
-	  self->dtsap_sel = dtsap_sel;
-	}
-    }
-
-  /* If no failure */
-  if((self->errno == -EADDRNOTAVAIL) || (self->errno == 0))
-    {
-      int	ret;
-
-      /* Search the next node */
-      ret = irnet_discover_next_daddr(self);
-      if(!ret)
-	{
-	  /* In this case, the above request was non-blocking.
-	   * We will return here after a while... */
-	  return;
-	}
-      /* In this case, we have processed the last discovery item */
-    }
-
-  /* No more queries to be done (failure or last one) */
-
-  /* We probably don't need to make any more queries */
-  iriap_close(self->iriap);
-  self->iriap = NULL;
-
-  /* No more items : remove the log and signal termination */
-  DEBUG(IRDA_OCB_INFO, "Cleaning up log (0x%p)\n",
-	self->discoveries);
-  if(self->discoveries != NULL)
-    {
-      /* Cleanup our copy of the discovery log */
-      kfree(self->discoveries);
-      self->discoveries = NULL;
-    }
-  self->disco_number = -1;
-
-  /* Check out what we found */
-  if(self->daddr == DEV_ADDR_ANY)
-    {
-      self->daddr = DEV_ADDR_ANY;
-      clear_bit(0, &self->ttp_connect);
-      DEXIT(IRDA_OCB_TRACE, ": cannot discover IrNET in any device !!!\n");
-      return;
-    }
-
-  /* We have a valid address - just connect */
-
-  DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n",
-	self->daddr, self->dtsap_sel);
-
-  /* Start up TTP - non blocking */
-  irnet_connect_tsap(self);
-
-  DEXIT(IRDA_OCB_TRACE, "\n");
-}
-
-#ifdef DISCOVERY_EVENTS
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_discovery_indication (discovery)
- *
- *    Got a discovery indication from IrLMP, post an event
- *
- * Note : IrLMP take care of matching the hint mask for us, and also
- * check if it is a "new" node for us...
- *
- * As IrLMP filter on the IrLAN hint bit, we get both IrLAN and IrNET
- * nodes, so it's only at connection time that we will know if the
- * node support IrNET, IrLAN or both. The other solution is to check
- * in IAS the PNP ids and service name.
- * Note : even if a node support IrNET (or IrLAN), it's no guarantee
- * that we will be able to connect to it, the node might already be
- * busy...
- *
- * One last thing : in some case, this function will trigger duplicate
- * discovery events. On the other hand, we should catch all
- * discoveries properly (i.e. not miss one). Filtering duplicate here
- * is to messy, so we leave that to user space...
- */
-static void
-irnet_discovery_indication(discinfo_t *		discovery,
-			   DISCOVERY_MODE	mode,
-			   void *		priv)
-{
-  irnet_socket *	self = &irnet_server.s;
-
-  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
-  DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
-	  "Invalid instance (0x%p) !!!\n", priv);
-
-  DEBUG(IRDA_OCB_INFO, "Discovered new IrNET/IrLAN node %s...\n",
-	discovery->info);
-
-  /* Notify the control channel */
-  irnet_post_event(NULL, IRNET_DISCOVER,
-		   discovery->saddr, discovery->daddr, discovery->info,
-		   get_unaligned((__u16 *)discovery->hints));
-
-  DEXIT(IRDA_OCB_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_expiry_indication (expiry)
- *
- *    Got a expiry indication from IrLMP, post an event
- *
- * Note : IrLMP take care of matching the hint mask for us, we only
- * check if it is a "new" node...
- */
-static void
-irnet_expiry_indication(discinfo_t *	expiry,
-			DISCOVERY_MODE	mode,
-			void *		priv)
-{
-  irnet_socket *	self = &irnet_server.s;
-
-  DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self);
-  DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR,
-	  "Invalid instance (0x%p) !!!\n", priv);
-
-  DEBUG(IRDA_OCB_INFO, "IrNET/IrLAN node %s expired...\n",
-	expiry->info);
-
-  /* Notify the control channel */
-  irnet_post_event(NULL, IRNET_EXPIRE,
-		   expiry->saddr, expiry->daddr, expiry->info,
-		   get_unaligned((__u16 *)expiry->hints));
-
-  DEXIT(IRDA_OCB_TRACE, "\n");
-}
-#endif /* DISCOVERY_EVENTS */
-
-
-/*********************** PROC ENTRY CALLBACKS ***********************/
-/*
- * We create a instance in the /proc filesystem, and here we take care
- * of that...
- */
-
-#ifdef CONFIG_PROC_FS
-static int
-irnet_proc_show(struct seq_file *m, void *v)
-{
-  irnet_socket *	self;
-  char *		state;
-  int			i = 0;
-
-  /* Get the IrNET server information... */
-  seq_printf(m, "IrNET server - ");
-  seq_printf(m, "IrDA state: %s, ",
-		 (irnet_server.running ? "running" : "dead"));
-  seq_printf(m, "stsap_sel: %02x, ", irnet_server.s.stsap_sel);
-  seq_printf(m, "dtsap_sel: %02x\n", irnet_server.s.dtsap_sel);
-
-  /* Do we need to continue ? */
-  if(!irnet_server.running)
-    return 0;
-
-  /* Protect access to the instance list */
-  spin_lock_bh(&irnet_server.spinlock);
-
-  /* Get the sockets one by one... */
-  self = (irnet_socket *) hashbin_get_first(irnet_server.list);
-  while(self != NULL)
-    {
-      /* Start printing info about the socket. */
-      seq_printf(m, "\nIrNET socket %d - ", i++);
-
-      /* First, get the requested configuration */
-      seq_printf(m, "Requested IrDA name: \"%s\", ", self->rname);
-      seq_printf(m, "daddr: %08x, ", self->rdaddr);
-      seq_printf(m, "saddr: %08x\n", self->rsaddr);
-
-      /* Second, get all the PPP info */
-      seq_printf(m, "	PPP state: %s",
-		 (self->ppp_open ? "registered" : "unregistered"));
-      if(self->ppp_open)
-	{
-	  seq_printf(m, ", unit: ppp%d",
-			 ppp_unit_number(&self->chan));
-	  seq_printf(m, ", channel: %d",
-			 ppp_channel_index(&self->chan));
-	  seq_printf(m, ", mru: %d",
-			 self->mru);
-	  /* Maybe add self->flags ? Later... */
-	}
-
-      /* Then, get all the IrDA specific info... */
-      if(self->ttp_open)
-	state = "connected";
-      else
-	if(self->tsap != NULL)
-	  state = "connecting";
-	else
-	  if(self->iriap != NULL)
-	    state = "searching";
-	  else
-	    if(self->ttp_connect)
-	      state = "weird";
-	    else
-	      state = "idle";
-      seq_printf(m, "\n	IrDA state: %s, ", state);
-      seq_printf(m, "daddr: %08x, ", self->daddr);
-      seq_printf(m, "stsap_sel: %02x, ", self->stsap_sel);
-      seq_printf(m, "dtsap_sel: %02x\n", self->dtsap_sel);
-
-      /* Next socket, please... */
-      self = (irnet_socket *) hashbin_get_next(irnet_server.list);
-    }
-
-  /* Spin lock end */
-  spin_unlock_bh(&irnet_server.spinlock);
-
-  return 0;
-}
-
-static int irnet_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, irnet_proc_show, NULL);
-}
-
-static const struct file_operations irnet_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= irnet_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif /* PROC_FS */
-
-
-/********************** CONFIGURATION/CLEANUP **********************/
-/*
- * Initialisation and teardown of the IrDA part, called at module
- * insertion and removal...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare the IrNET layer for operation...
- */
-int __init
-irda_irnet_init(void)
-{
-  int		err = 0;
-
-  DENTER(MODULE_TRACE, "()\n");
-
-  /* Pure paranoia - should be redundant */
-  memset(&irnet_server, 0, sizeof(struct irnet_root));
-
-  /* Setup start of irnet instance list */
-  irnet_server.list = hashbin_new(HB_NOLOCK);
-  DABORT(irnet_server.list == NULL, -ENOMEM,
-	 MODULE_ERROR, "Can't allocate hashbin!\n");
-  /* Init spinlock for instance list */
-  spin_lock_init(&irnet_server.spinlock);
-
-  /* Initialise control channel */
-  init_waitqueue_head(&irnet_events.rwait);
-  irnet_events.index = 0;
-  /* Init spinlock for event logging */
-  spin_lock_init(&irnet_events.spinlock);
-
-#ifdef CONFIG_PROC_FS
-  /* Add a /proc file for irnet infos */
-  proc_create("irnet", 0, proc_irda, &irnet_proc_fops);
-#endif /* CONFIG_PROC_FS */
-
-  /* Setup the IrNET server */
-  err = irnet_setup_server();
-
-  if(!err)
-    /* We are no longer functional... */
-    irnet_server.running = 1;
-
-  DEXIT(MODULE_TRACE, "\n");
-  return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Cleanup at exit...
- */
-void __exit
-irda_irnet_cleanup(void)
-{
-  DENTER(MODULE_TRACE, "()\n");
-
-  /* We are no longer there... */
-  irnet_server.running = 0;
-
-#ifdef CONFIG_PROC_FS
-  /* Remove our /proc file */
-  remove_proc_entry("irnet", proc_irda);
-#endif /* CONFIG_PROC_FS */
-
-  /* Remove our IrNET server from existence */
-  irnet_destroy_server();
-
-  /* Remove all instances of IrNET socket still present */
-  hashbin_delete(irnet_server.list, (FREE_FUNC) irda_irnet_destroy);
-
-  DEXIT(MODULE_TRACE, "\n");
-}
diff --git a/net/irda/irnet/irnet_irda.h b/net/irda/irnet/irnet_irda.h
deleted file mode 100644
index 3e408952a3f1..000000000000
--- a/net/irda/irnet/irnet_irda.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- *	IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- *		Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file contains all definitions and declarations necessary for the
- * IRDA part of the IrNET module (dealing with IrTTP, IrIAS and co).
- * This file is a private header, so other modules don't want to know
- * what's in there...
- */
-
-#ifndef IRNET_IRDA_H
-#define IRNET_IRDA_H
-
-/***************************** INCLUDES *****************************/
-/* Please add other headers in irnet.h */
-
-#include "irnet.h"		/* Module global include */
-
-/************************ CONSTANTS & MACROS ************************/
-
-/*
- * Name of the service (socket name) used by IrNET
- */
-/* IAS object name (or part of it) */
-#define IRNET_SERVICE_NAME	"IrNetv1"
-/* IAS attribute */
-#define IRNET_IAS_VALUE		"IrDA:TinyTP:LsapSel"
-/* LMP notify name for client (only for /proc/net/irda/irlmp) */
-#define IRNET_NOTIFY_NAME	"IrNET socket"
-/* LMP notify name for server (only for /proc/net/irda/irlmp) */
-#define IRNET_NOTIFY_NAME_SERV	"IrNET server"
-
-/****************************** TYPES ******************************/
-
-/*
- * This is the main structure where we store all the data pertaining to
- * the IrNET server (listen for connection requests) and the root
- * of the IrNET socket list
- */
-typedef struct irnet_root
-{
-  irnet_socket		s;		/* To pretend we are a client... */
-
-  /* Generic stuff */
-  int			magic;		/* Paranoia */
-  int			running;	/* Are we operational ? */
-
-  /* Link list of all IrNET instances opened */
-  hashbin_t *		list;
-  spinlock_t		spinlock;	/* Serialize access to the list */
-  /* Note : the way hashbin has been designed is absolutely not
-   * reentrant, beware... So, we blindly protect all with spinlock */
-
-  /* Handle for the hint bit advertised in IrLMP */
-  void *		skey;
-
-  /* Server socket part */
-  struct ias_object *	ias_obj;	/* Our service name + lsap in IAS */
-
-} irnet_root;
-
-
-/**************************** PROTOTYPES ****************************/
-
-/* ----------------------- CONTROL CHANNEL ----------------------- */
-static void
-	irnet_post_event(irnet_socket *,
-			 irnet_event,
-			 __u32,
-			 __u32,
-			 char *,
-			 __u16);
-/* ----------------------- IRDA SUBROUTINES ----------------------- */
-static inline int
-	irnet_open_tsap(irnet_socket *);
-static inline __u8
-	irnet_ias_to_tsap(irnet_socket *,
-			  int,
-			  struct ias_value *);
-static inline int
-	irnet_find_lsap_sel(irnet_socket *);
-static inline int
-	irnet_connect_tsap(irnet_socket *);
-static inline int
-	irnet_discover_next_daddr(irnet_socket *);
-static inline int
-	irnet_discover_daddr_and_lsap_sel(irnet_socket *);
-static inline int
-	irnet_dname_to_daddr(irnet_socket *);
-/* ------------------------ SERVER SOCKET ------------------------ */
-static inline int
-	irnet_daddr_to_dname(irnet_socket *);
-static inline irnet_socket *
-	irnet_find_socket(irnet_socket *);
-static inline int
-	irnet_connect_socket(irnet_socket *,
-			     irnet_socket *,
-			     struct qos_info *,
-			     __u32,
-			     __u8);
-static inline void
-	irnet_disconnect_server(irnet_socket *,
-				struct sk_buff *);
-static inline int
-	irnet_setup_server(void);
-static inline void
-	irnet_destroy_server(void);
-/* ---------------------- IRDA-TTP CALLBACKS ---------------------- */
-static int
-	irnet_data_indication(void *,		/* instance */
-			      void *,		/* sap */
-			      struct sk_buff *);
-static void
-	irnet_disconnect_indication(void *,
-				    void *,
-				    LM_REASON,
-				    struct sk_buff *);
-static void
-	irnet_connect_confirm(void *,
-			      void *,
-			      struct qos_info *,
-			      __u32,
-			      __u8,
-			      struct sk_buff *);
-static void
-	irnet_flow_indication(void *,
-			      void *,
-			      LOCAL_FLOW);
-static void
-	irnet_status_indication(void *,
-				LINK_STATUS,
-				LOCK_STATUS);
-static void
-	irnet_connect_indication(void *,
-				 void *,
-				 struct qos_info *,
-				 __u32,
-				 __u8,
-				 struct sk_buff *);
-/* -------------------- IRDA-IAS/LMP CALLBACKS -------------------- */
-static void
-	irnet_getvalue_confirm(int,
-			       __u16,
-			       struct ias_value *,
-			       void *);
-static void
-	irnet_discovervalue_confirm(int,
-				    __u16,
-				    struct ias_value *,
-				    void *);
-#ifdef DISCOVERY_EVENTS
-static void
-	irnet_discovery_indication(discinfo_t *,
-				   DISCOVERY_MODE,
-				   void *);
-static void
-	irnet_expiry_indication(discinfo_t *,
-				DISCOVERY_MODE,
-				void *);
-#endif
-
-/**************************** VARIABLES ****************************/
-
-/*
- * The IrNET server. Listen to connection requests and co...
- */
-static struct irnet_root	irnet_server;
-
-/* Control channel stuff (note : extern) */
-struct irnet_ctrl_channel	irnet_events;
-
-/* The /proc/net/irda directory, defined elsewhere... */
-#ifdef CONFIG_PROC_FS
-extern struct proc_dir_entry *proc_irda;
-#endif /* CONFIG_PROC_FS */
-
-#endif /* IRNET_IRDA_H */
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
deleted file mode 100644
index 7025dcb853d0..000000000000
--- a/net/irda/irnet/irnet_ppp.c
+++ /dev/null
@@ -1,1189 +0,0 @@
-/*
- *	IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- *		Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file implement the PPP interface and /dev/irnet character device.
- * The PPP interface hook to the ppp_generic module, handle all our
- *	relationship to the PPP code in the kernel (and by extension to pppd),
- *	and exchange PPP frames with this module (send/receive).
- * The /dev/irnet device is used primarily for 2 functions :
- *	1) as a stub for pppd (the ppp daemon), so that we can appropriately
- *	generate PPP sessions (we pretend we are a tty).
- *	2) as a control channel (write commands, read events)
- */
-
-#include <linux/sched/signal.h>
-#include <linux/slab.h>
-
-#include "irnet_ppp.h"		/* Private header */
-/* Please put other headers in irnet.h - Thanks */
-
-/* Generic PPP callbacks (to call us) */
-static const struct ppp_channel_ops irnet_ppp_ops = {
-	.start_xmit = ppp_irnet_send,
-	.ioctl = ppp_irnet_ioctl
-};
-
-/************************* CONTROL CHANNEL *************************/
-/*
- * When a pppd instance is not active on /dev/irnet, it acts as a control
- * channel.
- * Writing allow to set up the IrDA destination of the IrNET channel,
- * and any application may be read events happening in IrNET...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Write is used to send a command to configure a IrNET channel
- * before it is open by pppd. The syntax is : "command argument"
- * Currently there is only two defined commands :
- *	o name : set the requested IrDA nickname of the IrNET peer.
- *	o addr : set the requested IrDA address of the IrNET peer.
- * Note : the code is crude, but effective...
- */
-static inline ssize_t
-irnet_ctrl_write(irnet_socket *	ap,
-		 const char __user *buf,
-		 size_t		count)
-{
-  char		command[IRNET_MAX_COMMAND];
-  char *	start;		/* Current command being processed */
-  char *	next;		/* Next command to process */
-  int		length;		/* Length of current command */
-
-  DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count);
-
-  /* Check for overflow... */
-  DABORT(count >= IRNET_MAX_COMMAND, -ENOMEM,
-	 CTRL_ERROR, "Too much data !!!\n");
-
-  /* Get the data in the driver */
-  if(copy_from_user(command, buf, count))
-    {
-      DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
-      return -EFAULT;
-    }
-
-  /* Safe terminate the string */
-  command[count] = '\0';
-  DEBUG(CTRL_INFO, "Command line received is ``%s'' (%zd).\n",
-	command, count);
-
-  /* Check every commands in the command line */
-  next = command;
-  while(next != NULL)
-    {
-      /* Look at the next command */
-      start = next;
-
-	/* Scrap whitespaces before the command */
-	start = skip_spaces(start);
-
-      /* ',' is our command separator */
-      next = strchr(start, ',');
-      if(next)
-	{
-	  *next = '\0';			/* Terminate command */
-	  length = next - start;	/* Length */
-	  next++;			/* Skip the '\0' */
-	}
-      else
-	length = strlen(start);
-
-      DEBUG(CTRL_INFO, "Found command ``%s'' (%d).\n", start, length);
-
-      /* Check if we recognised one of the known command
-       * We can't use "switch" with strings, so hack with "continue" */
-
-      /* First command : name -> Requested IrDA nickname */
-      if(!strncmp(start, "name", 4))
-	{
-	  /* Copy the name only if is included and not "any" */
-	  if((length > 5) && (strcmp(start + 5, "any")))
-	    {
-	      /* Strip out trailing whitespaces */
-	      while(isspace(start[length - 1]))
-		length--;
-
-	      DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5,
-		     -EINVAL, CTRL_ERROR, "Invalid nickname.\n");
-
-	      /* Copy the name for later reuse */
-	      memcpy(ap->rname, start + 5, length - 5);
-	      ap->rname[length - 5] = '\0';
-	    }
-	  else
-	    ap->rname[0] = '\0';
-	  DEBUG(CTRL_INFO, "Got rname = ``%s''\n", ap->rname);
-
-	  /* Restart the loop */
-	  continue;
-	}
-
-      /* Second command : addr, daddr -> Requested IrDA destination address
-       * Also process : saddr -> Requested IrDA source address */
-      if((!strncmp(start, "addr", 4)) ||
-	 (!strncmp(start, "daddr", 5)) ||
-	 (!strncmp(start, "saddr", 5)))
-	{
-	  __u32		addr = DEV_ADDR_ANY;
-
-	  /* Copy the address only if is included and not "any" */
-	  if((length > 5) && (strcmp(start + 5, "any")))
-	    {
-	      char *	begp = start + 5;
-	      char *	endp;
-
-	      /* Scrap whitespaces before the command */
-	      begp = skip_spaces(begp);
-
-	      /* Convert argument to a number (last arg is the base) */
-	      addr = simple_strtoul(begp, &endp, 16);
-	      /* Has it worked  ? (endp should be start + length) */
-	      DABORT(endp <= (start + 5), -EINVAL,
-		     CTRL_ERROR, "Invalid address.\n");
-	    }
-	  /* Which type of address ? */
-	  if(start[0] == 's')
-	    {
-	      /* Save it */
-	      ap->rsaddr = addr;
-	      DEBUG(CTRL_INFO, "Got rsaddr = %08x\n", ap->rsaddr);
-	    }
-	  else
-	    {
-	      /* Save it */
-	      ap->rdaddr = addr;
-	      DEBUG(CTRL_INFO, "Got rdaddr = %08x\n", ap->rdaddr);
-	    }
-
-	  /* Restart the loop */
-	  continue;
-	}
-
-      /* Other possible command : connect N (number of retries) */
-
-      /* No command matched -> Failed... */
-      DABORT(1, -EINVAL, CTRL_ERROR, "Not a recognised IrNET command.\n");
-    }
-
-  /* Success : we have parsed all commands successfully */
-  return count;
-}
-
-#ifdef INITIAL_DISCOVERY
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_get_discovery_log (self)
- *
- *    Query the content on the discovery log if not done
- *
- * This function query the current content of the discovery log
- * at the startup of the event channel and save it in the internal struct.
- */
-static void
-irnet_get_discovery_log(irnet_socket *	ap)
-{
-  __u16		mask = irlmp_service_to_hint(S_LAN);
-
-  /* Ask IrLMP for the current discovery log */
-  ap->discoveries = irlmp_get_discoveries(&ap->disco_number, mask,
-					  DISCOVERY_DEFAULT_SLOTS);
-
-  /* Check if the we got some results */
-  if(ap->discoveries == NULL)
-    ap->disco_number = -1;
-
-  DEBUG(CTRL_INFO, "Got the log (0x%p), size is %d\n",
-	ap->discoveries, ap->disco_number);
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Function irnet_read_discovery_log (self, event)
- *
- *    Read the content on the discovery log
- *
- * This function dump the current content of the discovery log
- * at the startup of the event channel.
- * Return 1 if wrote an event on the control channel...
- *
- * State of the ap->disco_XXX variables :
- * Socket creation :  discoveries = NULL ; disco_index = 0 ; disco_number = 0
- * While reading :    discoveries = ptr  ; disco_index = X ; disco_number = Y
- * After reading :    discoveries = NULL ; disco_index = Y ; disco_number = -1
- */
-static inline int
-irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size)
-{
-  int		done_event = 0;
-
-  DENTER(CTRL_TRACE, "(ap=0x%p, event=0x%p)\n",
-	 ap, event);
-
-  /* Test if we have some work to do or we have already finished */
-  if(ap->disco_number == -1)
-    {
-      DEBUG(CTRL_INFO, "Already done\n");
-      return 0;
-    }
-
-  /* Test if it's the first time and therefore we need to get the log */
-  if(ap->discoveries == NULL)
-    irnet_get_discovery_log(ap);
-
-  /* Check if we have more item to dump */
-  if(ap->disco_index < ap->disco_number)
-    {
-      /* Write an event */
-      snprintf(event, buf_size,
-	       "Found %08x (%s) behind %08x {hints %02X-%02X}\n",
-	       ap->discoveries[ap->disco_index].daddr,
-	       ap->discoveries[ap->disco_index].info,
-	       ap->discoveries[ap->disco_index].saddr,
-	       ap->discoveries[ap->disco_index].hints[0],
-	       ap->discoveries[ap->disco_index].hints[1]);
-      DEBUG(CTRL_INFO, "Writing discovery %d : %s\n",
-	    ap->disco_index, ap->discoveries[ap->disco_index].info);
-
-      /* We have an event */
-      done_event = 1;
-      /* Next discovery */
-      ap->disco_index++;
-    }
-
-  /* Check if we have done the last item */
-  if(ap->disco_index >= ap->disco_number)
-    {
-      /* No more items : remove the log and signal termination */
-      DEBUG(CTRL_INFO, "Cleaning up log (0x%p)\n",
-	    ap->discoveries);
-      if(ap->discoveries != NULL)
-	{
-	  /* Cleanup our copy of the discovery log */
-	  kfree(ap->discoveries);
-	  ap->discoveries = NULL;
-	}
-      ap->disco_number = -1;
-    }
-
-  return done_event;
-}
-#endif /* INITIAL_DISCOVERY */
-
-/*------------------------------------------------------------------*/
-/*
- * Read is used to get IrNET events
- */
-static inline ssize_t
-irnet_ctrl_read(irnet_socket *	ap,
-		struct file *	file,
-		char __user *	buf,
-		size_t		count)
-{
-  DECLARE_WAITQUEUE(wait, current);
-  char		event[75];
-  ssize_t	ret = 0;
-
-  DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count);
-
-#ifdef INITIAL_DISCOVERY
-  /* Check if we have read the log */
-  if (irnet_read_discovery_log(ap, event, sizeof(event)))
-    {
-      count = min(strlen(event), count);
-      if (copy_to_user(buf, event, count))
-	{
-	  DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
-	  return -EFAULT;
-	}
-
-      DEXIT(CTRL_TRACE, "\n");
-      return count;
-    }
-#endif /* INITIAL_DISCOVERY */
-
-  /* Put ourselves on the wait queue to be woken up */
-  add_wait_queue(&irnet_events.rwait, &wait);
-  set_current_state(TASK_INTERRUPTIBLE);
-  for(;;)
-    {
-      /* If there is unread events */
-      ret = 0;
-      if(ap->event_index != irnet_events.index)
-	break;
-      ret = -EAGAIN;
-      if(file->f_flags & O_NONBLOCK)
-	break;
-      ret = -ERESTARTSYS;
-      if(signal_pending(current))
-	break;
-      /* Yield and wait to be woken up */
-      schedule();
-    }
-  __set_current_state(TASK_RUNNING);
-  remove_wait_queue(&irnet_events.rwait, &wait);
-
-  /* Did we got it ? */
-  if(ret != 0)
-    {
-      /* No, return the error code */
-      DEXIT(CTRL_TRACE, " - ret %zd\n", ret);
-      return ret;
-    }
-
-  /* Which event is it ? */
-  switch(irnet_events.log[ap->event_index].event)
-    {
-    case IRNET_DISCOVER:
-      snprintf(event, sizeof(event),
-	       "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].saddr,
-	       irnet_events.log[ap->event_index].hints.byte[0],
-	       irnet_events.log[ap->event_index].hints.byte[1]);
-      break;
-    case IRNET_EXPIRE:
-      snprintf(event, sizeof(event),
-	       "Expired %08x (%s) behind %08x {hints %02X-%02X}\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].saddr,
-	       irnet_events.log[ap->event_index].hints.byte[0],
-	       irnet_events.log[ap->event_index].hints.byte[1]);
-      break;
-    case IRNET_CONNECT_TO:
-      snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].unit);
-      break;
-    case IRNET_CONNECT_FROM:
-      snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].unit);
-      break;
-    case IRNET_REQUEST_FROM:
-      snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].saddr);
-      break;
-    case IRNET_NOANSWER_FROM:
-      snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].unit);
-      break;
-    case IRNET_BLOCKED_LINK:
-      snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].unit);
-      break;
-    case IRNET_DISCONNECT_FROM:
-      snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name,
-	       irnet_events.log[ap->event_index].unit);
-      break;
-    case IRNET_DISCONNECT_TO:
-      snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n",
-	       irnet_events.log[ap->event_index].daddr,
-	       irnet_events.log[ap->event_index].name);
-      break;
-    default:
-      snprintf(event, sizeof(event), "Bug\n");
-    }
-  /* Increment our event index */
-  ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS;
-
-  DEBUG(CTRL_INFO, "Event is :%s", event);
-
-  count = min(strlen(event), count);
-  if (copy_to_user(buf, event, count))
-    {
-      DERROR(CTRL_ERROR, "Invalid user space pointer.\n");
-      return -EFAULT;
-    }
-
-  DEXIT(CTRL_TRACE, "\n");
-  return count;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Poll : called when someone do a select on /dev/irnet.
- * Just check if there are new events...
- */
-static inline unsigned int
-irnet_ctrl_poll(irnet_socket *	ap,
-		struct file *	file,
-		poll_table *	wait)
-{
-  unsigned int mask;
-
-  DENTER(CTRL_TRACE, "(ap=0x%p)\n", ap);
-
-  poll_wait(file, &irnet_events.rwait, wait);
-  mask = POLLOUT | POLLWRNORM;
-  /* If there is unread events */
-  if(ap->event_index != irnet_events.index)
-    mask |= POLLIN | POLLRDNORM;
-#ifdef INITIAL_DISCOVERY
-  if(ap->disco_number != -1)
-    {
-      /* Test if it's the first time and therefore we need to get the log */
-      if(ap->discoveries == NULL)
-	irnet_get_discovery_log(ap);
-      /* Recheck */
-      if(ap->disco_number != -1)
-	mask |= POLLIN | POLLRDNORM;
-    }
-#endif /* INITIAL_DISCOVERY */
-
-  DEXIT(CTRL_TRACE, " - mask=0x%X\n", mask);
-  return mask;
-}
-
-
-/*********************** FILESYSTEM CALLBACKS ***********************/
-/*
- * Implement the usual open, read, write functions that will be called
- * by the file system when some action is performed on /dev/irnet.
- * Most of those actions will in fact be performed by "pppd" or
- * the control channel, we just act as a redirector...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Open : when somebody open /dev/irnet
- * We basically create a new instance of irnet and initialise it.
- */
-static int
-dev_irnet_open(struct inode *	inode,
-	       struct file *	file)
-{
-  struct irnet_socket *	ap;
-  int			err;
-
-  DENTER(FS_TRACE, "(file=0x%p)\n", file);
-
-#ifdef SECURE_DEVIRNET
-  /* This could (should?) be enforced by the permissions on /dev/irnet. */
-  if(!capable(CAP_NET_ADMIN))
-    return -EPERM;
-#endif /* SECURE_DEVIRNET */
-
-  /* Allocate a private structure for this IrNET instance */
-  ap = kzalloc(sizeof(*ap), GFP_KERNEL);
-  DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
-
-  /* initialize the irnet structure */
-  ap->file = file;
-
-  /* PPP channel setup */
-  ap->ppp_open = 0;
-  ap->chan.private = ap;
-  ap->chan.ops = &irnet_ppp_ops;
-  ap->chan.mtu = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN);
-  ap->chan.hdrlen = 2 + TTP_MAX_HEADER;		/* for A/C + Max IrDA hdr */
-  /* PPP parameters */
-  ap->mru = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN);
-  ap->xaccm[0] = ~0U;
-  ap->xaccm[3] = 0x60000000U;
-  ap->raccm = ~0U;
-
-  /* Setup the IrDA part... */
-  err = irda_irnet_create(ap);
-  if(err)
-    {
-      DERROR(FS_ERROR, "Can't setup IrDA link...\n");
-      kfree(ap);
-
-      return err;
-    }
-
-  /* For the control channel */
-  ap->event_index = irnet_events.index;	/* Cancel all past events */
-
-  mutex_init(&ap->lock);
-
-  /* Put our stuff where we will be able to find it later */
-  file->private_data = ap;
-
-  DEXIT(FS_TRACE, " - ap=0x%p\n", ap);
-
-  return 0;
-}
-
-
-/*------------------------------------------------------------------*/
-/*
- * Close : when somebody close /dev/irnet
- * Destroy the instance of /dev/irnet
- */
-static int
-dev_irnet_close(struct inode *	inode,
-		struct file *	file)
-{
-  irnet_socket *	ap = file->private_data;
-
-  DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
-	 file, ap);
-  DABORT(ap == NULL, 0, FS_ERROR, "ap is NULL !!!\n");
-
-  /* Detach ourselves */
-  file->private_data = NULL;
-
-  /* Close IrDA stuff */
-  irda_irnet_destroy(ap);
-
-  /* Disconnect from the generic PPP layer if not already done */
-  if(ap->ppp_open)
-    {
-      DERROR(FS_ERROR, "Channel still registered - deregistering !\n");
-      ap->ppp_open = 0;
-      ppp_unregister_channel(&ap->chan);
-    }
-
-  kfree(ap);
-
-  DEXIT(FS_TRACE, "\n");
-  return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Write does nothing.
- * (we receive packet from ppp_generic through ppp_irnet_send())
- */
-static ssize_t
-dev_irnet_write(struct file *	file,
-		const char __user *buf,
-		size_t		count,
-		loff_t *	ppos)
-{
-  irnet_socket *	ap = file->private_data;
-
-  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n",
-	file, ap, count);
-  DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
-
-  /* If we are connected to ppp_generic, let it handle the job */
-  if(ap->ppp_open)
-    return -EAGAIN;
-  else
-    return irnet_ctrl_write(ap, buf, count);
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Read doesn't do much either.
- * (pppd poll us, but ultimately reads through /dev/ppp)
- */
-static ssize_t
-dev_irnet_read(struct file *	file,
-	       char __user *	buf,
-	       size_t		count,
-	       loff_t *		ppos)
-{
-  irnet_socket *	ap = file->private_data;
-
-  DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n",
-	file, ap, count);
-  DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n");
-
-  /* If we are connected to ppp_generic, let it handle the job */
-  if(ap->ppp_open)
-    return -EAGAIN;
-  else
-    return irnet_ctrl_read(ap, file, buf, count);
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Poll : called when someone do a select on /dev/irnet
- */
-static unsigned int
-dev_irnet_poll(struct file *	file,
-	       poll_table *	wait)
-{
-  irnet_socket *	ap = file->private_data;
-  unsigned int		mask;
-
-  DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
-	 file, ap);
-
-  mask = POLLOUT | POLLWRNORM;
-  DABORT(ap == NULL, mask, FS_ERROR, "ap is NULL !!!\n");
-
-  /* If we are connected to ppp_generic, let it handle the job */
-  if(!ap->ppp_open)
-    mask |= irnet_ctrl_poll(ap, file, wait);
-
-  DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
-  return mask;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * IOCtl : Called when someone does some ioctls on /dev/irnet
- * This is the way pppd configure us and control us while the PPP
- * instance is active.
- */
-static long
-dev_irnet_ioctl(
-		struct file *	file,
-		unsigned int	cmd,
-		unsigned long	arg)
-{
-  irnet_socket *	ap = file->private_data;
-  int			err;
-  int			val;
-  void __user *argp = (void __user *)arg;
-
-  DENTER(FS_TRACE, "(file=0x%p, ap=0x%p, cmd=0x%X)\n",
-	 file, ap, cmd);
-
-  /* Basic checks... */
-  DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n");
-#ifdef SECURE_DEVIRNET
-  if(!capable(CAP_NET_ADMIN))
-    return -EPERM;
-#endif /* SECURE_DEVIRNET */
-
-  err = -EFAULT;
-  switch(cmd)
-    {
-      /* Set discipline (should be N_SYNC_PPP or N_TTY) */
-    case TIOCSETD:
-      if(get_user(val, (int __user *)argp))
-	break;
-      if((val == N_SYNC_PPP) || (val == N_PPP))
-	{
-	  DEBUG(FS_INFO, "Entering PPP discipline.\n");
-	  /* PPP channel setup (ap->chan in configured in dev_irnet_open())*/
-	  if (mutex_lock_interruptible(&ap->lock))
-		  return -EINTR;
-
-	  err = ppp_register_channel(&ap->chan);
-	  if(err == 0)
-	    {
-	      /* Our ppp side is active */
-	      ap->ppp_open = 1;
-
-	      DEBUG(FS_INFO, "Trying to establish a connection.\n");
-	      /* Setup the IrDA link now - may fail... */
-	      irda_irnet_connect(ap);
-	    }
-	  else
-	    DERROR(FS_ERROR, "Can't setup PPP channel...\n");
-
-          mutex_unlock(&ap->lock);
-	}
-      else
-	{
-	  /* In theory, should be N_TTY */
-	  DEBUG(FS_INFO, "Exiting PPP discipline.\n");
-	  /* Disconnect from the generic PPP layer */
-	  if (mutex_lock_interruptible(&ap->lock))
-		  return -EINTR;
-
-	  if(ap->ppp_open)
-	    {
-	      ap->ppp_open = 0;
-	      ppp_unregister_channel(&ap->chan);
-	    }
-	  else
-	    DERROR(FS_ERROR, "Channel not registered !\n");
-	  err = 0;
-
-	  mutex_unlock(&ap->lock);
-	}
-      break;
-
-      /* Query PPP channel and unit number */
-    case PPPIOCGCHAN:
-      if (mutex_lock_interruptible(&ap->lock))
-	      return -EINTR;
-
-      if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
-						(int __user *)argp))
-	err = 0;
-
-      mutex_unlock(&ap->lock);
-      break;
-    case PPPIOCGUNIT:
-      if (mutex_lock_interruptible(&ap->lock))
-	      return -EINTR;
-
-      if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
-						(int __user *)argp))
-        err = 0;
-
-      mutex_unlock(&ap->lock);
-      break;
-
-      /* All these ioctls can be passed both directly and from ppp_generic,
-       * so we just deal with them in one place...
-       */
-    case PPPIOCGFLAGS:
-    case PPPIOCSFLAGS:
-    case PPPIOCGASYNCMAP:
-    case PPPIOCSASYNCMAP:
-    case PPPIOCGRASYNCMAP:
-    case PPPIOCSRASYNCMAP:
-    case PPPIOCGXASYNCMAP:
-    case PPPIOCSXASYNCMAP:
-    case PPPIOCGMRU:
-    case PPPIOCSMRU:
-      DEBUG(FS_INFO, "Standard PPP ioctl.\n");
-      if(!capable(CAP_NET_ADMIN))
-	err = -EPERM;
-      else {
-	if (mutex_lock_interruptible(&ap->lock))
-	      return -EINTR;
-
-	err = ppp_irnet_ioctl(&ap->chan, cmd, arg);
-
-	mutex_unlock(&ap->lock);
-      }
-      break;
-
-      /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */
-      /* Get termios */
-    case TCGETS:
-      DEBUG(FS_INFO, "Get termios.\n");
-      if (mutex_lock_interruptible(&ap->lock))
-	      return -EINTR;
-
-#ifndef TCGETS2
-      if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
-	err = 0;
-#else
-      if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios))
-	err = 0;
-#endif
-
-      mutex_unlock(&ap->lock);
-      break;
-      /* Set termios */
-    case TCSETSF:
-      DEBUG(FS_INFO, "Set termios.\n");
-      if (mutex_lock_interruptible(&ap->lock))
-	      return -EINTR;
-
-#ifndef TCGETS2
-      if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
-	err = 0;
-#else
-      if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
-	err = 0;
-#endif
-
-      mutex_unlock(&ap->lock);
-      break;
-
-      /* Set DTR/RTS */
-    case TIOCMBIS:
-    case TIOCMBIC:
-      /* Set exclusive/non-exclusive mode */
-    case TIOCEXCL:
-    case TIOCNXCL:
-      DEBUG(FS_INFO, "TTY compatibility.\n");
-      err = 0;
-      break;
-
-    case TCGETA:
-      DEBUG(FS_INFO, "TCGETA\n");
-      break;
-
-    case TCFLSH:
-      DEBUG(FS_INFO, "TCFLSH\n");
-      /* Note : this will flush buffers in PPP, so it *must* be done
-       * We should also worry that we don't accept junk here and that
-       * we get rid of our own buffers */
-#ifdef FLUSH_TO_PPP
-      if (mutex_lock_interruptible(&ap->lock))
-	      return -EINTR;
-      ppp_output_wakeup(&ap->chan);
-      mutex_unlock(&ap->lock);
-#endif /* FLUSH_TO_PPP */
-      err = 0;
-      break;
-
-    case FIONREAD:
-      DEBUG(FS_INFO, "FIONREAD\n");
-      val = 0;
-      if(put_user(val, (int __user *)argp))
-	break;
-      err = 0;
-      break;
-
-    default:
-      DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd);
-      err = -ENOTTY;
-    }
-
-  DEXIT(FS_TRACE, " - err = 0x%X\n", err);
-  return err;
-}
-
-/************************** PPP CALLBACKS **************************/
-/*
- * This are the functions that the generic PPP driver in the kernel
- * will call to communicate to us.
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare the ppp frame for transmission over the IrDA socket.
- * We make sure that the header space is enough, and we change ppp header
- * according to flags passed by pppd.
- * This is not a callback, but just a helper function used in ppp_irnet_send()
- */
-static inline struct sk_buff *
-irnet_prepare_skb(irnet_socket *	ap,
-		  struct sk_buff *	skb)
-{
-  unsigned char *	data;
-  int			proto;		/* PPP protocol */
-  int			islcp;		/* Protocol == LCP */
-  int			needaddr;	/* Need PPP address */
-
-  DENTER(PPP_TRACE, "(ap=0x%p, skb=0x%p)\n",
-	 ap, skb);
-
-  /* Extract PPP protocol from the frame */
-  data  = skb->data;
-  proto = (data[0] << 8) + data[1];
-
-  /* LCP packets with codes between 1 (configure-request)
-   * and 7 (code-reject) must be sent as though no options
-   * have been negotiated. */
-  islcp = (proto == PPP_LCP) && (1 <= data[2]) && (data[2] <= 7);
-
-  /* compress protocol field if option enabled */
-  if((data[0] == 0) && (ap->flags & SC_COMP_PROT) && (!islcp))
-    skb_pull(skb,1);
-
-  /* Check if we need address/control fields */
-  needaddr = 2*((ap->flags & SC_COMP_AC) == 0 || islcp);
-
-  /* Is the skb headroom large enough to contain all IrDA-headers? */
-  if((skb_headroom(skb) < (ap->max_header_size + needaddr)) ||
-      (skb_shared(skb)))
-    {
-      struct sk_buff *	new_skb;
-
-      DEBUG(PPP_INFO, "Reallocating skb\n");
-
-      /* Create a new skb */
-      new_skb = skb_realloc_headroom(skb, ap->max_header_size + needaddr);
-
-      /* We have to free the original skb anyway */
-      dev_kfree_skb(skb);
-
-      /* Did the realloc succeed ? */
-      DABORT(new_skb == NULL, NULL, PPP_ERROR, "Could not realloc skb\n");
-
-      /* Use the new skb instead */
-      skb = new_skb;
-    }
-
-  /* prepend address/control fields if necessary */
-  if(needaddr)
-    {
-      skb_push(skb, 2);
-      skb->data[0] = PPP_ALLSTATIONS;
-      skb->data[1] = PPP_UI;
-    }
-
-  DEXIT(PPP_TRACE, "\n");
-
-  return skb;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Send a packet to the peer over the IrTTP connection.
- * Returns 1 iff the packet was accepted.
- * Returns 0 iff packet was not consumed.
- * If the packet was not accepted, we will call ppp_output_wakeup
- * at some later time to reactivate flow control in ppp_generic.
- */
-static int
-ppp_irnet_send(struct ppp_channel *	chan,
-	       struct sk_buff *		skb)
-{
-  irnet_socket *	self = (struct irnet_socket *) chan->private;
-  int			ret;
-
-  DENTER(PPP_TRACE, "(channel=0x%p, ap/self=0x%p)\n",
-	 chan, self);
-
-  /* Check if things are somewhat valid... */
-  DASSERT(self != NULL, 0, PPP_ERROR, "Self is NULL !!!\n");
-
-  /* Check if we are connected */
-  if(!(test_bit(0, &self->ttp_open)))
-    {
-#ifdef CONNECT_IN_SEND
-      /* Let's try to connect one more time... */
-      /* Note : we won't be connected after this call, but we should be
-       * ready for next packet... */
-      /* If we are already connecting, this will fail */
-      irda_irnet_connect(self);
-#endif /* CONNECT_IN_SEND */
-
-      DEBUG(PPP_INFO, "IrTTP not ready ! (%ld-%ld)\n",
-	    self->ttp_open, self->ttp_connect);
-
-      /* Note : we can either drop the packet or block the packet.
-       *
-       * Blocking the packet allow us a better connection time,
-       * because by calling ppp_output_wakeup() we can have
-       * ppp_generic resending the LCP request immediately to us,
-       * rather than waiting for one of pppd periodic transmission of
-       * LCP request.
-       *
-       * On the other hand, if we block all packet, all those periodic
-       * transmissions of pppd accumulate in ppp_generic, creating a
-       * backlog of LCP request. When we eventually connect later on,
-       * we have to transmit all this backlog before we can connect
-       * proper (if we don't timeout before).
-       *
-       * The current strategy is as follow :
-       * While we are attempting to connect, we block packets to get
-       * a better connection time.
-       * If we fail to connect, we drain the queue and start dropping packets
-       */
-#ifdef BLOCK_WHEN_CONNECT
-      /* If we are attempting to connect */
-      if(test_bit(0, &self->ttp_connect))
-	{
-	  /* Blocking packet, ppp_generic will retry later */
-	  return 0;
-	}
-#endif /* BLOCK_WHEN_CONNECT */
-
-      /* Dropping packet, pppd will retry later */
-      dev_kfree_skb(skb);
-      return 1;
-    }
-
-  /* Check if the queue can accept any packet, otherwise block */
-  if(self->tx_flow != FLOW_START)
-    DRETURN(0, PPP_INFO, "IrTTP queue full (%d skbs)...\n",
-	    skb_queue_len(&self->tsap->tx_queue));
-
-  /* Prepare ppp frame for transmission */
-  skb = irnet_prepare_skb(self, skb);
-  DABORT(skb == NULL, 1, PPP_ERROR, "Prepare skb for Tx failed.\n");
-
-  /* Send the packet to IrTTP */
-  ret = irttp_data_request(self->tsap, skb);
-  if(ret < 0)
-    {
-      /*
-       * > IrTTPs tx queue is full, so we just have to
-       * > drop the frame! You might think that we should
-       * > just return -1 and don't deallocate the frame,
-       * > but that is dangerous since it's possible that
-       * > we have replaced the original skb with a new
-       * > one with larger headroom, and that would really
-       * > confuse do_dev_queue_xmit() in dev.c! I have
-       * > tried :-) DB
-       * Correction : we verify the flow control above (self->tx_flow),
-       * so we come here only if IrTTP doesn't like the packet (empty,
-       * too large, IrTTP not connected). In those rare cases, it's ok
-       * to drop it, we don't want to see it here again...
-       * Jean II
-       */
-      DERROR(PPP_ERROR, "IrTTP doesn't like this packet !!! (0x%X)\n", ret);
-      /* irttp_data_request already free the packet */
-    }
-
-  DEXIT(PPP_TRACE, "\n");
-  return 1;	/* Packet has been consumed */
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Take care of the ioctls that ppp_generic doesn't want to deal with...
- * Note : we are also called from dev_irnet_ioctl().
- */
-static int
-ppp_irnet_ioctl(struct ppp_channel *	chan,
-		unsigned int		cmd,
-		unsigned long		arg)
-{
-  irnet_socket *	ap = (struct irnet_socket *) chan->private;
-  int			err;
-  int			val;
-  u32			accm[8];
-  void __user *argp = (void __user *)arg;
-
-  DENTER(PPP_TRACE, "(channel=0x%p, ap=0x%p, cmd=0x%X)\n",
-	 chan, ap, cmd);
-
-  /* Basic checks... */
-  DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n");
-
-  err = -EFAULT;
-  switch(cmd)
-    {
-      /* PPP flags */
-    case PPPIOCGFLAGS:
-      val = ap->flags | ap->rbits;
-      if(put_user(val, (int __user *) argp))
-	break;
-      err = 0;
-      break;
-    case PPPIOCSFLAGS:
-      if(get_user(val, (int __user *) argp))
-	break;
-      ap->flags = val & ~SC_RCV_BITS;
-      ap->rbits = val & SC_RCV_BITS;
-      err = 0;
-      break;
-
-      /* Async map stuff - all dummy to please pppd */
-    case PPPIOCGASYNCMAP:
-      if(put_user(ap->xaccm[0], (u32 __user *) argp))
-	break;
-      err = 0;
-      break;
-    case PPPIOCSASYNCMAP:
-      if(get_user(ap->xaccm[0], (u32 __user *) argp))
-	break;
-      err = 0;
-      break;
-    case PPPIOCGRASYNCMAP:
-      if(put_user(ap->raccm, (u32 __user *) argp))
-	break;
-      err = 0;
-      break;
-    case PPPIOCSRASYNCMAP:
-      if(get_user(ap->raccm, (u32 __user *) argp))
-	break;
-      err = 0;
-      break;
-    case PPPIOCGXASYNCMAP:
-      if(copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm)))
-	break;
-      err = 0;
-      break;
-    case PPPIOCSXASYNCMAP:
-      if(copy_from_user(accm, argp, sizeof(accm)))
-	break;
-      accm[2] &= ~0x40000000U;		/* can't escape 0x5e */
-      accm[3] |= 0x60000000U;		/* must escape 0x7d, 0x7e */
-      memcpy(ap->xaccm, accm, sizeof(ap->xaccm));
-      err = 0;
-      break;
-
-      /* Max PPP frame size */
-    case PPPIOCGMRU:
-      if(put_user(ap->mru, (int __user *) argp))
-	break;
-      err = 0;
-      break;
-    case PPPIOCSMRU:
-      if(get_user(val, (int __user *) argp))
-	break;
-      if(val < PPP_MRU)
-	val = PPP_MRU;
-      ap->mru = val;
-      err = 0;
-      break;
-
-    default:
-      DEBUG(PPP_INFO, "Unsupported ioctl (0x%X)\n", cmd);
-      err = -ENOIOCTLCMD;
-    }
-
-  DEXIT(PPP_TRACE, " - err = 0x%X\n", err);
-  return err;
-}
-
-/************************** INITIALISATION **************************/
-/*
- * Module initialisation and all that jazz...
- */
-
-/*------------------------------------------------------------------*/
-/*
- * Hook our device callbacks in the filesystem, to connect our code
- * to /dev/irnet
- */
-static inline int __init
-ppp_irnet_init(void)
-{
-  int err = 0;
-
-  DENTER(MODULE_TRACE, "()\n");
-
-  /* Allocate ourselves as a minor in the misc range */
-  err = misc_register(&irnet_misc_device);
-
-  DEXIT(MODULE_TRACE, "\n");
-  return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Cleanup at exit...
- */
-static inline void __exit
-ppp_irnet_cleanup(void)
-{
-  DENTER(MODULE_TRACE, "()\n");
-
-  /* De-allocate /dev/irnet minor in misc range */
-  misc_deregister(&irnet_misc_device);
-
-  DEXIT(MODULE_TRACE, "\n");
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Module main entry point
- */
-static int __init
-irnet_init(void)
-{
-  int err;
-
-  /* Initialise both parts... */
-  err = irda_irnet_init();
-  if(!err)
-    err = ppp_irnet_init();
-  return err;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Module exit
- */
-static void __exit
-irnet_cleanup(void)
-{
-  irda_irnet_cleanup();
-  ppp_irnet_cleanup();
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Module magic
- */
-module_init(irnet_init);
-module_exit(irnet_cleanup);
-MODULE_AUTHOR("Jean Tourrilhes <jt@hpl.hp.com>");
-MODULE_DESCRIPTION("IrNET : Synchronous PPP over IrDA");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV(10, 187);
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
deleted file mode 100644
index 32061442cc8e..000000000000
--- a/net/irda/irnet/irnet_ppp.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- *	IrNET protocol module : Synchronous PPP over an IrDA socket.
- *
- *		Jean II - HPL `00 - <jt@hpl.hp.com>
- *
- * This file contains all definitions and declarations necessary for the
- * PPP part of the IrNET module.
- * This file is a private header, so other modules don't want to know
- * what's in there...
- */
-
-#ifndef IRNET_PPP_H
-#define IRNET_PPP_H
-
-/***************************** INCLUDES *****************************/
-
-#include "irnet.h"		/* Module global include */
-#include <linux/miscdevice.h>
-
-/************************ CONSTANTS & MACROS ************************/
-
-/* IrNET control channel stuff */
-#define IRNET_MAX_COMMAND	256	/* Max length of a command line */
-
-/* PPP hardcore stuff */
-
-/* Bits in rbits (PPP flags in irnet struct) */
-#define SC_RCV_BITS	(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-/* Bit numbers in busy */
-#define XMIT_BUSY	0
-#define RECV_BUSY	1
-#define XMIT_WAKEUP	2
-#define XMIT_FULL	3
-
-/* Queue management */
-#define PPPSYNC_MAX_RQLEN	32	/* arbitrary */
-
-/****************************** TYPES ******************************/
-
-
-/**************************** PROTOTYPES ****************************/
-
-/* ----------------------- CONTROL CHANNEL ----------------------- */
-static inline ssize_t
-	irnet_ctrl_write(irnet_socket *,
-			 const char *,
-			 size_t);
-static inline ssize_t
-	irnet_ctrl_read(irnet_socket *,
-			struct file *,
-			char *,
-			size_t);
-static inline unsigned int
-	irnet_ctrl_poll(irnet_socket *,
-			struct file *,
-			poll_table *);
-/* ----------------------- CHARACTER DEVICE ----------------------- */
-static int
-	dev_irnet_open(struct inode *,	/* fs callback : open */
-		       struct file *),
-	dev_irnet_close(struct inode *,
-			struct file *);
-static ssize_t
-	dev_irnet_write(struct file *,
-			const char __user *,
-			size_t,
-			loff_t *),
-	dev_irnet_read(struct file *,
-		       char __user *,
-		       size_t,
-		       loff_t *);
-static unsigned int
-	dev_irnet_poll(struct file *,
-		       poll_table *);
-static long
-	dev_irnet_ioctl(struct file *,
-			unsigned int,
-			unsigned long);
-/* ------------------------ PPP INTERFACE ------------------------ */
-static inline struct sk_buff *
-	irnet_prepare_skb(irnet_socket *,
-			  struct sk_buff *);
-static int
-	ppp_irnet_send(struct ppp_channel *,
-		      struct sk_buff *);
-static int
-	ppp_irnet_ioctl(struct ppp_channel *,
-			unsigned int,
-			unsigned long);
-
-/**************************** VARIABLES ****************************/
-
-/* Filesystem callbacks (to call us) */
-static const struct file_operations irnet_device_fops =
-{
-	.owner		= THIS_MODULE,
-	.read		= dev_irnet_read,
-	.write		= dev_irnet_write,
-	.poll		= dev_irnet_poll,
-	.unlocked_ioctl	= dev_irnet_ioctl,
-	.open		= dev_irnet_open,
-	.release	= dev_irnet_close,
-	.llseek		= noop_llseek,
-  /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
-};
-
-/* Structure so that the misc major (drivers/char/misc.c) take care of us... */
-static struct miscdevice irnet_misc_device =
-{
-	.minor = IRNET_MINOR,
-	.name = "irnet",
-	.fops = &irnet_device_fops
-};
-
-#endif /* IRNET_PPP_H */
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
deleted file mode 100644
index 7fc340e574cf..000000000000
--- a/net/irda/irnetlink.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * IrDA netlink layer, for stack configuration.
- *
- * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
- *
- * Partly based on the 802.11 nelink implementation
- * (see net/wireless/nl80211.c) which is:
- * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/socket.h>
-#include <linux/irda.h>
-#include <linux/gfp.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/genetlink.h>
-
-
-
-static struct genl_family irda_nl_family;
-
-static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
-{
-	char * ifname;
-
-	if (!info->attrs[IRDA_NL_ATTR_IFNAME])
-		return NULL;
-
-	ifname = nla_data(info->attrs[IRDA_NL_ATTR_IFNAME]);
-
-	pr_debug("%s(): Looking for %s\n", __func__, ifname);
-
-	return dev_get_by_name(net, ifname);
-}
-
-static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info)
-{
-	struct net_device * dev;
-	struct irlap_cb * irlap;
-	u32 mode;
-
-	if (!info->attrs[IRDA_NL_ATTR_MODE])
-		return -EINVAL;
-
-	mode = nla_get_u32(info->attrs[IRDA_NL_ATTR_MODE]);
-
-	pr_debug("%s(): Switching to mode: %d\n", __func__, mode);
-
-	dev = ifname_to_netdev(&init_net, info);
-	if (!dev)
-		return -ENODEV;
-
-	irlap = (struct irlap_cb *)dev->atalk_ptr;
-	if (!irlap) {
-		dev_put(dev);
-		return -ENODEV;
-	}
-
-	irlap->mode = mode;
-
-	dev_put(dev);
-
-	return 0;
-}
-
-static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
-{
-	struct net_device * dev;
-	struct irlap_cb * irlap;
-	struct sk_buff *msg;
-	void *hdr;
-	int ret = -ENOBUFS;
-
-	dev = ifname_to_netdev(&init_net, info);
-	if (!dev)
-		return -ENODEV;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg) {
-		dev_put(dev);
-		return -ENOMEM;
-	}
-
-	irlap = (struct irlap_cb *)dev->atalk_ptr;
-	if (!irlap) {
-		ret = -ENODEV;
-		goto err_out;
-	}
-
-	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
-			  &irda_nl_family, 0,  IRDA_NL_CMD_GET_MODE);
-	if (hdr == NULL) {
-		ret = -EMSGSIZE;
-		goto err_out;
-	}
-
-	if(nla_put_string(msg, IRDA_NL_ATTR_IFNAME,
-			  dev->name))
-		goto err_out;
-
-	if(nla_put_u32(msg, IRDA_NL_ATTR_MODE, irlap->mode))
-		goto err_out;
-
-	genlmsg_end(msg, hdr);
-
-	return genlmsg_reply(msg, info);
-
- err_out:
-	nlmsg_free(msg);
-	dev_put(dev);
-
-	return ret;
-}
-
-static const struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = {
-	[IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING,
-				  .len = IFNAMSIZ-1 },
-	[IRDA_NL_ATTR_MODE] = { .type = NLA_U32 },
-};
-
-static const struct genl_ops irda_nl_ops[] = {
-	{
-		.cmd = IRDA_NL_CMD_SET_MODE,
-		.doit = irda_nl_set_mode,
-		.policy = irda_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = IRDA_NL_CMD_GET_MODE,
-		.doit = irda_nl_get_mode,
-		.policy = irda_nl_policy,
-		/* can be retrieved by unprivileged users */
-	},
-
-};
-
-static struct genl_family irda_nl_family __ro_after_init = {
-	.name = IRDA_NL_NAME,
-	.hdrsize = 0,
-	.version = IRDA_NL_VERSION,
-	.maxattr = IRDA_NL_CMD_MAX,
-	.module = THIS_MODULE,
-	.ops = irda_nl_ops,
-	.n_ops = ARRAY_SIZE(irda_nl_ops),
-};
-
-int __init irda_nl_register(void)
-{
-	return genl_register_family(&irda_nl_family);
-}
-
-void irda_nl_unregister(void)
-{
-	genl_unregister_family(&irda_nl_family);
-}
diff --git a/net/irda/irproc.c b/net/irda/irproc.c
deleted file mode 100644
index 77cfdde9d82f..000000000000
--- a/net/irda/irproc.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irproc.c
- * Version:       1.0
- * Description:   Various entries in the /proc file system
- * Status:        Experimental.
- * Author:        Thomas Davis, <ratbert@radiks.net>
- * Created at:    Sat Feb 21 21:33:24 1998
- * Modified at:   Sun Nov 14 08:54:54 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-1999, Dag Brattli <dagb@cs.uit.no>
- *     Copyright (c) 1998, Thomas Davis, <ratbert@radiks.net>,
- *     All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     I, Thomas Davis, provide no warranty for any of this software.
- *     This material is provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-
-extern const struct file_operations discovery_seq_fops;
-extern const struct file_operations irlap_seq_fops;
-extern const struct file_operations irlmp_seq_fops;
-extern const struct file_operations irttp_seq_fops;
-extern const struct file_operations irias_seq_fops;
-
-struct irda_entry {
-	const char *name;
-	const struct file_operations *fops;
-};
-
-struct proc_dir_entry *proc_irda;
-EXPORT_SYMBOL(proc_irda);
-
-static const struct irda_entry irda_dirs[] = {
-	{"discovery",	&discovery_seq_fops},
-	{"irttp",	&irttp_seq_fops},
-	{"irlmp",	&irlmp_seq_fops},
-	{"irlap",	&irlap_seq_fops},
-	{"irias",	&irias_seq_fops},
-};
-
-/*
- * Function irda_proc_register (void)
- *
- *    Register irda entry in /proc file system
- *
- */
-void __init irda_proc_register(void)
-{
-	int i;
-
-	proc_irda = proc_mkdir("irda", init_net.proc_net);
-	if (proc_irda == NULL)
-		return;
-
-	for (i = 0; i < ARRAY_SIZE(irda_dirs); i++)
-		(void) proc_create(irda_dirs[i].name, 0, proc_irda,
-				   irda_dirs[i].fops);
-}
-
-/*
- * Function irda_proc_unregister (void)
- *
- *    Unregister irda entry in /proc file system
- *
- */
-void irda_proc_unregister(void)
-{
-	int i;
-
-	if (proc_irda) {
-		for (i=0; i<ARRAY_SIZE(irda_dirs); i++)
-			remove_proc_entry(irda_dirs[i].name, proc_irda);
-
-		remove_proc_entry("irda", init_net.proc_net);
-		proc_irda = NULL;
-	}
-}
-
-
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
deleted file mode 100644
index 160dc89335e2..000000000000
--- a/net/irda/irqueue.c
+++ /dev/null
@@ -1,911 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irqueue.c
- * Version:       0.3
- * Description:   General queue implementation
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Jun  9 13:29:31 1998
- * Modified at:   Sun Dec 12 13:48:22 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Modified at:   Thu Jan  4 14:29:10 CET 2001
- * Modified by:   Marc Zyngier <mzyngier@freesurf.fr>
- *
- *     Copyright (C) 1998-1999, Aage Kvalnes <aage@cs.uit.no>
- *     Copyright (C) 1998, Dag Brattli,
- *     All Rights Reserved.
- *
- *     This code is taken from the Vortex Operating System written by Aage
- *     Kvalnes. Aage has agreed that this code can use the GPL licence,
- *     although he does not use that licence in his own code.
- *
- *     This copyright does however _not_ include the ELF hash() function
- *     which I currently don't know which licence or copyright it
- *     has. Please inform me if you know.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-/*
- * NOTE :
- * There are various problems with this package :
- *	o the hash function for ints is pathetic (but could be changed)
- *	o locking is sometime suspicious (especially during enumeration)
- *	o most users have only a few elements (== overhead)
- *	o most users never use search, so don't benefit from hashing
- * Problem already fixed :
- *	o not 64 bit compliant (most users do hashv = (int) self)
- *	o hashbin_remove() is broken => use hashbin_remove_this()
- * I think most users would be better served by a simple linked list
- * (like include/linux/list.h) with a global spinlock per list.
- * Jean II
- */
-
-/*
- * Notes on the concurrent access to hashbin and other SMP issues
- * -------------------------------------------------------------
- *	Hashbins are very often in the IrDA stack a global repository of
- * information, and therefore used in a very asynchronous manner following
- * various events (driver calls, timers, user calls...).
- *	Therefore, very often it is highly important to consider the
- * management of concurrent access to the hashbin and how to guarantee the
- * consistency of the operations on it.
- *
- *	First, we need to define the objective of locking :
- *		1) Protect user data (content pointed by the hashbin)
- *		2) Protect hashbin structure itself (linked list in each bin)
- *
- *			     OLD LOCKING
- *			     -----------
- *
- *	The previous locking strategy, either HB_LOCAL or HB_GLOBAL were
- * both inadequate in *both* aspect.
- *		o HB_GLOBAL was using a spinlock for each bin (local locking).
- *		o HB_LOCAL was disabling irq on *all* CPUs, so use a single
- *		  global semaphore.
- *	The problems were :
- *		A) Global irq disabling is no longer supported by the kernel
- *		B) No protection for the hashbin struct global data
- *			o hashbin_delete()
- *			o hb_current
- *		C) No protection for user data in some cases
- *
- *	A) HB_LOCAL use global irq disabling, so doesn't work on kernel
- * 2.5.X. Even when it is supported (kernel 2.4.X and earlier), its
- * performance is not satisfactory on SMP setups. Most hashbins were
- * HB_LOCAL, so (A) definitely need fixing.
- *	B) HB_LOCAL could be modified to fix (B). However, because HB_GLOBAL
- * lock only the individual bins, it will never be able to lock the
- * global data, so can't do (B).
- *	C) Some functions return pointer to data that is still in the
- * hashbin :
- *		o hashbin_find()
- *		o hashbin_get_first()
- *		o hashbin_get_next()
- *	As the data is still in the hashbin, it may be changed or free'd
- * while the caller is examinimg the data. In those case, locking can't
- * be done within the hashbin, but must include use of the data within
- * the caller.
- *	The caller can easily do this with HB_LOCAL (just disable irqs).
- * However, this is impossible with HB_GLOBAL because the caller has no
- * way to know the proper bin, so don't know which spinlock to use.
- *
- *	Quick summary : can no longer use HB_LOCAL, and HB_GLOBAL is
- * fundamentally broken and will never work.
- *
- *			     NEW LOCKING
- *			     -----------
- *
- *	To fix those problems, I've introduce a few changes in the
- * hashbin locking :
- *		1) New HB_LOCK scheme
- *		2) hashbin->hb_spinlock
- *		3) New hashbin usage policy
- *
- * HB_LOCK :
- * -------
- *	HB_LOCK is a locking scheme intermediate between the old HB_LOCAL
- * and HB_GLOBAL. It uses a single spinlock to protect the whole content
- * of the hashbin. As it is a single spinlock, it can protect the global
- * data of the hashbin and not only the bins themselves.
- *	HB_LOCK can only protect some of the hashbin calls, so it only lock
- * call that can be made 100% safe and leave other call unprotected.
- *	HB_LOCK in theory is slower than HB_GLOBAL, but as the hashbin
- * content is always small contention is not high, so it doesn't matter
- * much. HB_LOCK is probably faster than HB_LOCAL.
- *
- * hashbin->hb_spinlock :
- * --------------------
- *	The spinlock that HB_LOCK uses is available for caller, so that
- * the caller can protect unprotected calls (see below).
- *	If the caller want to do entirely its own locking (HB_NOLOCK), he
- * can do so and may use safely this spinlock.
- *	Locking is done like this :
- *		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
- *	Releasing the lock :
- *		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
- *
- * Safe & Protected calls :
- * ----------------------
- *	The following calls are safe or protected via HB_LOCK :
- *		o hashbin_new()		-> safe
- *		o hashbin_delete()
- *		o hashbin_insert()
- *		o hashbin_remove_first()
- *		o hashbin_remove()
- *		o hashbin_remove_this()
- *		o HASHBIN_GET_SIZE()	-> atomic
- *
- *	The following calls only protect the hashbin itself :
- *		o hashbin_lock_find()
- *		o hashbin_find_next()
- *
- * Unprotected calls :
- * -----------------
- *	The following calls need to be protected by the caller :
- *		o hashbin_find()
- *		o hashbin_get_first()
- *		o hashbin_get_next()
- *
- * Locking Policy :
- * --------------
- *	If the hashbin is used only in a single thread of execution
- * (explicitly or implicitely), you can use HB_NOLOCK
- *	If the calling module already provide concurrent access protection,
- * you may use HB_NOLOCK.
- *
- *	In all other cases, you need to use HB_LOCK and lock the hashbin
- * every time before calling one of the unprotected calls. You also must
- * use the pointer returned by the unprotected call within the locked
- * region.
- *
- * Extra care for enumeration :
- * --------------------------
- *	hashbin_get_first() and hashbin_get_next() use the hashbin to
- * store the current position, in hb_current.
- *	As long as the hashbin remains locked, this is safe. If you unlock
- * the hashbin, the current position may change if anybody else modify
- * or enumerate the hashbin.
- *	Summary : do the full enumeration while locked.
- *
- *	Alternatively, you may use hashbin_find_next(). But, this will
- * be slower, is more complex to use and doesn't protect the hashbin
- * content. So, care is needed here as well.
- *
- * Other issues :
- * ------------
- *	I believe that we are overdoing it by using spin_lock_irqsave()
- * and we should use only spin_lock_bh() or similar. But, I don't have
- * the balls to try it out.
- *	Don't believe that because hashbin are now (somewhat) SMP safe
- * that the rest of the code is. Higher layers tend to be safest,
- * but LAP and LMP would need some serious dedicated love.
- *
- * Jean II
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irqueue.h>
-
-/************************ QUEUE SUBROUTINES ************************/
-
-/*
- * Hashbin
- */
-#define GET_HASHBIN(x) ( x & HASHBIN_MASK )
-
-/*
- * Function hash (name)
- *
- *    This function hash the input string 'name' using the ELF hash
- *    function for strings.
- */
-static __u32 hash( const char* name)
-{
-	__u32 h = 0;
-	__u32 g;
-
-	while(*name) {
-		h = (h<<4) + *name++;
-		if ((g = (h & 0xf0000000)))
-			h ^=g>>24;
-		h &=~g;
-	}
-	return h;
-}
-
-/*
- * Function enqueue_first (queue, proc)
- *
- *    Insert item first in queue.
- *
- */
-static void enqueue_first(irda_queue_t **queue, irda_queue_t* element)
-{
-
-	/*
-	 * Check if queue is empty.
-	 */
-	if ( *queue == NULL ) {
-		/*
-		 * Queue is empty.  Insert one element into the queue.
-		 */
-		element->q_next = element->q_prev = *queue = element;
-
-	} else {
-		/*
-		 * Queue is not empty.  Insert element into front of queue.
-		 */
-		element->q_next          = (*queue);
-		(*queue)->q_prev->q_next = element;
-		element->q_prev          = (*queue)->q_prev;
-		(*queue)->q_prev         = element;
-		(*queue)                 = element;
-	}
-}
-
-
-/*
- * Function dequeue (queue)
- *
- *    Remove first entry in queue
- *
- */
-static irda_queue_t *dequeue_first(irda_queue_t **queue)
-{
-	irda_queue_t *ret;
-
-	pr_debug("dequeue_first()\n");
-
-	/*
-	 * Set return value
-	 */
-	ret =  *queue;
-
-	if ( *queue == NULL ) {
-		/*
-		 * Queue was empty.
-		 */
-	} else if ( (*queue)->q_next == *queue ) {
-		/*
-		 *  Queue only contained a single element. It will now be
-		 *  empty.
-		 */
-		*queue = NULL;
-	} else {
-		/*
-		 * Queue contained several element.  Remove the first one.
-		 */
-		(*queue)->q_prev->q_next = (*queue)->q_next;
-		(*queue)->q_next->q_prev = (*queue)->q_prev;
-		*queue = (*queue)->q_next;
-	}
-
-	/*
-	 * Return the removed entry (or NULL of queue was empty).
-	 */
-	return ret;
-}
-
-/*
- * Function dequeue_general (queue, element)
- *
- *
- */
-static irda_queue_t *dequeue_general(irda_queue_t **queue, irda_queue_t* element)
-{
-	irda_queue_t *ret;
-
-	pr_debug("dequeue_general()\n");
-
-	/*
-	 * Set return value
-	 */
-	ret =  *queue;
-
-	if ( *queue == NULL ) {
-		/*
-		 * Queue was empty.
-		 */
-	} else if ( (*queue)->q_next == *queue ) {
-		/*
-		 *  Queue only contained a single element. It will now be
-		 *  empty.
-		 */
-		*queue = NULL;
-
-	} else {
-		/*
-		 *  Remove specific element.
-		 */
-		element->q_prev->q_next = element->q_next;
-		element->q_next->q_prev = element->q_prev;
-		if ( (*queue) == element)
-			(*queue) = element->q_next;
-	}
-
-	/*
-	 * Return the removed entry (or NULL of queue was empty).
-	 */
-	return ret;
-}
-
-/************************ HASHBIN MANAGEMENT ************************/
-
-/*
- * Function hashbin_create ( type, name )
- *
- *    Create hashbin!
- *
- */
-hashbin_t *hashbin_new(int type)
-{
-	hashbin_t* hashbin;
-
-	/*
-	 * Allocate new hashbin
-	 */
-	hashbin = kzalloc(sizeof(*hashbin), GFP_ATOMIC);
-	if (!hashbin)
-		return NULL;
-
-	/*
-	 * Initialize structure
-	 */
-	hashbin->hb_type = type;
-	hashbin->magic = HB_MAGIC;
-	//hashbin->hb_current = NULL;
-
-	/* Make sure all spinlock's are unlocked */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_init(&hashbin->hb_spinlock);
-	}
-
-	return hashbin;
-}
-EXPORT_SYMBOL(hashbin_new);
-
-
-/*
- * Function hashbin_delete (hashbin, free_func)
- *
- *    Destroy hashbin, the free_func can be a user supplied special routine
- *    for deallocating this structure if it's complex. If not the user can
- *    just supply kfree, which should take care of the job.
- */
-int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
-{
-	irda_queue_t* queue;
-	unsigned long flags = 0;
-	int i;
-
-	IRDA_ASSERT(hashbin != NULL, return -1;);
-	IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;);
-
-	/* Synchronize */
-	if (hashbin->hb_type & HB_LOCK)
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-
-	/*
-	 *  Free the entries in the hashbin, TODO: use hashbin_clear when
-	 *  it has been shown to work
-	 */
-	for (i = 0; i < HASHBIN_SIZE; i ++ ) {
-		while (1) {
-			queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
-
-			if (!queue)
-				break;
-
-			if (free_func) {
-				if (hashbin->hb_type & HB_LOCK)
-					spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-				free_func(queue);
-				if (hashbin->hb_type & HB_LOCK)
-					spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-			}
-		}
-	}
-
-	/* Cleanup local data */
-	hashbin->hb_current = NULL;
-	hashbin->magic = ~HB_MAGIC;
-
-	/* Release lock */
-	if (hashbin->hb_type & HB_LOCK)
-		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-
-	/*
-	 *  Free the hashbin structure
-	 */
-	kfree(hashbin);
-
-	return 0;
-}
-EXPORT_SYMBOL(hashbin_delete);
-
-/********************* HASHBIN LIST OPERATIONS *********************/
-
-/*
- * Function hashbin_insert (hashbin, entry, name)
- *
- *    Insert an entry into the hashbin
- *
- */
-void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv,
-		    const char* name)
-{
-	unsigned long flags = 0;
-	int bin;
-
-	IRDA_ASSERT( hashbin != NULL, return;);
-	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return;);
-
-	/*
-	 * Locate hashbin
-	 */
-	if ( name )
-		hashv = hash( name );
-	bin = GET_HASHBIN( hashv );
-
-	/* Synchronize */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-	/*
-	 * Store name and key
-	 */
-	entry->q_hash = hashv;
-	if ( name )
-		strlcpy( entry->q_name, name, sizeof(entry->q_name));
-
-	/*
-	 * Insert new entry first
-	 */
-	enqueue_first( (irda_queue_t**) &hashbin->hb_queue[ bin ],
-		       entry);
-	hashbin->hb_size++;
-
-	/* Release lock */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-}
-EXPORT_SYMBOL(hashbin_insert);
-
-/*
- *  Function hashbin_remove_first (hashbin)
- *
- *    Remove first entry of the hashbin
- *
- * Note : this function no longer use hashbin_remove(), but does things
- * similar to hashbin_remove_this(), so can be considered safe.
- * Jean II
- */
-void *hashbin_remove_first( hashbin_t *hashbin)
-{
-	unsigned long flags = 0;
-	irda_queue_t *entry = NULL;
-
-	/* Synchronize */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-	entry = hashbin_get_first( hashbin);
-	if ( entry != NULL) {
-		int	bin;
-		long	hashv;
-		/*
-		 * Locate hashbin
-		 */
-		hashv = entry->q_hash;
-		bin = GET_HASHBIN( hashv );
-
-		/*
-		 * Dequeue the entry...
-		 */
-		dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
-				 entry);
-		hashbin->hb_size--;
-		entry->q_next = NULL;
-		entry->q_prev = NULL;
-
-		/*
-		 *  Check if this item is the currently selected item, and in
-		 *  that case we must reset hb_current
-		 */
-		if ( entry == hashbin->hb_current)
-			hashbin->hb_current = NULL;
-	}
-
-	/* Release lock */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-	return entry;
-}
-
-
-/*
- *  Function hashbin_remove (hashbin, hashv, name)
- *
- *    Remove entry with the given name
- *
- *  The use of this function is highly discouraged, because the whole
- *  concept behind hashbin_remove() is broken. In many cases, it's not
- *  possible to guarantee the unicity of the index (either hashv or name),
- *  leading to removing the WRONG entry.
- *  The only simple safe use is :
- *		hashbin_remove(hasbin, (int) self, NULL);
- *  In other case, you must think hard to guarantee unicity of the index.
- *  Jean II
- */
-void* hashbin_remove( hashbin_t* hashbin, long hashv, const char* name)
-{
-	int bin, found = FALSE;
-	unsigned long flags = 0;
-	irda_queue_t* entry;
-
-	IRDA_ASSERT( hashbin != NULL, return NULL;);
-	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
-	/*
-	 * Locate hashbin
-	 */
-	if ( name )
-		hashv = hash( name );
-	bin = GET_HASHBIN( hashv );
-
-	/* Synchronize */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-	/*
-	 * Search for entry
-	 */
-	entry = hashbin->hb_queue[ bin ];
-	if ( entry ) {
-		do {
-			/*
-			 * Check for key
-			 */
-			if ( entry->q_hash == hashv ) {
-				/*
-				 * Name compare too?
-				 */
-				if ( name ) {
-					if ( strcmp( entry->q_name, name) == 0)
-					{
-						found = TRUE;
-						break;
-					}
-				} else {
-					found = TRUE;
-					break;
-				}
-			}
-			entry = entry->q_next;
-		} while ( entry != hashbin->hb_queue[ bin ] );
-	}
-
-	/*
-	 * If entry was found, dequeue it
-	 */
-	if ( found ) {
-		dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
-				 entry);
-		hashbin->hb_size--;
-
-		/*
-		 *  Check if this item is the currently selected item, and in
-		 *  that case we must reset hb_current
-		 */
-		if ( entry == hashbin->hb_current)
-			hashbin->hb_current = NULL;
-	}
-
-	/* Release lock */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-
-	/* Return */
-	if ( found )
-		return entry;
-	else
-		return NULL;
-
-}
-EXPORT_SYMBOL(hashbin_remove);
-
-/*
- *  Function hashbin_remove_this (hashbin, entry)
- *
- *    Remove entry with the given name
- *
- * In some cases, the user of hashbin can't guarantee the unicity
- * of either the hashv or name.
- * In those cases, using the above function is guaranteed to cause troubles,
- * so we use this one instead...
- * And by the way, it's also faster, because we skip the search phase ;-)
- */
-void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry)
-{
-	unsigned long flags = 0;
-	int	bin;
-	long	hashv;
-
-	IRDA_ASSERT( hashbin != NULL, return NULL;);
-	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-	IRDA_ASSERT( entry != NULL, return NULL;);
-
-	/* Synchronize */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-	/* Check if valid and not already removed... */
-	if((entry->q_next == NULL) || (entry->q_prev == NULL)) {
-		entry = NULL;
-		goto out;
-	}
-
-	/*
-	 * Locate hashbin
-	 */
-	hashv = entry->q_hash;
-	bin = GET_HASHBIN( hashv );
-
-	/*
-	 * Dequeue the entry...
-	 */
-	dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ],
-			 entry);
-	hashbin->hb_size--;
-	entry->q_next = NULL;
-	entry->q_prev = NULL;
-
-	/*
-	 *  Check if this item is the currently selected item, and in
-	 *  that case we must reset hb_current
-	 */
-	if ( entry == hashbin->hb_current)
-		hashbin->hb_current = NULL;
-out:
-	/* Release lock */
-	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-	} /* Default is no-lock  */
-
-	return entry;
-}
-EXPORT_SYMBOL(hashbin_remove_this);
-
-/*********************** HASHBIN ENUMERATION ***********************/
-
-/*
- * Function hashbin_common_find (hashbin, hashv, name)
- *
- *    Find item with the given hashv or name
- *
- */
-void* hashbin_find( hashbin_t* hashbin, long hashv, const char* name )
-{
-	int bin;
-	irda_queue_t* entry;
-
-	pr_debug("hashbin_find()\n");
-
-	IRDA_ASSERT( hashbin != NULL, return NULL;);
-	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
-	/*
-	 * Locate hashbin
-	 */
-	if ( name )
-		hashv = hash( name );
-	bin = GET_HASHBIN( hashv );
-
-	/*
-	 * Search for entry
-	 */
-	entry = hashbin->hb_queue[ bin];
-	if ( entry ) {
-		do {
-			/*
-			 * Check for key
-			 */
-			if ( entry->q_hash == hashv ) {
-				/*
-				 * Name compare too?
-				 */
-				if ( name ) {
-					if ( strcmp( entry->q_name, name ) == 0 ) {
-						return entry;
-					}
-				} else {
-					return entry;
-				}
-			}
-			entry = entry->q_next;
-		} while ( entry != hashbin->hb_queue[ bin ] );
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(hashbin_find);
-
-/*
- * Function hashbin_lock_find (hashbin, hashv, name)
- *
- *    Find item with the given hashv or name
- *
- * Same, but with spinlock protection...
- * I call it safe, but it's only safe with respect to the hashbin, not its
- * content. - Jean II
- */
-void* hashbin_lock_find( hashbin_t* hashbin, long hashv, const char* name )
-{
-	unsigned long flags = 0;
-	irda_queue_t* entry;
-
-	/* Synchronize */
-	spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-
-	/*
-	 * Search for entry
-	 */
-	entry = hashbin_find(hashbin, hashv, name);
-
-	/* Release lock */
-	spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-
-	return entry;
-}
-EXPORT_SYMBOL(hashbin_lock_find);
-
-/*
- * Function hashbin_find (hashbin, hashv, name, pnext)
- *
- *    Find an item with the given hashv or name, and its successor
- *
- * This function allow to do concurrent enumerations without the
- * need to lock over the whole session, because the caller keep the
- * context of the search. On the other hand, it might fail and return
- * NULL if the entry is removed. - Jean II
- */
-void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name,
-			 void ** pnext)
-{
-	unsigned long flags = 0;
-	irda_queue_t* entry;
-
-	/* Synchronize */
-	spin_lock_irqsave(&hashbin->hb_spinlock, flags);
-
-	/*
-	 * Search for current entry
-	 * This allow to check if the current item is still in the
-	 * hashbin or has been removed.
-	 */
-	entry = hashbin_find(hashbin, hashv, name);
-
-	/*
-	 * Trick hashbin_get_next() to return what we want
-	 */
-	if(entry) {
-		hashbin->hb_current = entry;
-		*pnext = hashbin_get_next( hashbin );
-	} else
-		*pnext = NULL;
-
-	/* Release lock */
-	spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-
-	return entry;
-}
-
-/*
- * Function hashbin_get_first (hashbin)
- *
- *    Get a pointer to first element in hashbin, this function must be
- *    called before any calls to hashbin_get_next()!
- *
- */
-irda_queue_t *hashbin_get_first( hashbin_t* hashbin)
-{
-	irda_queue_t *entry;
-	int i;
-
-	IRDA_ASSERT( hashbin != NULL, return NULL;);
-	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
-	if ( hashbin == NULL)
-		return NULL;
-
-	for ( i = 0; i < HASHBIN_SIZE; i ++ ) {
-		entry = hashbin->hb_queue[ i];
-		if ( entry) {
-			hashbin->hb_current = entry;
-			return entry;
-		}
-	}
-	/*
-	 *  Did not find any item in hashbin
-	 */
-	return NULL;
-}
-EXPORT_SYMBOL(hashbin_get_first);
-
-/*
- * Function hashbin_get_next (hashbin)
- *
- *    Get next item in hashbin. A series of hashbin_get_next() calls must
- *    be started by a call to hashbin_get_first(). The function returns
- *    NULL when all items have been traversed
- *
- * The context of the search is stored within the hashbin, so you must
- * protect yourself from concurrent enumerations. - Jean II
- */
-irda_queue_t *hashbin_get_next( hashbin_t *hashbin)
-{
-	irda_queue_t* entry;
-	int bin;
-	int i;
-
-	IRDA_ASSERT( hashbin != NULL, return NULL;);
-	IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;);
-
-	if ( hashbin->hb_current == NULL) {
-		IRDA_ASSERT( hashbin->hb_current != NULL, return NULL;);
-		return NULL;
-	}
-	entry = hashbin->hb_current->q_next;
-	bin = GET_HASHBIN( entry->q_hash);
-
-	/*
-	 *  Make sure that we are not back at the beginning of the queue
-	 *  again
-	 */
-	if ( entry != hashbin->hb_queue[ bin ]) {
-		hashbin->hb_current = entry;
-
-		return entry;
-	}
-
-	/*
-	 *  Check that this is not the last queue in hashbin
-	 */
-	if ( bin >= HASHBIN_SIZE)
-		return NULL;
-
-	/*
-	 *  Move to next queue in hashbin
-	 */
-	bin++;
-	for ( i = bin; i < HASHBIN_SIZE; i++ ) {
-		entry = hashbin->hb_queue[ i];
-		if ( entry) {
-			hashbin->hb_current = entry;
-
-			return entry;
-		}
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(hashbin_get_next);
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
deleted file mode 100644
index 873da5e7d428..000000000000
--- a/net/irda/irsysctl.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irsysctl.c
- * Version:       1.0
- * Description:   Sysctl interface for IrDA
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun May 24 22:12:06 1998
- * Modified at:   Fri Jun  4 02:50:15 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1997, 1999 Dag Brattli, All Rights Reserved.
- *     Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/mm.h>
-#include <linux/ctype.h>
-#include <linux/sysctl.h>
-#include <linux/init.h>
-
-#include <net/irda/irda.h>		/* irda_debug */
-#include <net/irda/irlmp.h>
-#include <net/irda/timer.h>
-#include <net/irda/irias_object.h>
-
-extern int  sysctl_discovery;
-extern int  sysctl_discovery_slots;
-extern int  sysctl_discovery_timeout;
-extern int  sysctl_slot_timeout;
-extern int  sysctl_fast_poll_increase;
-extern char sysctl_devname[];
-extern int  sysctl_max_baud_rate;
-extern unsigned int sysctl_min_tx_turn_time;
-extern unsigned int sysctl_max_tx_data_size;
-extern unsigned int sysctl_max_tx_window;
-extern int  sysctl_max_noreply_time;
-extern int  sysctl_warn_noreply_time;
-extern int  sysctl_lap_keepalive_time;
-
-extern struct irlmp_cb *irlmp;
-
-/* this is needed for the proc_dointvec_minmax - Jean II */
-static int max_discovery_slots = 16;		/* ??? */
-static int min_discovery_slots = 1;
-/* IrLAP 6.13.2 says 25ms to 10+70ms - allow higher since some devices
- * seems to require it. (from Dag's comment) */
-static int max_slot_timeout = 160;
-static int min_slot_timeout = 20;
-static int max_max_baud_rate = 16000000;	/* See qos.c - IrLAP spec */
-static int min_max_baud_rate = 2400;
-static int max_min_tx_turn_time = 10000;	/* See qos.c - IrLAP spec */
-static int min_min_tx_turn_time;
-static int max_max_tx_data_size = 2048;		/* See qos.c - IrLAP spec */
-static int min_max_tx_data_size = 64;
-static int max_max_tx_window = 7;		/* See qos.c - IrLAP spec */
-static int min_max_tx_window = 1;
-static int max_max_noreply_time = 40;		/* See qos.c - IrLAP spec */
-static int min_max_noreply_time = 3;
-static int max_warn_noreply_time = 3;		/* 3s == standard */
-static int min_warn_noreply_time = 1;		/* 1s == min WD_TIMER */
-static int max_lap_keepalive_time = 10000;	/* 10s */
-static int min_lap_keepalive_time = 100;	/* 100us */
-/* For other sysctl, I've no idea of the range. Maybe Dag could help
- * us on that - Jean II */
-
-static int do_devname(struct ctl_table *table, int write,
-		      void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	int ret;
-
-	ret = proc_dostring(table, write, buffer, lenp, ppos);
-	if (ret == 0 && write) {
-		struct ias_value *val;
-
-		val = irias_new_string_value(sysctl_devname);
-		if (val)
-			irias_object_change_attribute("Device", "DeviceName", val);
-	}
-	return ret;
-}
-
-
-static int do_discovery(struct ctl_table *table, int write,
-                    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       int ret;
-
-       ret = proc_dointvec(table, write, buffer, lenp, ppos);
-       if (ret)
-	       return ret;
-
-       if (irlmp == NULL)
-	       return -ENODEV;
-
-       if (sysctl_discovery)
-	       irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ);
-       else
-	       del_timer_sync(&irlmp->discovery_timer);
-
-       return ret;
-}
-
-/* One file */
-static struct ctl_table irda_table[] = {
-	{
-		.procname	= "discovery",
-		.data		= &sysctl_discovery,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= do_discovery,
-	},
-	{
-		.procname	= "devname",
-		.data		= sysctl_devname,
-		.maxlen		= 65,
-		.mode		= 0644,
-		.proc_handler	= do_devname,
-	},
-#ifdef CONFIG_IRDA_FAST_RR
-	{
-		.procname	= "fast_poll_increase",
-		.data		= &sysctl_fast_poll_increase,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-#endif
-	{
-		.procname	= "discovery_slots",
-		.data		= &sysctl_discovery_slots,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_discovery_slots,
-		.extra2		= &max_discovery_slots
-	},
-	{
-		.procname	= "discovery_timeout",
-		.data		= &sysctl_discovery_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
-		.procname	= "slot_timeout",
-		.data		= &sysctl_slot_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_slot_timeout,
-		.extra2		= &max_slot_timeout
-	},
-	{
-		.procname	= "max_baud_rate",
-		.data		= &sysctl_max_baud_rate,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_max_baud_rate,
-		.extra2		= &max_max_baud_rate
-	},
-	{
-		.procname	= "min_tx_turn_time",
-		.data		= &sysctl_min_tx_turn_time,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_min_tx_turn_time,
-		.extra2		= &max_min_tx_turn_time
-	},
-	{
-		.procname	= "max_tx_data_size",
-		.data		= &sysctl_max_tx_data_size,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_max_tx_data_size,
-		.extra2		= &max_max_tx_data_size
-	},
-	{
-		.procname	= "max_tx_window",
-		.data		= &sysctl_max_tx_window,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_max_tx_window,
-		.extra2		= &max_max_tx_window
-	},
-	{
-		.procname	= "max_noreply_time",
-		.data		= &sysctl_max_noreply_time,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_max_noreply_time,
-		.extra2		= &max_max_noreply_time
-	},
-	{
-		.procname	= "warn_noreply_time",
-		.data		= &sysctl_warn_noreply_time,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_warn_noreply_time,
-		.extra2		= &max_warn_noreply_time
-	},
-	{
-		.procname	= "lap_keepalive_time",
-		.data		= &sysctl_lap_keepalive_time,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_lap_keepalive_time,
-		.extra2		= &max_lap_keepalive_time
-	},
-	{ }
-};
-
-static struct ctl_table_header *irda_table_header;
-
-/*
- * Function irda_sysctl_register (void)
- *
- *    Register our sysctl interface
- *
- */
-int __init irda_sysctl_register(void)
-{
-	irda_table_header = register_net_sysctl(&init_net, "net/irda", irda_table);
-	if (!irda_table_header)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/*
- * Function irda_sysctl_unregister (void)
- *
- *    Unregister our sysctl interface
- *
- */
-void irda_sysctl_unregister(void)
-{
-	unregister_net_sysctl_table(irda_table_header);
-}
-
-
-
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
deleted file mode 100644
index b6ab41d5b3a3..000000000000
--- a/net/irda/irttp.c
+++ /dev/null
@@ -1,1891 +0,0 @@
-/*********************************************************************
- *
- * Filename:      irttp.c
- * Version:       1.2
- * Description:   Tiny Transport Protocol (TTP) implementation
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sun Aug 31 20:14:31 1997
- * Modified at:   Wed Jan  5 11:31:27 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-#include <net/irda/parameters.h>
-#include <net/irda/irttp.h>
-
-static struct irttp_cb *irttp;
-
-static void __irttp_close_tsap(struct tsap_cb *self);
-
-static int irttp_data_indication(void *instance, void *sap,
-				 struct sk_buff *skb);
-static int irttp_udata_indication(void *instance, void *sap,
-				  struct sk_buff *skb);
-static void irttp_disconnect_indication(void *instance, void *sap,
-					LM_REASON reason, struct sk_buff *);
-static void irttp_connect_indication(void *instance, void *sap,
-				     struct qos_info *qos, __u32 max_sdu_size,
-				     __u8 header_size, struct sk_buff *skb);
-static void irttp_connect_confirm(void *instance, void *sap,
-				  struct qos_info *qos, __u32 max_sdu_size,
-				  __u8 header_size, struct sk_buff *skb);
-static void irttp_run_tx_queue(struct tsap_cb *self);
-static void irttp_run_rx_queue(struct tsap_cb *self);
-
-static void irttp_flush_queues(struct tsap_cb *self);
-static void irttp_fragment_skb(struct tsap_cb *self, struct sk_buff *skb);
-static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self);
-static void irttp_todo_expired(unsigned long data);
-static int irttp_param_max_sdu_size(void *instance, irda_param_t *param,
-				    int get);
-
-static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow);
-static void irttp_status_indication(void *instance,
-				    LINK_STATUS link, LOCK_STATUS lock);
-
-/* Information for parsing parameters in IrTTP */
-static const pi_minor_info_t pi_minor_call_table[] = {
-	{ NULL, 0 },                                             /* 0x00 */
-	{ irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
-};
-static const pi_major_info_t pi_major_call_table[] = {
-	{ pi_minor_call_table, 2 }
-};
-static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
-
-/************************ GLOBAL PROCEDURES ************************/
-
-/*
- * Function irttp_init (void)
- *
- *    Initialize the IrTTP layer. Called by module initialization code
- *
- */
-int __init irttp_init(void)
-{
-	irttp = kzalloc(sizeof(struct irttp_cb), GFP_KERNEL);
-	if (irttp == NULL)
-		return -ENOMEM;
-
-	irttp->magic = TTP_MAGIC;
-
-	irttp->tsaps = hashbin_new(HB_LOCK);
-	if (!irttp->tsaps) {
-		net_err_ratelimited("%s: can't allocate IrTTP hashbin!\n",
-				    __func__);
-		kfree(irttp);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-/*
- * Function irttp_cleanup (void)
- *
- *    Called by module destruction/cleanup code
- *
- */
-void irttp_cleanup(void)
-{
-	/* Check for main structure */
-	IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;);
-
-	/*
-	 *  Delete hashbin and close all TSAP instances in it
-	 */
-	hashbin_delete(irttp->tsaps, (FREE_FUNC) __irttp_close_tsap);
-
-	irttp->magic = 0;
-
-	/* De-allocate main structure */
-	kfree(irttp);
-
-	irttp = NULL;
-}
-
-/*************************** SUBROUTINES ***************************/
-
-/*
- * Function irttp_start_todo_timer (self, timeout)
- *
- *    Start todo timer.
- *
- * Made it more effient and unsensitive to race conditions - Jean II
- */
-static inline void irttp_start_todo_timer(struct tsap_cb *self, int timeout)
-{
-	/* Set new value for timer */
-	mod_timer(&self->todo_timer, jiffies + timeout);
-}
-
-/*
- * Function irttp_todo_expired (data)
- *
- *    Todo timer has expired!
- *
- * One of the restriction of the timer is that it is run only on the timer
- * interrupt which run every 10ms. This mean that even if you set the timer
- * with a delay of 0, it may take up to 10ms before it's run.
- * So, to minimise latency and keep cache fresh, we try to avoid using
- * it as much as possible.
- * Note : we can't use tasklets, because they can't be asynchronously
- * killed (need user context), and we can't guarantee that here...
- * Jean II
- */
-static void irttp_todo_expired(unsigned long data)
-{
-	struct tsap_cb *self = (struct tsap_cb *) data;
-
-	/* Check that we still exist */
-	if (!self || self->magic != TTP_TSAP_MAGIC)
-		return;
-
-	pr_debug("%s(instance=%p)\n", __func__, self);
-
-	/* Try to make some progress, especially on Tx side - Jean II */
-	irttp_run_rx_queue(self);
-	irttp_run_tx_queue(self);
-
-	/* Check if time for disconnect */
-	if (test_bit(0, &self->disconnect_pend)) {
-		/* Check if it's possible to disconnect yet */
-		if (skb_queue_empty(&self->tx_queue)) {
-			/* Make sure disconnect is not pending anymore */
-			clear_bit(0, &self->disconnect_pend);	/* FALSE */
-
-			/* Note : self->disconnect_skb may be NULL */
-			irttp_disconnect_request(self, self->disconnect_skb,
-						 P_NORMAL);
-			self->disconnect_skb = NULL;
-		} else {
-			/* Try again later */
-			irttp_start_todo_timer(self, HZ/10);
-
-			/* No reason to try and close now */
-			return;
-		}
-	}
-
-	/* Check if it's closing time */
-	if (self->close_pend)
-		/* Finish cleanup */
-		irttp_close_tsap(self);
-}
-
-/*
- * Function irttp_flush_queues (self)
- *
- *     Flushes (removes all frames) in transitt-buffer (tx_list)
- */
-static void irttp_flush_queues(struct tsap_cb *self)
-{
-	struct sk_buff *skb;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	/* Deallocate frames waiting to be sent */
-	while ((skb = skb_dequeue(&self->tx_queue)) != NULL)
-		dev_kfree_skb(skb);
-
-	/* Deallocate received frames */
-	while ((skb = skb_dequeue(&self->rx_queue)) != NULL)
-		dev_kfree_skb(skb);
-
-	/* Deallocate received fragments */
-	while ((skb = skb_dequeue(&self->rx_fragments)) != NULL)
-		dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_reassemble (self)
- *
- *    Makes a new (continuous) skb of all the fragments in the fragment
- *    queue
- *
- */
-static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
-{
-	struct sk_buff *skb, *frag;
-	int n = 0;  /* Fragment index */
-
-	IRDA_ASSERT(self != NULL, return NULL;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return NULL;);
-
-	pr_debug("%s(), self->rx_sdu_size=%d\n", __func__,
-		 self->rx_sdu_size);
-
-	skb = dev_alloc_skb(TTP_HEADER + self->rx_sdu_size);
-	if (!skb)
-		return NULL;
-
-	/*
-	 * Need to reserve space for TTP header in case this skb needs to
-	 * be requeued in case delivery failes
-	 */
-	skb_reserve(skb, TTP_HEADER);
-	skb_put(skb, self->rx_sdu_size);
-
-	/*
-	 *  Copy all fragments to a new buffer
-	 */
-	while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
-		skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
-		n += frag->len;
-
-		dev_kfree_skb(frag);
-	}
-
-	pr_debug("%s(), frame len=%d, rx_sdu_size=%d, rx_max_sdu_size=%d\n",
-		 __func__, n, self->rx_sdu_size, self->rx_max_sdu_size);
-	/* Note : irttp_run_rx_queue() calculate self->rx_sdu_size
-	 * by summing the size of all fragments, so we should always
-	 * have n == self->rx_sdu_size, except in cases where we
-	 * droped the last fragment (when self->rx_sdu_size exceed
-	 * self->rx_max_sdu_size), where n < self->rx_sdu_size.
-	 * Jean II */
-	IRDA_ASSERT(n <= self->rx_sdu_size, n = self->rx_sdu_size;);
-
-	/* Set the new length */
-	skb_trim(skb, n);
-
-	self->rx_sdu_size = 0;
-
-	return skb;
-}
-
-/*
- * Function irttp_fragment_skb (skb)
- *
- *    Fragments a frame and queues all the fragments for transmission
- *
- */
-static inline void irttp_fragment_skb(struct tsap_cb *self,
-				      struct sk_buff *skb)
-{
-	struct sk_buff *frag;
-	__u8 *frame;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	/*
-	 *  Split frame into a number of segments
-	 */
-	while (skb->len > self->max_seg_size) {
-		pr_debug("%s(), fragmenting ...\n", __func__);
-
-		/* Make new segment */
-		frag = alloc_skb(self->max_seg_size+self->max_header_size,
-				 GFP_ATOMIC);
-		if (!frag)
-			return;
-
-		skb_reserve(frag, self->max_header_size);
-
-		/* Copy data from the original skb into this fragment. */
-		skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
-			      self->max_seg_size);
-
-		/* Insert TTP header, with the more bit set */
-		frame = skb_push(frag, TTP_HEADER);
-		frame[0] = TTP_MORE;
-
-		/* Hide the copied data from the original skb */
-		skb_pull(skb, self->max_seg_size);
-
-		/* Queue fragment */
-		skb_queue_tail(&self->tx_queue, frag);
-	}
-	/* Queue what is left of the original skb */
-	pr_debug("%s(), queuing last segment\n", __func__);
-
-	frame = skb_push(skb, TTP_HEADER);
-	frame[0] = 0x00; /* Clear more bit */
-
-	/* Queue fragment */
-	skb_queue_tail(&self->tx_queue, skb);
-}
-
-/*
- * Function irttp_param_max_sdu_size (self, param)
- *
- *    Handle the MaxSduSize parameter in the connect frames, this function
- *    will be called both when this parameter needs to be inserted into, and
- *    extracted from the connect frames
- */
-static int irttp_param_max_sdu_size(void *instance, irda_param_t *param,
-				    int get)
-{
-	struct tsap_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->tx_max_sdu_size;
-	else
-		self->tx_max_sdu_size = param->pv.i;
-
-	pr_debug("%s(), MaxSduSize=%d\n", __func__, param->pv.i);
-
-	return 0;
-}
-
-/*************************** CLIENT CALLS ***************************/
-/************************** LMP CALLBACKS **************************/
-/* Everything is happily mixed up. Waiting for next clean up - Jean II */
-
-/*
- * Initialization, that has to be done on new tsap
- * instance allocation and on duplication
- */
-static void irttp_init_tsap(struct tsap_cb *tsap)
-{
-	spin_lock_init(&tsap->lock);
-	init_timer(&tsap->todo_timer);
-
-	skb_queue_head_init(&tsap->rx_queue);
-	skb_queue_head_init(&tsap->tx_queue);
-	skb_queue_head_init(&tsap->rx_fragments);
-}
-
-/*
- * Function irttp_open_tsap (stsap, notify)
- *
- *    Create TSAP connection endpoint,
- */
-struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
-{
-	struct tsap_cb *self;
-	struct lsap_cb *lsap;
-	notify_t ttp_notify;
-
-	IRDA_ASSERT(irttp->magic == TTP_MAGIC, return NULL;);
-
-	/* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to
-	 * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well.
-	 * JeanII */
-	if ((stsap_sel != LSAP_ANY) &&
-	   ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) {
-		pr_debug("%s(), invalid tsap!\n", __func__);
-		return NULL;
-	}
-
-	self = kzalloc(sizeof(struct tsap_cb), GFP_ATOMIC);
-	if (self == NULL)
-		return NULL;
-
-	/* Initialize internal objects */
-	irttp_init_tsap(self);
-
-	/* Initialise todo timer */
-	self->todo_timer.data     = (unsigned long) self;
-	self->todo_timer.function = &irttp_todo_expired;
-
-	/* Initialize callbacks for IrLMP to use */
-	irda_notify_init(&ttp_notify);
-	ttp_notify.connect_confirm = irttp_connect_confirm;
-	ttp_notify.connect_indication = irttp_connect_indication;
-	ttp_notify.disconnect_indication = irttp_disconnect_indication;
-	ttp_notify.data_indication = irttp_data_indication;
-	ttp_notify.udata_indication = irttp_udata_indication;
-	ttp_notify.flow_indication = irttp_flow_indication;
-	if (notify->status_indication != NULL)
-		ttp_notify.status_indication = irttp_status_indication;
-	ttp_notify.instance = self;
-	strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME);
-
-	self->magic = TTP_TSAP_MAGIC;
-	self->connected = FALSE;
-
-	/*
-	 *  Create LSAP at IrLMP layer
-	 */
-	lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0);
-	if (lsap == NULL) {
-		pr_debug("%s: unable to allocate LSAP!!\n", __func__);
-		__irttp_close_tsap(self);
-		return NULL;
-	}
-
-	/*
-	 *  If user specified LSAP_ANY as source TSAP selector, then IrLMP
-	 *  will replace it with whatever source selector which is free, so
-	 *  the stsap_sel we have might not be valid anymore
-	 */
-	self->stsap_sel = lsap->slsap_sel;
-	pr_debug("%s(), stsap_sel=%02x\n", __func__, self->stsap_sel);
-
-	self->notify = *notify;
-	self->lsap = lsap;
-
-	hashbin_insert(irttp->tsaps, (irda_queue_t *) self, (long) self, NULL);
-
-	if (credit > TTP_RX_MAX_CREDIT)
-		self->initial_credit = TTP_RX_MAX_CREDIT;
-	else
-		self->initial_credit = credit;
-
-	return self;
-}
-EXPORT_SYMBOL(irttp_open_tsap);
-
-/*
- * Function irttp_close (handle)
- *
- *    Remove an instance of a TSAP. This function should only deal with the
- *    deallocation of the TSAP, and resetting of the TSAPs values;
- *
- */
-static void __irttp_close_tsap(struct tsap_cb *self)
-{
-	/* First make sure we're connected. */
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	irttp_flush_queues(self);
-
-	del_timer(&self->todo_timer);
-
-	/* This one won't be cleaned up if we are disconnect_pend + close_pend
-	 * and we receive a disconnect_indication */
-	if (self->disconnect_skb)
-		dev_kfree_skb(self->disconnect_skb);
-
-	self->connected = FALSE;
-	self->magic = ~TTP_TSAP_MAGIC;
-
-	kfree(self);
-}
-
-/*
- * Function irttp_close (self)
- *
- *    Remove TSAP from list of all TSAPs and then deallocate all resources
- *    associated with this TSAP
- *
- * Note : because we *free* the tsap structure, it is the responsibility
- * of the caller to make sure we are called only once and to deal with
- * possible race conditions. - Jean II
- */
-int irttp_close_tsap(struct tsap_cb *self)
-{
-	struct tsap_cb *tsap;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
-	/* Make sure tsap has been disconnected */
-	if (self->connected) {
-		/* Check if disconnect is not pending */
-		if (!test_bit(0, &self->disconnect_pend)) {
-			net_warn_ratelimited("%s: TSAP still connected!\n",
-					     __func__);
-			irttp_disconnect_request(self, NULL, P_NORMAL);
-		}
-		self->close_pend = TRUE;
-		irttp_start_todo_timer(self, HZ/10);
-
-		return 0; /* Will be back! */
-	}
-
-	tsap = hashbin_remove(irttp->tsaps, (long) self, NULL);
-
-	IRDA_ASSERT(tsap == self, return -1;);
-
-	/* Close corresponding LSAP */
-	if (self->lsap) {
-		irlmp_close_lsap(self->lsap);
-		self->lsap = NULL;
-	}
-
-	__irttp_close_tsap(self);
-
-	return 0;
-}
-EXPORT_SYMBOL(irttp_close_tsap);
-
-/*
- * Function irttp_udata_request (self, skb)
- *
- *    Send unreliable data on this TSAP
- *
- */
-int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	/* Take shortcut on zero byte packets */
-	if (skb->len == 0) {
-		ret = 0;
-		goto err;
-	}
-
-	/* Check that nothing bad happens */
-	if (!self->connected) {
-		net_warn_ratelimited("%s(), Not connected\n", __func__);
-		ret = -ENOTCONN;
-		goto err;
-	}
-
-	if (skb->len > self->max_seg_size) {
-		net_err_ratelimited("%s(), UData is too large for IrLAP!\n",
-				    __func__);
-		ret = -EMSGSIZE;
-		goto err;
-	}
-
-	irlmp_udata_request(self->lsap, skb);
-	self->stats.tx_packets++;
-
-	return 0;
-
-err:
-	dev_kfree_skb(skb);
-	return ret;
-}
-EXPORT_SYMBOL(irttp_udata_request);
-
-
-/*
- * Function irttp_data_request (handle, skb)
- *
- *    Queue frame for transmission. If SAR is enabled, fragement the frame
- *    and queue the fragments for transmission
- */
-int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
-{
-	__u8 *frame;
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	pr_debug("%s() : queue len = %d\n", __func__,
-		 skb_queue_len(&self->tx_queue));
-
-	/* Take shortcut on zero byte packets */
-	if (skb->len == 0) {
-		ret = 0;
-		goto err;
-	}
-
-	/* Check that nothing bad happens */
-	if (!self->connected) {
-		net_warn_ratelimited("%s: Not connected\n", __func__);
-		ret = -ENOTCONN;
-		goto err;
-	}
-
-	/*
-	 *  Check if SAR is disabled, and the frame is larger than what fits
-	 *  inside an IrLAP frame
-	 */
-	if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
-		net_err_ratelimited("%s: SAR disabled, and data is too large for IrLAP!\n",
-				    __func__);
-		ret = -EMSGSIZE;
-		goto err;
-	}
-
-	/*
-	 *  Check if SAR is enabled, and the frame is larger than the
-	 *  TxMaxSduSize
-	 */
-	if ((self->tx_max_sdu_size != 0) &&
-	    (self->tx_max_sdu_size != TTP_SAR_UNBOUND) &&
-	    (skb->len > self->tx_max_sdu_size)) {
-		net_err_ratelimited("%s: SAR enabled, but data is larger than TxMaxSduSize!\n",
-				    __func__);
-		ret = -EMSGSIZE;
-		goto err;
-	}
-	/*
-	 *  Check if transmit queue is full
-	 */
-	if (skb_queue_len(&self->tx_queue) >= TTP_TX_MAX_QUEUE) {
-		/*
-		 *  Give it a chance to empty itself
-		 */
-		irttp_run_tx_queue(self);
-
-		/* Drop packet. This error code should trigger the caller
-		 * to resend the data in the client code - Jean II */
-		ret = -ENOBUFS;
-		goto err;
-	}
-
-	/* Queue frame, or queue frame segments */
-	if ((self->tx_max_sdu_size == 0) || (skb->len < self->max_seg_size)) {
-		/* Queue frame */
-		IRDA_ASSERT(skb_headroom(skb) >= TTP_HEADER, return -1;);
-		frame = skb_push(skb, TTP_HEADER);
-		frame[0] = 0x00; /* Clear more bit */
-
-		skb_queue_tail(&self->tx_queue, skb);
-	} else {
-		/*
-		 *  Fragment the frame, this function will also queue the
-		 *  fragments, we don't care about the fact the transmit
-		 *  queue may be overfilled by all the segments for a little
-		 *  while
-		 */
-		irttp_fragment_skb(self, skb);
-	}
-
-	/* Check if we can accept more data from client */
-	if ((!self->tx_sdu_busy) &&
-	    (skb_queue_len(&self->tx_queue) > TTP_TX_HIGH_THRESHOLD)) {
-		/* Tx queue filling up, so stop client. */
-		if (self->notify.flow_indication) {
-			self->notify.flow_indication(self->notify.instance,
-						     self, FLOW_STOP);
-		}
-		/* self->tx_sdu_busy is the state of the client.
-		 * Update state after notifying client to avoid
-		 * race condition with irttp_flow_indication().
-		 * If the queue empty itself after our test but before
-		 * we set the flag, we will fix ourselves below in
-		 * irttp_run_tx_queue().
-		 * Jean II */
-		self->tx_sdu_busy = TRUE;
-	}
-
-	/* Try to make some progress */
-	irttp_run_tx_queue(self);
-
-	return 0;
-
-err:
-	dev_kfree_skb(skb);
-	return ret;
-}
-EXPORT_SYMBOL(irttp_data_request);
-
-/*
- * Function irttp_run_tx_queue (self)
- *
- *    Transmit packets queued for transmission (if possible)
- *
- */
-static void irttp_run_tx_queue(struct tsap_cb *self)
-{
-	struct sk_buff *skb;
-	unsigned long flags;
-	int n;
-
-	pr_debug("%s() : send_credit = %d, queue_len = %d\n",
-		 __func__,
-		 self->send_credit, skb_queue_len(&self->tx_queue));
-
-	/* Get exclusive access to the tx queue, otherwise don't touch it */
-	if (irda_lock(&self->tx_queue_lock) == FALSE)
-		return;
-
-	/* Try to send out frames as long as we have credits
-	 * and as long as LAP is not full. If LAP is full, it will
-	 * poll us through irttp_flow_indication() - Jean II */
-	while ((self->send_credit > 0) &&
-	       (!irlmp_lap_tx_queue_full(self->lsap)) &&
-	       (skb = skb_dequeue(&self->tx_queue))) {
-		/*
-		 *  Since we can transmit and receive frames concurrently,
-		 *  the code below is a critical region and we must assure that
-		 *  nobody messes with the credits while we update them.
-		 */
-		spin_lock_irqsave(&self->lock, flags);
-
-		n = self->avail_credit;
-		self->avail_credit = 0;
-
-		/* Only room for 127 credits in frame */
-		if (n > 127) {
-			self->avail_credit = n-127;
-			n = 127;
-		}
-		self->remote_credit += n;
-		self->send_credit--;
-
-		spin_unlock_irqrestore(&self->lock, flags);
-
-		/*
-		 *  More bit must be set by the data_request() or fragment()
-		 *  functions
-		 */
-		skb->data[0] |= (n & 0x7f);
-
-		/* Detach from socket.
-		 * The current skb has a reference to the socket that sent
-		 * it (skb->sk). When we pass it to IrLMP, the skb will be
-		 * stored in in IrLAP (self->wx_list). When we are within
-		 * IrLAP, we lose the notion of socket, so we should not
-		 * have a reference to a socket. So, we drop it here.
-		 *
-		 * Why does it matter ?
-		 * When the skb is freed (kfree_skb), if it is associated
-		 * with a socket, it release buffer space on the socket
-		 * (through sock_wfree() and sock_def_write_space()).
-		 * If the socket no longer exist, we may crash. Hard.
-		 * When we close a socket, we make sure that associated packets
-		 * in IrTTP are freed. However, we have no way to cancel
-		 * the packet that we have passed to IrLAP. So, if a packet
-		 * remains in IrLAP (retry on the link or else) after we
-		 * close the socket, we are dead !
-		 * Jean II */
-		if (skb->sk != NULL) {
-			/* IrSOCK application, IrOBEX, ... */
-			skb_orphan(skb);
-		}
-			/* IrCOMM over IrTTP, IrLAN, ... */
-
-		/* Pass the skb to IrLMP - done */
-		irlmp_data_request(self->lsap, skb);
-		self->stats.tx_packets++;
-	}
-
-	/* Check if we can accept more frames from client.
-	 * We don't want to wait until the todo timer to do that, and we
-	 * can't use tasklets (grr...), so we are obliged to give control
-	 * to client. That's ok, this test will be true not too often
-	 * (max once per LAP window) and we are called from places
-	 * where we can spend a bit of time doing stuff. - Jean II */
-	if ((self->tx_sdu_busy) &&
-	    (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) &&
-	    (!self->close_pend)) {
-		if (self->notify.flow_indication)
-			self->notify.flow_indication(self->notify.instance,
-						     self, FLOW_START);
-
-		/* self->tx_sdu_busy is the state of the client.
-		 * We don't really have a race here, but it's always safer
-		 * to update our state after the client - Jean II */
-		self->tx_sdu_busy = FALSE;
-	}
-
-	/* Reset lock */
-	self->tx_queue_lock = 0;
-}
-
-/*
- * Function irttp_give_credit (self)
- *
- *    Send a dataless flowdata TTP-PDU and give available credit to peer
- *    TSAP
- */
-static inline void irttp_give_credit(struct tsap_cb *self)
-{
-	struct sk_buff *tx_skb = NULL;
-	unsigned long flags;
-	int n;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	pr_debug("%s() send=%d,avail=%d,remote=%d\n",
-		 __func__,
-		 self->send_credit, self->avail_credit, self->remote_credit);
-
-	/* Give credit to peer */
-	tx_skb = alloc_skb(TTP_MAX_HEADER, GFP_ATOMIC);
-	if (!tx_skb)
-		return;
-
-	/* Reserve space for LMP, and LAP header */
-	skb_reserve(tx_skb, LMP_MAX_HEADER);
-
-	/*
-	 *  Since we can transmit and receive frames concurrently,
-	 *  the code below is a critical region and we must assure that
-	 *  nobody messes with the credits while we update them.
-	 */
-	spin_lock_irqsave(&self->lock, flags);
-
-	n = self->avail_credit;
-	self->avail_credit = 0;
-
-	/* Only space for 127 credits in frame */
-	if (n > 127) {
-		self->avail_credit = n - 127;
-		n = 127;
-	}
-	self->remote_credit += n;
-
-	spin_unlock_irqrestore(&self->lock, flags);
-
-	skb_put(tx_skb, 1);
-	tx_skb->data[0] = (__u8) (n & 0x7f);
-
-	irlmp_data_request(self->lsap, tx_skb);
-	self->stats.tx_packets++;
-}
-
-/*
- * Function irttp_udata_indication (instance, sap, skb)
- *
- *    Received some unit-data (unreliable)
- *
- */
-static int irttp_udata_indication(void *instance, void *sap,
-				  struct sk_buff *skb)
-{
-	struct tsap_cb *self;
-	int err;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-	IRDA_ASSERT(skb != NULL, return -1;);
-
-	self->stats.rx_packets++;
-
-	/* Just pass data to layer above */
-	if (self->notify.udata_indication) {
-		err = self->notify.udata_indication(self->notify.instance,
-						    self, skb);
-		/* Same comment as in irttp_do_data_indication() */
-		if (!err)
-			return 0;
-	}
-	/* Either no handler, or handler returns an error */
-	dev_kfree_skb(skb);
-
-	return 0;
-}
-
-/*
- * Function irttp_data_indication (instance, sap, skb)
- *
- *    Receive segment from IrLMP.
- *
- */
-static int irttp_data_indication(void *instance, void *sap,
-				 struct sk_buff *skb)
-{
-	struct tsap_cb *self;
-	unsigned long flags;
-	int n;
-
-	self = instance;
-
-	n = skb->data[0] & 0x7f;     /* Extract the credits */
-
-	self->stats.rx_packets++;
-
-	/*  Deal with inbound credit
-	 *  Since we can transmit and receive frames concurrently,
-	 *  the code below is a critical region and we must assure that
-	 *  nobody messes with the credits while we update them.
-	 */
-	spin_lock_irqsave(&self->lock, flags);
-	self->send_credit += n;
-	if (skb->len > 1)
-		self->remote_credit--;
-	spin_unlock_irqrestore(&self->lock, flags);
-
-	/*
-	 *  Data or dataless packet? Dataless frames contains only the
-	 *  TTP_HEADER.
-	 */
-	if (skb->len > 1) {
-		/*
-		 *  We don't remove the TTP header, since we must preserve the
-		 *  more bit, so the defragment routing knows what to do
-		 */
-		skb_queue_tail(&self->rx_queue, skb);
-	} else {
-		/* Dataless flowdata TTP-PDU */
-		dev_kfree_skb(skb);
-	}
-
-
-	/* Push data to the higher layer.
-	 * We do it synchronously because running the todo timer for each
-	 * receive packet would be too much overhead and latency.
-	 * By passing control to the higher layer, we run the risk that
-	 * it may take time or grab a lock. Most often, the higher layer
-	 * will only put packet in a queue.
-	 * Anyway, packets are only dripping through the IrDA, so we can
-	 * have time before the next packet.
-	 * Further, we are run from NET_BH, so the worse that can happen is
-	 * us missing the optimal time to send back the PF bit in LAP.
-	 * Jean II */
-	irttp_run_rx_queue(self);
-
-	/* We now give credits to peer in irttp_run_rx_queue().
-	 * We need to send credit *NOW*, otherwise we are going
-	 * to miss the next Tx window. The todo timer may take
-	 * a while before it's run... - Jean II */
-
-	/*
-	 * If the peer device has given us some credits and we didn't have
-	 * anyone from before, then we need to shedule the tx queue.
-	 * We need to do that because our Tx have stopped (so we may not
-	 * get any LAP flow indication) and the user may be stopped as
-	 * well. - Jean II
-	 */
-	if (self->send_credit == n) {
-		/* Restart pushing stuff to LAP */
-		irttp_run_tx_queue(self);
-		/* Note : we don't want to schedule the todo timer
-		 * because it has horrible latency. No tasklets
-		 * because the tasklet API is broken. - Jean II */
-	}
-
-	return 0;
-}
-
-/*
- * Function irttp_status_indication (self, reason)
- *
- *    Status_indication, just pass to the higher layer...
- *
- */
-static void irttp_status_indication(void *instance,
-				    LINK_STATUS link, LOCK_STATUS lock)
-{
-	struct tsap_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	/* Check if client has already closed the TSAP and gone away */
-	if (self->close_pend)
-		return;
-
-	/*
-	 *  Inform service user if he has requested it
-	 */
-	if (self->notify.status_indication != NULL)
-		self->notify.status_indication(self->notify.instance,
-					       link, lock);
-	else
-		pr_debug("%s(), no handler\n", __func__);
-}
-
-/*
- * Function irttp_flow_indication (self, reason)
- *
- *    Flow_indication : IrLAP tells us to send more data.
- *
- */
-static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
-{
-	struct tsap_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	pr_debug("%s(instance=%p)\n", __func__, self);
-
-	/* We are "polled" directly from LAP, and the LAP want to fill
-	 * its Tx window. We want to do our best to send it data, so that
-	 * we maximise the window. On the other hand, we want to limit the
-	 * amount of work here so that LAP doesn't hang forever waiting
-	 * for packets. - Jean II */
-
-	/* Try to send some packets. Currently, LAP calls us every time
-	 * there is one free slot, so we will send only one packet.
-	 * This allow the scheduler to do its round robin - Jean II */
-	irttp_run_tx_queue(self);
-
-	/* Note regarding the interraction with higher layer.
-	 * irttp_run_tx_queue() may call the client when its queue
-	 * start to empty, via notify.flow_indication(). Initially.
-	 * I wanted this to happen in a tasklet, to avoid client
-	 * grabbing the CPU, but we can't use tasklets safely. And timer
-	 * is definitely too slow.
-	 * This will happen only once per LAP window, and usually at
-	 * the third packet (unless window is smaller). LAP is still
-	 * doing mtt and sending first packet so it's sort of OK
-	 * to do that. Jean II */
-
-	/* If we need to send disconnect. try to do it now */
-	if (self->disconnect_pend)
-		irttp_start_todo_timer(self, 0);
-}
-
-/*
- * Function irttp_flow_request (self, command)
- *
- *    This function could be used by the upper layers to tell IrTTP to stop
- *    delivering frames if the receive queues are starting to get full, or
- *    to tell IrTTP to start delivering frames again.
- */
-void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow)
-{
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	switch (flow) {
-	case FLOW_STOP:
-		pr_debug("%s(), flow stop\n", __func__);
-		self->rx_sdu_busy = TRUE;
-		break;
-	case FLOW_START:
-		pr_debug("%s(), flow start\n", __func__);
-		self->rx_sdu_busy = FALSE;
-
-		/* Client say he can accept more data, try to free our
-		 * queues ASAP - Jean II */
-		irttp_run_rx_queue(self);
-
-		break;
-	default:
-		pr_debug("%s(), Unknown flow command!\n", __func__);
-	}
-}
-EXPORT_SYMBOL(irttp_flow_request);
-
-/*
- * Function irttp_connect_request (self, dtsap_sel, daddr, qos)
- *
- *    Try to connect to remote destination TSAP selector
- *
- */
-int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
-			  __u32 saddr, __u32 daddr,
-			  struct qos_info *qos, __u32 max_sdu_size,
-			  struct sk_buff *userdata)
-{
-	struct sk_buff *tx_skb;
-	__u8 *frame;
-	__u8 n;
-
-	pr_debug("%s(), max_sdu_size=%d\n", __func__, max_sdu_size);
-
-	IRDA_ASSERT(self != NULL, return -EBADR;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;);
-
-	if (self->connected) {
-		if (userdata)
-			dev_kfree_skb(userdata);
-		return -EISCONN;
-	}
-
-	/* Any userdata supplied? */
-	if (userdata == NULL) {
-		tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
-				   GFP_ATOMIC);
-		if (!tx_skb)
-			return -ENOMEM;
-
-		/* Reserve space for MUX_CONTROL and LAP header */
-		skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER);
-	} else {
-		tx_skb = userdata;
-		/*
-		 *  Check that the client has reserved enough space for
-		 *  headers
-		 */
-		IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
-			{ dev_kfree_skb(userdata); return -1; });
-	}
-
-	/* Initialize connection parameters */
-	self->connected = FALSE;
-	self->avail_credit = 0;
-	self->rx_max_sdu_size = max_sdu_size;
-	self->rx_sdu_size = 0;
-	self->rx_sdu_busy = FALSE;
-	self->dtsap_sel = dtsap_sel;
-
-	n = self->initial_credit;
-
-	self->remote_credit = 0;
-	self->send_credit = 0;
-
-	/*
-	 *  Give away max 127 credits for now
-	 */
-	if (n > 127) {
-		self->avail_credit = n - 127;
-		n = 127;
-	}
-
-	self->remote_credit = n;
-
-	/* SAR enabled? */
-	if (max_sdu_size > 0) {
-		IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
-			{ dev_kfree_skb(tx_skb); return -1; });
-
-		/* Insert SAR parameters */
-		frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
-
-		frame[0] = TTP_PARAMETERS | n;
-		frame[1] = 0x04; /* Length */
-		frame[2] = 0x01; /* MaxSduSize */
-		frame[3] = 0x02; /* Value length */
-
-		put_unaligned(cpu_to_be16((__u16) max_sdu_size),
-			      (__be16 *)(frame+4));
-	} else {
-		/* Insert plain TTP header */
-		frame = skb_push(tx_skb, TTP_HEADER);
-
-		/* Insert initial credit in frame */
-		frame[0] = n & 0x7f;
-	}
-
-	/* Connect with IrLMP. No QoS parameters for now */
-	return irlmp_connect_request(self->lsap, dtsap_sel, saddr, daddr, qos,
-				     tx_skb);
-}
-EXPORT_SYMBOL(irttp_connect_request);
-
-/*
- * Function irttp_connect_confirm (handle, qos, skb)
- *
- *    Service user confirms TSAP connection with peer.
- *
- */
-static void irttp_connect_confirm(void *instance, void *sap,
-				  struct qos_info *qos, __u32 max_seg_size,
-				  __u8 max_header_size, struct sk_buff *skb)
-{
-	struct tsap_cb *self;
-	int parameters;
-	int ret;
-	__u8 plen;
-	__u8 n;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	self->max_seg_size = max_seg_size - TTP_HEADER;
-	self->max_header_size = max_header_size + TTP_HEADER;
-
-	/*
-	 *  Check if we have got some QoS parameters back! This should be the
-	 *  negotiated QoS for the link.
-	 */
-	if (qos) {
-		pr_debug("IrTTP, Negotiated BAUD_RATE: %02x\n",
-			 qos->baud_rate.bits);
-		pr_debug("IrTTP, Negotiated BAUD_RATE: %d bps.\n",
-			 qos->baud_rate.value);
-	}
-
-	n = skb->data[0] & 0x7f;
-
-	pr_debug("%s(), Initial send_credit=%d\n", __func__, n);
-
-	self->send_credit = n;
-	self->tx_max_sdu_size = 0;
-	self->connected = TRUE;
-
-	parameters = skb->data[0] & 0x80;
-
-	IRDA_ASSERT(skb->len >= TTP_HEADER, return;);
-	skb_pull(skb, TTP_HEADER);
-
-	if (parameters) {
-		plen = skb->data[0];
-
-		ret = irda_param_extract_all(self, skb->data+1,
-					     IRDA_MIN(skb->len-1, plen),
-					     &param_info);
-
-		/* Any errors in the parameter list? */
-		if (ret < 0) {
-			net_warn_ratelimited("%s: error extracting parameters\n",
-					     __func__);
-			dev_kfree_skb(skb);
-
-			/* Do not accept this connection attempt */
-			return;
-		}
-		/* Remove parameters */
-		skb_pull(skb, IRDA_MIN(skb->len, plen+1));
-	}
-
-	pr_debug("%s() send=%d,avail=%d,remote=%d\n", __func__,
-		 self->send_credit, self->avail_credit, self->remote_credit);
-
-	pr_debug("%s(), MaxSduSize=%d\n", __func__,
-		 self->tx_max_sdu_size);
-
-	if (self->notify.connect_confirm) {
-		self->notify.connect_confirm(self->notify.instance, self, qos,
-					     self->tx_max_sdu_size,
-					     self->max_header_size, skb);
-	} else
-		dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_connect_indication (handle, skb)
- *
- *    Some other device is connecting to this TSAP
- *
- */
-static void irttp_connect_indication(void *instance, void *sap,
-		struct qos_info *qos, __u32 max_seg_size, __u8 max_header_size,
-		struct sk_buff *skb)
-{
-	struct tsap_cb *self;
-	struct lsap_cb *lsap;
-	int parameters;
-	int ret;
-	__u8 plen;
-	__u8 n;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-	IRDA_ASSERT(skb != NULL, return;);
-
-	lsap = sap;
-
-	self->max_seg_size = max_seg_size - TTP_HEADER;
-	self->max_header_size = max_header_size+TTP_HEADER;
-
-	pr_debug("%s(), TSAP sel=%02x\n", __func__, self->stsap_sel);
-
-	/* Need to update dtsap_sel if its equal to LSAP_ANY */
-	self->dtsap_sel = lsap->dlsap_sel;
-
-	n = skb->data[0] & 0x7f;
-
-	self->send_credit = n;
-	self->tx_max_sdu_size = 0;
-
-	parameters = skb->data[0] & 0x80;
-
-	IRDA_ASSERT(skb->len >= TTP_HEADER, return;);
-	skb_pull(skb, TTP_HEADER);
-
-	if (parameters) {
-		plen = skb->data[0];
-
-		ret = irda_param_extract_all(self, skb->data+1,
-					     IRDA_MIN(skb->len-1, plen),
-					     &param_info);
-
-		/* Any errors in the parameter list? */
-		if (ret < 0) {
-			net_warn_ratelimited("%s: error extracting parameters\n",
-					     __func__);
-			dev_kfree_skb(skb);
-
-			/* Do not accept this connection attempt */
-			return;
-		}
-
-		/* Remove parameters */
-		skb_pull(skb, IRDA_MIN(skb->len, plen+1));
-	}
-
-	if (self->notify.connect_indication) {
-		self->notify.connect_indication(self->notify.instance, self,
-						qos, self->tx_max_sdu_size,
-						self->max_header_size, skb);
-	} else
-		dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_connect_response (handle, userdata)
- *
- *    Service user is accepting the connection, just pass it down to
- *    IrLMP!
- *
- */
-int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
-			   struct sk_buff *userdata)
-{
-	struct sk_buff *tx_skb;
-	__u8 *frame;
-	int ret;
-	__u8 n;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
-	pr_debug("%s(), Source TSAP selector=%02x\n", __func__,
-		 self->stsap_sel);
-
-	/* Any userdata supplied? */
-	if (userdata == NULL) {
-		tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
-				   GFP_ATOMIC);
-		if (!tx_skb)
-			return -ENOMEM;
-
-		/* Reserve space for MUX_CONTROL and LAP header */
-		skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER);
-	} else {
-		tx_skb = userdata;
-		/*
-		 *  Check that the client has reserved enough space for
-		 *  headers
-		 */
-		IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
-			{ dev_kfree_skb(userdata); return -1; });
-	}
-
-	self->avail_credit = 0;
-	self->remote_credit = 0;
-	self->rx_max_sdu_size = max_sdu_size;
-	self->rx_sdu_size = 0;
-	self->rx_sdu_busy = FALSE;
-
-	n = self->initial_credit;
-
-	/* Frame has only space for max 127 credits (7 bits) */
-	if (n > 127) {
-		self->avail_credit = n - 127;
-		n = 127;
-	}
-
-	self->remote_credit = n;
-	self->connected = TRUE;
-
-	/* SAR enabled? */
-	if (max_sdu_size > 0) {
-		IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
-			{ dev_kfree_skb(tx_skb); return -1; });
-
-		/* Insert TTP header with SAR parameters */
-		frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
-
-		frame[0] = TTP_PARAMETERS | n;
-		frame[1] = 0x04; /* Length */
-
-		/* irda_param_insert(self, IRTTP_MAX_SDU_SIZE, frame+1,  */
-/*				  TTP_SAR_HEADER, &param_info) */
-
-		frame[2] = 0x01; /* MaxSduSize */
-		frame[3] = 0x02; /* Value length */
-
-		put_unaligned(cpu_to_be16((__u16) max_sdu_size),
-			      (__be16 *)(frame+4));
-	} else {
-		/* Insert TTP header */
-		frame = skb_push(tx_skb, TTP_HEADER);
-
-		frame[0] = n & 0x7f;
-	}
-
-	ret = irlmp_connect_response(self->lsap, tx_skb);
-
-	return ret;
-}
-EXPORT_SYMBOL(irttp_connect_response);
-
-/*
- * Function irttp_dup (self, instance)
- *
- *    Duplicate TSAP, can be used by servers to confirm a connection on a
- *    new TSAP so it can keep listening on the old one.
- */
-struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance)
-{
-	struct tsap_cb *new;
-	unsigned long flags;
-
-	/* Protect our access to the old tsap instance */
-	spin_lock_irqsave(&irttp->tsaps->hb_spinlock, flags);
-
-	/* Find the old instance */
-	if (!hashbin_find(irttp->tsaps, (long) orig, NULL)) {
-		pr_debug("%s(), unable to find TSAP\n", __func__);
-		spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
-		return NULL;
-	}
-
-	/* Allocate a new instance */
-	new = kmemdup(orig, sizeof(struct tsap_cb), GFP_ATOMIC);
-	if (!new) {
-		pr_debug("%s(), unable to kmalloc\n", __func__);
-		spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
-		return NULL;
-	}
-	spin_lock_init(&new->lock);
-
-	/* We don't need the old instance any more */
-	spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
-
-	/* Try to dup the LSAP (may fail if we were too slow) */
-	new->lsap = irlmp_dup(orig->lsap, new);
-	if (!new->lsap) {
-		pr_debug("%s(), dup failed!\n", __func__);
-		kfree(new);
-		return NULL;
-	}
-
-	/* Not everything should be copied */
-	new->notify.instance = instance;
-
-	/* Initialize internal objects */
-	irttp_init_tsap(new);
-
-	/* This is locked */
-	hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL);
-
-	return new;
-}
-EXPORT_SYMBOL(irttp_dup);
-
-/*
- * Function irttp_disconnect_request (self)
- *
- *    Close this connection please! If priority is high, the queued data
- *    segments, if any, will be deallocated first
- *
- */
-int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
-			     int priority)
-{
-	int ret;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
-
-	/* Already disconnected? */
-	if (!self->connected) {
-		pr_debug("%s(), already disconnected!\n", __func__);
-		if (userdata)
-			dev_kfree_skb(userdata);
-		return -1;
-	}
-
-	/* Disconnect already pending ?
-	 * We need to use an atomic operation to prevent reentry. This
-	 * function may be called from various context, like user, timer
-	 * for following a disconnect_indication() (i.e. net_bh).
-	 * Jean II */
-	if (test_and_set_bit(0, &self->disconnect_pend)) {
-		pr_debug("%s(), disconnect already pending\n",
-			 __func__);
-		if (userdata)
-			dev_kfree_skb(userdata);
-
-		/* Try to make some progress */
-		irttp_run_tx_queue(self);
-		return -1;
-	}
-
-	/*
-	 *  Check if there is still data segments in the transmit queue
-	 */
-	if (!skb_queue_empty(&self->tx_queue)) {
-		if (priority == P_HIGH) {
-			/*
-			 *  No need to send the queued data, if we are
-			 *  disconnecting right now since the data will
-			 *  not have any usable connection to be sent on
-			 */
-			pr_debug("%s(): High priority!!()\n", __func__);
-			irttp_flush_queues(self);
-		} else if (priority == P_NORMAL) {
-			/*
-			 *  Must delay disconnect until after all data segments
-			 *  have been sent and the tx_queue is empty
-			 */
-			/* We'll reuse this one later for the disconnect */
-			self->disconnect_skb = userdata;  /* May be NULL */
-
-			irttp_run_tx_queue(self);
-
-			irttp_start_todo_timer(self, HZ/10);
-			return -1;
-		}
-	}
-	/* Note : we don't need to check if self->rx_queue is full and the
-	 * state of self->rx_sdu_busy because the disconnect response will
-	 * be sent at the LMP level (so even if the peer has its Tx queue
-	 * full of data). - Jean II */
-
-	pr_debug("%s(), Disconnecting ...\n", __func__);
-	self->connected = FALSE;
-
-	if (!userdata) {
-		struct sk_buff *tx_skb;
-		tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC);
-		if (!tx_skb)
-			return -ENOMEM;
-
-		/*
-		 *  Reserve space for MUX and LAP header
-		 */
-		skb_reserve(tx_skb, LMP_MAX_HEADER);
-
-		userdata = tx_skb;
-	}
-	ret = irlmp_disconnect_request(self->lsap, userdata);
-
-	/* The disconnect is no longer pending */
-	clear_bit(0, &self->disconnect_pend);	/* FALSE */
-
-	return ret;
-}
-EXPORT_SYMBOL(irttp_disconnect_request);
-
-/*
- * Function irttp_disconnect_indication (self, reason)
- *
- *    Disconnect indication, TSAP disconnected by peer?
- *
- */
-static void irttp_disconnect_indication(void *instance, void *sap,
-		LM_REASON reason, struct sk_buff *skb)
-{
-	struct tsap_cb *self;
-
-	self = instance;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;);
-
-	/* Prevent higher layer to send more data */
-	self->connected = FALSE;
-
-	/* Check if client has already tried to close the TSAP */
-	if (self->close_pend) {
-		/* In this case, the higher layer is probably gone. Don't
-		 * bother it and clean up the remains - Jean II */
-		if (skb)
-			dev_kfree_skb(skb);
-		irttp_close_tsap(self);
-		return;
-	}
-
-	/* If we are here, we assume that is the higher layer is still
-	 * waiting for the disconnect notification and able to process it,
-	 * even if he tried to disconnect. Otherwise, it would have already
-	 * attempted to close the tsap and self->close_pend would be TRUE.
-	 * Jean II */
-
-	/* No need to notify the client if has already tried to disconnect */
-	if (self->notify.disconnect_indication)
-		self->notify.disconnect_indication(self->notify.instance, self,
-						   reason, skb);
-	else
-		if (skb)
-			dev_kfree_skb(skb);
-}
-
-/*
- * Function irttp_do_data_indication (self, skb)
- *
- *    Try to deliver reassembled skb to layer above, and requeue it if that
- *    for some reason should fail. We mark rx sdu as busy to apply back
- *    pressure is necessary.
- */
-static void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb)
-{
-	int err;
-
-	/* Check if client has already closed the TSAP and gone away */
-	if (self->close_pend) {
-		dev_kfree_skb(skb);
-		return;
-	}
-
-	err = self->notify.data_indication(self->notify.instance, self, skb);
-
-	/* Usually the layer above will notify that it's input queue is
-	 * starting to get filled by using the flow request, but this may
-	 * be difficult, so it can instead just refuse to eat it and just
-	 * give an error back
-	 */
-	if (err) {
-		pr_debug("%s() requeueing skb!\n", __func__);
-
-		/* Make sure we take a break */
-		self->rx_sdu_busy = TRUE;
-
-		/* Need to push the header in again */
-		skb_push(skb, TTP_HEADER);
-		skb->data[0] = 0x00; /* Make sure MORE bit is cleared */
-
-		/* Put skb back on queue */
-		skb_queue_head(&self->rx_queue, skb);
-	}
-}
-
-/*
- * Function irttp_run_rx_queue (self)
- *
- *     Check if we have any frames to be transmitted, or if we have any
- *     available credit to give away.
- */
-static void irttp_run_rx_queue(struct tsap_cb *self)
-{
-	struct sk_buff *skb;
-	int more = 0;
-
-	pr_debug("%s() send=%d,avail=%d,remote=%d\n", __func__,
-		 self->send_credit, self->avail_credit, self->remote_credit);
-
-	/* Get exclusive access to the rx queue, otherwise don't touch it */
-	if (irda_lock(&self->rx_queue_lock) == FALSE)
-		return;
-
-	/*
-	 *  Reassemble all frames in receive queue and deliver them
-	 */
-	while (!self->rx_sdu_busy && (skb = skb_dequeue(&self->rx_queue))) {
-		/* This bit will tell us if it's the last fragment or not */
-		more = skb->data[0] & 0x80;
-
-		/* Remove TTP header */
-		skb_pull(skb, TTP_HEADER);
-
-		/* Add the length of the remaining data */
-		self->rx_sdu_size += skb->len;
-
-		/*
-		 * If SAR is disabled, or user has requested no reassembly
-		 * of received fragments then we just deliver them
-		 * immediately. This can be requested by clients that
-		 * implements byte streams without any message boundaries
-		 */
-		if (self->rx_max_sdu_size == TTP_SAR_DISABLE) {
-			irttp_do_data_indication(self, skb);
-			self->rx_sdu_size = 0;
-
-			continue;
-		}
-
-		/* Check if this is a fragment, and not the last fragment */
-		if (more) {
-			/*
-			 *  Queue the fragment if we still are within the
-			 *  limits of the maximum size of the rx_sdu
-			 */
-			if (self->rx_sdu_size <= self->rx_max_sdu_size) {
-				pr_debug("%s(), queueing frag\n",
-					 __func__);
-				skb_queue_tail(&self->rx_fragments, skb);
-			} else {
-				/* Free the part of the SDU that is too big */
-				dev_kfree_skb(skb);
-			}
-			continue;
-		}
-		/*
-		 *  This is the last fragment, so time to reassemble!
-		 */
-		if ((self->rx_sdu_size <= self->rx_max_sdu_size) ||
-		    (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) {
-			/*
-			 * A little optimizing. Only queue the fragment if
-			 * there are other fragments. Since if this is the
-			 * last and only fragment, there is no need to
-			 * reassemble :-)
-			 */
-			if (!skb_queue_empty(&self->rx_fragments)) {
-				skb_queue_tail(&self->rx_fragments,
-					       skb);
-
-				skb = irttp_reassemble_skb(self);
-			}
-
-			/* Now we can deliver the reassembled skb */
-			irttp_do_data_indication(self, skb);
-		} else {
-			pr_debug("%s(), Truncated frame\n", __func__);
-
-			/* Free the part of the SDU that is too big */
-			dev_kfree_skb(skb);
-
-			/* Deliver only the valid but truncated part of SDU */
-			skb = irttp_reassemble_skb(self);
-
-			irttp_do_data_indication(self, skb);
-		}
-		self->rx_sdu_size = 0;
-	}
-
-	/*
-	 * It's not trivial to keep track of how many credits are available
-	 * by incrementing at each packet, because delivery may fail
-	 * (irttp_do_data_indication() may requeue the frame) and because
-	 * we need to take care of fragmentation.
-	 * We want the other side to send up to initial_credit packets.
-	 * We have some frames in our queues, and we have already allowed it
-	 * to send remote_credit.
-	 * No need to spinlock, write is atomic and self correcting...
-	 * Jean II
-	 */
-	self->avail_credit = (self->initial_credit -
-			      (self->remote_credit +
-			       skb_queue_len(&self->rx_queue) +
-			       skb_queue_len(&self->rx_fragments)));
-
-	/* Do we have too much credits to send to peer ? */
-	if ((self->remote_credit <= TTP_RX_MIN_CREDIT) &&
-	    (self->avail_credit > 0)) {
-		/* Send explicit credit frame */
-		irttp_give_credit(self);
-		/* Note : do *NOT* check if tx_queue is non-empty, that
-		 * will produce deadlocks. I repeat : send a credit frame
-		 * even if we have something to send in our Tx queue.
-		 * If we have credits, it means that our Tx queue is blocked.
-		 *
-		 * Let's suppose the peer can't keep up with our Tx. He will
-		 * flow control us by not sending us any credits, and we
-		 * will stop Tx and start accumulating credits here.
-		 * Up to the point where the peer will stop its Tx queue,
-		 * for lack of credits.
-		 * Let's assume the peer application is single threaded.
-		 * It will block on Tx and never consume any Rx buffer.
-		 * Deadlock. Guaranteed. - Jean II
-		 */
-	}
-
-	/* Reset lock */
-	self->rx_queue_lock = 0;
-}
-
-#ifdef CONFIG_PROC_FS
-struct irttp_iter_state {
-	int id;
-};
-
-static void *irttp_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	struct irttp_iter_state *iter = seq->private;
-	struct tsap_cb *self;
-
-	/* Protect our access to the tsap list */
-	spin_lock_irq(&irttp->tsaps->hb_spinlock);
-	iter->id = 0;
-
-	for (self = (struct tsap_cb *) hashbin_get_first(irttp->tsaps);
-	     self != NULL;
-	     self = (struct tsap_cb *) hashbin_get_next(irttp->tsaps)) {
-		if (iter->id == *pos)
-			break;
-		++iter->id;
-	}
-
-	return self;
-}
-
-static void *irttp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct irttp_iter_state *iter = seq->private;
-
-	++*pos;
-	++iter->id;
-	return (void *) hashbin_get_next(irttp->tsaps);
-}
-
-static void irttp_seq_stop(struct seq_file *seq, void *v)
-{
-	spin_unlock_irq(&irttp->tsaps->hb_spinlock);
-}
-
-static int irttp_seq_show(struct seq_file *seq, void *v)
-{
-	const struct irttp_iter_state *iter = seq->private;
-	const struct tsap_cb *self = v;
-
-	seq_printf(seq, "TSAP %d, ", iter->id);
-	seq_printf(seq, "stsap_sel: %02x, ",
-		   self->stsap_sel);
-	seq_printf(seq, "dtsap_sel: %02x\n",
-		   self->dtsap_sel);
-	seq_printf(seq, "  connected: %s, ",
-		   self->connected ? "TRUE" : "FALSE");
-	seq_printf(seq, "avail credit: %d, ",
-		   self->avail_credit);
-	seq_printf(seq, "remote credit: %d, ",
-		   self->remote_credit);
-	seq_printf(seq, "send credit: %d\n",
-		   self->send_credit);
-	seq_printf(seq, "  tx packets: %lu, ",
-		   self->stats.tx_packets);
-	seq_printf(seq, "rx packets: %lu, ",
-		   self->stats.rx_packets);
-	seq_printf(seq, "tx_queue len: %u ",
-		   skb_queue_len(&self->tx_queue));
-	seq_printf(seq, "rx_queue len: %u\n",
-		   skb_queue_len(&self->rx_queue));
-	seq_printf(seq, "  tx_sdu_busy: %s, ",
-		   self->tx_sdu_busy ? "TRUE" : "FALSE");
-	seq_printf(seq, "rx_sdu_busy: %s\n",
-		   self->rx_sdu_busy ? "TRUE" : "FALSE");
-	seq_printf(seq, "  max_seg_size: %u, ",
-		   self->max_seg_size);
-	seq_printf(seq, "tx_max_sdu_size: %u, ",
-		   self->tx_max_sdu_size);
-	seq_printf(seq, "rx_max_sdu_size: %u\n",
-		   self->rx_max_sdu_size);
-
-	seq_printf(seq, "  Used by (%s)\n\n",
-		   self->notify.name);
-	return 0;
-}
-
-static const struct seq_operations irttp_seq_ops = {
-	.start  = irttp_seq_start,
-	.next   = irttp_seq_next,
-	.stop   = irttp_seq_stop,
-	.show   = irttp_seq_show,
-};
-
-static int irttp_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_private(file, &irttp_seq_ops,
-			sizeof(struct irttp_iter_state));
-}
-
-const struct file_operations irttp_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = irttp_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_private,
-};
-
-#endif /* PROC_FS */
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
deleted file mode 100644
index 16ce32ffe004..000000000000
--- a/net/irda/parameters.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*********************************************************************
- *
- * Filename:      parameters.c
- * Version:       1.0
- * Description:   A more general way to handle (pi,pl,pv) parameters
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Jun  7 10:25:11 1999
- * Modified at:   Sun Jan 30 14:08:39 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved.
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/types.h>
-#include <linux/module.h>
-
-#include <asm/unaligned.h>
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/parameters.h>
-
-static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
-				PV_TYPE type, PI_HANDLER func);
-static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
-			       PV_TYPE type, PI_HANDLER func);
-static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
-			       PV_TYPE type, PI_HANDLER func);
-static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi,
-				 PV_TYPE type, PI_HANDLER func);
-
-static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
-			       PV_TYPE type, PI_HANDLER func);
-static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
-				PV_TYPE type, PI_HANDLER func);
-
-static int irda_param_unpack(__u8 *buf, char *fmt, ...);
-
-/* Parameter value call table. Must match PV_TYPE */
-static const PV_HANDLER pv_extract_table[] = {
-	irda_extract_integer, /* Handler for any length integers */
-	irda_extract_integer, /* Handler for 8  bits integers */
-	irda_extract_integer, /* Handler for 16 bits integers */
-	irda_extract_string,  /* Handler for strings */
-	irda_extract_integer, /* Handler for 32 bits integers */
-	irda_extract_octseq,  /* Handler for octet sequences */
-	irda_extract_no_value /* Handler for no value parameters */
-};
-
-static const PV_HANDLER pv_insert_table[] = {
-	irda_insert_integer, /* Handler for any length integers */
-	irda_insert_integer, /* Handler for 8  bits integers */
-	irda_insert_integer, /* Handler for 16 bits integers */
-	NULL,                /* Handler for strings */
-	irda_insert_integer, /* Handler for 32 bits integers */
-	NULL,                /* Handler for octet sequences */
-	irda_insert_no_value /* Handler for no value parameters */
-};
-
-/*
- * Function irda_insert_no_value (self, buf, len, pi, type, func)
- */
-static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi,
-				PV_TYPE type, PI_HANDLER func)
-{
-	irda_param_t p;
-	int ret;
-
-	p.pi = pi;
-	p.pl = 0;
-
-	/* Call handler for this parameter */
-	ret = (*func)(self, &p, PV_GET);
-
-	/* Extract values anyway, since handler may need them */
-	irda_param_pack(buf, "bb", p.pi, p.pl);
-
-	if (ret < 0)
-		return ret;
-
-	return 2; /* Inserted pl+2 bytes */
-}
-
-/*
- * Function irda_extract_no_value (self, buf, len, type, func)
- *
- *    Extracts a parameter without a pv field (pl=0)
- *
- */
-static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi,
-				 PV_TYPE type, PI_HANDLER func)
-{
-	irda_param_t p;
-	int ret;
-
-	/* Extract values anyway, since handler may need them */
-	irda_param_unpack(buf, "bb", &p.pi, &p.pl);
-
-	/* Call handler for this parameter */
-	ret = (*func)(self, &p, PV_PUT);
-
-	if (ret < 0)
-		return ret;
-
-	return 2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_insert_integer (self, buf, len, pi, type, func)
- */
-static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
-			       PV_TYPE type, PI_HANDLER func)
-{
-	irda_param_t p;
-	int n = 0;
-	int err;
-
-	p.pi = pi;             /* In case handler needs to know */
-	p.pl = type & PV_MASK; /* The integer type codes the length as well */
-	p.pv.i = 0;            /* Clear value */
-
-	/* Call handler for this parameter */
-	err = (*func)(self, &p, PV_GET);
-	if (err < 0)
-		return err;
-
-	/*
-	 * If parameter length is still 0, then (1) this is an any length
-	 * integer, and (2) the handler function does not care which length
-	 * we choose to use, so we pick the one the gives the fewest bytes.
-	 */
-	if (p.pl == 0) {
-		if (p.pv.i < 0xff) {
-			pr_debug("%s(), using 1 byte\n", __func__);
-			p.pl = 1;
-		} else if (p.pv.i < 0xffff) {
-			pr_debug("%s(), using 2 bytes\n", __func__);
-			p.pl = 2;
-		} else {
-			pr_debug("%s(), using 4 bytes\n", __func__);
-			p.pl = 4; /* Default length */
-		}
-	}
-	/* Check if buffer is long enough for insertion */
-	if (len < (2+p.pl)) {
-		net_warn_ratelimited("%s: buffer too short for insertion!\n",
-				     __func__);
-		return -1;
-	}
-	pr_debug("%s(), pi=%#x, pl=%d, pi=%d\n", __func__,
-		 p.pi, p.pl, p.pv.i);
-	switch (p.pl) {
-	case 1:
-		n += irda_param_pack(buf, "bbb", p.pi, p.pl, (__u8) p.pv.i);
-		break;
-	case 2:
-		if (type & PV_BIG_ENDIAN)
-			p.pv.i = cpu_to_be16((__u16) p.pv.i);
-		else
-			p.pv.i = cpu_to_le16((__u16) p.pv.i);
-		n += irda_param_pack(buf, "bbs", p.pi, p.pl, (__u16) p.pv.i);
-		break;
-	case 4:
-		if (type & PV_BIG_ENDIAN)
-			cpu_to_be32s(&p.pv.i);
-		else
-			cpu_to_le32s(&p.pv.i);
-		n += irda_param_pack(buf, "bbi", p.pi, p.pl, p.pv.i);
-
-		break;
-	default:
-		net_warn_ratelimited("%s: length %d not supported\n",
-				     __func__, p.pl);
-		/* Skip parameter */
-		return -1;
-	}
-
-	return p.pl+2; /* Inserted pl+2 bytes */
-}
-
-/*
- * Function irda_extract integer (self, buf, len, pi, type, func)
- *
- *    Extract a possibly variable length integer from buffer, and call
- *    handler for processing of the parameter
- */
-static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
-				PV_TYPE type, PI_HANDLER func)
-{
-	irda_param_t p;
-	int n = 0;
-	int extract_len;	/* Real length we extract */
-	int err;
-
-	p.pi = pi;     /* In case handler needs to know */
-	p.pl = buf[1]; /* Extract length of value */
-	p.pv.i = 0;    /* Clear value */
-	extract_len = p.pl;	/* Default : extract all */
-
-	/* Check if buffer is long enough for parsing */
-	if (len < (2+p.pl)) {
-		net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n",
-				     __func__, p.pl, len);
-		return -1;
-	}
-
-	/*
-	 * Check that the integer length is what we expect it to be. If the
-	 * handler want a 16 bits integer then a 32 bits is not good enough
-	 * PV_INTEGER means that the handler is flexible.
-	 */
-	if (((type & PV_MASK) != PV_INTEGER) && ((type & PV_MASK) != p.pl)) {
-		net_err_ratelimited("%s: invalid parameter length! Expected %d bytes, but value had %d bytes!\n",
-				    __func__, type & PV_MASK, p.pl);
-
-		/* Most parameters are bit/byte fields or little endian,
-		 * so it's ok to only extract a subset of it (the subset
-		 * that the handler expect). This is necessary, as some
-		 * broken implementations seems to add extra undefined bits.
-		 * If the parameter is shorter than we expect or is big
-		 * endian, we can't play those tricks. Jean II */
-		if((p.pl < (type & PV_MASK)) || (type & PV_BIG_ENDIAN)) {
-			/* Skip parameter */
-			return p.pl+2;
-		} else {
-			/* Extract subset of it, fallthrough */
-			extract_len = type & PV_MASK;
-		}
-	}
-
-
-	switch (extract_len) {
-	case 1:
-		n += irda_param_unpack(buf+2, "b", &p.pv.i);
-		break;
-	case 2:
-		n += irda_param_unpack(buf+2, "s", &p.pv.i);
-		if (type & PV_BIG_ENDIAN)
-			p.pv.i = be16_to_cpu((__u16) p.pv.i);
-		else
-			p.pv.i = le16_to_cpu((__u16) p.pv.i);
-		break;
-	case 4:
-		n += irda_param_unpack(buf+2, "i", &p.pv.i);
-		if (type & PV_BIG_ENDIAN)
-			be32_to_cpus(&p.pv.i);
-		else
-			le32_to_cpus(&p.pv.i);
-		break;
-	default:
-		net_warn_ratelimited("%s: length %d not supported\n",
-				     __func__, p.pl);
-
-		/* Skip parameter */
-		return p.pl+2;
-	}
-
-	pr_debug("%s(), pi=%#x, pl=%d, pi=%d\n", __func__,
-		 p.pi, p.pl, p.pv.i);
-	/* Call handler for this parameter */
-	err = (*func)(self, &p, PV_PUT);
-	if (err < 0)
-		return err;
-
-	return p.pl+2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_extract_string (self, buf, len, type, func)
- */
-static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
-			       PV_TYPE type, PI_HANDLER func)
-{
-	char str[33];
-	irda_param_t p;
-	int err;
-
-	p.pi = pi;     /* In case handler needs to know */
-	p.pl = buf[1]; /* Extract length of value */
-	if (p.pl > 32)
-		p.pl = 32;
-
-	pr_debug("%s(), pi=%#x, pl=%d\n", __func__,
-		 p.pi, p.pl);
-
-	/* Check if buffer is long enough for parsing */
-	if (len < (2+p.pl)) {
-		net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n",
-				     __func__, p.pl, len);
-		return -1;
-	}
-
-	/* Should be safe to copy string like this since we have already
-	 * checked that the buffer is long enough */
-	strncpy(str, buf+2, p.pl);
-
-	pr_debug("%s(), str=0x%02x 0x%02x\n",
-		 __func__, (__u8)str[0], (__u8)str[1]);
-
-	/* Null terminate string */
-	str[p.pl] = '\0';
-
-	p.pv.c = str; /* Handler will need to take a copy */
-
-	/* Call handler for this parameter */
-	err = (*func)(self, &p, PV_PUT);
-	if (err < 0)
-		return err;
-
-	return p.pl+2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_extract_octseq (self, buf, len, type, func)
- */
-static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
-			       PV_TYPE type, PI_HANDLER func)
-{
-	irda_param_t p;
-
-	p.pi = pi;     /* In case handler needs to know */
-	p.pl = buf[1]; /* Extract length of value */
-
-	/* Check if buffer is long enough for parsing */
-	if (len < (2+p.pl)) {
-		net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n",
-				     __func__, p.pl, len);
-		return -1;
-	}
-
-	pr_debug("%s(), not impl\n", __func__);
-
-	return p.pl+2; /* Extracted pl+2 bytes */
-}
-
-/*
- * Function irda_param_pack (skb, fmt, ...)
- *
- *    Format:
- *        'i' = 32 bits integer
- *        's' = string
- *
- */
-int irda_param_pack(__u8 *buf, char *fmt, ...)
-{
-	irda_pv_t arg;
-	va_list args;
-	char *p;
-	int n = 0;
-
-	va_start(args, fmt);
-
-	for (p = fmt; *p != '\0'; p++) {
-		switch (*p) {
-		case 'b':  /* 8 bits unsigned byte */
-			buf[n++] = (__u8)va_arg(args, int);
-			break;
-		case 's':  /* 16 bits unsigned short */
-			arg.i = (__u16)va_arg(args, int);
-			put_unaligned((__u16)arg.i, (__u16 *)(buf+n)); n+=2;
-			break;
-		case 'i':  /* 32 bits unsigned integer */
-			arg.i = va_arg(args, __u32);
-			put_unaligned(arg.i, (__u32 *)(buf+n)); n+=4;
-			break;
-#if 0
-		case 'c': /* \0 terminated string */
-			arg.c = va_arg(args, char *);
-			strcpy(buf+n, arg.c);
-			n += strlen(arg.c) + 1;
-			break;
-#endif
-		default:
-			va_end(args);
-			return -1;
-		}
-	}
-	va_end(args);
-
-	return 0;
-}
-EXPORT_SYMBOL(irda_param_pack);
-
-/*
- * Function irda_param_unpack (skb, fmt, ...)
- */
-static int irda_param_unpack(__u8 *buf, char *fmt, ...)
-{
-	irda_pv_t arg;
-	va_list args;
-	char *p;
-	int n = 0;
-
-	va_start(args, fmt);
-
-	for (p = fmt; *p != '\0'; p++) {
-		switch (*p) {
-		case 'b':  /* 8 bits byte */
-			arg.ip = va_arg(args, __u32 *);
-			*arg.ip = buf[n++];
-			break;
-		case 's':  /* 16 bits short */
-			arg.ip = va_arg(args, __u32 *);
-			*arg.ip = get_unaligned((__u16 *)(buf+n)); n+=2;
-			break;
-		case 'i':  /* 32 bits unsigned integer */
-			arg.ip = va_arg(args, __u32 *);
-			*arg.ip = get_unaligned((__u32 *)(buf+n)); n+=4;
-			break;
-#if 0
-		case 'c':   /* \0 terminated string */
-			arg.c = va_arg(args, char *);
-			strcpy(arg.c, buf+n);
-			n += strlen(arg.c) + 1;
-			break;
-#endif
-		default:
-			va_end(args);
-			return -1;
-		}
-
-	}
-	va_end(args);
-
-	return 0;
-}
-
-/*
- * Function irda_param_insert (self, pi, buf, len, info)
- *
- *    Insert the specified parameter (pi) into buffer. Returns number of
- *    bytes inserted
- */
-int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len,
-		      pi_param_info_t *info)
-{
-	const pi_minor_info_t *pi_minor_info;
-	__u8 pi_minor;
-	__u8 pi_major;
-	int type;
-	int ret = -1;
-	int n = 0;
-
-	IRDA_ASSERT(buf != NULL, return ret;);
-	IRDA_ASSERT(info != NULL, return ret;);
-
-	pi_minor = pi & info->pi_mask;
-	pi_major = pi >> info->pi_major_offset;
-
-	/* Check if the identifier value (pi) is valid */
-	if ((pi_major > info->len-1) ||
-	    (pi_minor > info->tables[pi_major].len-1))
-	{
-		pr_debug("%s(), no handler for parameter=0x%02x\n",
-			 __func__, pi);
-
-		/* Skip this parameter */
-		return -1;
-	}
-
-	/* Lookup the info on how to parse this parameter */
-	pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor];
-
-	/* Find expected data type for this parameter identifier (pi)*/
-	type = pi_minor_info->type;
-
-	/*  Check if handler has been implemented */
-	if (!pi_minor_info->func) {
-		net_info_ratelimited("%s: no handler for pi=%#x\n",
-				     __func__, pi);
-		/* Skip this parameter */
-		return -1;
-	}
-
-	/* Insert parameter value */
-	ret = (*pv_insert_table[type & PV_MASK])(self, buf+n, len, pi, type,
-						 pi_minor_info->func);
-	return ret;
-}
-EXPORT_SYMBOL(irda_param_insert);
-
-/*
- * Function irda_param_extract (self, buf, len, info)
- *
- *    Parse all parameters. If len is correct, then everything should be
- *    safe. Returns the number of bytes that was parsed
- *
- */
-static int irda_param_extract(void *self, __u8 *buf, int len,
-			      pi_param_info_t *info)
-{
-	const pi_minor_info_t *pi_minor_info;
-	__u8 pi_minor;
-	__u8 pi_major;
-	int type;
-	int ret = -1;
-	int n = 0;
-
-	IRDA_ASSERT(buf != NULL, return ret;);
-	IRDA_ASSERT(info != NULL, return ret;);
-
-	pi_minor = buf[n] & info->pi_mask;
-	pi_major = buf[n] >> info->pi_major_offset;
-
-	/* Check if the identifier value (pi) is valid */
-	if ((pi_major > info->len-1) ||
-	    (pi_minor > info->tables[pi_major].len-1))
-	{
-		pr_debug("%s(), no handler for parameter=0x%02x\n",
-			 __func__, buf[0]);
-
-		/* Skip this parameter */
-		return 2 + buf[n + 1];  /* Continue */
-	}
-
-	/* Lookup the info on how to parse this parameter */
-	pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor];
-
-	/* Find expected data type for this parameter identifier (pi)*/
-	type = pi_minor_info->type;
-
-	pr_debug("%s(), pi=[%d,%d], type=%d\n", __func__,
-		 pi_major, pi_minor, type);
-
-	/*  Check if handler has been implemented */
-	if (!pi_minor_info->func) {
-		net_info_ratelimited("%s: no handler for pi=%#x\n",
-				     __func__, buf[n]);
-		/* Skip this parameter */
-		return 2 + buf[n + 1]; /* Continue */
-	}
-
-	/* Parse parameter value */
-	ret = (*pv_extract_table[type & PV_MASK])(self, buf+n, len, buf[n],
-						  type, pi_minor_info->func);
-	return ret;
-}
-
-/*
- * Function irda_param_extract_all (self, buf, len, info)
- *
- *    Parse all parameters. If len is correct, then everything should be
- *    safe. Returns the number of bytes that was parsed
- *
- */
-int irda_param_extract_all(void *self, __u8 *buf, int len,
-			   pi_param_info_t *info)
-{
-	int ret = -1;
-	int n = 0;
-
-	IRDA_ASSERT(buf != NULL, return ret;);
-	IRDA_ASSERT(info != NULL, return ret;);
-
-	/*
-	 * Parse all parameters. Each parameter must be at least two bytes
-	 * long or else there is no point in trying to parse it
-	 */
-	while (len > 2) {
-		ret = irda_param_extract(self, buf+n, len, info);
-		if (ret < 0)
-			return ret;
-
-		n += ret;
-		len -= ret;
-	}
-	return n;
-}
-EXPORT_SYMBOL(irda_param_extract_all);
diff --git a/net/irda/qos.c b/net/irda/qos.c
deleted file mode 100644
index 25ba8509ad3e..000000000000
--- a/net/irda/qos.c
+++ /dev/null
@@ -1,771 +0,0 @@
-/*********************************************************************
- *
- * Filename:      qos.c
- * Version:       1.0
- * Description:   IrLAP QoS parameter negotiation
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Tue Sep  9 00:00:26 1997
- * Modified at:   Sun Jan 30 14:29:16 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- ********************************************************************/
-
-#include <linux/export.h>
-
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/parameters.h>
-#include <net/irda/qos.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlap_frame.h>
-
-/*
- * Maximum values of the baud rate we negotiate with the other end.
- * Most often, you don't have to change that, because Linux-IrDA will
- * use the maximum offered by the link layer, which usually works fine.
- * In some very rare cases, you may want to limit it to lower speeds...
- */
-int sysctl_max_baud_rate = 16000000;
-/*
- * Maximum value of the lap disconnect timer we negotiate with the other end.
- * Most often, the value below represent the best compromise, but some user
- * may want to keep the LAP alive longer or shorter in case of link failure.
- * Remember that the threshold time (early warning) is fixed to 3s...
- */
-int sysctl_max_noreply_time = 12;
-/*
- * Minimum turn time to be applied before transmitting to the peer.
- * Nonzero values (usec) are used as lower limit to the per-connection
- * mtt value which was announced by the other end during negotiation.
- * Might be helpful if the peer device provides too short mtt.
- * Default is 10us which means using the unmodified value given by the
- * peer except if it's 0 (0 is likely a bug in the other stack).
- */
-unsigned int sysctl_min_tx_turn_time = 10;
-/*
- * Maximum data size to be used in transmission in payload of LAP frame.
- * There is a bit of confusion in the IrDA spec :
- * The LAP spec defines the payload of a LAP frame (I field) to be
- * 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40).
- * On the other hand, the PHY mention frames of 2048 bytes max (IrPHY
- * 1.2, chapt 5.3.2.1, p41). But, this number includes the LAP header
- * (2 bytes), and CRC (32 bits at 4 Mb/s). So, for the I field (LAP
- * payload), that's only 2042 bytes. Oups !
- * My nsc-ircc hardware has troubles receiving 2048 bytes frames at 4 Mb/s,
- * so adjust to 2042... I don't know if this bug applies only for 2048
- * bytes frames or all negotiated frame sizes, but you can use the sysctl
- * to play with this value anyway.
- * Jean II */
-unsigned int sysctl_max_tx_data_size = 2042;
-/*
- * Maximum transmit window, i.e. number of LAP frames between turn-around.
- * This allow to override what the peer told us. Some peers are buggy and
- * don't always support what they tell us.
- * Jean II */
-unsigned int sysctl_max_tx_window = 7;
-
-static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get);
-static int irlap_param_link_disconnect(void *instance, irda_param_t *parm,
-				       int get);
-static int irlap_param_max_turn_time(void *instance, irda_param_t *param,
-				     int get);
-static int irlap_param_data_size(void *instance, irda_param_t *param, int get);
-static int irlap_param_window_size(void *instance, irda_param_t *param,
-				   int get);
-static int irlap_param_additional_bofs(void *instance, irda_param_t *parm,
-				       int get);
-static int irlap_param_min_turn_time(void *instance, irda_param_t *param,
-				     int get);
-
-#ifndef CONFIG_IRDA_DYNAMIC_WINDOW
-static __u32 irlap_requested_line_capacity(struct qos_info *qos);
-#endif
-
-static __u32 min_turn_times[]  = { 10000, 5000, 1000, 500, 100, 50, 10, 0 }; /* us */
-static __u32 baud_rates[]      = { 2400, 9600, 19200, 38400, 57600, 115200, 576000,
-				   1152000, 4000000, 16000000 };           /* bps */
-static __u32 data_sizes[]      = { 64, 128, 256, 512, 1024, 2048 };        /* bytes */
-static __u32 add_bofs[]        = { 48, 24, 12, 5, 3, 2, 1, 0 };            /* bytes */
-static __u32 max_turn_times[]  = { 500, 250, 100, 50 };                    /* ms */
-static __u32 link_disc_times[] = { 3, 8, 12, 16, 20, 25, 30, 40 };         /* secs */
-
-static __u32 max_line_capacities[10][4] = {
-       /* 500 ms     250 ms  100 ms  50 ms (max turn time) */
-	{    100,      0,      0,     0 }, /*     2400 bps */
-	{    400,      0,      0,     0 }, /*     9600 bps */
-	{    800,      0,      0,     0 }, /*    19200 bps */
-	{   1600,      0,      0,     0 }, /*    38400 bps */
-	{   2360,      0,      0,     0 }, /*    57600 bps */
-	{   4800,   2400,    960,   480 }, /*   115200 bps */
-	{  28800,  11520,   5760,  2880 }, /*   576000 bps */
-	{  57600,  28800,  11520,  5760 }, /*  1152000 bps */
-	{ 200000, 100000,  40000, 20000 }, /*  4000000 bps */
-	{ 800000, 400000, 160000, 80000 }, /* 16000000 bps */
-};
-
-static const pi_minor_info_t pi_minor_call_table_type_0[] = {
-	{ NULL, 0 },
-/* 01 */{ irlap_param_baud_rate,       PV_INTEGER | PV_LITTLE_ENDIAN },
-	{ NULL, 0 },
-	{ NULL, 0 },
-	{ NULL, 0 },
-	{ NULL, 0 },
-	{ NULL, 0 },
-	{ NULL, 0 },
-/* 08 */{ irlap_param_link_disconnect, PV_INT_8_BITS }
-};
-
-static const pi_minor_info_t pi_minor_call_table_type_1[] = {
-	{ NULL, 0 },
-	{ NULL, 0 },
-/* 82 */{ irlap_param_max_turn_time,   PV_INT_8_BITS },
-/* 83 */{ irlap_param_data_size,       PV_INT_8_BITS },
-/* 84 */{ irlap_param_window_size,     PV_INT_8_BITS },
-/* 85 */{ irlap_param_additional_bofs, PV_INT_8_BITS },
-/* 86 */{ irlap_param_min_turn_time,   PV_INT_8_BITS },
-};
-
-static const pi_major_info_t pi_major_call_table[] = {
-	{ pi_minor_call_table_type_0, 9 },
-	{ pi_minor_call_table_type_1, 7 },
-};
-
-static pi_param_info_t irlap_param_info = { pi_major_call_table, 2, 0x7f, 7 };
-
-/* ---------------------- LOCAL SUBROUTINES ---------------------- */
-/* Note : we start with a bunch of local subroutines.
- * As the compiler is "one pass", this is the only way to get them to
- * inline properly...
- * Jean II
- */
-/*
- * Function value_index (value, array, size)
- *
- *    Returns the index to the value in the specified array
- */
-static inline int value_index(__u32 value, __u32 *array, int size)
-{
-	int i;
-
-	for (i=0; i < size; i++)
-		if (array[i] == value)
-			break;
-	return i;
-}
-
-/*
- * Function index_value (index, array)
- *
- *    Returns value to index in array, easy!
- *
- */
-static inline __u32 index_value(int index, __u32 *array)
-{
-	return array[index];
-}
-
-/*
- * Function msb_index (word)
- *
- *    Returns index to most significant bit (MSB) in word
- *
- */
-static int msb_index (__u16 word)
-{
-	__u16 msb = 0x8000;
-	int index = 15;   /* Current MSB */
-
-	/* Check for buggy peers.
-	 * Note : there is a small probability that it could be us, but I
-	 * would expect driver authors to catch that pretty early and be
-	 * able to check precisely what's going on. If a end user sees this,
-	 * it's very likely the peer. - Jean II */
-	if (word == 0) {
-		net_warn_ratelimited("%s(), Detected buggy peer, adjust null PV to 0x1!\n",
-				     __func__);
-		/* The only safe choice (we don't know the array size) */
-		word = 0x1;
-	}
-
-	while (msb) {
-		if (word & msb)
-			break;   /* Found it! */
-		msb >>=1;
-		index--;
-	}
-	return index;
-}
-
-/*
- * Function value_lower_bits (value, array)
- *
- *    Returns a bit field marking all possibility lower than value.
- */
-static inline int value_lower_bits(__u32 value, __u32 *array, int size, __u16 *field)
-{
-	int	i;
-	__u16	mask = 0x1;
-	__u16	result = 0x0;
-
-	for (i=0; i < size; i++) {
-		/* Add the current value to the bit field, shift mask */
-		result |= mask;
-		mask <<= 1;
-		/* Finished ? */
-		if (array[i] >= value)
-			break;
-	}
-	/* Send back a valid index */
-	if(i >= size)
-	  i = size - 1;	/* Last item */
-	*field = result;
-	return i;
-}
-
-/*
- * Function value_highest_bit (value, array)
- *
- *    Returns a bit field marking the highest possibility lower than value.
- */
-static inline int value_highest_bit(__u32 value, __u32 *array, int size, __u16 *field)
-{
-	int	i;
-	__u16	mask = 0x1;
-	__u16	result = 0x0;
-
-	for (i=0; i < size; i++) {
-		/* Finished ? */
-		if (array[i] <= value)
-			break;
-		/* Shift mask */
-		mask <<= 1;
-	}
-	/* Set the current value to the bit field */
-	result |= mask;
-	/* Send back a valid index */
-	if(i >= size)
-	  i = size - 1;	/* Last item */
-	*field = result;
-	return i;
-}
-
-/* -------------------------- MAIN CALLS -------------------------- */
-
-/*
- * Function irda_qos_compute_intersection (qos, new)
- *
- *    Compute the intersection of the old QoS capabilities with new ones
- *
- */
-void irda_qos_compute_intersection(struct qos_info *qos, struct qos_info *new)
-{
-	IRDA_ASSERT(qos != NULL, return;);
-	IRDA_ASSERT(new != NULL, return;);
-
-	/* Apply */
-	qos->baud_rate.bits       &= new->baud_rate.bits;
-	qos->window_size.bits     &= new->window_size.bits;
-	qos->min_turn_time.bits   &= new->min_turn_time.bits;
-	qos->max_turn_time.bits   &= new->max_turn_time.bits;
-	qos->data_size.bits       &= new->data_size.bits;
-	qos->link_disc_time.bits  &= new->link_disc_time.bits;
-	qos->additional_bofs.bits &= new->additional_bofs.bits;
-
-	irda_qos_bits_to_value(qos);
-}
-
-/*
- * Function irda_init_max_qos_capabilies (qos)
- *
- *    The purpose of this function is for layers and drivers to be able to
- *    set the maximum QoS possible and then "and in" their own limitations
- *
- */
-void irda_init_max_qos_capabilies(struct qos_info *qos)
-{
-	int i;
-	/*
-	 *  These are the maximum supported values as specified on pages
-	 *  39-43 in IrLAP
-	 */
-
-	/* Use sysctl to set some configurable values... */
-	/* Set configured max speed */
-	i = value_lower_bits(sysctl_max_baud_rate, baud_rates, 10,
-			     &qos->baud_rate.bits);
-	sysctl_max_baud_rate = index_value(i, baud_rates);
-
-	/* Set configured max disc time */
-	i = value_lower_bits(sysctl_max_noreply_time, link_disc_times, 8,
-			     &qos->link_disc_time.bits);
-	sysctl_max_noreply_time = index_value(i, link_disc_times);
-
-	/* LSB is first byte, MSB is second byte */
-	qos->baud_rate.bits    &= 0x03ff;
-
-	qos->window_size.bits     = 0x7f;
-	qos->min_turn_time.bits   = 0xff;
-	qos->max_turn_time.bits   = 0x0f;
-	qos->data_size.bits       = 0x3f;
-	qos->link_disc_time.bits &= 0xff;
-	qos->additional_bofs.bits = 0xff;
-}
-EXPORT_SYMBOL(irda_init_max_qos_capabilies);
-
-/*
- * Function irlap_adjust_qos_settings (qos)
- *
- *     Adjust QoS settings in case some values are not possible to use because
- *     of other settings
- */
-static void irlap_adjust_qos_settings(struct qos_info *qos)
-{
-	__u32 line_capacity;
-	int index;
-
-	/*
-	 * Make sure the mintt is sensible.
-	 * Main culprit : Ericsson T39. - Jean II
-	 */
-	if (sysctl_min_tx_turn_time > qos->min_turn_time.value) {
-		int i;
-
-		net_warn_ratelimited("%s(), Detected buggy peer, adjust mtt to %dus!\n",
-				     __func__, sysctl_min_tx_turn_time);
-
-		/* We don't really need bits, but easier this way */
-		i = value_highest_bit(sysctl_min_tx_turn_time, min_turn_times,
-				      8, &qos->min_turn_time.bits);
-		sysctl_min_tx_turn_time = index_value(i, min_turn_times);
-		qos->min_turn_time.value = sysctl_min_tx_turn_time;
-	}
-
-	/*
-	 * Not allowed to use a max turn time less than 500 ms if the baudrate
-	 * is less than 115200
-	 */
-	if ((qos->baud_rate.value < 115200) &&
-	    (qos->max_turn_time.value < 500))
-	{
-		pr_debug("%s(), adjusting max turn time from %d to 500 ms\n",
-			 __func__, qos->max_turn_time.value);
-		qos->max_turn_time.value = 500;
-	}
-
-	/*
-	 * The data size must be adjusted according to the baud rate and max
-	 * turn time
-	 */
-	index = value_index(qos->data_size.value, data_sizes, 6);
-	line_capacity = irlap_max_line_capacity(qos->baud_rate.value,
-						qos->max_turn_time.value);
-
-#ifdef CONFIG_IRDA_DYNAMIC_WINDOW
-	while ((qos->data_size.value > line_capacity) && (index > 0)) {
-		qos->data_size.value = data_sizes[index--];
-		pr_debug("%s(), reducing data size to %d\n",
-			 __func__, qos->data_size.value);
-	}
-#else /* Use method described in section 6.6.11 of IrLAP */
-	while (irlap_requested_line_capacity(qos) > line_capacity) {
-		IRDA_ASSERT(index != 0, return;);
-
-		/* Must be able to send at least one frame */
-		if (qos->window_size.value > 1) {
-			qos->window_size.value--;
-			pr_debug("%s(), reducing window size to %d\n",
-				 __func__, qos->window_size.value);
-		} else if (index > 1) {
-			qos->data_size.value = data_sizes[index--];
-			pr_debug("%s(), reducing data size to %d\n",
-				 __func__, qos->data_size.value);
-		} else {
-			net_warn_ratelimited("%s(), nothing more we can do!\n",
-					     __func__);
-		}
-	}
-#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
-	/*
-	 * Fix tx data size according to user limits - Jean II
-	 */
-	if (qos->data_size.value > sysctl_max_tx_data_size)
-		/* Allow non discrete adjustement to avoid losing capacity */
-		qos->data_size.value = sysctl_max_tx_data_size;
-	/*
-	 * Override Tx window if user request it. - Jean II
-	 */
-	if (qos->window_size.value > sysctl_max_tx_window)
-		qos->window_size.value = sysctl_max_tx_window;
-}
-
-/*
- * Function irlap_negotiate (qos_device, qos_session, skb)
- *
- *    Negotiate QoS values, not really that much negotiation :-)
- *    We just set the QoS capabilities for the peer station
- *
- */
-int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb)
-{
-	int ret;
-
-	ret = irda_param_extract_all(self, skb->data, skb->len,
-				     &irlap_param_info);
-
-	/* Convert the negotiated bits to values */
-	irda_qos_bits_to_value(&self->qos_tx);
-	irda_qos_bits_to_value(&self->qos_rx);
-
-	irlap_adjust_qos_settings(&self->qos_tx);
-
-	pr_debug("Setting BAUD_RATE to %d bps.\n",
-		 self->qos_tx.baud_rate.value);
-	pr_debug("Setting DATA_SIZE to %d bytes\n",
-		 self->qos_tx.data_size.value);
-	pr_debug("Setting WINDOW_SIZE to %d\n",
-		 self->qos_tx.window_size.value);
-	pr_debug("Setting XBOFS to %d\n",
-		 self->qos_tx.additional_bofs.value);
-	pr_debug("Setting MAX_TURN_TIME to %d ms.\n",
-		 self->qos_tx.max_turn_time.value);
-	pr_debug("Setting MIN_TURN_TIME to %d usecs.\n",
-		 self->qos_tx.min_turn_time.value);
-	pr_debug("Setting LINK_DISC to %d secs.\n",
-		 self->qos_tx.link_disc_time.value);
-	return ret;
-}
-
-/*
- * Function irlap_insert_negotiation_params (qos, fp)
- *
- *    Insert QoS negotiaion pararameters into frame
- *
- */
-int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
-					struct sk_buff *skb)
-{
-	int ret;
-
-	/* Insert data rate */
-	ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	/* Insert max turnaround time */
-	ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	/* Insert data size */
-	ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	/* Insert window size */
-	ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	/* Insert additional BOFs */
-	ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	/* Insert minimum turnaround time */
-	ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	/* Insert link disconnect/threshold time */
-	ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
-				skb_tailroom(skb), &irlap_param_info);
-	if (ret < 0)
-		return ret;
-	skb_put(skb, ret);
-
-	return 0;
-}
-
-/*
- * Function irlap_param_baud_rate (instance, param, get)
- *
- *    Negotiate data-rate
- *
- */
-static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get)
-{
-	__u16 final;
-
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get) {
-		param->pv.i = self->qos_rx.baud_rate.bits;
-		pr_debug("%s(), baud rate = 0x%02x\n",
-			 __func__, param->pv.i);
-	} else {
-		/*
-		 *  Stations must agree on baud rate, so calculate
-		 *  intersection
-		 */
-		pr_debug("Requested BAUD_RATE: 0x%04x\n", (__u16)param->pv.i);
-		final = (__u16) param->pv.i & self->qos_rx.baud_rate.bits;
-
-		pr_debug("Final BAUD_RATE: 0x%04x\n", final);
-		self->qos_tx.baud_rate.bits = final;
-		self->qos_rx.baud_rate.bits = final;
-	}
-
-	return 0;
-}
-
-/*
- * Function irlap_param_link_disconnect (instance, param, get)
- *
- *    Negotiate link disconnect/threshold time.
- *
- */
-static int irlap_param_link_disconnect(void *instance, irda_param_t *param,
-				       int get)
-{
-	__u16 final;
-
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->qos_rx.link_disc_time.bits;
-	else {
-		/*
-		 *  Stations must agree on link disconnect/threshold
-		 *  time.
-		 */
-		pr_debug("LINK_DISC: %02x\n", (__u8)param->pv.i);
-		final = (__u8) param->pv.i & self->qos_rx.link_disc_time.bits;
-
-		pr_debug("Final LINK_DISC: %02x\n", final);
-		self->qos_tx.link_disc_time.bits = final;
-		self->qos_rx.link_disc_time.bits = final;
-	}
-	return 0;
-}
-
-/*
- * Function irlap_param_max_turn_time (instance, param, get)
- *
- *    Negotiate the maximum turnaround time. This is a type 1 parameter and
- *    will be negotiated independently for each station
- *
- */
-static int irlap_param_max_turn_time(void *instance, irda_param_t *param,
-				     int get)
-{
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->qos_rx.max_turn_time.bits;
-	else
-		self->qos_tx.max_turn_time.bits = (__u8) param->pv.i;
-
-	return 0;
-}
-
-/*
- * Function irlap_param_data_size (instance, param, get)
- *
- *    Negotiate the data size. This is a type 1 parameter and
- *    will be negotiated independently for each station
- *
- */
-static int irlap_param_data_size(void *instance, irda_param_t *param, int get)
-{
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->qos_rx.data_size.bits;
-	else
-		self->qos_tx.data_size.bits = (__u8) param->pv.i;
-
-	return 0;
-}
-
-/*
- * Function irlap_param_window_size (instance, param, get)
- *
- *    Negotiate the window size. This is a type 1 parameter and
- *    will be negotiated independently for each station
- *
- */
-static int irlap_param_window_size(void *instance, irda_param_t *param,
-				   int get)
-{
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->qos_rx.window_size.bits;
-	else
-		self->qos_tx.window_size.bits = (__u8) param->pv.i;
-
-	return 0;
-}
-
-/*
- * Function irlap_param_additional_bofs (instance, param, get)
- *
- *    Negotiate additional BOF characters. This is a type 1 parameter and
- *    will be negotiated independently for each station.
- */
-static int irlap_param_additional_bofs(void *instance, irda_param_t *param, int get)
-{
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->qos_rx.additional_bofs.bits;
-	else
-		self->qos_tx.additional_bofs.bits = (__u8) param->pv.i;
-
-	return 0;
-}
-
-/*
- * Function irlap_param_min_turn_time (instance, param, get)
- *
- *    Negotiate the minimum turn around time. This is a type 1 parameter and
- *    will be negotiated independently for each station
- */
-static int irlap_param_min_turn_time(void *instance, irda_param_t *param,
-				     int get)
-{
-	struct irlap_cb *self = (struct irlap_cb *) instance;
-
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;);
-
-	if (get)
-		param->pv.i = self->qos_rx.min_turn_time.bits;
-	else
-		self->qos_tx.min_turn_time.bits = (__u8) param->pv.i;
-
-	return 0;
-}
-
-/*
- * Function irlap_max_line_capacity (speed, max_turn_time, min_turn_time)
- *
- *    Calculate the maximum line capacity
- *
- */
-__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time)
-{
-	__u32 line_capacity;
-	int i,j;
-
-	pr_debug("%s(), speed=%d, max_turn_time=%d\n",
-		 __func__, speed, max_turn_time);
-
-	i = value_index(speed, baud_rates, 10);
-	j = value_index(max_turn_time, max_turn_times, 4);
-
-	IRDA_ASSERT(((i >=0) && (i <10)), return 0;);
-	IRDA_ASSERT(((j >=0) && (j <4)), return 0;);
-
-	line_capacity = max_line_capacities[i][j];
-
-	pr_debug("%s(), line capacity=%d bytes\n",
-		 __func__, line_capacity);
-
-	return line_capacity;
-}
-
-#ifndef CONFIG_IRDA_DYNAMIC_WINDOW
-static __u32 irlap_requested_line_capacity(struct qos_info *qos)
-{
-	__u32 line_capacity;
-
-	line_capacity = qos->window_size.value *
-		(qos->data_size.value + 6 + qos->additional_bofs.value) +
-		irlap_min_turn_time_in_bytes(qos->baud_rate.value,
-					     qos->min_turn_time.value);
-
-	pr_debug("%s(), requested line capacity=%d\n",
-		 __func__, line_capacity);
-
-	return line_capacity;
-}
-#endif
-
-void irda_qos_bits_to_value(struct qos_info *qos)
-{
-	int index;
-
-	IRDA_ASSERT(qos != NULL, return;);
-
-	index = msb_index(qos->baud_rate.bits);
-	qos->baud_rate.value = baud_rates[index];
-
-	index = msb_index(qos->data_size.bits);
-	qos->data_size.value = data_sizes[index];
-
-	index = msb_index(qos->window_size.bits);
-	qos->window_size.value = index+1;
-
-	index = msb_index(qos->min_turn_time.bits);
-	qos->min_turn_time.value = min_turn_times[index];
-
-	index = msb_index(qos->max_turn_time.bits);
-	qos->max_turn_time.value = max_turn_times[index];
-
-	index = msb_index(qos->link_disc_time.bits);
-	qos->link_disc_time.value = link_disc_times[index];
-
-	index = msb_index(qos->additional_bofs.bits);
-	qos->additional_bofs.value = add_bofs[index];
-}
-EXPORT_SYMBOL(irda_qos_bits_to_value);
diff --git a/net/irda/timer.c b/net/irda/timer.c
deleted file mode 100644
index f2280f73b057..000000000000
--- a/net/irda/timer.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*********************************************************************
- *
- * Filename:      timer.c
- * Version:
- * Description:
- * Status:        Experimental.
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Sat Aug 16 00:59:29 1997
- * Modified at:   Wed Dec  8 12:50:34 1999
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- *
- *     Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/delay.h>
-
-#include <net/irda/timer.h>
-#include <net/irda/irda.h>
-#include <net/irda/irda_device.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlmp.h>
-
-extern int  sysctl_slot_timeout;
-
-static void irlap_slot_timer_expired(void* data);
-static void irlap_query_timer_expired(void* data);
-static void irlap_final_timer_expired(void* data);
-static void irlap_wd_timer_expired(void* data);
-static void irlap_backoff_timer_expired(void* data);
-static void irlap_media_busy_expired(void* data);
-
-void irlap_start_slot_timer(struct irlap_cb *self, int timeout)
-{
-	irda_start_timer(&self->slot_timer, timeout, (void *) self,
-			 irlap_slot_timer_expired);
-}
-
-void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
-{
-	int timeout;
-
-	/* Calculate when the peer discovery should end. Normally, we
-	 * get the end-of-discovery frame, so this is just in case
-	 * we miss it.
-	 * Basically, we multiply the number of remaining slots by our
-	 * slot time, plus add some extra time to properly receive the last
-	 * discovery packet (which is longer due to extra discovery info),
-	 * to avoid messing with for incoming connections requests and
-	 * to accommodate devices that perform discovery slower than us.
-	 * Jean II */
-	timeout = msecs_to_jiffies(sysctl_slot_timeout) * (S - s)
-		   + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT;
-
-	/* Set or re-set the timer. We reset the timer for each received
-	 * discovery query, which allow us to automatically adjust to
-	 * the speed of the peer discovery (faster or slower). Jean II */
-	irda_start_timer( &self->query_timer, timeout, (void *) self,
-			  irlap_query_timer_expired);
-}
-
-void irlap_start_final_timer(struct irlap_cb *self, int timeout)
-{
-	irda_start_timer(&self->final_timer, timeout, (void *) self,
-			 irlap_final_timer_expired);
-}
-
-void irlap_start_wd_timer(struct irlap_cb *self, int timeout)
-{
-	irda_start_timer(&self->wd_timer, timeout, (void *) self,
-			 irlap_wd_timer_expired);
-}
-
-void irlap_start_backoff_timer(struct irlap_cb *self, int timeout)
-{
-	irda_start_timer(&self->backoff_timer, timeout, (void *) self,
-			 irlap_backoff_timer_expired);
-}
-
-void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout)
-{
-	irda_start_timer(&self->media_busy_timer, timeout,
-			 (void *) self, irlap_media_busy_expired);
-}
-
-void irlap_stop_mbusy_timer(struct irlap_cb *self)
-{
-	/* If timer is activated, kill it! */
-	del_timer(&self->media_busy_timer);
-
-	/* If we are in NDM, there is a bunch of events in LAP that
-	 * that be pending due to the media_busy condition, such as
-	 * CONNECT_REQUEST and SEND_UI_FRAME. If we don't generate
-	 * an event, they will wait forever...
-	 * Jean II */
-	if (self->state == LAP_NDM)
-		irlap_do_event(self, MEDIA_BUSY_TIMER_EXPIRED, NULL, NULL);
-}
-
-void irlmp_start_watchdog_timer(struct lsap_cb *self, int timeout)
-{
-	irda_start_timer(&self->watchdog_timer, timeout, (void *) self,
-			 irlmp_watchdog_timer_expired);
-}
-
-void irlmp_start_discovery_timer(struct irlmp_cb *self, int timeout)
-{
-	irda_start_timer(&self->discovery_timer, timeout, (void *) self,
-			 irlmp_discovery_timer_expired);
-}
-
-void irlmp_start_idle_timer(struct lap_cb *self, int timeout)
-{
-	irda_start_timer(&self->idle_timer, timeout, (void *) self,
-			 irlmp_idle_timer_expired);
-}
-
-void irlmp_stop_idle_timer(struct lap_cb *self)
-{
-	/* If timer is activated, kill it! */
-	del_timer(&self->idle_timer);
-}
-
-/*
- * Function irlap_slot_timer_expired (data)
- *
- *    IrLAP slot timer has expired
- *
- */
-static void irlap_slot_timer_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_do_event(self, SLOT_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irlap_query_timer_expired (data)
- *
- *    IrLAP query timer has expired
- *
- */
-static void irlap_query_timer_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_do_event(self, QUERY_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irda_final_timer_expired (data)
- *
- *
- *
- */
-static void irlap_final_timer_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_do_event(self, FINAL_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irda_wd_timer_expired (data)
- *
- *
- *
- */
-static void irlap_wd_timer_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_do_event(self, WD_TIMER_EXPIRED, NULL, NULL);
-}
-
-/*
- * Function irda_backoff_timer_expired (data)
- *
- *
- *
- */
-static void irlap_backoff_timer_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
-
-	irlap_do_event(self, BACKOFF_TIMER_EXPIRED, NULL, NULL);
-}
-
-
-/*
- * Function irtty_media_busy_expired (data)
- *
- *
- */
-static void irlap_media_busy_expired(void *data)
-{
-	struct irlap_cb *self = (struct irlap_cb *) data;
-
-	IRDA_ASSERT(self != NULL, return;);
-
-	irda_device_set_media_busy(self->netdev, FALSE);
-	/* Note : the LAP event will be send in irlap_stop_mbusy_timer(),
-	* to catch other cases where the flag is cleared (for example
-	* after a discovery) - Jean II */
-}
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
deleted file mode 100644
index 40a0f993bf13..000000000000
--- a/net/irda/wrapper.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/*********************************************************************
- *
- * Filename:      wrapper.c
- * Version:       1.2
- * Description:   IrDA SIR async wrapper layer
- * Status:        Stable
- * Author:        Dag Brattli <dagb@cs.uit.no>
- * Created at:    Mon Aug  4 20:40:53 1997
- * Modified at:   Fri Jan 28 13:21:09 2000
- * Modified by:   Dag Brattli <dagb@cs.uit.no>
- * Modified at:   Fri May 28  3:11 CST 1999
- * Modified by:   Horst von Brand <vonbrand@sleipnir.valparaiso.cl>
- *
- *     Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>,
- *     All Rights Reserved.
- *     Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com>
- *
- *     This program is free software; you can redistribute it and/or
- *     modify it under the terms of the GNU General Public License as
- *     published by the Free Software Foundation; either version 2 of
- *     the License, or (at your option) any later version.
- *
- *     Neither Dag Brattli nor University of Tromsø admit liability nor
- *     provide warranty for any of this software. This material is
- *     provided "AS-IS" and at no charge.
- *
- ********************************************************************/
-
-#include <linux/skbuff.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <asm/byteorder.h>
-
-#include <net/irda/irda.h>
-#include <net/irda/wrapper.h>
-#include <net/irda/crc.h>
-#include <net/irda/irlap.h>
-#include <net/irda/irlap_frame.h>
-#include <net/irda/irda_device.h>
-
-/************************** FRAME WRAPPING **************************/
-/*
- * Unwrap and unstuff SIR frames
- *
- * Note : at FIR and MIR, HDLC framing is used and usually handled
- * by the controller, so we come here only for SIR... Jean II
- */
-
-/*
- * Function stuff_byte (byte, buf)
- *
- *    Byte stuff one single byte and put the result in buffer pointed to by
- *    buf. The buffer must at all times be able to have two bytes inserted.
- *
- * This is in a tight loop, better inline it, so need to be prior to callers.
- * (2000 bytes on P6 200MHz, non-inlined ~370us, inline ~170us) - Jean II
- */
-static inline int stuff_byte(__u8 byte, __u8 *buf)
-{
-	switch (byte) {
-	case BOF: /* FALLTHROUGH */
-	case EOF: /* FALLTHROUGH */
-	case CE:
-		/* Insert transparently coded */
-		buf[0] = CE;               /* Send link escape */
-		buf[1] = byte^IRDA_TRANS;    /* Complement bit 5 */
-		return 2;
-		/* break; */
-	default:
-		 /* Non-special value, no transparency required */
-		buf[0] = byte;
-		return 1;
-		/* break; */
-	}
-}
-
-/*
- * Function async_wrap (skb, *tx_buff, buffsize)
- *
- *    Makes a new buffer with wrapping and stuffing, should check that
- *    we don't get tx buffer overflow.
- */
-int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize)
-{
-	struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb;
-	int xbofs;
-	int i;
-	int n;
-	union {
-		__u16 value;
-		__u8 bytes[2];
-	} fcs;
-
-	/* Initialize variables */
-	fcs.value = INIT_FCS;
-	n = 0;
-
-	/*
-	 *  Send  XBOF's for required min. turn time and for the negotiated
-	 *  additional XBOFS
-	 */
-
-	if (cb->magic != LAP_MAGIC) {
-		/*
-		 * This will happen for all frames sent from user-space.
-		 * Nothing to worry about, but we set the default number of
-		 * BOF's
-		 */
-		pr_debug("%s(), wrong magic in skb!\n", __func__);
-		xbofs = 10;
-	} else
-		xbofs = cb->xbofs + cb->xbofs_delay;
-
-	pr_debug("%s(), xbofs=%d\n", __func__, xbofs);
-
-	/* Check that we never use more than 115 + 48 xbofs */
-	if (xbofs > 163) {
-		pr_debug("%s(), too many xbofs (%d)\n", __func__,
-			 xbofs);
-		xbofs = 163;
-	}
-
-	memset(tx_buff + n, XBOF, xbofs);
-	n += xbofs;
-
-	/* Start of packet character BOF */
-	tx_buff[n++] = BOF;
-
-	/* Insert frame and calc CRC */
-	for (i=0; i < skb->len; i++) {
-		/*
-		 *  Check for the possibility of tx buffer overflow. We use
-		 *  bufsize-5 since the maximum number of bytes that can be
-		 *  transmitted after this point is 5.
-		 */
-		if(n >= (buffsize-5)) {
-			net_err_ratelimited("%s(), tx buffer overflow (n=%d)\n",
-					    __func__, n);
-			return n;
-		}
-
-		n += stuff_byte(skb->data[i], tx_buff+n);
-		fcs.value = irda_fcs(fcs.value, skb->data[i]);
-	}
-
-	/* Insert CRC in little endian format (LSB first) */
-	fcs.value = ~fcs.value;
-#ifdef __LITTLE_ENDIAN
-	n += stuff_byte(fcs.bytes[0], tx_buff+n);
-	n += stuff_byte(fcs.bytes[1], tx_buff+n);
-#else /* ifdef __BIG_ENDIAN */
-	n += stuff_byte(fcs.bytes[1], tx_buff+n);
-	n += stuff_byte(fcs.bytes[0], tx_buff+n);
-#endif
-	tx_buff[n++] = EOF;
-
-	return n;
-}
-EXPORT_SYMBOL(async_wrap_skb);
-
-/************************* FRAME UNWRAPPING *************************/
-/*
- * Unwrap and unstuff SIR frames
- *
- * Complete rewrite by Jean II :
- * More inline, faster, more compact, more logical. Jean II
- * (16 bytes on P6 200MHz, old 5 to 7 us, new 4 to 6 us)
- * (24 bytes on P6 200MHz, old 9 to 10 us, new 7 to 8 us)
- * (for reference, 115200 b/s is 1 byte every 69 us)
- * And reduce wrapper.o by ~900B in the process ;-)
- *
- * Then, we have the addition of ZeroCopy, which is optional
- * (i.e. the driver must initiate it) and improve final processing.
- * (2005 B frame + EOF on P6 200MHz, without 30 to 50 us, with 10 to 25 us)
- *
- * Note : at FIR and MIR, HDLC framing is used and usually handled
- * by the controller, so we come here only for SIR... Jean II
- */
-
-/*
- * We can also choose where we want to do the CRC calculation. We can
- * do it "inline", as we receive the bytes, or "postponed", when
- * receiving the End-Of-Frame.
- * (16 bytes on P6 200MHz, inlined 4 to 6 us, postponed 4 to 5 us)
- * (24 bytes on P6 200MHz, inlined 7 to 8 us, postponed 5 to 7 us)
- * With ZeroCopy :
- * (2005 B frame on P6 200MHz, inlined 10 to 25 us, postponed 140 to 180 us)
- * Without ZeroCopy :
- * (2005 B frame on P6 200MHz, inlined 30 to 50 us, postponed 150 to 180 us)
- * (Note : numbers taken with irq disabled)
- *
- * From those numbers, it's not clear which is the best strategy, because
- * we end up running through a lot of data one way or another (i.e. cache
- * misses). I personally prefer to avoid the huge latency spike of the
- * "postponed" solution, because it come just at the time when we have
- * lot's of protocol processing to do and it will hurt our ability to
- * reach low link turnaround times... Jean II
- */
-//#define POSTPONE_RX_CRC
-
-/*
- * Function async_bump (buf, len, stats)
- *
- *    Got a frame, make a copy of it, and pass it up the stack! We can try
- *    to inline it since it's only called from state_inside_frame
- */
-static inline void
-async_bump(struct net_device *dev,
-	   struct net_device_stats *stats,
-	   iobuff_t *rx_buff)
-{
-	struct sk_buff *newskb;
-	struct sk_buff *dataskb;
-	int		docopy;
-
-	/* Check if we need to copy the data to a new skb or not.
-	 * If the driver doesn't use ZeroCopy Rx, we have to do it.
-	 * With ZeroCopy Rx, the rx_buff already point to a valid
-	 * skb. But, if the frame is small, it is more efficient to
-	 * copy it to save memory (copy will be fast anyway - that's
-	 * called Rx-copy-break). Jean II */
-	docopy = ((rx_buff->skb == NULL) ||
-		  (rx_buff->len < IRDA_RX_COPY_THRESHOLD));
-
-	/* Allocate a new skb */
-	newskb = dev_alloc_skb(docopy ? rx_buff->len + 1 : rx_buff->truesize);
-	if (!newskb)  {
-		stats->rx_dropped++;
-		/* We could deliver the current skb if doing ZeroCopy Rx,
-		 * but this would stall the Rx path. Better drop the
-		 * packet... Jean II */
-		return;
-	}
-
-	/* Align IP header to 20 bytes (i.e. increase skb->data)
-	 * Note this is only useful with IrLAN, as PPP has a variable
-	 * header size (2 or 1 bytes) - Jean II */
-	skb_reserve(newskb, 1);
-
-	if(docopy) {
-		/* Copy data without CRC (length already checked) */
-		skb_copy_to_linear_data(newskb, rx_buff->data,
-					rx_buff->len - 2);
-		/* Deliver this skb */
-		dataskb = newskb;
-	} else {
-		/* We are using ZeroCopy. Deliver old skb */
-		dataskb = rx_buff->skb;
-		/* And hook the new skb to the rx_buff */
-		rx_buff->skb = newskb;
-		rx_buff->head = newskb->data;	/* NOT newskb->head */
-		//printk(KERN_DEBUG "ZeroCopy : len = %d, dataskb = %p, newskb = %p\n", rx_buff->len, dataskb, newskb);
-	}
-
-	/* Set proper length on skb (without CRC) */
-	skb_put(dataskb, rx_buff->len - 2);
-
-	/* Feed it to IrLAP layer */
-	dataskb->dev = dev;
-	skb_reset_mac_header(dataskb);
-	dataskb->protocol = htons(ETH_P_IRDA);
-
-	netif_rx(dataskb);
-
-	stats->rx_packets++;
-	stats->rx_bytes += rx_buff->len;
-
-	/* Clean up rx_buff (redundant with async_unwrap_bof() ???) */
-	rx_buff->data = rx_buff->head;
-	rx_buff->len = 0;
-}
-
-/*
- * Function async_unwrap_bof(dev, byte)
- *
- *    Handle Beginning Of Frame character received within a frame
- *
- */
-static inline void
-async_unwrap_bof(struct net_device *dev,
-		 struct net_device_stats *stats,
-		 iobuff_t *rx_buff, __u8 byte)
-{
-	switch(rx_buff->state) {
-	case LINK_ESCAPE:
-	case INSIDE_FRAME:
-		/* Not supposed to happen, the previous frame is not
-		 * finished - Jean II */
-		pr_debug("%s(), Discarding incomplete frame\n",
-			 __func__);
-		stats->rx_errors++;
-		stats->rx_missed_errors++;
-		irda_device_set_media_busy(dev, TRUE);
-		break;
-
-	case OUTSIDE_FRAME:
-	case BEGIN_FRAME:
-	default:
-		/* We may receive multiple BOF at the start of frame */
-		break;
-	}
-
-	/* Now receiving frame */
-	rx_buff->state = BEGIN_FRAME;
-	rx_buff->in_frame = TRUE;
-
-	/* Time to initialize receive buffer */
-	rx_buff->data = rx_buff->head;
-	rx_buff->len = 0;
-	rx_buff->fcs = INIT_FCS;
-}
-
-/*
- * Function async_unwrap_eof(dev, byte)
- *
- *    Handle End Of Frame character received within a frame
- *
- */
-static inline void
-async_unwrap_eof(struct net_device *dev,
-		 struct net_device_stats *stats,
-		 iobuff_t *rx_buff, __u8 byte)
-{
-#ifdef POSTPONE_RX_CRC
-	int	i;
-#endif
-
-	switch(rx_buff->state) {
-	case OUTSIDE_FRAME:
-		/* Probably missed the BOF */
-		stats->rx_errors++;
-		stats->rx_missed_errors++;
-		irda_device_set_media_busy(dev, TRUE);
-		break;
-
-	case BEGIN_FRAME:
-	case LINK_ESCAPE:
-	case INSIDE_FRAME:
-	default:
-		/* Note : in the case of BEGIN_FRAME and LINK_ESCAPE,
-		 * the fcs will most likely not match and generate an
-		 * error, as expected - Jean II */
-		rx_buff->state = OUTSIDE_FRAME;
-		rx_buff->in_frame = FALSE;
-
-#ifdef POSTPONE_RX_CRC
-		/* If we haven't done the CRC as we receive bytes, we
-		 * must do it now... Jean II */
-		for(i = 0; i < rx_buff->len; i++)
-			rx_buff->fcs = irda_fcs(rx_buff->fcs,
-						rx_buff->data[i]);
-#endif
-
-		/* Test FCS and signal success if the frame is good */
-		if (rx_buff->fcs == GOOD_FCS) {
-			/* Deliver frame */
-			async_bump(dev, stats, rx_buff);
-			break;
-		} else {
-			/* Wrong CRC, discard frame!  */
-			irda_device_set_media_busy(dev, TRUE);
-
-			pr_debug("%s(), crc error\n", __func__);
-			stats->rx_errors++;
-			stats->rx_crc_errors++;
-		}
-		break;
-	}
-}
-
-/*
- * Function async_unwrap_ce(dev, byte)
- *
- *    Handle Character Escape character received within a frame
- *
- */
-static inline void
-async_unwrap_ce(struct net_device *dev,
-		 struct net_device_stats *stats,
-		 iobuff_t *rx_buff, __u8 byte)
-{
-	switch(rx_buff->state) {
-	case OUTSIDE_FRAME:
-		/* Activate carrier sense */
-		irda_device_set_media_busy(dev, TRUE);
-		break;
-
-	case LINK_ESCAPE:
-		net_warn_ratelimited("%s: state not defined\n", __func__);
-		break;
-
-	case BEGIN_FRAME:
-	case INSIDE_FRAME:
-	default:
-		/* Stuffed byte coming */
-		rx_buff->state = LINK_ESCAPE;
-		break;
-	}
-}
-
-/*
- * Function async_unwrap_other(dev, byte)
- *
- *    Handle other characters received within a frame
- *
- */
-static inline void
-async_unwrap_other(struct net_device *dev,
-		   struct net_device_stats *stats,
-		   iobuff_t *rx_buff, __u8 byte)
-{
-	switch(rx_buff->state) {
-		/* This is on the critical path, case are ordered by
-		 * probability (most frequent first) - Jean II */
-	case INSIDE_FRAME:
-		/* Must be the next byte of the frame */
-		if (rx_buff->len < rx_buff->truesize)  {
-			rx_buff->data[rx_buff->len++] = byte;
-#ifndef POSTPONE_RX_CRC
-			rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
-#endif
-		} else {
-			pr_debug("%s(), Rx buffer overflow, aborting\n",
-				 __func__);
-			rx_buff->state = OUTSIDE_FRAME;
-		}
-		break;
-
-	case LINK_ESCAPE:
-		/*
-		 *  Stuffed char, complement bit 5 of byte
-		 *  following CE, IrLAP p.114
-		 */
-		byte ^= IRDA_TRANS;
-		if (rx_buff->len < rx_buff->truesize)  {
-			rx_buff->data[rx_buff->len++] = byte;
-#ifndef POSTPONE_RX_CRC
-			rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
-#endif
-			rx_buff->state = INSIDE_FRAME;
-		} else {
-			pr_debug("%s(), Rx buffer overflow, aborting\n",
-				 __func__);
-			rx_buff->state = OUTSIDE_FRAME;
-		}
-		break;
-
-	case OUTSIDE_FRAME:
-		/* Activate carrier sense */
-		if(byte != XBOF)
-			irda_device_set_media_busy(dev, TRUE);
-		break;
-
-	case BEGIN_FRAME:
-	default:
-		rx_buff->data[rx_buff->len++] = byte;
-#ifndef POSTPONE_RX_CRC
-		rx_buff->fcs = irda_fcs(rx_buff->fcs, byte);
-#endif
-		rx_buff->state = INSIDE_FRAME;
-		break;
-	}
-}
-
-/*
- * Function async_unwrap_char (dev, rx_buff, byte)
- *
- *    Parse and de-stuff frame received from the IrDA-port
- *
- * This is the main entry point for SIR drivers.
- */
-void async_unwrap_char(struct net_device *dev,
-		       struct net_device_stats *stats,
-		       iobuff_t *rx_buff, __u8 byte)
-{
-	switch(byte) {
-	case CE:
-		async_unwrap_ce(dev, stats, rx_buff, byte);
-		break;
-	case BOF:
-		async_unwrap_bof(dev, stats, rx_buff, byte);
-		break;
-	case EOF:
-		async_unwrap_eof(dev, stats, rx_buff, byte);
-		break;
-	default:
-		async_unwrap_other(dev, stats, rx_buff, byte);
-		break;
-	}
-}
-EXPORT_SYMBOL(async_unwrap_char);
-
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c343ac60bf50..bd5723315069 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/list.h>
@@ -155,8 +156,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->strp.stats.rx_msgs,
-		   psock->strp.stats.rx_bytes,
+		   psock->strp.stats.msgs,
+		   psock->strp.stats.bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,22 +171,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->strp.rx_stopped)
+	if (psock->strp.stopped)
 		seq_puts(seq, "RxStop ");
 
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-	if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+	if (!psock->strp.paused && !psock->ready_rx_msg) {
 		if (psock->sk->sk_receive_queue.qlen) {
-			if (psock->strp.rx_need_bytes)
+			if (psock->strp.need_bytes)
 				seq_printf(seq, "RxWait=%u ",
-					   psock->strp.rx_need_bytes);
+					   psock->strp.need_bytes);
 			else
 				seq_printf(seq, "RxWait ");
 		}
 	} else  {
-		if (psock->strp.rx_paused)
+		if (psock->strp.paused)
 			seq_puts(seq, "RxPause ");
 
 		if (psock->ready_rx_msg)
@@ -371,20 +372,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   strp_stats.rx_msgs,
-		   strp_stats.rx_bytes,
+		   strp_stats.msgs,
+		   strp_stats.bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   strp_stats.rx_aborts,
-		   strp_stats.rx_interrupted,
-		   strp_stats.rx_unrecov_intr,
-		   strp_stats.rx_mem_fail,
-		   strp_stats.rx_need_more_hdr,
-		   strp_stats.rx_bad_hdr_len,
-		   strp_stats.rx_msg_too_big,
-		   strp_stats.rx_msg_timeouts,
+		   strp_stats.aborts,
+		   strp_stats.interrupted,
+		   strp_stats.unrecov_intr,
+		   strp_stats.mem_fail,
+		   strp_stats.need_more_hdr,
+		   strp_stats.bad_hdr_len,
+		   strp_stats.msg_too_big,
+		   strp_stats.msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index da49191f7ad0..0b750a22c4b9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
 	STRP_STATS_ADD(mux->stats.rx_bytes,
-		       psock->strp.stats.rx_bytes -
+		       psock->strp.stats.bytes -
 		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
-	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
+		psock->strp.stats.msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.msgs;
+	psock->saved_rx_bytes = psock->strp.stats.bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+	err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
 	if (err < 0)
 		goto out;
 
 	copied = len;
 	if (likely(!(flags & MSG_PEEK))) {
 		KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
-		if (copied < rxm->full_len) {
+		if (copied < stm->full_len) {
 			if (sock->type == SOCK_DGRAM) {
 				/* Truncated message */
 				msg->msg_flags |= MSG_TRUNC;
 				goto msg_finished;
 			}
-			rxm->offset += copied;
-			rxm->full_len -= copied;
+			stm->offset += copied;
+			stm->full_len -= copied;
 		} else {
 msg_finished:
 			/* Finished with message */
@@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
+	copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
 	if (copied < 0) {
 		err = copied;
 		goto err_out;
@@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
 
-	rxm->offset += copied;
-	rxm->full_len -= copied;
+	stm->offset += copied;
+	stm->full_len -= copied;
 
 	/* We have no way to return MSG_EOR. If all the bytes have been
 	 * read we still leave the message in the receive socket buffer.
@@ -1376,13 +1376,21 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	struct kcm_psock *psock = NULL, *tpsock;
 	struct list_head *head;
 	int index = 0;
-	struct strp_callbacks cb;
+	static const struct strp_callbacks cb = {
+		.rcv_msg = kcm_rcv_strparser,
+		.parse_msg = kcm_parse_func_strparser,
+		.read_sock_done = kcm_read_sock_done,
+	};
 	int err;
 
 	csk = csock->sk;
 	if (!csk)
 		return -EINVAL;
 
+	/* We must prevent loops or risk deadlock ! */
+	if (csk->sk_family == PF_KCM)
+		return -EOPNOTSUPP;
+
 	psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
 	if (!psock)
 		return -ENOMEM;
@@ -1391,11 +1399,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	psock->sk = csk;
 	psock->bpf_prog = prog;
 
-	cb.rcv_msg = kcm_rcv_strparser;
-	cb.abort_parser = NULL;
-	cb.parse_msg = kcm_parse_func_strparser;
-	cb.read_sock_done = kcm_read_sock_done;
-
 	err = strp_init(&psock->strp, csk, &cb);
 	if (err) {
 		kmem_cache_free(kcm_psockp, psock);
@@ -1647,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info,
 	}
 
 	newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
-	if (unlikely(IS_ERR(newfile))) {
+	if (IS_ERR(newfile)) {
 		err = PTR_ERR(newfile);
 		goto out_sock_alloc_fail;
 	}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 98f4d8211b9a..3dffb892d52c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2399,8 +2399,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 out:
 	xfrm_pol_put(xp);
-	if (err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2651,8 +2649,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2752,8 +2748,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
-	if (!err)
-		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3851,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net)
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 
 	pfkey_exit_proc(net);
-	BUG_ON(!hlist_empty(&net_pfkey->table));
+	WARN_ON(!hlist_empty(&net_pfkey->table));
 }
 
 static struct pernet_operations pfkey_net_ops = {
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2870f41ea44d..399a7e5db2f4 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the L2TP.
 #
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b0c2d4ae781d..115918ad8eca 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -100,8 +100,6 @@ struct l2tp_skb_cb {
 
 #define L2TP_SKB_CB(skb)	((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
 
-static atomic_t l2tp_tunnel_count;
-static atomic_t l2tp_session_count;
 static struct workqueue_struct *l2tp_wq;
 
 /* per-net private data for this module */
@@ -113,7 +111,6 @@ struct l2tp_net {
 	spinlock_t l2tp_session_hlist_lock;
 };
 
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 
 static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
 {
@@ -127,39 +124,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net)
 	return net_generic(net, l2tp_net_id);
 }
 
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
-{
-	refcount_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
-{
-	if (refcount_dec_and_test(&tunnel->ref_count))
-		l2tp_tunnel_free(tunnel);
-}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_tunnel_inc_refcount(_t)					\
-do {									\
-	pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n",	\
-		 __func__, __LINE__, (_t)->name,			\
-		 refcount_read(&_t->ref_count));			\
-	l2tp_tunnel_inc_refcount_1(_t);					\
-} while (0)
-#define l2tp_tunnel_dec_refcount(_t)					\
-do {									\
-	pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n",	\
-		 __func__, __LINE__, (_t)->name,			\
-		 refcount_read(&_t->ref_count));			\
-	l2tp_tunnel_dec_refcount_1(_t);					\
-} while (0)
-#else
-#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
-#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
-#endif
-
 /* Session hash global list for L2TPv3.
  * The session_id SHOULD be random according to RFC3931, but several
  * L2TP implementations use incrementing session_ids.  So we do a real
@@ -229,12 +193,31 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
 	return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
 }
 
-/* Lookup a session. A new reference is held on the returned session.
- * Optionally calls session->ref() too if do_ref is true.
- */
+/* Lookup a tunnel. A new reference is held on the returned tunnel. */
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
+{
+	const struct l2tp_net *pn = l2tp_pernet(net);
+	struct l2tp_tunnel *tunnel;
+
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (tunnel->tunnel_id == tunnel_id) {
+			l2tp_tunnel_inc_refcount(tunnel);
+			rcu_read_unlock_bh();
+
+			return tunnel;
+		}
+	}
+	rcu_read_unlock_bh();
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
+
+/* Lookup a session. A new reference is held on the returned session. */
 struct l2tp_session *l2tp_session_get(const struct net *net,
 				      struct l2tp_tunnel *tunnel,
-				      u32 session_id, bool do_ref)
+				      u32 session_id)
 {
 	struct hlist_head *session_list;
 	struct l2tp_session *session;
@@ -248,8 +231,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
 		hlist_for_each_entry_rcu(session, session_list, global_hlist) {
 			if (session->session_id == session_id) {
 				l2tp_session_inc_refcount(session);
-				if (do_ref && session->ref)
-					session->ref(session);
 				rcu_read_unlock_bh();
 
 				return session;
@@ -265,8 +246,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
 	hlist_for_each_entry(session, session_list, hlist) {
 		if (session->session_id == session_id) {
 			l2tp_session_inc_refcount(session);
-			if (do_ref && session->ref)
-				session->ref(session);
 			read_unlock_bh(&tunnel->hlist_lock);
 
 			return session;
@@ -278,8 +257,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
 }
 EXPORT_SYMBOL_GPL(l2tp_session_get);
 
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
-					  bool do_ref)
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
 {
 	int hash;
 	struct l2tp_session *session;
@@ -290,8 +268,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
 		hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
 			if (++count > nth) {
 				l2tp_session_inc_refcount(session);
-				if (do_ref && session->ref)
-					session->ref(session);
 				read_unlock_bh(&tunnel->hlist_lock);
 				return session;
 			}
@@ -308,8 +284,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
  * This is very inefficient but is only used by management interfaces.
  */
 struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
-						const char *ifname,
-						bool do_ref)
+						const char *ifname)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
 	int hash;
@@ -320,8 +295,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
 		hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
 				l2tp_session_inc_refcount(session);
-				if (do_ref && session->ref)
-					session->ref(session);
 				rcu_read_unlock_bh();
 
 				return session;
@@ -335,20 +308,28 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
 }
 EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
 
-static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
-				      struct l2tp_session *session)
+int l2tp_session_register(struct l2tp_session *session,
+			  struct l2tp_tunnel *tunnel)
 {
 	struct l2tp_session *session_walk;
 	struct hlist_head *g_head;
 	struct hlist_head *head;
 	struct l2tp_net *pn;
+	int err;
 
 	head = l2tp_session_id_hash(tunnel, session->session_id);
 
 	write_lock_bh(&tunnel->hlist_lock);
+	if (!tunnel->acpt_newsess) {
+		err = -ENODEV;
+		goto err_tlock;
+	}
+
 	hlist_for_each_entry(session_walk, head, hlist)
-		if (session_walk->session_id == session->session_id)
-			goto exist;
+		if (session_walk->session_id == session->session_id) {
+			err = -EEXIST;
+			goto err_tlock;
+		}
 
 	if (tunnel->version == L2TP_HDR_VER_3) {
 		pn = l2tp_pernet(tunnel->l2tp_net);
@@ -356,12 +337,21 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 						session->session_id);
 
 		spin_lock_bh(&pn->l2tp_session_hlist_lock);
+
 		hlist_for_each_entry(session_walk, g_head, global_hlist)
-			if (session_walk->session_id == session->session_id)
-				goto exist_glob;
+			if (session_walk->session_id == session->session_id) {
+				err = -EEXIST;
+				goto err_tlock_pnlock;
+			}
 
+		l2tp_tunnel_inc_refcount(tunnel);
+		sock_hold(tunnel->sock);
 		hlist_add_head_rcu(&session->global_hlist, g_head);
+
 		spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+	} else {
+		l2tp_tunnel_inc_refcount(tunnel);
+		sock_hold(tunnel->sock);
 	}
 
 	hlist_add_head(&session->hlist, head);
@@ -369,13 +359,14 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 
 	return 0;
 
-exist_glob:
+err_tlock_pnlock:
 	spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-exist:
+err_tlock:
 	write_unlock_bh(&tunnel->hlist_lock);
 
-	return -EEXIST;
+	return err;
 }
+EXPORT_SYMBOL_GPL(l2tp_session_register);
 
 /* Lookup a tunnel by id
  */
@@ -480,9 +471,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 		(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
 	else
 		kfree_skb(skb);
-
-	if (session->deref)
-		(*session->deref)(session);
 }
 
 /* Dequeue skbs from the session's reorder_q, subject to packet order.
@@ -511,8 +499,6 @@ start:
 			session->reorder_skip = 1;
 			__skb_unlink(skb, &session->reorder_q);
 			kfree_skb(skb);
-			if (session->deref)
-				(*session->deref)(session);
 			continue;
 		}
 
@@ -685,9 +671,6 @@ discard:
  * a data (not control) frame before coming here. Fields up to the
  * session-id have already been parsed and ptr points to the data
  * after the session-id.
- *
- * session->ref() must have been called prior to l2tp_recv_common().
- * session->deref() will be called automatically after skb is processed.
  */
 void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 		      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -854,9 +837,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 discard:
 	atomic_long_inc(&session->stats.rx_errors);
 	kfree_skb(skb);
-
-	if (session->deref)
-		(*session->deref)(session);
 }
 EXPORT_SYMBOL(l2tp_recv_common);
 
@@ -870,8 +850,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
 	while ((skb = skb_dequeue(&session->reorder_q))) {
 		atomic_long_inc(&session->stats.rx_errors);
 		kfree_skb(skb);
-		if (session->deref)
-			(*session->deref)(session);
 	}
 	return 0;
 }
@@ -963,13 +941,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	}
 
 	/* Find the session context */
-	session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
+	session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id);
 	if (!session || !session->recv_skb) {
-		if (session) {
-			if (session->deref)
-				session->deref(session);
+		if (session)
 			l2tp_session_dec_refcount(session);
-		}
 
 		/* Not found? Pass to userspace to deal with */
 		l2tp_info(tunnel, L2TP_MSG_DATA,
@@ -1264,16 +1239,14 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	/* Remove hooks into tunnel socket */
 	sk->sk_destruct = tunnel->old_sk_destruct;
 	sk->sk_user_data = NULL;
-	tunnel->sock = NULL;
 
 	/* Remove the tunnel struct from the tunnel list */
 	pn = l2tp_pernet(tunnel->l2tp_net);
 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
 	list_del_rcu(&tunnel->list);
 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-	atomic_dec(&l2tp_tunnel_count);
 
-	l2tp_tunnel_closeall(tunnel);
+	tunnel->sock = NULL;
 	l2tp_tunnel_dec_refcount(tunnel);
 
 	/* Call the original destructor */
@@ -1298,6 +1271,7 @@ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 		  tunnel->name);
 
 	write_lock_bh(&tunnel->hlist_lock);
+	tunnel->acpt_newsess = false;
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
 again:
 		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
@@ -1308,8 +1282,8 @@ again:
 
 			hlist_del_init(&session->hlist);
 
-			if (session->ref != NULL)
-				(*session->ref)(session);
+			if (test_and_set_bit(0, &session->dead))
+				goto again;
 
 			write_unlock_bh(&tunnel->hlist_lock);
 
@@ -1319,9 +1293,6 @@ again:
 			if (session->session_close != NULL)
 				(*session->session_close)(session);
 
-			if (session->deref != NULL)
-				(*session->deref)(session);
-
 			l2tp_session_dec_refcount(session);
 
 			write_lock_bh(&tunnel->hlist_lock);
@@ -1348,17 +1319,6 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
 	}
 }
 
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
-{
-	BUG_ON(refcount_read(&tunnel->ref_count) != 0);
-	BUG_ON(tunnel->sock != NULL);
-	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
-	kfree_rcu(tunnel, rcu);
-}
-
 /* Workqueue tunnel deletion function */
 static void l2tp_tunnel_del_work(struct work_struct *work)
 {
@@ -1605,6 +1565,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->magic = L2TP_TUNNEL_MAGIC;
 	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
 	rwlock_init(&tunnel->hlist_lock);
+	tunnel->acpt_newsess = true;
 
 	/* The net we belong to */
 	tunnel->l2tp_net = net;
@@ -1662,7 +1623,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
-	atomic_inc(&l2tp_tunnel_count);
 
 	/* Bump the reference count. The tunnel context is deleted
 	 * only when this drops to zero. Must be done before list insertion
@@ -1689,14 +1649,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 
 /* This function is used by the netlink TUNNEL_DELETE command.
  */
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
-	l2tp_tunnel_inc_refcount(tunnel);
-	if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
-		l2tp_tunnel_dec_refcount(tunnel);
-		return 1;
+	if (!test_and_set_bit(0, &tunnel->dead)) {
+		l2tp_tunnel_inc_refcount(tunnel);
+		queue_work(l2tp_wq, &tunnel->del_work);
 	}
-	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
 
@@ -1710,8 +1668,6 @@ void l2tp_session_free(struct l2tp_session *session)
 
 	if (tunnel) {
 		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-		if (session->session_id != 0)
-			atomic_dec(&l2tp_session_count);
 		sock_put(tunnel->sock);
 		session->tunnel = NULL;
 		l2tp_tunnel_dec_refcount(tunnel);
@@ -1754,15 +1710,16 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
  */
 int l2tp_session_delete(struct l2tp_session *session)
 {
-	if (session->ref)
-		(*session->ref)(session);
+	if (test_and_set_bit(0, &session->dead))
+		return 0;
+
 	__l2tp_session_unhash(session);
 	l2tp_session_queue_purge(session);
 	if (session->session_close != NULL)
 		(*session->session_close)(session);
-	if (session->deref)
-		(*session->deref)(session);
+
 	l2tp_session_dec_refcount(session);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_session_delete);
@@ -1788,7 +1745,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
 struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct l2tp_session *session;
-	int err;
 
 	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
 	if (session != NULL) {
@@ -1844,25 +1800,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 
 		l2tp_session_set_header_len(session, tunnel->version);
 
-		err = l2tp_session_add_to_tunnel(tunnel, session);
-		if (err) {
-			kfree(session);
-
-			return ERR_PTR(err);
-		}
-
-		/* Bump the reference count. The session context is deleted
-		 * only when this drops to zero.
-		 */
 		refcount_set(&session->ref_count, 1);
-		l2tp_tunnel_inc_refcount(tunnel);
-
-		/* Ensure tunnel socket isn't deleted */
-		sock_hold(tunnel->sock);
-
-		/* Ignore management session in session count value */
-		if (session->session_id != 0)
-			atomic_inc(&l2tp_session_count);
 
 		return session;
 	}
@@ -1895,15 +1833,19 @@ static __net_exit void l2tp_exit_net(struct net *net)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
 	struct l2tp_tunnel *tunnel = NULL;
+	int hash;
 
 	rcu_read_lock_bh();
 	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-		(void)l2tp_tunnel_delete(tunnel);
+		l2tp_tunnel_delete(tunnel);
 	}
 	rcu_read_unlock_bh();
 
 	flush_workqueue(l2tp_wq);
 	rcu_barrier();
+
+	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+		WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
 }
 
 static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index cdb6e3327f74..9534e16965cc 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -76,6 +76,7 @@ struct l2tp_session_cfg {
 struct l2tp_session {
 	int			magic;		/* should be
 						 * L2TP_SESSION_MAGIC */
+	long			dead;
 
 	struct l2tp_tunnel	*tunnel;	/* back pointer to tunnel
 						 * context */
@@ -128,8 +129,6 @@ struct l2tp_session {
 	int (*build_header)(struct l2tp_session *session, void *buf);
 	void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
 	void (*session_close)(struct l2tp_session *session);
-	void (*ref)(struct l2tp_session *session);
-	void (*deref)(struct l2tp_session *session);
 #if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 	void (*show)(struct seq_file *m, void *priv);
 #endif
@@ -160,8 +159,15 @@ struct l2tp_tunnel_cfg {
 
 struct l2tp_tunnel {
 	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+
+	unsigned long		dead;
+
 	struct rcu_head rcu;
 	rwlock_t		hlist_lock;	/* protect session_hlist */
+	bool			acpt_newsess;	/* Indicates whether this
+						 * tunnel accepts new sessions.
+						 * Protected by hlist_lock.
+						 */
 	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
 						/* hashed list of sessions,
 						 * hashed by id */
@@ -197,7 +203,9 @@ struct l2tp_tunnel {
 };
 
 struct l2tp_nl_cmd_ops {
-	int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+	int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel,
+			      u32 session_id, u32 peer_session_id,
+			      struct l2tp_session_cfg *cfg);
 	int (*session_delete)(struct l2tp_session *session);
 };
 
@@ -231,14 +239,14 @@ out:
 	return tunnel;
 }
 
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+
 struct l2tp_session *l2tp_session_get(const struct net *net,
 				      struct l2tp_tunnel *tunnel,
-				      u32 session_id, bool do_ref);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
-					  bool do_ref);
+				      u32 session_id);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
 struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
-						const char *ifname,
-						bool do_ref);
+						const char *ifname);
 struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
 struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
 
@@ -246,11 +254,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
 		       u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
 		       struct l2tp_tunnel **tunnelp);
 void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 struct l2tp_session *l2tp_session_create(int priv_size,
 					 struct l2tp_tunnel *tunnel,
 					 u32 session_id, u32 peer_session_id,
 					 struct l2tp_session_cfg *cfg);
+int l2tp_session_register(struct l2tp_session *session,
+			  struct l2tp_tunnel *tunnel);
+
 void __l2tp_session_unhash(struct l2tp_session *session);
 int l2tp_session_delete(struct l2tp_session *session);
 void l2tp_session_free(struct l2tp_session *session);
@@ -269,40 +280,31 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
 void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
 int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 
+static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
+{
+	refcount_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+{
+	if (refcount_dec_and_test(&tunnel->ref_count))
+		kfree_rcu(tunnel, rcu);
+}
+
 /* Session reference counts. Incremented when code obtains a reference
  * to a session.
  */
-static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+static inline void l2tp_session_inc_refcount(struct l2tp_session *session)
 {
 	refcount_inc(&session->ref_count);
 }
 
-static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
 {
 	if (refcount_dec_and_test(&session->ref_count))
 		l2tp_session_free(session);
 }
 
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_session_inc_refcount(_s)					\
-do {									\
-	pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n",	\
-		 __func__, __LINE__, (_s)->name,			\
-		 refcount_read(&_s->ref_count));			\
-	l2tp_session_inc_refcount_1(_s);				\
-} while (0)
-#define l2tp_session_dec_refcount(_s)					\
-do {									\
-	pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n",	\
-		 __func__, __LINE__, (_s)->name,			\
-		 refcount_read(&_s->ref_count));			\
-	l2tp_session_dec_refcount_1(_s);				\
-} while (0)
-#else
-#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
-#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
-#endif
-
 #define l2tp_printk(ptr, type, func, fmt, ...)				\
 do {									\
 	if (((ptr)->debug) & (type))					\
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 53bae54c4d6e..eb69411bcb47 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
 
 static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
 {
-	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
 	pd->session_idx++;
 
 	if (pd->session == NULL) {
@@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
 		l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
 	} else {
 		l2tp_dfs_seq_session_show(m, pd->session);
-		if (pd->session->deref)
-			pd->session->deref(pd->session);
 		l2tp_session_dec_refcount(pd->session);
 	}
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 4de2ec94b08c..5c366ecfa1cb 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,10 +41,7 @@
 
 /* via netdev_priv() */
 struct l2tp_eth {
-	struct net_device	*dev;
-	struct sock		*tunnel_sock;
 	struct l2tp_session	*session;
-	struct list_head	list;
 	atomic_long_t		tx_bytes;
 	atomic_long_t		tx_packets;
 	atomic_long_t		tx_dropped;
@@ -55,26 +52,12 @@ struct l2tp_eth {
 
 /* via l2tp_session_priv() */
 struct l2tp_eth_sess {
-	struct net_device	*dev;
+	struct net_device __rcu *dev;
 };
 
-/* per-net private data for this module */
-static unsigned int l2tp_eth_net_id;
-struct l2tp_eth_net {
-	struct list_head l2tp_eth_dev_list;
-	spinlock_t l2tp_eth_lock;
-};
-
-static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
-{
-	return net_generic(net, l2tp_eth_net_id);
-}
 
 static int l2tp_eth_dev_init(struct net_device *dev)
 {
-	struct l2tp_eth *priv = netdev_priv(dev);
-
-	priv->dev = dev;
 	eth_hw_addr_random(dev);
 	eth_broadcast_addr(dev->broadcast);
 	netdev_lockdep_set_classes(dev);
@@ -85,12 +68,13 @@ static int l2tp_eth_dev_init(struct net_device *dev)
 static void l2tp_eth_dev_uninit(struct net_device *dev)
 {
 	struct l2tp_eth *priv = netdev_priv(dev);
-	struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+	struct l2tp_eth_sess *spriv;
 
-	spin_lock(&pn->l2tp_eth_lock);
-	list_del_init(&priv->list);
-	spin_unlock(&pn->l2tp_eth_lock);
-	dev_put(dev);
+	spriv = l2tp_session_priv(priv->session);
+	RCU_INIT_POINTER(spriv->dev, NULL);
+	/* No need for synchronize_net() here. We're called by
+	 * unregister_netdev*(), which does the synchronisation for us.
+	 */
 }
 
 static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -148,8 +132,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
 static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
 {
 	struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
-	struct net_device *dev = spriv->dev;
-	struct l2tp_eth *priv = netdev_priv(dev);
+	struct net_device *dev;
+	struct l2tp_eth *priv;
 
 	if (session->debug & L2TP_MSG_DATA) {
 		unsigned int length;
@@ -173,16 +157,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
 	skb_dst_drop(skb);
 	nf_reset(skb);
 
+	rcu_read_lock();
+	dev = rcu_dereference(spriv->dev);
+	if (!dev)
+		goto error_rcu;
+
+	priv = netdev_priv(dev);
 	if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
 		atomic_long_inc(&priv->rx_packets);
 		atomic_long_add(data_len, &priv->rx_bytes);
 	} else {
 		atomic_long_inc(&priv->rx_errors);
 	}
+	rcu_read_unlock();
+
 	return;
 
+error_rcu:
+	rcu_read_unlock();
 error:
-	atomic_long_inc(&priv->rx_errors);
 	kfree_skb(skb);
 }
 
@@ -193,11 +186,15 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 
 	if (session) {
 		spriv = l2tp_session_priv(session);
-		dev = spriv->dev;
+
+		rtnl_lock();
+		dev = rtnl_dereference(spriv->dev);
 		if (dev) {
-			unregister_netdev(dev);
-			spriv->dev = NULL;
+			unregister_netdevice(dev);
+			rtnl_unlock();
 			module_put(THIS_MODULE);
+		} else {
+			rtnl_unlock();
 		}
 	}
 }
@@ -207,9 +204,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
 	struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
-	struct net_device *dev = spriv->dev;
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(spriv->dev);
+	if (!dev) {
+		rcu_read_unlock();
+		return;
+	}
+	dev_hold(dev);
+	rcu_read_unlock();
 
 	seq_printf(m, "   interface %s\n", dev->name);
+
+	dev_put(dev);
 }
 #endif
 
@@ -262,23 +270,17 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
 	dev->needed_headroom += session->hdr_len;
 }
 
-static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
+			   u32 session_id, u32 peer_session_id,
+			   struct l2tp_session_cfg *cfg)
 {
 	unsigned char name_assign_type;
 	struct net_device *dev;
 	char name[IFNAMSIZ];
-	struct l2tp_tunnel *tunnel;
 	struct l2tp_session *session;
 	struct l2tp_eth *priv;
 	struct l2tp_eth_sess *spriv;
 	int rc;
-	struct l2tp_eth_net *pn;
-
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (!tunnel) {
-		rc = -ENODEV;
-		goto out;
-	}
 
 	if (cfg->ifname) {
 		strlcpy(name, cfg->ifname, IFNAMSIZ);
@@ -292,14 +294,14 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 				      peer_session_id, cfg);
 	if (IS_ERR(session)) {
 		rc = PTR_ERR(session);
-		goto out;
+		goto err;
 	}
 
 	dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
 			   l2tp_eth_dev_setup);
 	if (!dev) {
 		rc = -ENOMEM;
-		goto out_del_session;
+		goto err_sess;
 	}
 
 	dev_net_set(dev, net);
@@ -308,11 +310,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	l2tp_eth_adjust_mtu(tunnel, session, dev);
 
 	priv = netdev_priv(dev);
-	priv->dev = dev;
 	priv->session = session;
-	INIT_LIST_HEAD(&priv->list);
 
-	priv->tunnel_sock = tunnel->sock;
 	session->recv_skb = l2tp_eth_dev_recv;
 	session->session_close = l2tp_eth_delete;
 #if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
@@ -320,48 +319,50 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 #endif
 
 	spriv = l2tp_session_priv(session);
-	spriv->dev = dev;
 
-	rc = register_netdev(dev);
-	if (rc < 0)
-		goto out_del_dev;
+	l2tp_session_inc_refcount(session);
 
-	__module_get(THIS_MODULE);
-	/* Must be done after register_netdev() */
-	strlcpy(session->ifname, dev->name, IFNAMSIZ);
+	rtnl_lock();
 
-	dev_hold(dev);
-	pn = l2tp_eth_pernet(dev_net(dev));
-	spin_lock(&pn->l2tp_eth_lock);
-	list_add(&priv->list, &pn->l2tp_eth_dev_list);
-	spin_unlock(&pn->l2tp_eth_lock);
+	/* Register both device and session while holding the rtnl lock. This
+	 * ensures that l2tp_eth_delete() will see that there's a device to
+	 * unregister, even if it happened to run before we assign spriv->dev.
+	 */
+	rc = l2tp_session_register(session, tunnel);
+	if (rc < 0) {
+		rtnl_unlock();
+		goto err_sess_dev;
+	}
 
-	return 0;
+	rc = register_netdevice(dev);
+	if (rc < 0) {
+		rtnl_unlock();
+		l2tp_session_delete(session);
+		l2tp_session_dec_refcount(session);
+		free_netdev(dev);
 
-out_del_dev:
-	free_netdev(dev);
-	spriv->dev = NULL;
-out_del_session:
-	l2tp_session_delete(session);
-out:
-	return rc;
-}
+		return rc;
+	}
 
-static __net_init int l2tp_eth_init_net(struct net *net)
-{
-	struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
+	strlcpy(session->ifname, dev->name, IFNAMSIZ);
+	rcu_assign_pointer(spriv->dev, dev);
+
+	rtnl_unlock();
+
+	l2tp_session_dec_refcount(session);
 
-	INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
-	spin_lock_init(&pn->l2tp_eth_lock);
+	__module_get(THIS_MODULE);
 
 	return 0;
-}
 
-static struct pernet_operations l2tp_eth_net_ops = {
-	.init = l2tp_eth_init_net,
-	.id   = &l2tp_eth_net_id,
-	.size = sizeof(struct l2tp_eth_net),
-};
+err_sess_dev:
+	l2tp_session_dec_refcount(session);
+	free_netdev(dev);
+err_sess:
+	kfree(session);
+err:
+	return rc;
+}
 
 
 static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
@@ -376,25 +377,18 @@ static int __init l2tp_eth_init(void)
 
 	err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
 	if (err)
-		goto out;
-
-	err = register_pernet_device(&l2tp_eth_net_ops);
-	if (err)
-		goto out_unreg;
+		goto err;
 
 	pr_info("L2TP ethernet pseudowire support (L2TPv3)\n");
 
 	return 0;
 
-out_unreg:
-	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
-out:
+err:
 	return err;
 }
 
 static void __exit l2tp_eth_exit(void)
 {
-	unregister_pernet_device(&l2tp_eth_net_ops);
 	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
 }
 
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4d322c1b7233..ff61124fdf59 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	unsigned char *ptr, *optr;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel = NULL;
+	struct iphdr *iph;
 	int length;
 
 	if (!pskb_may_pull(skb, 4))
@@ -143,7 +144,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_get(net, NULL, session_id, true);
+	session = l2tp_session_get(net, NULL, session_id);
 	if (!session)
 		goto discard;
 
@@ -178,24 +179,17 @@ pass_up:
 		goto discard;
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel) {
-		sk = tunnel->sock;
-		sock_hold(sk);
-	} else {
-		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
-
-		read_lock_bh(&l2tp_ip_lock);
-		sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr,
-					   inet_iif(skb), tunnel_id);
-		if (!sk) {
-			read_unlock_bh(&l2tp_ip_lock);
-			goto discard;
-		}
+	iph = (struct iphdr *)skb_network_header(skb);
 
-		sock_hold(sk);
+	read_lock_bh(&l2tp_ip_lock);
+	sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
+				   tunnel_id);
+	if (!sk) {
 		read_unlock_bh(&l2tp_ip_lock);
+		goto discard;
 	}
+	sock_hold(sk);
+	read_unlock_bh(&l2tp_ip_lock);
 
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_put;
@@ -205,8 +199,6 @@ pass_up:
 	return sk_receive_skb(sk, skb, 1);
 
 discard_sess:
-	if (session->deref)
-		session->deref(session);
 	l2tp_session_dec_refcount(session);
 	goto discard;
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 88b397c30d86..192344688c06 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 	unsigned char *ptr, *optr;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel = NULL;
+	struct ipv6hdr *iph;
 	int length;
 
 	if (!pskb_may_pull(skb, 4))
@@ -156,7 +157,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_get(net, NULL, session_id, true);
+	session = l2tp_session_get(net, NULL, session_id);
 	if (!session)
 		goto discard;
 
@@ -192,24 +193,17 @@ pass_up:
 		goto discard;
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel) {
-		sk = tunnel->sock;
-		sock_hold(sk);
-	} else {
-		struct ipv6hdr *iph = ipv6_hdr(skb);
-
-		read_lock_bh(&l2tp_ip6_lock);
-		sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
-					    inet6_iif(skb), tunnel_id);
-		if (!sk) {
-			read_unlock_bh(&l2tp_ip6_lock);
-			goto discard;
-		}
+	iph = ipv6_hdr(skb);
 
-		sock_hold(sk);
+	read_lock_bh(&l2tp_ip6_lock);
+	sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
+				    inet6_iif(skb), tunnel_id);
+	if (!sk) {
 		read_unlock_bh(&l2tp_ip6_lock);
+		goto discard;
 	}
+	sock_hold(sk);
+	read_unlock_bh(&l2tp_ip6_lock);
 
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_put;
@@ -219,8 +213,6 @@ pass_up:
 	return sk_receive_skb(sk, skb, 1);
 
 discard_sess:
-	if (session->deref)
-		session->deref(session);
 	l2tp_session_dec_refcount(session);
 	goto discard;
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 12cfcd0ca807..a1f24fb2be98 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
 /* Accessed under genl lock */
 static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
 
-static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
-						bool do_ref)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
 {
 	u32 tunnel_id;
 	u32 session_id;
@@ -60,15 +59,16 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
 
 	if (info->attrs[L2TP_ATTR_IFNAME]) {
 		ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
-		session = l2tp_session_get_by_ifname(net, ifname, do_ref);
+		session = l2tp_session_get_by_ifname(net, ifname);
 	} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
 		   (info->attrs[L2TP_ATTR_CONN_ID])) {
 		tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
 		session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
-		tunnel = l2tp_tunnel_find(net, tunnel_id);
-		if (tunnel)
-			session = l2tp_session_get(net, tunnel, session_id,
-						   do_ref);
+		tunnel = l2tp_tunnel_get(net, tunnel_id);
+		if (tunnel) {
+			session = l2tp_session_get(net, tunnel, session_id);
+			l2tp_tunnel_dec_refcount(tunnel);
+		}
 	}
 
 	return session;
@@ -271,8 +271,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
 	}
 	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
 
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel == NULL) {
+	tunnel = l2tp_tunnel_get(net, tunnel_id);
+	if (!tunnel) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -280,7 +280,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
 	l2tp_tunnel_notify(&l2tp_nl_family, info,
 			   tunnel, L2TP_CMD_TUNNEL_DELETE);
 
-	(void) l2tp_tunnel_delete(tunnel);
+	l2tp_tunnel_delete(tunnel);
+
+	l2tp_tunnel_dec_refcount(tunnel);
 
 out:
 	return ret;
@@ -299,8 +301,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
 	}
 	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
 
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel == NULL) {
+	tunnel = l2tp_tunnel_get(net, tunnel_id);
+	if (!tunnel) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -311,6 +313,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
 	ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
 				 tunnel, L2TP_CMD_TUNNEL_MODIFY);
 
+	l2tp_tunnel_dec_refcount(tunnel);
+
 out:
 	return ret;
 }
@@ -400,7 +404,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 		if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
 		    nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
 			goto nla_put_failure;
-		/* NOBREAK */
+		/* fall through  */
 	case L2TP_ENCAPTYPE_IP:
 #if IS_ENABLED(CONFIG_IPV6)
 		if (np) {
@@ -438,34 +442,37 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
 
 	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
 		ret = -EINVAL;
-		goto out;
+		goto err;
 	}
 
 	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
 
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel == NULL) {
-		ret = -ENODEV;
-		goto out;
-	}
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
-		goto out;
+		goto err;
+	}
+
+	tunnel = l2tp_tunnel_get(net, tunnel_id);
+	if (!tunnel) {
+		ret = -ENODEV;
+		goto err_nlmsg;
 	}
 
 	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
 				  NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
 	if (ret < 0)
-		goto err_out;
+		goto err_nlmsg_tunnel;
+
+	l2tp_tunnel_dec_refcount(tunnel);
 
 	return genlmsg_unicast(net, msg, info->snd_portid);
 
-err_out:
+err_nlmsg_tunnel:
+	l2tp_tunnel_dec_refcount(tunnel);
+err_nlmsg:
 	nlmsg_free(msg);
-
-out:
+err:
 	return ret;
 }
 
@@ -509,8 +516,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		ret = -EINVAL;
 		goto out;
 	}
+
 	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	tunnel = l2tp_tunnel_get(net, tunnel_id);
 	if (!tunnel) {
 		ret = -ENODEV;
 		goto out;
@@ -518,24 +526,24 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 
 	if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
 		ret = -EINVAL;
-		goto out;
+		goto out_tunnel;
 	}
 	session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
 
 	if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
 		ret = -EINVAL;
-		goto out;
+		goto out_tunnel;
 	}
 	peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
 
 	if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
 		ret = -EINVAL;
-		goto out;
+		goto out_tunnel;
 	}
 	cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
 	if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
 		ret = -EINVAL;
-		goto out;
+		goto out_tunnel;
 	}
 
 	if (tunnel->version > 2) {
@@ -557,7 +565,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 			u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
 			if (len > 8) {
 				ret = -EINVAL;
-				goto out;
+				goto out_tunnel;
 			}
 			cfg.cookie_len = len;
 			memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
@@ -566,7 +574,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 			u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
 			if (len > 8) {
 				ret = -EINVAL;
-				goto out;
+				goto out_tunnel;
 			}
 			cfg.peer_cookie_len = len;
 			memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
@@ -609,7 +617,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
 	    (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
 		ret = -EPROTONOSUPPORT;
-		goto out;
+		goto out_tunnel;
 	}
 
 	/* Check that pseudowire-specific params are present */
@@ -619,7 +627,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	case L2TP_PWTYPE_ETH_VLAN:
 		if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
 			ret = -EINVAL;
-			goto out;
+			goto out_tunnel;
 		}
 		break;
 	case L2TP_PWTYPE_ETH:
@@ -633,13 +641,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		break;
 	}
 
-	ret = -EPROTONOSUPPORT;
-	if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
-		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
-			session_id, peer_session_id, &cfg);
+	ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel,
+							   session_id,
+							   peer_session_id,
+							   &cfg);
 
 	if (ret >= 0) {
-		session = l2tp_session_get(net, tunnel, session_id, false);
+		session = l2tp_session_get(net, tunnel, session_id);
 		if (session) {
 			ret = l2tp_session_notify(&l2tp_nl_family, info, session,
 						  L2TP_CMD_SESSION_CREATE);
@@ -647,6 +655,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		}
 	}
 
+out_tunnel:
+	l2tp_tunnel_dec_refcount(tunnel);
 out:
 	return ret;
 }
@@ -657,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 	struct l2tp_session *session;
 	u16 pw_type;
 
-	session = l2tp_nl_session_get(info, true);
+	session = l2tp_nl_session_get(info);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto out;
@@ -671,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
 			ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
 
-	if (session->deref)
-		session->deref(session);
 	l2tp_session_dec_refcount(session);
 
 out:
@@ -684,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	int ret = 0;
 	struct l2tp_session *session;
 
-	session = l2tp_nl_session_get(info, false);
+	session = l2tp_nl_session_get(info);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto out;
@@ -816,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	int ret;
 
-	session = l2tp_nl_session_get(info, false);
+	session = l2tp_nl_session_get(info);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto err;
@@ -862,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 				goto out;
 		}
 
-		session = l2tp_session_get_nth(tunnel, si, false);
+		session = l2tp_session_get_nth(tunnel, si);
 		if (session == NULL) {
 			ti++;
 			tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f0edb7209079..b412fc3351dc 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -122,10 +122,11 @@
 struct pppol2tp_session {
 	int			owner;		/* pid that opened the socket */
 
-	struct sock		*sock;		/* Pointer to the session
+	struct mutex		sk_lock;	/* Protects .sk */
+	struct sock __rcu	*sk;		/* Pointer to the session
 						 * PPPoX socket */
-	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
-						 * socket */
+	struct sock		*__sk;		/* Copy of .sk, for cleanup */
+	struct rcu_head		rcu;		/* For asynchronous release */
 	int			flags;		/* accessed by PPPIOCGFLAGS.
 						 * Unused. */
 };
@@ -138,6 +139,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
 
 static const struct proto_ops pppol2tp_ops;
 
+/* Retrieves the pppol2tp socket associated to a session.
+ * A reference is held on the returned socket, so this function must be paired
+ * with sock_put().
+ */
+static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk;
+
+	rcu_read_lock();
+	sk = rcu_dereference(ps->sk);
+	if (sk)
+		sock_hold(sk);
+	rcu_read_unlock();
+
+	return sk;
+}
+
 /* Helpers to obtain tunnel/session contexts from sockets.
  */
 static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -224,7 +243,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 	/* If the socket is bound, send it in to PPP's input queue. Otherwise
 	 * queue it on the session socket.
 	 */
-	sk = ps->sock;
+	rcu_read_lock();
+	sk = rcu_dereference(ps->sk);
 	if (sk == NULL)
 		goto no_sock;
 
@@ -247,30 +267,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 			kfree_skb(skb);
 		}
 	}
+	rcu_read_unlock();
 
 	return;
 
 no_sock:
+	rcu_read_unlock();
 	l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
 	kfree_skb(skb);
 }
 
-static void pppol2tp_session_sock_hold(struct l2tp_session *session)
-{
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-
-	if (ps->sock)
-		sock_hold(ps->sock);
-}
-
-static void pppol2tp_session_sock_put(struct l2tp_session *session)
-{
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-
-	if (ps->sock)
-		sock_put(ps->sock);
-}
-
 /************************************************************************
  * Transmit handling
  ***********************************************************************/
@@ -287,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 	int error;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
-	struct pppol2tp_session *ps;
 	int uhlen;
 
 	error = -ENOTCONN;
@@ -300,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 	if (session == NULL)
 		goto error;
 
-	ps = l2tp_session_priv(session);
-	tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
-	if (tunnel == NULL)
-		goto error_put_sess;
+	tunnel = session->tunnel;
 
 	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 
@@ -314,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 			   2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
 			   0, GFP_KERNEL);
 	if (!skb)
-		goto error_put_sess_tun;
+		goto error_put_sess;
 
 	/* Reserve space for headers. */
 	skb_reserve(skb, NET_SKB_PAD);
@@ -332,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 	error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
 	if (error < 0) {
 		kfree_skb(skb);
-		goto error_put_sess_tun;
+		goto error_put_sess;
 	}
 
 	local_bh_disable();
 	l2tp_xmit_skb(session, skb, session->hdr_len);
 	local_bh_enable();
 
-	sock_put(ps->tunnel_sock);
 	sock_put(sk);
 
 	return total_len;
 
-error_put_sess_tun:
-	sock_put(ps->tunnel_sock);
 error_put_sess:
 	sock_put(sk);
 error:
@@ -369,10 +368,8 @@ error:
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
 	struct sock *sk = (struct sock *) chan->private;
-	struct sock *sk_tun;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
-	struct pppol2tp_session *ps;
 	int uhlen, headroom;
 
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -383,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (session == NULL)
 		goto abort;
 
-	ps = l2tp_session_priv(session);
-	sk_tun = ps->tunnel_sock;
-	if (sk_tun == NULL)
-		goto abort_put_sess;
-	tunnel = l2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto abort_put_sess;
+	tunnel = session->tunnel;
 
 	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	headroom = NET_SKB_PAD +
@@ -398,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 		   session->hdr_len +	/* L2TP header */
 		   2;			/* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
 	if (skb_cow_head(skb, headroom))
-		goto abort_put_sess_tun;
+		goto abort_put_sess;
 
 	/* Setup PPP header */
 	__skb_push(skb, 2);
@@ -409,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	l2tp_xmit_skb(session, skb, session->hdr_len);
 	local_bh_enable();
 
-	sock_put(sk_tun);
 	sock_put(sk);
+
 	return 1;
 
-abort_put_sess_tun:
-	sock_put(sk_tun);
 abort_put_sess:
 	sock_put(sk);
 abort:
@@ -431,16 +420,15 @@ abort:
  */
 static void pppol2tp_session_close(struct l2tp_session *session)
 {
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-	struct sock *sk = ps->sock;
-	struct socket *sock = sk->sk_socket;
+	struct sock *sk;
 
 	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
-	if (sock) {
-		inet_shutdown(sock, SEND_SHUTDOWN);
-		/* Don't let the session go away before our socket does */
-		l2tp_session_inc_refcount(session);
+	sk = pppol2tp_session_get_sock(session);
+	if (sk) {
+		if (sk->sk_socket)
+			inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
+		sock_put(sk);
 	}
 }
 
@@ -461,6 +449,14 @@ static void pppol2tp_session_destruct(struct sock *sk)
 	}
 }
 
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+	struct pppol2tp_session *ps;
+
+	ps = container_of(head, typeof(*ps), rcu);
+	sock_put(ps->__sk);
+}
+
 /* Called when the PPPoX socket (session) is closed.
  */
 static int pppol2tp_release(struct socket *sock)
@@ -486,11 +482,23 @@ static int pppol2tp_release(struct socket *sock)
 
 	session = pppol2tp_sock_to_session(sk);
 
-	/* Purge any queued data */
 	if (session != NULL) {
-		__l2tp_session_unhash(session);
-		l2tp_session_queue_purge(session);
-		sock_put(sk);
+		struct pppol2tp_session *ps;
+
+		l2tp_session_delete(session);
+
+		ps = l2tp_session_priv(session);
+		mutex_lock(&ps->sk_lock);
+		ps->__sk = rcu_dereference_protected(ps->sk,
+						     lockdep_is_held(&ps->sk_lock));
+		RCU_INIT_POINTER(ps->sk, NULL);
+		mutex_unlock(&ps->sk_lock);
+		call_rcu(&ps->rcu, pppol2tp_put_sk);
+
+		/* Rely on the sock_put() call at the end of the function for
+		 * dropping the reference held by pppol2tp_sock_to_session().
+		 * The last reference will be dropped by pppol2tp_put_sk().
+		 */
 	}
 	release_sock(sk);
 
@@ -557,16 +565,46 @@ out:
 static void pppol2tp_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk;
+
+	sk = pppol2tp_session_get_sock(session);
+	if (sk) {
+		struct pppox_sock *po = pppox_sk(sk);
 
-	if (ps) {
-		struct pppox_sock *po = pppox_sk(ps->sock);
-		if (po)
-			seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+		sock_put(sk);
 	}
 }
 #endif
 
+static void pppol2tp_session_init(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps;
+	struct dst_entry *dst;
+
+	session->recv_skb = pppol2tp_recv;
+	session->session_close = pppol2tp_session_close;
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
+	session->show = pppol2tp_show;
+#endif
+
+	ps = l2tp_session_priv(session);
+	mutex_init(&ps->sk_lock);
+	ps->owner = current->pid;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(session->tunnel->sock);
+	if (dst) {
+		u32 pmtu = dst_mtu(dst);
+
+		if (pmtu) {
+			session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+			session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+		}
+		dst_release(dst);
+	}
+}
+
 /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
  */
 static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -578,12 +616,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct l2tp_session *session = NULL;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
-	struct dst_entry *dst;
 	struct l2tp_session_cfg cfg = { 0, };
 	int error = 0;
 	u32 tunnel_id, peer_tunnel_id;
 	u32 session_id, peer_session_id;
 	bool drop_refcnt = false;
+	bool drop_tunnel = false;
 	int ver = 2;
 	int fd;
 
@@ -652,7 +690,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel_id == 0)
 		goto end;
 
-	tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+	tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
+	if (tunnel)
+		drop_tunnel = true;
 
 	/* Special case: create tunnel context if session_id and
 	 * peer_session_id is 0. Otherwise look up tunnel using supplied
@@ -685,7 +725,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel->peer_tunnel_id == 0)
 		tunnel->peer_tunnel_id = peer_tunnel_id;
 
-	session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+	session = l2tp_session_get(sock_net(sk), tunnel, session_id);
 	if (session) {
 		drop_refcnt = true;
 		ps = l2tp_session_priv(session);
@@ -693,13 +733,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 		/* Using a pre-existing session is fine as long as it hasn't
 		 * been connected yet.
 		 */
-		if (ps->sock) {
-			error = -EEXIST;
-			goto end;
-		}
-
-		/* consistency checks */
-		if (ps->tunnel_sock != tunnel->sock) {
+		mutex_lock(&ps->sk_lock);
+		if (rcu_dereference_protected(ps->sk,
+					      lockdep_is_held(&ps->sk_lock))) {
+			mutex_unlock(&ps->sk_lock);
 			error = -EEXIST;
 			goto end;
 		}
@@ -715,35 +752,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 			error = PTR_ERR(session);
 			goto end;
 		}
-	}
-
-	/* Associate session with its PPPoL2TP socket */
-	ps = l2tp_session_priv(session);
-	ps->owner	     = current->pid;
-	ps->sock	     = sk;
-	ps->tunnel_sock = tunnel->sock;
-
-	session->recv_skb	= pppol2tp_recv;
-	session->session_close	= pppol2tp_session_close;
-#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
-	session->show		= pppol2tp_show;
-#endif
-
-	/* We need to know each time a skb is dropped from the reorder
-	 * queue.
-	 */
-	session->ref = pppol2tp_session_sock_hold;
-	session->deref = pppol2tp_session_sock_put;
 
-	/* If PMTU discovery was enabled, use the MTU that was discovered */
-	dst = sk_dst_get(tunnel->sock);
-	if (dst != NULL) {
-		u32 pmtu = dst_mtu(dst);
+		pppol2tp_session_init(session);
+		ps = l2tp_session_priv(session);
+		l2tp_session_inc_refcount(session);
 
-		if (pmtu != 0)
-			session->mtu = session->mru = pmtu -
-				PPPOL2TP_HEADER_OVERHEAD;
-		dst_release(dst);
+		mutex_lock(&ps->sk_lock);
+		error = l2tp_session_register(session, tunnel);
+		if (error < 0) {
+			mutex_unlock(&ps->sk_lock);
+			kfree(session);
+			goto end;
+		}
+		drop_refcnt = true;
 	}
 
 	/* Special case: if source & dest session_id == 0x0000, this
@@ -768,12 +789,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	po->chan.mtu	 = session->mtu;
 
 	error = ppp_register_net_channel(sock_net(sk), &po->chan);
-	if (error)
+	if (error) {
+		mutex_unlock(&ps->sk_lock);
 		goto end;
+	}
 
 out_no_ppp:
 	/* This is how we get the session context from the socket. */
 	sk->sk_user_data = session;
+	rcu_assign_pointer(ps->sk, sk);
+	mutex_unlock(&ps->sk_lock);
+
+	/* Keep the reference we've grabbed on the session: sk doesn't expect
+	 * the session to disappear. pppol2tp_session_destruct() is responsible
+	 * for dropping it.
+	 */
+	drop_refcnt = false;
+
 	sk->sk_state = PPPOX_CONNECTED;
 	l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
 		  session->name);
@@ -781,6 +813,8 @@ out_no_ppp:
 end:
 	if (drop_refcnt)
 		l2tp_session_dec_refcount(session);
+	if (drop_tunnel)
+		l2tp_tunnel_dec_refcount(tunnel);
 	release_sock(sk);
 
 	return error;
@@ -788,25 +822,19 @@ end:
 
 #ifdef CONFIG_L2TP_V3
 
-/* Called when creating sessions via the netlink interface.
- */
-static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+/* Called when creating sessions via the netlink interface. */
+static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
+				   u32 session_id, u32 peer_session_id,
+				   struct l2tp_session_cfg *cfg)
 {
 	int error;
-	struct l2tp_tunnel *tunnel;
 	struct l2tp_session *session;
-	struct pppol2tp_session *ps;
-
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-
-	/* Error if we can't find the tunnel */
-	error = -ENOENT;
-	if (tunnel == NULL)
-		goto out;
 
 	/* Error if tunnel socket is not prepped */
-	if (tunnel->sock == NULL)
-		goto out;
+	if (!tunnel->sock) {
+		error = -ENOENT;
+		goto err;
+	}
 
 	/* Default MTU values. */
 	if (cfg->mtu == 0)
@@ -820,18 +848,20 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
 				      peer_session_id, cfg);
 	if (IS_ERR(session)) {
 		error = PTR_ERR(session);
-		goto out;
+		goto err;
 	}
 
-	ps = l2tp_session_priv(session);
-	ps->tunnel_sock = tunnel->sock;
+	pppol2tp_session_init(session);
 
-	l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
-		  session->name);
+	error = l2tp_session_register(session, tunnel);
+	if (error < 0)
+		goto err_sess;
 
-	error = 0;
+	return 0;
 
-out:
+err_sess:
+	kfree(session);
+err:
 	return error;
 }
 
@@ -862,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		goto end;
 
 	pls = l2tp_session_priv(session);
-	tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
-	if (tunnel == NULL)
-		goto end_put_sess;
+	tunnel = session->tunnel;
 
 	inet = inet_sk(tunnel->sock);
 	if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) {
@@ -944,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 	*usockaddr_len = len;
 	error = 0;
 
-	sock_put(pls->tunnel_sock);
-end_put_sess:
 	sock_put(sk);
 end:
 	return error;
@@ -992,8 +1018,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
 		 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
 		 session->name, cmd, arg);
 
-	sk = ps->sock;
-	sock_hold(sk);
+	sk = pppol2tp_session_get_sock(session);
+	if (!sk)
+		return -EBADR;
 
 	switch (cmd) {
 	case SIOCGIFMTU:
@@ -1140,13 +1167,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 			/* resend to session ioctl handler */
 			struct l2tp_session *session =
 				l2tp_session_get(sock_net(sk), tunnel,
-						 stats.session_id, true);
+						 stats.session_id);
 
 			if (session) {
 				err = pppol2tp_session_ioctl(session, cmd,
 							     arg);
-				if (session->deref)
-					session->deref(session);
 				l2tp_session_dec_refcount(session);
 			} else {
 				err = -EBADR;
@@ -1185,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
 	struct sock *sk = sock->sk;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
-	struct pppol2tp_session *ps;
 	int err;
 
 	if (!sk)
@@ -1209,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
 	/* Special case: if session's session_id is zero, treat ioctl as a
 	 * tunnel ioctl
 	 */
-	ps = l2tp_session_priv(session);
 	if ((session->session_id == 0) &&
 	    (session->peer_session_id == 0)) {
-		err = -EBADF;
-		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
+		tunnel = session->tunnel;
 		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
-		sock_put(ps->tunnel_sock);
 		goto end_put_sess;
 	}
 
@@ -1270,7 +1288,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
 				       int optname, int val)
 {
 	int err = 0;
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
 
 	switch (optname) {
 	case PPPOL2TP_SO_RECVSEQ:
@@ -1291,8 +1308,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
 		}
 		session->send_seq = !!val;
 		{
-			struct sock *ssk      = ps->sock;
-			struct pppox_sock *po = pppox_sk(ssk);
+			struct pppox_sock *po = pppox_sk(sk);
+
 			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
 				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
 		}
@@ -1345,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
-	struct pppol2tp_session *ps;
 	int val;
 	int err;
 
@@ -1370,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
 
 	/* Special case: if session_id == 0x0000, treat as operation on tunnel
 	 */
-	ps = l2tp_session_priv(session);
 	if ((session->session_id == 0) &&
 	    (session->peer_session_id == 0)) {
-		err = -EBADF;
-		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
+		tunnel = session->tunnel;
 		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
-		sock_put(ps->tunnel_sock);
-	} else
+	} else {
 		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+	}
 
-end_put_sess:
 	sock_put(sk);
 end:
 	return err;
@@ -1471,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
 	struct l2tp_tunnel *tunnel;
 	int val, len;
 	int err;
-	struct pppol2tp_session *ps;
 
 	if (level != SOL_PPPOL2TP)
 		return -EINVAL;
@@ -1495,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
 		goto end;
 
 	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
-	ps = l2tp_session_priv(session);
 	if ((session->session_id == 0) &&
 	    (session->peer_session_id == 0)) {
-		err = -EBADF;
-		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
+		tunnel = session->tunnel;
 		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
-		sock_put(ps->tunnel_sock);
 		if (err)
 			goto end_put_sess;
 	} else {
@@ -1563,7 +1566,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
 
 static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
 {
-	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
 	pd->session_idx++;
 
 	if (pd->session == NULL) {
@@ -1631,8 +1634,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 {
 	struct l2tp_session *session = v;
 	struct l2tp_tunnel *tunnel = session->tunnel;
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-	struct pppox_sock *po = pppox_sk(ps->sock);
+	unsigned char state;
+	char user_data_ok;
+	struct sock *sk;
 	u32 ip = 0;
 	u16 port = 0;
 
@@ -1642,6 +1646,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		port = ntohs(inet->inet_sport);
 	}
 
+	sk = pppol2tp_session_get_sock(session);
+	if (sk) {
+		state = sk->sk_state;
+		user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N';
+	} else {
+		state = 0;
+		user_data_ok = 'N';
+	}
+
 	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
 		   "%04X/%04X %d %c\n",
 		   session->name, ip, port,
@@ -1649,9 +1662,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   session->session_id,
 		   tunnel->peer_tunnel_id,
 		   session->peer_session_id,
-		   ps->sock->sk_state,
-		   (session == ps->sock->sk_user_data) ?
-		   'Y' : 'N');
+		   state, user_data_ok);
 	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
 		   session->mtu, session->mru,
 		   session->recv_seq ? 'R' : '-',
@@ -1668,8 +1679,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   atomic_long_read(&session->stats.rx_bytes),
 		   atomic_long_read(&session->stats.rx_errors));
 
-	if (po)
+	if (sk) {
+		struct pppox_sock *po = pppox_sk(sk);
+
 		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+		sock_put(sk);
+	}
 }
 
 static int pppol2tp_seq_show(struct seq_file *m, void *v)
@@ -1694,8 +1709,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
 		pppol2tp_seq_tunnel_show(m, pd->tunnel);
 	} else {
 		pppol2tp_seq_session_show(m, pd->session);
-		if (pd->session->deref)
-			pd->session->deref(pd->session);
 		l2tp_session_dec_refcount(pd->session);
 	}
 
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index e15314e3b464..db6e0afe3a20 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void)
 	skb_queue_head_init(&lapb->write_queue);
 	skb_queue_head_init(&lapb->ack_queue);
 
-	init_timer(&lapb->t1timer);
-	init_timer(&lapb->t2timer);
+	timer_setup(&lapb->t1timer, NULL, 0);
+	timer_setup(&lapb->t2timer, NULL, 0);
 
 	lapb->t1      = LAPB_DEFAULT_T1;
 	lapb->t2      = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 1a5535bc3b8d..8bb469cb3abe 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -35,15 +35,14 @@
 #include <linux/interrupt.h>
 #include <net/lapb.h>
 
-static void lapb_t1timer_expiry(unsigned long);
-static void lapb_t2timer_expiry(unsigned long);
+static void lapb_t1timer_expiry(struct timer_list *);
+static void lapb_t2timer_expiry(struct timer_list *);
 
 void lapb_start_t1timer(struct lapb_cb *lapb)
 {
 	del_timer(&lapb->t1timer);
 
-	lapb->t1timer.data     = (unsigned long)lapb;
-	lapb->t1timer.function = &lapb_t1timer_expiry;
+	lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry;
 	lapb->t1timer.expires  = jiffies + lapb->t1;
 
 	add_timer(&lapb->t1timer);
@@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
 {
 	del_timer(&lapb->t2timer);
 
-	lapb->t2timer.data     = (unsigned long)lapb;
-	lapb->t2timer.function = &lapb_t2timer_expiry;
+	lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry;
 	lapb->t2timer.expires  = jiffies + lapb->t2;
 
 	add_timer(&lapb->t2timer);
@@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb)
 	return timer_pending(&lapb->t1timer);
 }
 
-static void lapb_t2timer_expiry(unsigned long param)
+static void lapb_t2timer_expiry(struct timer_list *t)
 {
-	struct lapb_cb *lapb = (struct lapb_cb *)param;
+	struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
 
 	if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
 		lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
@@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param)
 	}
 }
 
-static void lapb_t1timer_expiry(unsigned long param)
+static void lapb_t1timer_expiry(struct timer_list *t)
 {
-	struct lapb_cb *lapb = (struct lapb_cb *)param;
+	struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
 
 	switch (lapb->state) {
 
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index ea225bd2672c..f59648018060 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
+static void llc_conn_tmr_common_cb(struct sock *sk, u8 type)
 {
-	struct sock *sk = (struct sock *)timeout_data;
 	struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
 
 	bh_lock_sock(sk);
@@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
 	bh_unlock_sock(sk);
 }
 
-void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data)
+void llc_conn_pf_cycle_tmr_cb(struct timer_list *t)
 {
-	llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR);
+	struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer);
+
+	llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR);
 }
 
-void llc_conn_busy_tmr_cb(unsigned long timeout_data)
+void llc_conn_busy_tmr_cb(struct timer_list *t)
 {
-	llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR);
+	struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer);
+
+	llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR);
 }
 
-void llc_conn_ack_tmr_cb(unsigned long timeout_data)
+void llc_conn_ack_tmr_cb(struct timer_list *t)
 {
-	llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR);
+	struct llc_sock *llc = from_timer(llc, t, ack_timer.timer);
+
+	llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR);
 }
 
-void llc_conn_rej_tmr_cb(unsigned long timeout_data)
+void llc_conn_rej_tmr_cb(struct timer_list *t)
 {
-	llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR);
+	struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer);
+
+	llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR);
 }
 
 int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5e91b47f0d2a..9177dbb16dce 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk)
 	llc->inc_cntr = llc->dec_cntr = 2;
 	llc->dec_step = llc->connect_step = 1;
 
-	setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb,
-			(unsigned long)sk);
+	timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0);
 	llc->ack_timer.expire	      = sysctl_llc2_ack_timeout;
 
-	setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb,
-			(unsigned long)sk);
+	timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0);
 	llc->pf_cycle_timer.expire	   = sysctl_llc2_p_timeout;
 
-	setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb,
-			(unsigned long)sk);
+	timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0);
 	llc->rej_sent_timer.expire	   = sysctl_llc2_rej_timeout;
 
-	setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb,
-			(unsigned long)sk);
+	timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0);
 	llc->busy_state_timer.expire	     = sysctl_llc2_busy_timeout;
 
 	llc->n2 = 2;   /* max retransmit */
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index dd3e83328ad5..82cb93f66b9b 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 	 */
 	rcv = rcu_dereference(sap->rcv_func);
 	dest = llc_pdu_type(skb);
-	sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+	sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL;
 	if (unlikely(!sap_handler)) {
 		if (rcv)
 			rcv(skb, dev, pt, orig_dev);
@@ -214,7 +214,7 @@ drop:
 	kfree_skb(skb);
 	goto out;
 handle_station:
-	sta_handler = ACCESS_ONCE(llc_station_handler);
+	sta_handler = READ_ONCE(llc_station_handler);
 	if (!sta_handler)
 		goto drop;
 	sta_handler(skb);
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 799bafc2af39..8443a6d841b0 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sysctl_net_llc.c: sysctl interface to LLC net subsystem.
  *
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 282912245938..e3589ade62e0 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_MAC80211) += mac80211.o
 
 # mac80211 objects
@@ -6,6 +7,7 @@ mac80211-y := \
 	driver-ops.o \
 	sta_info.o \
 	wep.o \
+	aead_api.o \
 	wpa.o \
 	scan.o offchannel.o \
 	ht.o agg-tx.o agg-rx.o \
@@ -15,8 +17,6 @@ mac80211-y := \
 	rate.o \
 	michael.o \
 	tkip.o \
-	aes_ccm.o \
-	aes_gcm.o \
 	aes_cmac.o \
 	aes_gmac.o \
 	fils_aead.o \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c
index a4e0d59a40dd..160f9df30402 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aead_api.c
@@ -1,6 +1,7 @@
 /*
  * Copyright 2003-2004, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
  *
  * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
@@ -12,30 +13,29 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/err.h>
+#include <linux/scatterlist.h>
 #include <crypto/aead.h>
 
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_ccm.h"
+#include "aead_api.h"
 
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic,
-			      size_t mic_len)
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+		 u8 *data, size_t data_len, u8 *mic)
 {
+	size_t mic_len = crypto_aead_authsize(tfm);
 	struct scatterlist sg[3];
 	struct aead_request *aead_req;
 	int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
 	u8 *__aad;
 
-	aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+	aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
 	if (!aead_req)
 		return -ENOMEM;
 
 	__aad = (u8 *)aead_req + reqsize;
-	memcpy(__aad, aad, CCM_AAD_LEN);
+	memcpy(__aad, aad, aad_len);
 
 	sg_init_table(sg, 3);
-	sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+	sg_set_buf(&sg[0], __aad, aad_len);
 	sg_set_buf(&sg[1], data, data_len);
 	sg_set_buf(&sg[2], mic, mic_len);
 
@@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 	return 0;
 }
 
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic,
-			      size_t mic_len)
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+		 u8 *data, size_t data_len, u8 *mic)
 {
+	size_t mic_len = crypto_aead_authsize(tfm);
 	struct scatterlist sg[3];
 	struct aead_request *aead_req;
 	int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
@@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 	if (data_len == 0)
 		return -EINVAL;
 
-	aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+	aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
 	if (!aead_req)
 		return -ENOMEM;
 
 	__aad = (u8 *)aead_req + reqsize;
-	memcpy(__aad, aad, CCM_AAD_LEN);
+	memcpy(__aad, aad, aad_len);
 
 	sg_init_table(sg, 3);
-	sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+	sg_set_buf(&sg[0], __aad, aad_len);
 	sg_set_buf(&sg[1], data, data_len);
 	sg_set_buf(&sg[2], mic, mic_len);
 
@@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
 	return err;
 }
 
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
-						    size_t key_len,
-						    size_t mic_len)
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+		       size_t key_len, size_t mic_len)
 {
 	struct crypto_aead *tfm;
 	int err;
 
-	tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm))
 		return tfm;
 
@@ -109,7 +109,7 @@ free_aead:
 	return ERR_PTR(err);
 }
 
-void ieee80211_aes_key_free(struct crypto_aead *tfm)
+void aead_key_free(struct crypto_aead *tfm)
 {
 	crypto_free_aead(tfm);
 }
diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
new file mode 100644
index 000000000000..5e39ea843bbf
--- /dev/null
+++ b/net/mac80211/aead_api.h
@@ -0,0 +1,27 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _AEAD_API_H
+#define _AEAD_API_H
+
+#include <crypto/aead.h>
+#include <linux/crypto.h>
+
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+		       size_t key_len, size_t mic_len);
+
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+		 size_t aad_len, u8 *data,
+		 size_t data_len, u8 *mic);
+
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+		 size_t aad_len, u8 *data,
+		 size_t data_len, u8 *mic);
+
+void aead_key_free(struct crypto_aead *tfm);
+
+#endif /* _AEAD_API_H */
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index fcd3254c5cf0..e9b7ca0bde5b 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -10,19 +10,39 @@
 #ifndef AES_CCM_H
 #define AES_CCM_H
 
-#include <linux/crypto.h>
+#include "aead_api.h"
 
 #define CCM_AAD_LEN	32
 
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
-						    size_t key_len,
-						    size_t mic_len);
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic,
-			      size_t mic_len);
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic,
-			      size_t mic_len);
-void ieee80211_aes_key_free(struct crypto_aead *tfm);
+static inline struct crypto_aead *
+ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len)
+{
+	return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len);
+}
+
+static inline int
+ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm,
+			  u8 *b_0, u8 *aad, u8 *data,
+			  size_t data_len, u8 *mic)
+{
+	return aead_encrypt(tfm, b_0, aad + 2,
+			    be16_to_cpup((__be16 *)aad),
+			    data, data_len, mic);
+}
+
+static inline int
+ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm,
+			  u8 *b_0, u8 *aad, u8 *data,
+			  size_t data_len, u8 *mic)
+{
+	return aead_decrypt(tfm, b_0, aad + 2,
+			    be16_to_cpup((__be16 *)aad),
+			    data, data_len, mic);
+}
+
+static inline void ieee80211_aes_key_free(struct crypto_aead *tfm)
+{
+	return aead_key_free(tfm);
+}
 
 #endif /* AES_CCM_H */
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
deleted file mode 100644
index 8a4397cc1b08..000000000000
--- a/net/mac80211/aes_gcm.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2014-2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/err.h>
-#include <crypto/aead.h>
-
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_gcm.h"
-
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic)
-{
-	struct scatterlist sg[3];
-	struct aead_request *aead_req;
-	int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
-	u8 *__aad;
-
-	aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
-	if (!aead_req)
-		return -ENOMEM;
-
-	__aad = (u8 *)aead_req + reqsize;
-	memcpy(__aad, aad, GCM_AAD_LEN);
-
-	sg_init_table(sg, 3);
-	sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
-	sg_set_buf(&sg[1], data, data_len);
-	sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
-	aead_request_set_tfm(aead_req, tfm);
-	aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
-	aead_request_set_ad(aead_req, sg[0].length);
-
-	crypto_aead_encrypt(aead_req);
-	kzfree(aead_req);
-	return 0;
-}
-
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic)
-{
-	struct scatterlist sg[3];
-	struct aead_request *aead_req;
-	int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
-	u8 *__aad;
-	int err;
-
-	if (data_len == 0)
-		return -EINVAL;
-
-	aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
-	if (!aead_req)
-		return -ENOMEM;
-
-	__aad = (u8 *)aead_req + reqsize;
-	memcpy(__aad, aad, GCM_AAD_LEN);
-
-	sg_init_table(sg, 3);
-	sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
-	sg_set_buf(&sg[1], data, data_len);
-	sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
-	aead_request_set_tfm(aead_req, tfm);
-	aead_request_set_crypt(aead_req, sg, sg,
-			       data_len + IEEE80211_GCMP_MIC_LEN, j_0);
-	aead_request_set_ad(aead_req, sg[0].length);
-
-	err = crypto_aead_decrypt(aead_req);
-	kzfree(aead_req);
-
-	return err;
-}
-
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
-							size_t key_len)
-{
-	struct crypto_aead *tfm;
-	int err;
-
-	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(tfm))
-		return tfm;
-
-	err = crypto_aead_setkey(tfm, key, key_len);
-	if (err)
-		goto free_aead;
-	err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
-	if (err)
-		goto free_aead;
-
-	return tfm;
-
-free_aead:
-	crypto_free_aead(tfm);
-	return ERR_PTR(err);
-}
-
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
-{
-	crypto_free_aead(tfm);
-}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 55aed5352494..d2b096033009 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -9,16 +9,38 @@
 #ifndef AES_GCM_H
 #define AES_GCM_H
 
-#include <linux/crypto.h>
+#include "aead_api.h"
 
 #define GCM_AAD_LEN	32
 
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic);
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
-			      u8 *data, size_t data_len, u8 *mic);
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
-							size_t key_len);
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm);
+static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm,
+					    u8 *j_0, u8 *aad,  u8 *data,
+					    size_t data_len, u8 *mic)
+{
+	return aead_encrypt(tfm, j_0, aad + 2,
+			    be16_to_cpup((__be16 *)aad),
+			    data, data_len, mic);
+}
+
+static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm,
+					    u8 *j_0, u8 *aad, u8 *data,
+					    size_t data_len, u8 *mic)
+{
+	return aead_decrypt(tfm, j_0, aad + 2,
+			    be16_to_cpup((__be16 *)aad),
+			    data, data_len, mic);
+}
+
+static inline struct crypto_aead *
+ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len)
+{
+	return aead_key_setup_encrypt("gcm(aes)", key,
+				      key_len, IEEE80211_GCMP_MIC_LEN);
+}
+
+static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
+{
+	return aead_key_free(tfm);
+}
 
 #endif /* AES_GCM_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2b36eff5d97e..88cc1ae935ea 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -245,10 +245,10 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	ieee80211_tx_skb(sdata, skb);
 }
 
-void __ieee80211_start_rx_ba_session(struct sta_info *sta,
-				     u8 dialog_token, u16 timeout,
-				     u16 start_seq_num, u16 ba_policy, u16 tid,
-				     u16 buf_size, bool tx, bool auto_seq)
+void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+				      u8 dialog_token, u16 timeout,
+				      u16 start_seq_num, u16 ba_policy, u16 tid,
+				      u16 buf_size, bool tx, bool auto_seq)
 {
 	struct ieee80211_local *local = sta->sdata->local;
 	struct tid_ampdu_rx *tid_agg_rx;
@@ -267,7 +267,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		ht_dbg(sta->sdata,
 		       "STA %pM requests BA session on unsupported tid %d\n",
 		       sta->sta.addr, tid);
-		goto end_no_lock;
+		goto end;
 	}
 
 	if (!sta->sta.ht_cap.ht_supported) {
@@ -275,14 +275,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		       "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
 		       sta->sta.addr, tid);
 		/* send a response anyway, it's an error case if we get here */
-		goto end_no_lock;
+		goto end;
 	}
 
 	if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) {
 		ht_dbg(sta->sdata,
 		       "Suspend in progress - Denying ADDBA request (%pM tid %d)\n",
 		       sta->sta.addr, tid);
-		goto end_no_lock;
+		goto end;
 	}
 
 	/* sanity check for incoming parameters:
@@ -296,7 +296,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		ht_dbg_ratelimited(sta->sdata,
 				   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
 				   sta->sta.addr, tid, ba_policy, buf_size);
-		goto end_no_lock;
+		goto end;
 	}
 	/* determine default buffer size */
 	if (buf_size == 0)
@@ -311,7 +311,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 	       buf_size, sta->sta.addr);
 
 	/* examine state machine */
-	mutex_lock(&sta->ampdu_mlme.mtx);
+	lockdep_assert_held(&sta->ampdu_mlme.mtx);
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
 		if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
@@ -415,15 +415,25 @@ end:
 		__clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
 		sta->ampdu_mlme.tid_rx_token[tid] = dialog_token;
 	}
-	mutex_unlock(&sta->ampdu_mlme.mtx);
 
-end_no_lock:
 	if (tx)
 		ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
 					  dialog_token, status, 1, buf_size,
 					  timeout);
 }
 
+void __ieee80211_start_rx_ba_session(struct sta_info *sta,
+				     u8 dialog_token, u16 timeout,
+				     u16 start_seq_num, u16 ba_policy, u16 tid,
+				     u16 buf_size, bool tx, bool auto_seq)
+{
+	mutex_lock(&sta->ampdu_mlme.mtx);
+	___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
+					 start_seq_num, ba_policy, tid,
+					 buf_size, tx, auto_seq);
+	mutex_unlock(&sta->ampdu_mlme.mtx);
+}
+
 void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     struct sta_info *sta,
 				     struct ieee80211_mgmt *mgmt,
@@ -449,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 }
 
 void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
-				 const u8 *addr, unsigned int bit)
+				 const u8 *addr, unsigned int tid)
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_local *local = sdata->local;
@@ -460,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
 	if (!sta)
 		goto unlock;
 
-	set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl);
+	set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
 	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
  unlock:
 	rcu_read_unlock();
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index cbd48762256c..bef516ec47f9 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -226,7 +226,11 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
 		clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
 
 	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+	local_bh_disable();
+	rcu_read_lock();
 	drv_wake_tx_queue(sta->sdata->local, txqi);
+	rcu_read_unlock();
+	local_bh_enable();
 }
 
 /*
@@ -436,7 +440,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 	    test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
 		rcu_read_unlock();
 		ht_dbg(sta->sdata,
-		       "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n",
+		       "timer expired on %pM tid %d not expecting addBA response\n",
 		       sta->sta.addr, tid);
 		return;
 	}
@@ -639,7 +643,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	    time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] +
 			HT_AGG_RETRIES_PERIOD)) {
 		ht_dbg(sdata,
-		       "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n",
+		       "BA request denied - %d failed requests on %pM tid %u\n",
 		       sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid);
 		ret = -EBUSY;
 		goto err_unlock_sta;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a354f1939e49..fb15d3b97cb2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	if (!ieee80211_sdata_running(sdata))
 		return -ENETDOWN;
 
-	if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
-		ret = drv_set_bitrate_mask(local, sdata, mask);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * If active validate the setting and reject it if it doesn't leave
 	 * at least one basic rate usable, since we really have to be able
@@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 			return -EINVAL;
 	}
 
+	if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+		ret = drv_set_bitrate_mask(local, sdata, mask);
+		if (ret)
+			return ret;
+	}
+
 	for (i = 0; i < NUM_NL80211_BANDS; i++) {
 		struct ieee80211_supported_band *sband = wiphy->bands[i];
 		int j;
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 1956b3115dd5..d90a8f9cc3fd 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __MAC80211_DEBUG_H
 #define __MAC80211_DEBUG_H
 #include <net/cfg80211.h>
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 60c35afee29d..d2c424787463 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __MAC80211_DEBUGFS_H
 #define __MAC80211_DEBUGFS_H
 
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index 32adc77e9c77..1cd7b8bff56c 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __MAC80211_DEBUGFS_KEY_H
 #define __MAC80211_DEBUGFS_KEY_H
 
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 9f5501a9a795..a7e9d8d518f9 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* routines exported for debugfs handling */
 
 #ifndef __IEEE80211_DEBUGFS_NETDEV_H
diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h
index 8b608903259f..d2e7c27ad6d1 100644
--- a/net/mac80211/debugfs_sta.h
+++ b/net/mac80211/debugfs_sta.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __MAC80211_DEBUGFS_STA_H
 #define __MAC80211_DEBUGFS_STA_H
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 09f77e4a8a79..c7f93fd9ca7a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Portions of this file
 * Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c92df492e898..41f5e48f8021 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -290,16 +290,36 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 {
 	int i;
 
+	mutex_lock(&sta->ampdu_mlme.mtx);
 	for (i = 0; i <  IEEE80211_NUM_TIDS; i++) {
-		__ieee80211_stop_tx_ba_session(sta, i, reason);
-		__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
-					       WLAN_REASON_QSTA_LEAVE_QBSS,
-					       reason != AGG_STOP_DESTROY_STA &&
-					       reason != AGG_STOP_PEER_REQUEST);
+		___ieee80211_stop_tx_ba_session(sta, i, reason);
+		___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
+						WLAN_REASON_QSTA_LEAVE_QBSS,
+						reason != AGG_STOP_DESTROY_STA &&
+						reason != AGG_STOP_PEER_REQUEST);
 	}
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 
 	/* stopping might queue the work again - so cancel only afterwards */
 	cancel_work_sync(&sta->ampdu_mlme.work);
+
+	/*
+	 * In case the tear down is part of a reconfigure due to HW restart
+	 * request, it is possible that the low level driver requested to stop
+	 * the BA session, so handle it to properly clean tid_tx data.
+	 */
+	mutex_lock(&sta->ampdu_mlme.mtx);
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+		struct tid_ampdu_tx *tid_tx =
+			rcu_dereference_protected_tid_tx(sta, i);
+
+		if (!tid_tx)
+			continue;
+
+		if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
+			ieee80211_stop_tx_ba_cb(sta, i, tid_tx);
+	}
+	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
 
 void ieee80211_ba_session_work(struct work_struct *work)
@@ -333,9 +353,9 @@ void ieee80211_ba_session_work(struct work_struct *work)
 
 		if (test_and_clear_bit(tid,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
-			__ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
-							IEEE80211_MAX_AMPDU_BUF,
-							false, true);
+			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
+							 IEEE80211_MAX_AMPDU_BUF,
+							 false, true);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2197c62a0a6e..68f874e73561 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1760,6 +1760,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 				     u8 dialog_token, u16 timeout,
 				     u16 start_seq_num, u16 ba_policy, u16 tid,
 				     u16 buf_size, bool tx, bool auto_seq);
+void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
+				      u8 dialog_token, u16 timeout,
+				      u16 start_seq_num, u16 ba_policy, u16 tid,
+				      u16 buf_size, bool tx, bool auto_seq);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					 enum ieee80211_agg_stop_reason reason);
 void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
@@ -2005,6 +2009,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 			struct txq_info *txq, int tid);
 void ieee80211_txq_purge(struct ieee80211_local *local,
 			 struct txq_info *txqi);
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+			       struct ieee80211_sub_if_data *sdata);
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
 			 const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9228ac73c429..13b16f90e1cf 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -731,7 +731,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
 	    local->ops->wake_tx_queue) {
 		/* XXX: for AP_VLAN, actually track AP queues */
-		netif_tx_start_all_queues(dev);
+		if (dev)
+			netif_tx_start_all_queues(dev);
 	} else if (dev) {
 		unsigned long flags;
 		int n_acs = IEEE80211_NUM_ACS;
@@ -793,7 +794,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 			      bool going_down)
 {
 	struct ieee80211_local *local = sdata->local;
-	struct fq *fq = &local->fq;
 	unsigned long flags;
 	struct sk_buff *skb, *tmp;
 	u32 hw_reconf_flags = 0;
@@ -993,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		skb_queue_purge(&sdata->skb_queue);
 	}
 
-	sdata->bss = NULL;
-
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 	for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
 		skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -1007,13 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
-	if (sdata->vif.txq) {
-		struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		ieee80211_txq_remove_vlan(local, sdata);
 
-		spin_lock_bh(&fq->lock);
-		ieee80211_txq_purge(local, txqi);
-		spin_unlock_bh(&fq->lock);
-	}
+	sdata->bss = NULL;
 
 	if (local->open_count == 0)
 		ieee80211_clear_tx_pending(local);
@@ -1758,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 				 sizeof(void *));
 		int txq_size = 0;
 
-		if (local->ops->wake_tx_queue)
+		if (local->ops->wake_tx_queue &&
+		    type != NL80211_IFTYPE_AP_VLAN &&
+		    type != NL80211_IFTYPE_MONITOR)
 			txq_size += sizeof(struct txq_info) +
 				    local->hw.txq_data_size;
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a98fc2b5e0dc..938049395f90 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2015	Intel Deutschland GmbH
+ * Copyright 2015-2017	Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <net/mac80211.h>
+#include <crypto/algapi.h>
 #include <asm/unaligned.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
@@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
 	ieee80211_key_free_common(key);
 }
 
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_key *old,
+				    struct ieee80211_key *new)
+{
+	u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+	u8 *tk_old, *tk_new;
+
+	if (!old || new->conf.keylen != old->conf.keylen)
+		return false;
+
+	tk_old = old->conf.key;
+	tk_new = new->conf.key;
+
+	/*
+	 * In station mode, don't compare the TX MIC key, as it's never used
+	 * and offloaded rekeying may not care to send it to the host. This
+	 * is the case in iwlwifi, for example.
+	 */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+	    new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+	    !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+		memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+		memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+		memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+		memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+		tk_old = tkip_old;
+		tk_new = tkip_new;
+	}
+
+	return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
 int ieee80211_key_link(struct ieee80211_key *key,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct sta_info *sta)
@@ -620,9 +654,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
 
 	pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
 	idx = key->conf.keyidx;
-	key->local = sdata->local;
-	key->sdata = sdata;
-	key->sta = sta;
 
 	mutex_lock(&sdata->local->key_mtx);
 
@@ -633,6 +664,20 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	else
 		old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
 
+	/*
+	 * Silently accept key re-installation without really installing the
+	 * new version of the key to avoid nonce reuse or replay issues.
+	 */
+	if (ieee80211_key_identical(sdata, old_key, key)) {
+		ieee80211_key_free_unused(key);
+		ret = 0;
+		goto out;
+	}
+
+	key->local = sdata->local;
+	key->sdata = sdata;
+	key->sta = sta;
+
 	increment_tailroom_need_count(sdata);
 
 	ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
@@ -648,6 +693,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 		ret = 0;
 	}
 
+ out:
 	mutex_unlock(&sdata->local->key_mtx);
 
 	return ret;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a550c707cd8a..7a76c4a6df30 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 	enum nl80211_band band;
 	u8 *pos;
 	struct ieee80211_sub_if_data *sdata;
-	int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
-		      sizeof(mgmt->u.beacon);
+	int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
 
 	sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
 	rcu_read_lock();
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7e5f271e3c30..465b7853edc0 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 			   u8 *hw_addr, struct ieee802_11_elems *ie);
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
 u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+void mesh_plink_timer(struct timer_list *t);
 void mesh_plink_broken(struct sta_info *sta);
 u32 mesh_plink_deactivate(struct sta_info *sta);
 u32 mesh_plink_open(struct sta_info *sta);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d8bbd0d2225a..146ec6c0f12f 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, ie_len;
-	int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
-		      sizeof(mgmt->u.action.u.mesh_action);
+	int hdr_len = offsetofend(struct ieee80211_mgmt,
+				  u.action.u.mesh_action);
 
 	skb = dev_alloc_skb(local->tx_headroom +
 			    hdr_len +
@@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, ie_len;
-	int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
-		      sizeof(mgmt->u.action.u.mesh_action);
+	int hdr_len = offsetofend(struct ieee80211_mgmt,
+				  u.action.u.mesh_action);
 
 	if (time_before(jiffies, ifmsh->next_perr))
 		return -EAGAIN;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f69c6c38ca43..e2d00cce3c17 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	bool include_plid = false;
 	u16 peering_proto = 0;
 	u8 *pos, ie_len = 4;
-	int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) +
-		      sizeof(mgmt->u.action.u.self_prot);
+	int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
 	int err = -ENOMEM;
 
 	skb = dev_alloc_skb(local->tx_headroom +
@@ -604,8 +603,9 @@ out:
 	ieee80211_mbss_info_change_notify(sdata, changed);
 }
 
-static void mesh_plink_timer(unsigned long data)
+void mesh_plink_timer(struct timer_list *t)
 {
+	struct mesh_sta *mesh = from_timer(mesh, t, plink_timer);
 	struct sta_info *sta;
 	u16 reason = 0;
 	struct ieee80211_sub_if_data *sdata;
@@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data)
 	 * del_timer_sync() this timer after having made sure
 	 * it cannot be readded (by deleting the plink.)
 	 */
-	sta = (struct sta_info *) data;
+	sta = mesh->plink_sta;
 
 	if (sta->sdata->local->quiescing)
 		return;
@@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data)
 
 static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
 {
-	sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
-	sta->mesh->plink_timer.data = (unsigned long) sta;
-	sta->mesh->plink_timer.function = mesh_plink_timer;
 	sta->mesh->plink_timeout = timeout;
-	add_timer(&sta->mesh->plink_timer);
+	mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout));
 }
 
 static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b588e593b0ec..e4ededa1909d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,7 +145,6 @@ static u32
 ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_supported_band *sband,
 			     struct ieee80211_channel *channel,
-			     const struct ieee80211_ht_cap *ht_cap,
 			     const struct ieee80211_ht_operation *ht_oper,
 			     const struct ieee80211_vht_operation *vht_oper,
 			     struct cfg80211_chan_def *chandef, bool tracking)
@@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	chandef->center_freq1 = channel->center_freq;
 	chandef->center_freq2 = 0;
 
-	if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) {
+	if (!ht_oper || !sta_ht_cap.ht_supported) {
 		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
 		goto out;
 	}
 
 	chandef->width = NL80211_CHAN_WIDTH_20;
 
-	if (!(ht_cap->cap_info &
-	      cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
-		ret = IEEE80211_STA_DISABLE_40MHZ;
-		vht_chandef = *chandef;
-		goto out;
-	}
-
 	ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
 						  channel->band);
 	/* check that channel matches the right operating channel */
@@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 
 	/* calculate new channel (type) based on HT/VHT operation IEs */
 	flags = ieee80211_determine_chantype(sdata, sband, chan,
-					     ht_cap, ht_oper, vht_oper,
+					     ht_oper, vht_oper,
 					     &chandef, true);
 
 	/*
@@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
 			WLAN_EID_HT_CAPABILITY,
 			WLAN_EID_BSS_COEX_2040,
+			/* luckily this is almost always there */
 			WLAN_EID_EXT_CAPABILITY,
 			WLAN_EID_QOS_TRAFFIC_CAPA,
 			WLAN_EID_TIM_BCAST_REQ,
 			WLAN_EID_INTERWORKING,
-			/* 60GHz doesn't happen right now */
+			/* 60 GHz (Multi-band, DMG, MMS) can't happen */
 			WLAN_EID_VHT_CAPABILITY,
 			WLAN_EID_OPMODE_NOTIF,
 		};
@@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	/* if present, add any custom IEs that go before VHT */
 	if (assoc_data->ie_len) {
 		static const u8 before_vht[] = {
-			WLAN_EID_SSID,
-			WLAN_EID_SUPP_RATES,
-			WLAN_EID_EXT_SUPP_RATES,
-			WLAN_EID_PWR_CAPABILITY,
-			WLAN_EID_SUPPORTED_CHANNELS,
-			WLAN_EID_RSN,
-			WLAN_EID_QOS_CAPA,
-			WLAN_EID_RRM_ENABLED_CAPABILITIES,
-			WLAN_EID_MOBILITY_DOMAIN,
-			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
-			WLAN_EID_HT_CAPABILITY,
+			/*
+			 * no need to list the ones split off before HT
+			 * or generated here
+			 */
 			WLAN_EID_BSS_COEX_2040,
 			WLAN_EID_EXT_CAPABILITY,
 			WLAN_EID_QOS_TRAFFIC_CAPA,
 			WLAN_EID_TIM_BCAST_REQ,
 			WLAN_EID_INTERWORKING,
+			/* 60 GHz (Multi-band, DMG, MMS) can't happen */
 		};
 
 		/* RIC already taken above, so no need to handle here anymore */
@@ -3155,7 +3142,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (len < 24 + 6)
 		return;
 
-	reassoc = ieee80211_is_reassoc_req(mgmt->frame_control);
+	reassoc = ieee80211_is_reassoc_resp(mgmt->frame_control);
 	capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 	aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
@@ -4317,7 +4304,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
-						     ht_cap, ht_oper, vht_oper,
+						     ht_oper, vht_oper,
 						     &chandef, false);
 
 	sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f8e7a8bbc618..faf4f6055000 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -707,6 +707,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
 	if (!cookie)
 		return -ENOENT;
 
+	flush_work(&local->hw_roc_start);
+
 	mutex_lock(&local->mtx);
 	list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
 		if (!mgmt_tx && roc->cookie != cookie)
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index a87d195c4a61..38c45e1dafd8 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <net/mac80211.h>
 #include <net/rtnetlink.h>
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 47d2ed570470..ef2becaade50 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,7 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
- * Copyright 2016  Intel Deutschland GmbH
+ * Copyright 2016-2017  Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	return bss;
 }
 
+static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+					u32 scan_flags, const u8 *da)
+{
+	if (!sdata)
+		return false;
+	/* accept broadcast for OCE */
+	if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
+	    is_broadcast_ether_addr(da))
+		return true;
+	if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
+		return true;
+	return ether_addr_equal(da, sdata->vif.addr);
+}
+
 void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 {
 	struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
@@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
 		struct cfg80211_scan_request *scan_req;
 		struct cfg80211_sched_scan_request *sched_scan_req;
+		u32 scan_req_flags = 0, sched_scan_req_flags = 0;
 
 		scan_req = rcu_dereference(local->scan_req);
 		sched_scan_req = rcu_dereference(local->sched_scan_req);
 
-		/* ignore ProbeResp to foreign address unless scanning
-		 * with randomised address
+		if (scan_req)
+			scan_req_flags = scan_req->flags;
+
+		if (sched_scan_req)
+			sched_scan_req_flags = sched_scan_req->flags;
+
+		/* ignore ProbeResp to foreign address or non-bcast (OCE)
+		 * unless scanning with randomised address
 		 */
-		if (!(sdata1 &&
-		      (ether_addr_equal(mgmt->da, sdata1->vif.addr) ||
-		       scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) &&
-		    !(sdata2 &&
-		      (ether_addr_equal(mgmt->da, sdata2->vif.addr) ||
-		       sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)))
+		if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+						 mgmt->da) &&
+		    !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+						 mgmt->da))
 			return;
 
 		elements = mgmt->u.probe_resp.variable;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 69615016d5bf..a3060e55122c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
 		if (!sta->mesh)
 			goto free;
+		sta->mesh->plink_sta = sta;
 		spin_lock_init(&sta->mesh->plink_lock);
 		if (ieee80211_vif_is_mesh(&sdata->vif) &&
 		    !sdata->u.mesh.user_mpm)
-			init_timer(&sta->mesh->plink_timer);
+			timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+				    0);
 		sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
 	}
 #endif
@@ -515,6 +517,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local,
 	return err;
 }
 
+static void
+ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+	bool allow_p2p_go_ps = sdata->vif.p2p;
+	struct sta_info *sta;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		if (sdata != sta->sdata ||
+		    !test_sta_flag(sta, WLAN_STA_ASSOC))
+			continue;
+		if (!sta->sta.support_p2p_ps) {
+			allow_p2p_go_ps = false;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+		sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+	}
+}
+
 /*
  * should be called with sta_mtx locked
  * this function replaces the mutex lock
@@ -561,6 +588,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
 		goto out_remove;
 
 	set_sta_flag(sta, WLAN_STA_INSERTED);
+
+	if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+		ieee80211_recalc_min_chandef(sta->sdata);
+		if (!sta->sta.support_p2p_ps)
+			ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+	}
+
 	/* accept BA sessions now */
 	clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
 
@@ -1788,31 +1822,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
 }
 EXPORT_SYMBOL(ieee80211_sta_set_buffered);
 
-static void
-ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_local *local = sdata->local;
-	bool allow_p2p_go_ps = sdata->vif.p2p;
-	struct sta_info *sta;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sta, &local->sta_list, list) {
-		if (sdata != sta->sdata ||
-		    !test_sta_flag(sta, WLAN_STA_ASSOC))
-			continue;
-		if (!sta->sta.support_p2p_ps) {
-			allow_p2p_go_ps = false;
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
-		sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
-		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
-	}
-}
-
 int sta_info_move_state(struct sta_info *sta,
 			enum ieee80211_sta_state new_state)
 {
@@ -2008,7 +2017,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
 
 static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
 {
-	u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+	u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
 
 	if (rate == STA_STATS_RATE_INVALID)
 		return -EINVAL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3acbdfa9f649..5c54acd10562 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -344,6 +344,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
  * @plink_state: peer link state
  * @plink_timeout: timeout of peer link
  * @plink_timer: peer link watch timer
+ * @plink_sta: peer link watch timer's sta_info
  * @t_offset: timing offset relative to this host
  * @t_offset_setpoint: reference timing offset of this sta to be used when
  * 	calculating clockdrift
@@ -356,6 +357,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
  */
 struct mesh_sta {
 	struct timer_list plink_timer;
+	struct sta_info *plink_sta;
 
 	s64 t_offset;
 	s64 t_offset_setpoint;
@@ -398,7 +400,7 @@ struct ieee80211_sta_rx_stats {
 	u64 msdu[IEEE80211_NUM_TIDS + 1];
 };
 
-/**
+/*
  * The bandwidth threshold below which the per-station CoDel parameters will be
  * scaled to be more lenient (to prevent starvation of slow stations). This
  * value will be scaled by the number of active stations when it is being
diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c
index edfe0c170a1c..837857261b66 100644
--- a/net/mac80211/trace.c
+++ b/net/mac80211/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* bug in tracepoint.h, it should include this */
 #include <linux/module.h>
 
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d9ac17af407..591ad02e1fa4 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Portions of this file
 * Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 768f7c22a190..366b9e6f043e 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifdef CONFIG_MAC80211_MESSAGE_TRACING
 
 #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8858f4f185e9..7b8154474b9e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1276,11 +1276,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
 	IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
 }
 
-static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
-{
-	IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
-}
-
 static u32 codel_skb_len_func(const struct sk_buff *skb)
 {
 	return skb->len;
@@ -1401,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 		       fq_flow_get_default_func);
 }
 
+static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
+				struct fq_flow *flow, struct sk_buff *skb,
+				void *data)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+	return info->control.vif == data;
+}
+
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+			       struct ieee80211_sub_if_data *sdata)
+{
+	struct fq *fq = &local->fq;
+	struct txq_info *txqi;
+	struct fq_tin *tin;
+	struct ieee80211_sub_if_data *ap;
+
+	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
+		return;
+
+	ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
+
+	if (!ap->vif.txq)
+		return;
+
+	txqi = to_txq_info(ap->vif.txq);
+	tin = &txqi->tin;
+
+	spin_lock_bh(&fq->lock);
+	fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif,
+		      fq_skb_free_func);
+	spin_unlock_bh(&fq->lock);
+}
+
 void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 			struct sta_info *sta,
 			struct txq_info *txqi, int tid)
@@ -3414,6 +3443,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result r;
+	struct ieee80211_vif *vif;
 
 	spin_lock_bh(&fq->lock);
 
@@ -3430,8 +3460,6 @@ begin:
 	if (!skb)
 		goto out;
 
-	ieee80211_set_skb_vif(skb, txqi);
-
 	hdr = (struct ieee80211_hdr *)skb->data;
 	info = IEEE80211_SKB_CB(skb);
 
@@ -3488,6 +3516,34 @@ begin:
 		}
 	}
 
+	switch (tx.sdata->vif.type) {
+	case NL80211_IFTYPE_MONITOR:
+		if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+			vif = &tx.sdata->vif;
+			break;
+		}
+		tx.sdata = rcu_dereference(local->monitor_sdata);
+		if (tx.sdata) {
+			vif = &tx.sdata->vif;
+			info->hw_queue =
+				vif->hw_queue[skb_get_queue_mapping(skb)];
+		} else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
+			ieee80211_free_txskb(&local->hw, skb);
+			goto begin;
+		} else {
+			vif = NULL;
+		}
+		break;
+	case NL80211_IFTYPE_AP_VLAN:
+		tx.sdata = container_of(tx.sdata->bss,
+					struct ieee80211_sub_if_data, u.ap);
+		/* fall through */
+	default:
+		vif = &tx.sdata->vif;
+		break;
+	}
+
+	IEEE80211_SKB_CB(skb)->control.vif = vif;
 out:
 	spin_unlock_bh(&fq->lock);
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 259698de569f..d57e5f6bd8b6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 	/* insert custom IEs that go before HT */
 	if (ie && ie_len) {
 		static const u8 before_ht[] = {
-			WLAN_EID_SSID,
-			WLAN_EID_SUPP_RATES,
-			WLAN_EID_REQUEST,
-			WLAN_EID_EXT_SUPP_RATES,
+			/*
+			 * no need to list the ones split off already
+			 * (or generated here)
+			 */
 			WLAN_EID_DS_PARAMS,
 			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
 		};
@@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 	/* insert custom IEs that go before VHT */
 	if (ie && ie_len) {
 		static const u8 before_vht[] = {
-			WLAN_EID_SSID,
-			WLAN_EID_SUPP_RATES,
-			WLAN_EID_REQUEST,
-			WLAN_EID_EXT_SUPP_RATES,
-			WLAN_EID_DS_PARAMS,
-			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
-			WLAN_EID_HT_CAPABILITY,
+			/*
+			 * no need to list the ones split off already
+			 * (or generated here)
+			 */
 			WLAN_EID_BSS_COEX_2040,
 			WLAN_EID_EXT_CAPABILITY,
 			WLAN_EID_SSID_LIST,
 			WLAN_EID_CHANNEL_USAGE,
 			WLAN_EID_INTERWORKING,
-			/* mesh ID can't happen here */
-			/* 60 GHz can't happen here right now */
+			WLAN_EID_MESH_ID,
+			/* 60 GHz (Multi-band, DMG, MMS) can't happen */
 		};
 		noffset = ieee80211_ie_split(ie, ie_len,
 					     before_vht, ARRAY_SIZE(before_vht),
@@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_local *local = sdata->local;
 	int freq;
-	int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) +
-			       sizeof(mgmt->u.action.u.chan_switch);
+	int hdr_len = offsetofend(struct ieee80211_mgmt,
+				  u.action.u.chan_switch);
 	u8 *pos;
 
 	if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 19ec2189d3ac..b9276ac849fa 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
 
 	bw = ieee80211_sta_cap_rx_bw(sta);
 	bw = min(bw, sta->cur_max_bandwidth);
+
+	/* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
+	 * IEEE80211-2016 specification makes higher bandwidth operation
+	 * possible on the TDLS link if the peers have wider bandwidth
+	 * capability.
+	 */
+	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+	    test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+		return bw;
+
 	bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
 
 	return bw;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0d722ea98a1b..b58722d9de37 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
 	pos += IEEE80211_CCMP_HDR_LEN;
 	ccmp_special_blocks(skb, pn, b_0, aad);
 	return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
-					 skb_put(skb, mic_len), mic_len);
+					 skb_put(skb, mic_len));
 }
 
 
@@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 				    key->u.ccmp.tfm, b_0, aad,
 				    skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
 				    data_len,
-				    skb->data + skb->len - mic_len, mic_len))
+				    skb->data + skb->len - mic_len))
 				return RX_DROP_UNUSABLE;
 		}
 
diff --git a/net/mac802154/cfg.h b/net/mac802154/cfg.h
index e2718f981e82..3bb089685500 100644
--- a/net/mac802154/cfg.h
+++ b/net/mac802154/cfg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* mac802154 configuration hooks for cfg802154
  */
 
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index fd9daf2ecec9..d23f0db98015 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __MAC802154_DRIVER_OPS
 #define __MAC802154_DRIVER_OPS
 
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1e1c9b20bab7..2fb703d70803 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 	u8 iv[16];
 	struct scatterlist src;
 	SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
-	int err;
+	int err, datalen;
+	unsigned char *data;
 
 	llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
-	sg_init_one(&src, skb->data, skb->len);
+	/* Compute data payload offset and data length */
+	data = skb_mac_header(skb) + skb->mac_len;
+	datalen = skb_tail_pointer(skb) - data;
+	sg_init_one(&src, data, datalen);
+
 	skcipher_request_set_tfm(req, key->tfm0);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
+	skcipher_request_set_crypt(req, &src, &src, datalen, iv);
 	err = crypto_skcipher_encrypt(req);
 	skcipher_request_zero(req);
 	return err;
@@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
 	if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
 		return -EINVAL;
 
-	if (!hdr.fc.security_enabled || hdr.sec.level == 0) {
+	if (!hdr.fc.security_enabled ||
+	    (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) {
 		skb_push(skb, hlen);
 		return 0;
 	}
diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c
index 863e5e6b983d..c36e3d541a42 100644
--- a/net/mac802154/trace.c
+++ b/net/mac802154/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 
 #ifndef __CHECKER__
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 6f30e0c93a16..2c8a43d3607f 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Based on net/mac80211/trace.h */
 
 #undef TRACE_SYSTEM
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..801ea9098387 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,6 +24,7 @@ config NET_MPLS_GSO
 
 config MPLS_ROUTING
 	tristate "MPLS: routing support"
+	depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
 	---help---
 	 Add support for forwarding of mpls packets.
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index ea4f481839dd..8ca9915befc8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -16,6 +16,7 @@
 #include <net/arp.h>
 #include <net/ip_fib.h>
 #include <net/netevent.h>
+#include <net/ip_tunnels.h>
 #include <net/netns/generic.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -39,6 +40,36 @@ static int one = 1;
 static int label_limit = (1 << 20) - 1;
 static int ttl_max = 255;
 
+#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
+static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
+{
+	return sizeof(struct mpls_shim_hdr);
+}
+
+static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
+	.encap_hlen	= ipgre_mpls_encap_hlen,
+};
+
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+	return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+	ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+#else
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+	return 0;
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+}
+#endif
+
 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
 		       struct nlmsghdr *nlh, struct net *net, u32 portid,
 		       unsigned int nlm_flags);
@@ -2479,12 +2510,16 @@ static int __init mpls_init(void)
 
 	rtnl_af_register(&mpls_af_ops);
 
-	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0);
+	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0);
 	rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
-		      NULL);
+		      0);
 	rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
-		      mpls_netconf_dump_devconf, NULL);
+		      mpls_netconf_dump_devconf, 0);
+	err = ipgre_tunnel_encap_add_mpls_ops();
+	if (err)
+		pr_err("Can't add mpls over gre tunnel ops\n");
+
 	err = 0;
 out:
 	return err;
@@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void)
 	dev_remove_pack(&mpls_packet_type);
 	unregister_netdevice_notifier(&mpls_dev_notifier);
 	unregister_pernet_subsys(&mpls_net_ops);
+	ipgre_tunnel_encap_del_mpls_ops();
 }
 module_exit(mpls_exit);
 
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index cf65aec2e551..768a302879b4 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef MPLS_INTERNAL_H
 #define MPLS_INTERNAL_H
 #include <net/mpls.h>
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1308a56f2591..d30f7bd741d0 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -180,6 +180,7 @@ struct ncsi_channel {
 #define NCSI_CHANNEL_INACTIVE		1
 #define NCSI_CHANNEL_ACTIVE		2
 #define NCSI_CHANNEL_INVISIBLE		3
+	bool                        reconfigure_needed;
 	spinlock_t                  lock;	/* Protect filters etc */
 	struct ncsi_package         *package;
 	struct ncsi_channel_version version;
@@ -235,6 +236,9 @@ enum {
 	ncsi_dev_state_probe_dp,
 	ncsi_dev_state_config_sp	= 0x0301,
 	ncsi_dev_state_config_cis,
+	ncsi_dev_state_config_clear_vids,
+	ncsi_dev_state_config_svf,
+	ncsi_dev_state_config_ev,
 	ncsi_dev_state_config_sma,
 	ncsi_dev_state_config_ebf,
 #if IS_ENABLED(CONFIG_IPV6)
@@ -253,6 +257,12 @@ enum {
 	ncsi_dev_state_suspend_done
 };
 
+struct vlan_vid {
+	struct list_head list;
+	__be16 proto;
+	u16 vid;
+};
+
 struct ncsi_dev_priv {
 	struct ncsi_dev     ndev;            /* Associated NCSI device     */
 	unsigned int        flags;           /* NCSI device flags          */
@@ -276,6 +286,8 @@ struct ncsi_dev_priv {
 	struct work_struct  work;            /* For channel management     */
 	struct packet_type  ptype;           /* NCSI packet Rx handler     */
 	struct list_head    node;            /* Form NCSI device list      */
+#define NCSI_MAX_VLAN_VIDS	15
+	struct list_head    vlan_vids;       /* List of active VLAN IDs */
 };
 
 struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 6898e7229285..67e708e98ccf 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
 	ncm->data[2] = data;
 	ncm->data[4] = ntohl(lsc->oem_status);
 
+	netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
+		    nc->id, data & 0x1 ? "up" : "down");
+
 	chained = !list_empty(&nc->link);
 	state = nc->state;
 	spin_unlock_irqrestore(&nc->lock, flags);
@@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
 	ncm = &nc->modes[NCSI_MODE_LINK];
 	hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
 	ncm->data[3] = ntohl(hncdsc->status);
+	netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n",
+		    nc->id, ncm->data[3] & 0x3 ? "up" : "down");
 	if (!list_empty(&nc->link) ||
 	    nc->state != NCSI_CHANNEL_ACTIVE) {
 		spin_unlock_irqrestore(&nc->lock, flags);
@@ -187,7 +192,7 @@ static struct ncsi_aen_handler {
 } ncsi_aen_handlers[] = {
 	{ NCSI_PKT_AEN_LSC,    12, ncsi_aen_handler_lsc    },
 	{ NCSI_PKT_AEN_CR,      4, ncsi_aen_handler_cr     },
-	{ NCSI_PKT_AEN_HNCDSC,  4, ncsi_aen_handler_hncdsc }
+	{ NCSI_PKT_AEN_HNCDSC,  8, ncsi_aen_handler_hncdsc }
 };
 
 int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
@@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
 	}
 
 	ret = ncsi_validate_aen_pkt(h, nah->payload);
-	if (ret)
+	if (ret) {
+		netdev_warn(ndp->ndev.dev,
+			    "NCSI: 'bad' packet ignored for AEN type 0x%x\n",
+			    h->type);
 		goto out;
+	}
 
 	ret = nah->handler(ndp, h);
+	if (ret)
+		netdev_err(ndp->ndev.dev,
+			   "NCSI: Handler for AEN type 0x%x returned %d\n",
+			   h->type, ret);
 out:
 	consume_skb(skb);
 	return ret;
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 5e03ed190e18..7567ca63aae2 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -139,9 +139,9 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb,
 	struct ncsi_cmd_svf_pkt *cmd;
 
 	cmd = skb_put_zero(skb, sizeof(*cmd));
-	cmd->vlan = htons(nca->words[0]);
-	cmd->index = nca->bytes[2];
-	cmd->enable = nca->bytes[3];
+	cmd->vlan = htons(nca->words[1]);
+	cmd->index = nca->bytes[6];
+	cmd->enable = nca->bytes[7];
 	ncsi_cmd_build_header(&cmd->cmd.common, nca);
 
 	return 0;
@@ -153,7 +153,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb,
 	struct ncsi_cmd_ev_pkt *cmd;
 
 	cmd = skb_put_zero(skb, sizeof(*cmd));
-	cmd->mode = nca->bytes[0];
+	cmd->mode = nca->bytes[3];
 	ncsi_cmd_build_header(&cmd->cmd.common, nca);
 
 	return 0;
@@ -228,7 +228,7 @@ static struct ncsi_cmd_handler {
 	{ NCSI_PKT_CMD_AE,     8, ncsi_cmd_handler_ae      },
 	{ NCSI_PKT_CMD_SL,     8, ncsi_cmd_handler_sl      },
 	{ NCSI_PKT_CMD_GLS,    0, ncsi_cmd_handler_default },
-	{ NCSI_PKT_CMD_SVF,    4, ncsi_cmd_handler_svf     },
+	{ NCSI_PKT_CMD_SVF,    8, ncsi_cmd_handler_svf     },
 	{ NCSI_PKT_CMD_EV,     4, ncsi_cmd_handler_ev      },
 	{ NCSI_PKT_CMD_DV,     0, ncsi_cmd_handler_default },
 	{ NCSI_PKT_CMD_SMA,    8, ncsi_cmd_handler_sma     },
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index a3bd5fa8ad09..a2b904a718c6 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table)
 	return sizes[table];
 }
 
+static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+{
+	struct ncsi_channel_filter *ncf;
+	int size;
+
+	ncf = nc->filters[table];
+	if (!ncf)
+		return NULL;
+
+	size = ncsi_filter_size(table);
+	if (size < 0)
+		return NULL;
+
+	return ncf->data + size * index;
+}
+
+/* Find the first active filter in a filter table that matches the given
+ * data parameter. If data is NULL, this returns the first active filter.
+ */
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 {
 	struct ncsi_channel_filter *ncf;
@@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 	index = -1;
 	while ((index = find_next_bit(bitmap, ncf->total, index + 1))
 	       < ncf->total) {
-		if (!memcmp(ncf->data + size * index, data, size)) {
+		if (!data || !memcmp(ncf->data + size * index, data, size)) {
 			spin_unlock_irqrestore(&nc->lock, flags);
 			return index;
 		}
@@ -170,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data)
 	struct ncsi_channel *nc = (struct ncsi_channel *)data;
 	struct ncsi_package *np = nc->package;
 	struct ncsi_dev_priv *ndp = np->ndp;
+	struct ncsi_channel_mode *ncm;
 	struct ncsi_cmd_arg nca;
 	bool enabled, chained;
 	unsigned int monitor_state;
@@ -183,11 +203,15 @@ static void ncsi_channel_monitor(unsigned long data)
 	monitor_state = nc->monitor.state;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	if (!enabled || chained)
+	if (!enabled || chained) {
+		ncsi_stop_channel_monitor(nc);
 		return;
+	}
 	if (state != NCSI_CHANNEL_INACTIVE &&
-	    state != NCSI_CHANNEL_ACTIVE)
+	    state != NCSI_CHANNEL_ACTIVE) {
+		ncsi_stop_channel_monitor(nc);
 		return;
+	}
 
 	switch (monitor_state) {
 	case NCSI_CHANNEL_MONITOR_START:
@@ -198,28 +222,30 @@ static void ncsi_channel_monitor(unsigned long data)
 		nca.type = NCSI_PKT_CMD_GLS;
 		nca.req_flags = 0;
 		ret = ncsi_xmit_cmd(&nca);
-		if (ret) {
+		if (ret)
 			netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
 				   ret);
-			return;
-		}
-
 		break;
 	case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
 		break;
 	default:
-		if (!(ndp->flags & NCSI_DEV_HWA) &&
-		    state == NCSI_CHANNEL_ACTIVE) {
+		netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
+			   nc->id);
+		if (!(ndp->flags & NCSI_DEV_HWA)) {
 			ncsi_report_link(ndp, true);
 			ndp->flags |= NCSI_DEV_RESHUFFLE;
 		}
 
+		ncsi_stop_channel_monitor(nc);
+
+		ncm = &nc->modes[NCSI_MODE_LINK];
 		spin_lock_irqsave(&nc->lock, flags);
 		nc->state = NCSI_CHANNEL_INVISIBLE;
+		ncm->data[2] &= ~0x1;
 		spin_unlock_irqrestore(&nc->lock, flags);
 
 		spin_lock_irqsave(&ndp->lock, flags);
-		nc->state = NCSI_CHANNEL_INACTIVE;
+		nc->state = NCSI_CHANNEL_ACTIVE;
 		list_add_tail_rcu(&nc->link, &ndp->channel_queue);
 		spin_unlock_irqrestore(&ndp->lock, flags);
 		ncsi_process_next_channel(ndp);
@@ -639,6 +665,99 @@ error:
 	nd->state = ncsi_dev_state_functional;
 }
 
+/* Check the VLAN filter bitmap for a set filter, and construct a
+ * "Set VLAN Filter - Disable" packet if found.
+ */
+static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+			 struct ncsi_cmd_arg *nca)
+{
+	int index;
+	u32 *data;
+	u16 vid;
+
+	index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL);
+	if (index < 0) {
+		/* Filter table empty */
+		return -1;
+	}
+
+	data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
+	if (!data) {
+		netdev_err(ndp->ndev.dev,
+			   "NCSI: failed to retrieve filter %d\n", index);
+		/* Set the VLAN id to 0 - this will still disable the entry in
+		 * the filter table, but we won't know what it was.
+		 */
+		vid = 0;
+	} else {
+		vid = *(u16 *)data;
+	}
+
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "NCSI: removed vlan tag %u at index %d\n",
+		      vid, index + 1);
+	ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
+
+	nca->type = NCSI_PKT_CMD_SVF;
+	nca->words[1] = vid;
+	/* HW filter index starts at 1 */
+	nca->bytes[6] = index + 1;
+	nca->bytes[7] = 0x00;
+	return 0;
+}
+
+/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable"
+ * packet.
+ */
+static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+		       struct ncsi_cmd_arg *nca)
+{
+	struct vlan_vid *vlan = NULL;
+	int index = 0;
+
+	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+		index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+		if (index < 0) {
+			/* New tag to add */
+			netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+				      "NCSI: new vlan id to set: %u\n",
+				      vlan->vid);
+			break;
+		}
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "vid %u already at filter pos %d\n",
+			      vlan->vid, index);
+	}
+
+	if (!vlan || index >= 0) {
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "no vlan ids left to set\n");
+		return -1;
+	}
+
+	index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+	if (index < 0) {
+		netdev_err(ndp->ndev.dev,
+			   "Failed to add new VLAN tag, error %d\n", index);
+		if (index == -ENOSPC)
+			netdev_err(ndp->ndev.dev,
+				   "Channel %u already has all VLAN filters set\n",
+				   nc->id);
+		return -1;
+	}
+
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "NCSI: set vid %u in packet, index %u\n",
+		      vlan->vid, index + 1);
+	nca->type = NCSI_PKT_CMD_SVF;
+	nca->words[1] = vlan->vid;
+	/* HW filter index starts at 1 */
+	nca->bytes[6] = index + 1;
+	nca->bytes[7] = 0x01;
+
+	return 0;
+}
+
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -667,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		nca.package = np->id;
 		nca.channel = NCSI_RESERVED_CHANNEL;
 		ret = ncsi_xmit_cmd(&nca);
-		if (ret)
+		if (ret) {
+			netdev_err(ndp->ndev.dev,
+				   "NCSI: Failed to transmit CMD_SP\n");
 			goto error;
+		}
 
 		nd->state = ncsi_dev_state_config_cis;
 		break;
@@ -680,11 +802,17 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		nca.package = np->id;
 		nca.channel = nc->id;
 		ret = ncsi_xmit_cmd(&nca);
-		if (ret)
+		if (ret) {
+			netdev_err(ndp->ndev.dev,
+				   "NCSI: Failed to transmit CMD_CIS\n");
 			goto error;
+		}
 
-		nd->state = ncsi_dev_state_config_sma;
+		nd->state = ncsi_dev_state_config_clear_vids;
 		break;
+	case ncsi_dev_state_config_clear_vids:
+	case ncsi_dev_state_config_svf:
+	case ncsi_dev_state_config_ev:
 	case ncsi_dev_state_config_sma:
 	case ncsi_dev_state_config_ebf:
 #if IS_ENABLED(CONFIG_IPV6)
@@ -699,11 +827,40 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		nca.package = np->id;
 		nca.channel = nc->id;
 
+		/* Clear any active filters on the channel before setting */
+		if (nd->state == ncsi_dev_state_config_clear_vids) {
+			ret = clear_one_vid(ndp, nc, &nca);
+			if (ret) {
+				nd->state = ncsi_dev_state_config_svf;
+				schedule_work(&ndp->work);
+				break;
+			}
+			/* Repeat */
+			nd->state = ncsi_dev_state_config_clear_vids;
+		/* Add known VLAN tags to the filter */
+		} else if (nd->state == ncsi_dev_state_config_svf) {
+			ret = set_one_vid(ndp, nc, &nca);
+			if (ret) {
+				nd->state = ncsi_dev_state_config_ev;
+				schedule_work(&ndp->work);
+				break;
+			}
+			/* Repeat */
+			nd->state = ncsi_dev_state_config_svf;
+		/* Enable/Disable the VLAN filter */
+		} else if (nd->state == ncsi_dev_state_config_ev) {
+			if (list_empty(&ndp->vlan_vids)) {
+				nca.type = NCSI_PKT_CMD_DV;
+			} else {
+				nca.type = NCSI_PKT_CMD_EV;
+				nca.bytes[3] = NCSI_CAP_VLAN_NO;
+			}
+			nd->state = ncsi_dev_state_config_sma;
+		} else if (nd->state == ncsi_dev_state_config_sma) {
 		/* Use first entry in unicast filter table. Note that
 		 * the MAC filter table starts from entry 1 instead of
 		 * 0.
 		 */
-		if (nd->state == ncsi_dev_state_config_sma) {
 			nca.type = NCSI_PKT_CMD_SMA;
 			for (index = 0; index < 6; index++)
 				nca.bytes[index] = dev->dev_addr[index];
@@ -746,17 +903,45 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		}
 
 		ret = ncsi_xmit_cmd(&nca);
-		if (ret)
+		if (ret) {
+			netdev_err(ndp->ndev.dev,
+				   "NCSI: Failed to transmit CMD %x\n",
+				   nca.type);
 			goto error;
+		}
 		break;
 	case ncsi_dev_state_config_done:
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "NCSI: channel %u config done\n", nc->id);
 		spin_lock_irqsave(&nc->lock, flags);
+		if (nc->reconfigure_needed) {
+			/* This channel's configuration has been updated
+			 * part-way during the config state - start the
+			 * channel configuration over
+			 */
+			nc->reconfigure_needed = false;
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+
+			netdev_printk(KERN_DEBUG, dev,
+				      "Dirty NCSI channel state reset\n");
+			ncsi_process_next_channel(ndp);
+			break;
+		}
+
 		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
 			hot_nc = nc;
 			nc->state = NCSI_CHANNEL_ACTIVE;
 		} else {
 			hot_nc = NULL;
 			nc->state = NCSI_CHANNEL_INACTIVE;
+			netdev_warn(ndp->ndev.dev,
+				    "NCSI: channel %u link down after config\n",
+				    nc->id);
 		}
 		spin_unlock_irqrestore(&nc->lock, flags);
 
@@ -769,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		ncsi_process_next_channel(ndp);
 		break;
 	default:
-		netdev_warn(dev, "Wrong NCSI state 0x%x in config\n",
-			    nd->state);
+		netdev_alert(dev, "Wrong NCSI state 0x%x in config\n",
+			     nd->state);
 	}
 
 	return;
@@ -822,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 	}
 
 	if (!found) {
+		netdev_warn(ndp->ndev.dev,
+			    "NCSI: No channel found with link\n");
 		ncsi_report_link(ndp, true);
 		return -ENODEV;
 	}
 
+	ncm = &found->modes[NCSI_MODE_LINK];
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "NCSI: Channel %u added to queue (link %s)\n",
+		      found->id, ncm->data[2] & 0x1 ? "up" : "down");
+
 out:
 	spin_lock_irqsave(&ndp->lock, flags);
 	list_add_tail_rcu(&found->link, &ndp->channel_queue);
@@ -839,12 +1031,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
 	struct ncsi_package *np;
 	struct ncsi_channel *nc;
 	unsigned int cap;
+	bool has_channel = false;
 
 	/* The hardware arbitration is disabled if any one channel
 	 * doesn't support explicitly.
 	 */
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			has_channel = true;
+
 			cap = nc->caps[NCSI_CAP_GENERIC].cap;
 			if (!(cap & NCSI_CAP_GENERIC_HWA) ||
 			    (cap & NCSI_CAP_GENERIC_HWA_MASK) !=
@@ -855,8 +1050,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
 		}
 	}
 
-	ndp->flags |= NCSI_DEV_HWA;
-	return true;
+	if (has_channel) {
+		ndp->flags |= NCSI_DEV_HWA;
+		return true;
+	}
+
+	ndp->flags &= ~NCSI_DEV_HWA;
+	return false;
 }
 
 static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
@@ -879,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
 
 	/* We can have no channels in extremely case */
 	if (list_empty(&ndp->channel_queue)) {
+		netdev_err(ndp->ndev.dev,
+			   "NCSI: No available channels for HWA\n");
 		ncsi_report_link(ndp, false);
 		return -ENOENT;
 	}
@@ -1047,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 
 	return;
 error:
+	netdev_err(ndp->ndev.dev,
+		   "NCSI: Failed to transmit cmd 0x%x during probe\n",
+		   nca.type);
 	ncsi_report_link(ndp, true);
 }
 
@@ -1100,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
 	switch (old_state) {
 	case NCSI_CHANNEL_INACTIVE:
 		ndp->ndev.state = ncsi_dev_state_config;
+		netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
+			    nc->id);
 		ncsi_configure_channel(ndp);
 		break;
 	case NCSI_CHANNEL_ACTIVE:
 		ndp->ndev.state = ncsi_dev_state_suspend;
+		netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
+			    nc->id);
 		ncsi_suspend_channel(ndp);
 		break;
 	default:
@@ -1123,6 +1332,8 @@ out:
 		return ncsi_choose_active_channel(ndp);
 	}
 
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "NCSI: No more channels to process\n");
 	ncsi_report_link(ndp, false);
 	return -ENODEV;
 }
@@ -1191,6 +1402,147 @@ static struct notifier_block ncsi_inet6addr_notifier = {
 };
 #endif /* CONFIG_IPV6 */
 
+static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct ncsi_channel *nc;
+	struct ncsi_package *np;
+	unsigned long flags;
+	unsigned int n = 0;
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
+			/* Channels may be busy, mark dirty instead of
+			 * kicking if;
+			 * a) not ACTIVE (configured)
+			 * b) in the channel_queue (to be configured)
+			 * c) it's ndev is in the config state
+			 */
+			if (nc->state != NCSI_CHANNEL_ACTIVE) {
+				if ((ndp->ndev.state & 0xff00) ==
+						ncsi_dev_state_config ||
+						!list_empty(&nc->link)) {
+					netdev_printk(KERN_DEBUG, nd->dev,
+						      "NCSI: channel %p marked dirty\n",
+						      nc);
+					nc->reconfigure_needed = true;
+				}
+				spin_unlock_irqrestore(&nc->lock, flags);
+				continue;
+			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			ncsi_stop_channel_monitor(nc);
+			spin_lock_irqsave(&nc->lock, flags);
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+
+			netdev_printk(KERN_DEBUG, nd->dev,
+				      "NCSI: kicked channel %p\n", nc);
+			n++;
+		}
+	}
+
+	return n;
+}
+
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned int n_vids = 0;
+	struct vlan_vid *vlan;
+	struct ncsi_dev *nd;
+	bool found = false;
+
+	if (vid == 0)
+		return 0;
+
+	nd = ncsi_find_dev(dev);
+	if (!nd) {
+		netdev_warn(dev, "NCSI: No net_device?\n");
+		return 0;
+	}
+
+	ndp = TO_NCSI_DEV_PRIV(nd);
+
+	/* Add the VLAN id to our internal list */
+	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+		n_vids++;
+		if (vlan->vid == vid) {
+			netdev_printk(KERN_DEBUG, dev,
+				      "NCSI: vid %u already registered\n", vid);
+			return 0;
+		}
+	}
+	if (n_vids >= NCSI_MAX_VLAN_VIDS) {
+		netdev_warn(dev,
+			    "tried to add vlan id %u but NCSI max already registered (%u)\n",
+			    vid, NCSI_MAX_VLAN_VIDS);
+		return -ENOSPC;
+	}
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return -ENOMEM;
+
+	vlan->proto = proto;
+	vlan->vid = vid;
+	list_add_rcu(&vlan->list, &ndp->vlan_vids);
+
+	netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
+
+	found = ncsi_kick_channels(ndp) != 0;
+
+	return found ? ncsi_process_next_channel(ndp) : 0;
+}
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid);
+
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct vlan_vid *vlan, *tmp;
+	struct ncsi_dev_priv *ndp;
+	struct ncsi_dev *nd;
+	bool found = false;
+
+	if (vid == 0)
+		return 0;
+
+	nd = ncsi_find_dev(dev);
+	if (!nd) {
+		netdev_warn(dev, "NCSI: no net_device?\n");
+		return 0;
+	}
+
+	ndp = TO_NCSI_DEV_PRIV(nd);
+
+	/* Remove the VLAN id from our internal list */
+	list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
+		if (vlan->vid == vid) {
+			netdev_printk(KERN_DEBUG, dev,
+				      "NCSI: vid %u found, removing\n", vid);
+			list_del_rcu(&vlan->list);
+			found = true;
+			kfree(vlan);
+		}
+
+	if (!found) {
+		netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid);
+		return -EINVAL;
+	}
+
+	found = ncsi_kick_channels(ndp) != 0;
+
+	return found ? ncsi_process_next_channel(ndp) : 0;
+}
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid);
+
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*handler)(struct ncsi_dev *ndev))
 {
@@ -1215,6 +1567,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	nd->handler = handler;
 	ndp->pending_req_num = 0;
 	INIT_LIST_HEAD(&ndp->channel_queue);
+	INIT_LIST_HEAD(&ndp->vlan_vids);
 	INIT_WORK(&ndp->work, ncsi_dev_work);
 
 	/* Initialize private NCSI device */
@@ -1263,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd)
 		return 0;
 	}
 
-	if (ndp->flags & NCSI_DEV_HWA)
+	if (ndp->flags & NCSI_DEV_HWA) {
+		netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
 		ret = ncsi_enable_hwa(ndp);
-	else
+	} else {
 		ret = ncsi_choose_active_channel(ndp);
+	}
 
 	return ret;
 }
@@ -1297,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
 		}
 	}
 
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
 	ncsi_report_link(ndp, true);
 }
 EXPORT_SYMBOL_GPL(ncsi_stop_dev);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 3ea49ed0a935..91b4b66438df 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -104,7 +104,7 @@ struct ncsi_cmd_svf_pkt {
 	unsigned char           index;     /* VLAN table index  */
 	unsigned char           enable;    /* Enable or disable */
 	__be32                  checksum;  /* Checksum          */
-	unsigned char           pad[14];
+	unsigned char           pad[18];
 };
 
 /* Enable VLAN */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 087db775b3dc..efd933ff5570 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr)
 
 	ncm = &nc->modes[NCSI_MODE_ENABLE];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	ncm->enable = 1;
 	return 0;
@@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr)
 
 	ncm = &nc->modes[NCSI_MODE_ENABLE];
 	if (!ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	ncm->enable = 0;
 	return 0;
@@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr)
 
 	ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	ncm->enable = 1;
 	return 0;
@@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
 
 	ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
 	if (!ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	ncm->enable = 1;
 	return 0;
@@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr)
 	/* Check if the AEN has been enabled */
 	ncm = &nc->modes[NCSI_MODE_AEN];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to AEN configuration */
 	cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd);
@@ -354,7 +354,8 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
 
 	/* Add or remove the VLAN filter */
 	if (!(cmd->enable & 0x1)) {
-		ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index);
+		/* HW indexes from 1 */
+		ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1);
 	} else {
 		vlan = ntohs(cmd->vlan);
 		ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
@@ -381,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
 	/* Check if VLAN mode has been enabled */
 	ncm = &nc->modes[NCSI_MODE_VLAN];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to VLAN mode */
 	cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
@@ -408,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr)
 	/* Check if VLAN mode has been enabled */
 	ncm = &nc->modes[NCSI_MODE_VLAN];
 	if (!ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to VLAN mode */
 	ncm->enable = 0;
@@ -454,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr)
 
 	bitmap = &ncf->bitmap;
 	if (cmd->at_e & 0x1) {
-		if (test_and_set_bit(cmd->index, bitmap))
-			return -EBUSY;
+		set_bit(cmd->index, bitmap);
 		memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6);
 	} else {
-		if (!test_and_clear_bit(cmd->index, bitmap))
-			return -EBUSY;
-
+		clear_bit(cmd->index, bitmap);
 		memset(ncf->data + 6 * cmd->index, 0, 6);
 	}
 
@@ -484,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr)
 	/* Check if broadcast filter has been enabled */
 	ncm = &nc->modes[NCSI_MODE_BC];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to broadcast filter mode */
 	cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd);
@@ -510,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr)
 	/* Check if broadcast filter isn't enabled */
 	ncm = &nc->modes[NCSI_MODE_BC];
 	if (!ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to broadcast filter mode */
 	ncm->enable = 0;
@@ -537,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr)
 	/* Check if multicast filter has been enabled */
 	ncm = &nc->modes[NCSI_MODE_MC];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to multicast filter mode */
 	cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd);
@@ -563,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr)
 	/* Check if multicast filter has been enabled */
 	ncm = &nc->modes[NCSI_MODE_MC];
 	if (!ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to multicast filter mode */
 	ncm->enable = 0;
@@ -590,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
 	/* Check if flow control has been enabled */
 	ncm = &nc->modes[NCSI_MODE_FC];
 	if (ncm->enable)
-		return -EBUSY;
+		return 0;
 
 	/* Update to flow control mode */
 	cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd);
@@ -693,7 +691,14 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 
 		ncf->index = i;
 		ncf->total = cnt;
-		ncf->bitmap = 0x0ul;
+		if (i == NCSI_FILTER_VLAN) {
+			/* Set VLAN filters active so they are cleared in
+			 * first configuration state
+			 */
+			ncf->bitmap = U64_MAX;
+		} else {
+			ncf->bitmap = 0x0ul;
+		}
 		nc->filters[i] = ncf;
 	}
 
@@ -951,7 +956,7 @@ static struct ncsi_rsp_handler {
 	{ NCSI_PKT_RSP_EGMF,    4, ncsi_rsp_handler_egmf    },
 	{ NCSI_PKT_RSP_DGMF,    4, ncsi_rsp_handler_dgmf    },
 	{ NCSI_PKT_RSP_SNFC,    4, ncsi_rsp_handler_snfc    },
-	{ NCSI_PKT_RSP_GVI,    36, ncsi_rsp_handler_gvi     },
+	{ NCSI_PKT_RSP_GVI,    40, ncsi_rsp_handler_gvi     },
 	{ NCSI_PKT_RSP_GC,     32, ncsi_rsp_handler_gc      },
 	{ NCSI_PKT_RSP_GP,     -1, ncsi_rsp_handler_gp      },
 	{ NCSI_PKT_RSP_GCPS,  172, ncsi_rsp_handler_gcps    },
@@ -1024,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
 	if (payload < 0)
 		payload = ntohs(hdr->length);
 	ret = ncsi_validate_rsp_pkt(nr, payload);
-	if (ret)
+	if (ret) {
+		netdev_warn(ndp->ndev.dev,
+			    "NCSI: 'bad' packet ignored for type 0x%x\n",
+			    hdr->type);
 		goto out;
+	}
 
 	/* Process the packet */
 	ret = nrh->handler(nr);
+	if (ret)
+		netdev_err(ndp->ndev.dev,
+			   "NCSI: Handler for packet type 0x%x returned %d\n",
+			   hdr->type, ret);
 out:
 	ncsi_free_request(nr);
 	return ret;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864cc36a..e4a13cc8a2e7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
 	help
 	  This option enables packet forwarding for the "netdev" family.
 
+config NFT_FIB_NETDEV
+	depends on NFT_FIB_IPV4
+	depends on NFT_FIB_IPV6
+	tristate "Netfilter nf_tables netdev fib lookups support"
+	help
+	  This option allows using the FIB expression from the netdev table.
+	  The lookup will be delegated to the IPv4 or IPv6 FIB depending
+	  on the protocol of the packet.
+
 endif # NF_TABLES_NETDEV
 
 endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 913380919301..f78ed2470831 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
 nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_NFT_REDIR)		+= nft_redir.o
 obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 obj-$(CONFIG_NFT_FIB)		+= nft_fib.o
 obj-$(CONFIG_NFT_FIB_INET)	+= nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV)	+= nft_fib_netdev.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a3795a..52cd2901a097 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
 #include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT		1024
+
 #define nf_entry_dereference(e) \
 	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+	struct nf_hook_entries *e;
+	size_t alloc = sizeof(*e) +
+		       sizeof(struct nf_hook_entry) * num +
+		       sizeof(struct nf_hook_ops *) * num;
+
+	if (num == 0)
+		return NULL;
+
+	e = kvzalloc(alloc, GFP_KERNEL);
+	if (e)
+		e->num_hook_entries = num;
+	return e;
+}
+
+static unsigned int accept_all(void *priv,
+			       struct sk_buff *skb,
+			       const struct nf_hook_state *state)
+{
+	return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+	.hook = accept_all,
+	.priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+		     const struct nf_hook_ops *reg)
+{
+	unsigned int i, alloc_entries, nhooks, old_entries;
+	struct nf_hook_ops **orig_ops = NULL;
+	struct nf_hook_ops **new_ops;
+	struct nf_hook_entries *new;
+	bool inserted = false;
+
+	alloc_entries = 1;
+	old_entries = old ? old->num_hook_entries : 0;
+
+	if (old) {
+		orig_ops = nf_hook_entries_get_hook_ops(old);
+
+		for (i = 0; i < old_entries; i++) {
+			if (orig_ops[i] != &dummy_ops)
+				alloc_entries++;
+		}
+	}
+
+	if (alloc_entries > MAX_HOOK_COUNT)
+		return ERR_PTR(-E2BIG);
+
+	new = allocate_hook_entries_size(alloc_entries);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+
+	i = 0;
+	nhooks = 0;
+	while (i < old_entries) {
+		if (orig_ops[i] == &dummy_ops) {
+			++i;
+			continue;
+		}
+		if (inserted || reg->priority > orig_ops[i]->priority) {
+			new_ops[nhooks] = (void *)orig_ops[i];
+			new->hooks[nhooks] = old->hooks[i];
+			i++;
+		} else {
+			new_ops[nhooks] = (void *)reg;
+			new->hooks[nhooks].hook = reg->hook;
+			new->hooks[nhooks].priv = reg->priv;
+			inserted = true;
+		}
+		nhooks++;
+	}
+
+	if (!inserted) {
+		new_ops[nhooks] = (void *)reg;
+		new->hooks[nhooks].hook = reg->hook;
+		new->hooks[nhooks].priv = reg->priv;
+	}
+
+	return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+	struct nf_hook_ops **orig_ops;
+	int prio = INT_MIN;
+	size_t i = 0;
+
+	orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+	for (i = 0; i < hooks->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+
+		WARN_ON(orig_ops[i]->priority < prio);
+
+		if (orig_ops[i]->priority > prio)
+			prio = orig_ops[i]->priority;
+	}
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+	struct nf_hook_entries *old, *new = NULL;
+	unsigned int i, j, skip = 0, hook_entries;
+	struct nf_hook_ops **orig_ops;
+	struct nf_hook_ops **new_ops;
+
+	old = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!old))
+		return NULL;
+
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			skip++;
+	}
+
+	/* if skip == hook_entries all hooks have been removed */
+	hook_entries = old->num_hook_entries;
+	if (skip == hook_entries)
+		goto out_assign;
+
+	if (skip == 0)
+		return NULL;
+
+	hook_entries -= skip;
+	new = allocate_hook_entries_size(hook_entries);
+	if (!new)
+		return NULL;
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+	for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+		new->hooks[j] = old->hooks[i];
+		new_ops[j] = (void *)orig_ops[i];
+		j++;
+	}
+	hooks_validate(new);
+out_assign:
+	rcu_assign_pointer(*pp, new);
+	return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
 {
 	if (reg->pf != NFPROTO_NETDEV)
 		return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
 			return &reg->dev->nf_hooks_ingress;
 	}
 #endif
+	WARN_ON_ONCE(1);
 	return NULL;
 }
 
 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct nf_hook_entry __rcu **pp;
-	struct nf_hook_entry *entry, *p;
+	struct nf_hook_entries *p, *new_hooks;
+	struct nf_hook_entries __rcu **pp;
 
 	if (reg->pf == NFPROTO_NETDEV) {
 #ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	if (!pp)
 		return -EINVAL;
 
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	nf_hook_entry_init(entry, reg);
-
 	mutex_lock(&nf_hook_mutex);
 
-	/* Find the spot in the list */
-	for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
-		if (reg->priority < nf_hook_entry_priority(p))
-			break;
-	}
-	rcu_assign_pointer(entry->next, p);
-	rcu_assign_pointer(*pp, entry);
+	p = nf_entry_dereference(*pp);
+	new_hooks = nf_hook_entries_grow(p, reg);
+
+	if (!IS_ERR(new_hooks))
+		rcu_assign_pointer(*pp, new_hooks);
 
 	mutex_unlock(&nf_hook_mutex);
+	if (IS_ERR(new_hooks))
+		return PTR_ERR(new_hooks);
+
+	hooks_validate(new_hooks);
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 		net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 #ifdef HAVE_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
+	synchronize_net();
+	BUG_ON(p == new_hooks);
+	kvfree(p);
 	return 0;
 }
 EXPORT_SYMBOL(nf_register_net_hook);
 
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+				     const struct nf_hook_ops *unreg)
 {
-	struct nf_hook_entry __rcu **pp;
-	struct nf_hook_entry *p;
-
-	pp = nf_hook_entry_head(net, reg);
-	if (WARN_ON_ONCE(!pp))
-		return NULL;
+	struct nf_hook_ops **orig_ops;
+	bool found = false;
+	unsigned int i;
 
-	mutex_lock(&nf_hook_mutex);
-	for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
-		if (nf_hook_entry_ops(p) == reg) {
-			rcu_assign_pointer(*pp, p->next);
-			break;
-		}
-	}
-	mutex_unlock(&nf_hook_mutex);
-	if (!p) {
-		WARN(1, "nf_unregister_net_hook: hook not found!\n");
-		return NULL;
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] != unreg)
+			continue;
+		WRITE_ONCE(old->hooks[i].hook, accept_all);
+		WRITE_ONCE(orig_ops[i], &dummy_ops);
+		found = true;
+		break;
 	}
+
+	if (found) {
 #ifdef CONFIG_NETFILTER_INGRESS
-	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
-		net_dec_ingress_queue();
+		if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+			net_dec_ingress_queue();
 #endif
 #ifdef HAVE_JUMP_LABEL
-	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+		static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
 #endif
-
-	return p;
+	} else {
+		WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+	}
 }
 
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+	struct nf_hook_entries __rcu **pp;
+	struct nf_hook_entries *p;
 	unsigned int nfq;
 
+	pp = nf_hook_entry_head(net, reg);
+	if (!pp)
+		return;
+
+	mutex_lock(&nf_hook_mutex);
+
+	p = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!p)) {
+		mutex_unlock(&nf_hook_mutex);
+		return;
+	}
+
+	__nf_unregister_net_hook(p, reg);
+
+	p = __nf_hook_entries_try_shrink(pp);
+	mutex_unlock(&nf_hook_mutex);
 	if (!p)
 		return;
 
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	nfq = nf_queue_nf_hook_drop(net);
 	if (nfq)
 		synchronize_net();
-	kfree(p);
+	kvfree(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 			     unsigned int hookcount)
 {
-	struct nf_hook_entry *to_free[16];
-	unsigned int i, n, nfq;
+	struct nf_hook_entries *to_free[16], *p;
+	struct nf_hook_entries __rcu **pp;
+	unsigned int i, j, n;
+
+	mutex_lock(&nf_hook_mutex);
+	for (i = 0; i < hookcount; i++) {
+		pp = nf_hook_entry_head(net, &reg[i]);
+		if (!pp)
+			continue;
+
+		p = nf_entry_dereference(*pp);
+		if (WARN_ON_ONCE(!p))
+			continue;
+		__nf_unregister_net_hook(p, &reg[i]);
+	}
+	mutex_unlock(&nf_hook_mutex);
 
 	do {
 		n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
 
-		for (i = 0; i < n; i++)
-			to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+		mutex_lock(&nf_hook_mutex);
 
-		synchronize_net();
+		for (i = 0, j = 0; i < hookcount && j < n; i++) {
+			pp = nf_hook_entry_head(net, &reg[i]);
+			if (!pp)
+				continue;
+
+			p = nf_entry_dereference(*pp);
+			if (!p)
+				continue;
+
+			to_free[j] = __nf_hook_entries_try_shrink(pp);
+			if (to_free[j])
+				++j;
+		}
+
+		mutex_unlock(&nf_hook_mutex);
+
+		if (j) {
+			unsigned int nfq;
 
-		/* need 2nd synchronize_net() if nfqueue is used, skb
-		 * can get reinjected right before nf_queue_hook_drop()
-		 */
-		nfq = nf_queue_nf_hook_drop(net);
-		if (nfq)
 			synchronize_net();
 
-		for (i = 0; i < n; i++)
-			kfree(to_free[i]);
+			/* need 2nd synchronize_net() if nfqueue is used, skb
+			 * can get reinjected right before nf_queue_hook_drop()
+			 */
+			nfq = nf_queue_nf_hook_drop(net);
+			if (nfq)
+				synchronize_net();
+
+			for (i = 0; i < j; i++)
+				kvfree(to_free[i]);
+		}
 
 		reg += n;
 		hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
-		 struct nf_hook_entry *entry)
+		 const struct nf_hook_entries *e, unsigned int s)
 {
 	unsigned int verdict;
 	int ret;
 
-	do {
-		verdict = nf_hook_entry_hookfn(entry, skb, state);
+	for (; s < e->num_hook_entries; s++) {
+		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 		switch (verdict & NF_VERDICT_MASK) {
 		case NF_ACCEPT:
-			entry = rcu_dereference(entry->next);
 			break;
 		case NF_DROP:
 			kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 				ret = -EPERM;
 			return ret;
 		case NF_QUEUE:
-			ret = nf_queue(skb, state, &entry, verdict);
-			if (ret == 1 && entry)
+			ret = nf_queue(skb, state, e, s, verdict);
+			if (ret == 1)
 				continue;
 			return ret;
 		default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 			 */
 			return 0;
 		}
-	} while (entry);
+	}
 
 	return 1;
 }
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 28ec148df02d..a445a6bf4f11 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the ipset modules
 #
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8ad2b52a0b32..5ca18f07683b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -37,11 +37,11 @@
 #define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
 
 static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
 {
 	struct mtype *map = set->data;
 
-	setup_timer(&map->gc, gc, (unsigned long)set);
+	timer_setup(&map->gc, gc, 0);
 	mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 }
 
@@ -272,10 +272,10 @@ out:
 }
 
 static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
 {
-	struct ip_set *set = (struct ip_set *)ul_set;
-	struct mtype *map = set->data;
+	struct mtype *map = from_timer(map, t, gc);
+	struct ip_set *set = map->set;
 	void *x;
 	u32 id;
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783efff0bde..d8975a0b4282 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,6 +48,7 @@ struct bitmap_ip {
 	size_t memsize;		/* members size */
 	u8 netmask;		/* subnet netmask */
 	struct timer_list gc;	/* garbage collection */
+	struct ip_set *set;	/* attached to this ip_set */
 	unsigned char extensions[0]	/* data extensions */
 		__aligned(__alignof__(u64));
 };
@@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
 	map->netmask = netmask;
 	set->timeout = IPSET_NO_TIMEOUT;
 
+	map->set = set;
 	set->data = map;
 	set->family = NFPROTO_IPV4;
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 9a065f672d3a..4c279fbd2d5d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,6 +52,7 @@ struct bitmap_ipmac {
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collector */
+	struct ip_set *set;	/* attached to this ip_set */
 	unsigned char extensions[0]	/* MAC + data extensions */
 		__aligned(__alignof__(u64));
 };
@@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
 	map->elements = elements;
 	set->timeout = IPSET_NO_TIMEOUT;
 
+	map->set = set;
 	set->data = map;
 	set->family = NFPROTO_IPV4;
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733358a4..7f9bbd7c98b5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,6 +40,7 @@ struct bitmap_port {
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collection */
+	struct ip_set *set;	/* attached to this ip_set */
 	unsigned char extensions[0]	/* data extensions */
 		__aligned(__alignof__(u64));
 };
@@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
 	map->last_port = last_port;
 	set->timeout = IPSET_NO_TIMEOUT;
 
+	map->set = set;
 	set->data = map;
 	set->family = NFPROTO_UNSPEC;
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e495b5e484b1..cf84f7b37cd9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	      from->family == to->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
 
-	if (from->ref_netlink || to->ref_netlink)
+	write_lock_bh(&ip_set_ref_lock);
+
+	if (from->ref_netlink || to->ref_netlink) {
+		write_unlock_bh(&ip_set_ref_lock);
 		return -EBUSY;
+	}
 
 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
 	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
 	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
 
-	write_lock_bh(&ip_set_ref_lock);
 	swap(from->ref, to->ref);
 	ip_set(inst, from_id) = to;
 	ip_set(inst, to_id) = from;
@@ -2072,25 +2075,28 @@ static struct pernet_operations ip_set_net_ops = {
 static int __init
 ip_set_init(void)
 {
-	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+	int ret = register_pernet_subsys(&ip_set_net_ops);
+
+	if (ret) {
+		pr_err("ip_set: cannot register pernet_subsys.\n");
+		return ret;
+	}
 
+	ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
+		unregister_pernet_subsys(&ip_set_net_ops);
 		return ret;
 	}
+
 	ret = nf_register_sockopt(&so_set);
 	if (ret != 0) {
 		pr_err("SO_SET registry failed: %d\n", ret);
 		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+		unregister_pernet_subsys(&ip_set_net_ops);
 		return ret;
 	}
-	ret = register_pernet_subsys(&ip_set_net_ops);
-	if (ret) {
-		pr_err("ip_set: cannot register pernet_subsys.\n");
-		nf_unregister_sockopt(&so_set);
-		nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-		return ret;
-	}
+
 	pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
 	return 0;
 }
@@ -2098,9 +2104,10 @@ ip_set_init(void)
 static void __exit
 ip_set_fini(void)
 {
-	unregister_pernet_subsys(&ip_set_net_ops);
 	nf_unregister_sockopt(&so_set);
 	nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+
+	unregister_pernet_subsys(&ip_set_net_ops);
 	pr_debug("these are the famous last words\n");
 }
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f236c0bc7b3f..efffc8eabafe 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -280,6 +280,7 @@ htable_bits(u32 hashsize)
 struct htype {
 	struct htable __rcu *table; /* the hash table */
 	struct timer_list gc;	/* garbage collection when timeout enabled */
+	struct ip_set *set;	/* attached to this ip_set */
 	u32 maxelem;		/* max elements in the hash */
 	u32 initval;		/* random jhash init value */
 #ifdef IP_SET_HASH_WITH_MARKMASK
@@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set)
 }
 
 static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
 {
 	struct htype *h = set->data;
 
-	setup_timer(&h->gc, gc, (unsigned long)set);
+	timer_setup(&h->gc, gc, 0);
 	mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 	pr_debug("gc initialized, run in every %u\n",
 		 IPSET_GC_PERIOD(set->timeout));
@@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h)
 }
 
 static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
 {
-	struct ip_set *set = (struct ip_set *)ul_set;
-	struct htype *h = set->data;
+	struct htype *h = from_timer(h, t, gc);
+	struct ip_set *set = h->set;
 
 	pr_debug("called\n");
 	spin_lock_bh(&set->lock);
@@ -1041,12 +1042,24 @@ out:
 static int
 mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
-	const struct htype *h = set->data;
+	struct htype *h = set->data;
 	const struct htable *t;
 	struct nlattr *nested;
 	size_t memsize;
 	u8 htable_bits;
 
+	/* If any members have expired, set->elements will be wrong
+	 * mytype_expire function will update it with the right count.
+	 * we do not hold set->lock here, so grab it first.
+	 * set->elements can still be incorrect in the case of a huge set,
+	 * because elements might time out during the listing.
+	 */
+	if (SET_WITH_TIMEOUT(set)) {
+		spin_lock_bh(&set->lock);
+		mtype_expire(set, h);
+		spin_unlock_bh(&set->lock);
+	}
+
 	rcu_read_lock_bh();
 	t = rcu_dereference_bh_nfnl(h->table);
 	memsize = mtype_ahash_memsize(h, t) + set->ext_size;
@@ -1302,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 	t->htable_bits = hbits;
 	RCU_INIT_POINTER(h->table, t);
 
+	h->set = set;
 	set->data = h;
 #ifndef IP_SET_PROTO_UNDEF
 	if (set->family == NFPROTO_IPV4) {
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 20bfbd315f61..613eb212cb48 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 		return ret;
 
 	ip &= ip_set_hostmask(h->netmask);
+	e.ip = htonl(ip);
+	if (e.ip == 0)
+		return -IPSET_ERR_HASH_ELEM;
 
-	if (adt == IPSET_TEST) {
-		e.ip = htonl(ip);
-		if (e.ip == 0)
-			return -IPSET_ERR_HASH_ELEM;
+	if (adt == IPSET_TEST)
 		return adtfn(set, &e, &ext, &ext, flags);
-	}
 
 	ip_to = ip;
 	if (tb[IPSET_ATTR_IP_TO]) {
@@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
 
-	if (retried)
+	if (retried) {
 		ip = ntohl(h->next.ip);
-	for (; !before(ip_to, ip); ip += hosts) {
 		e.ip = htonl(ip);
-		if (e.ip == 0)
-			return -IPSET_ERR_HASH_ELEM;
+	}
+	for (; ip <= ip_to;) {
 		ret = adtfn(set, &e, &ext, &ext, flags);
-
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
 
+		ip += hosts;
+		e.ip = htonl(ip);
+		if (e.ip == 0)
+			return 0;
+
 		ret = 0;
 	}
 	return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index b64cf14e8352..f3ba8348cf9d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; !before(ip_to, ip); ip++) {
+	for (; ip <= ip_to; ip++) {
 		e.ip = htonl(ip);
 		ret = adtfn(set, &e, &ext, &ext, flags);
 
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index f438740e6c6a..ddb8039ec1d2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; !before(ip_to, ip); ip++) {
+	for (; ip <= ip_to; ip++) {
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
 		for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6215fb898c50..a7f4d7a85420 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; !before(ip_to, ip); ip++) {
+	for (; ip <= ip_to; ip++) {
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
 		for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5ab1b99a53c2..0f164e986bf1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; !before(ip_to, ip); ip++) {
+	for (; ip <= ip_to; ip++) {
 		e.ip = htonl(ip);
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
@@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			      ip == ntohl(h->next.ip) &&
 			      p == ntohs(h->next.port)
 				? ntohl(h->next.ip2) : ip2_from;
-			while (!after(ip2, ip2_to)) {
+			while (ip2 <= ip2_to) {
 				e.ip2 = htonl(ip2);
 				ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
 								&cidr);
@@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (unlikely(tb[IPSET_ATTR_IP_TO]))
 		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
 	if (unlikely(tb[IPSET_ATTR_CIDR])) {
-		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
 
 		if (cidr != HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 5d9e895452e7..1c67a1761e45 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 	if (retried)
 		ip = ntohl(h->next.ip);
-	while (!after(ip, ip_to)) {
+	while (ip <= ip_to) {
 		e.ip = htonl(ip);
 		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
 		ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 44cf11939c91..d417074f1c1a 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (retried)
 		ip = ntohl(h->next.ip);
-	while (!after(ip, ip_to)) {
+	while (ip <= ip_to) {
 		e.ip = htonl(ip);
 		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
 		ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index db614e13b193..7f9ae2e9645b 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		ip = ntohl(h->next.ip[0]);
 
-	while (!after(ip, ip_to)) {
+	while (ip <= ip_to) {
 		e.ip[0] = htonl(ip);
 		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
 		ip2 = (retried &&
 		       ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
 						   : ip2_from;
-		while (!after(ip2, ip2_to)) {
+		while (ip2 <= ip2_to) {
 			e.ip[1] = htonl(ip2);
 			last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
 			ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 54b64b6cd0cd..e6ef382febe4 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (retried)
 		ip = ntohl(h->next.ip);
-	while (!after(ip, ip_to)) {
+	while (ip <= ip_to) {
 		e.ip = htonl(ip);
 		last = ip_set_range_to_cidr(ip, ip_to, &cidr);
 		e.cidr = cidr - 1;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index aff846960ac4..8602f2595a1a 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (retried)
 		ip = ntohl(h->next.ip[0]);
 
-	while (!after(ip, ip_to)) {
+	while (ip <= ip_to) {
 		e.ip[0] = htonl(ip);
 		ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
 		p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
@@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
 			       p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
 							 : ip2_from;
-			while (!after(ip2, ip2_to)) {
+			while (ip2 <= ip2_to) {
 				e.ip[1] = htonl(ip2);
 				ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
 								&e.cidr[1]);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 178d4eba013b..e864681b8dc5 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -44,6 +44,7 @@ struct set_adt_elem {
 struct list_set {
 	u32 size;		/* size of set list array */
 	struct timer_list gc;	/* garbage collection */
+	struct ip_set *set;	/* attached to this ip_set */
 	struct net *net;	/* namespace */
 	struct list_head members; /* the set members */
 };
@@ -453,7 +454,6 @@ static size_t
 list_set_memsize(const struct list_set *map, size_t dsize)
 {
 	struct set_elem *e;
-	size_t memsize;
 	u32 n = 0;
 
 	rcu_read_lock();
@@ -461,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize)
 		n++;
 	rcu_read_unlock();
 
-	memsize = sizeof(*map) + n * dsize;
-
-	return memsize;
+	return (sizeof(*map) + n * dsize);
 }
 
 static int
@@ -571,10 +569,10 @@ static const struct ip_set_type_variant set_variant = {
 };
 
 static void
-list_set_gc(unsigned long ul_set)
+list_set_gc(struct timer_list *t)
 {
-	struct ip_set *set = (struct ip_set *)ul_set;
-	struct list_set *map = set->data;
+	struct list_set *map = from_timer(map, t, gc);
+	struct ip_set *set = map->set;
 
 	spin_lock_bh(&set->lock);
 	set_cleanup_entries(set);
@@ -585,11 +583,11 @@ list_set_gc(unsigned long ul_set)
 }
 
 static void
-list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
 {
 	struct list_set *map = set->data;
 
-	setup_timer(&map->gc, gc, (unsigned long)set);
+	timer_setup(&map->gc, gc, 0);
 	mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 }
 
@@ -606,6 +604,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
 
 	map->size = size;
 	map->net = net;
+	map->set = set;
 	INIT_LIST_HEAD(&map->members);
 	set->data = map;
 
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 1c8a42c1056c..d5be9c25fad6 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -3,6 +3,141 @@
 
 /* Prefixlen maps for fast conversions, by Jan Engelhardt. */
 
+#ifdef E
+#undef E
+#endif
+
+#define PREFIXES_MAP						\
+	E(0x00000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0x80000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),	\
+	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+
 #define E(a, b, c, d) \
 	{.ip6 = { \
 		htonl(a), htonl(b), \
@@ -13,135 +148,7 @@
  * just use prefixlen_netmask_map[prefixlength].ip.
  */
 const union nf_inet_addr ip_set_netmask_map[] = {
-	E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+	PREFIXES_MAP
 };
 EXPORT_SYMBOL_GPL(ip_set_netmask_map);
 
@@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map);
  * just use prefixlen_hostmask_map[prefixlength].ip.
  */
 const union nf_inet_addr ip_set_hostmask_map[] = {
-	E(0x00000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0x80000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xC0000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xE0000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xF0000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xF8000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFC000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFE000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFF000000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFF800000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE),
-	E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF),
+	PREFIXES_MAP
 };
 EXPORT_SYMBOL_GPL(ip_set_hostmask_map);
 
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 67f3f4389602..c552993fa4b9 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the IPVS modules on top of IPv4.
 #
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 3d2ac71a83ec..3e053cb30070 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
 	spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
 }
 
-static void ip_vs_conn_expire(unsigned long data);
+static void ip_vs_conn_expire(struct timer_list *t);
 
 /*
  *	Returns hash value for IPVS connection entry
@@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 		hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
 		ret = 1;
 	} else {
-		pr_err("%s(): request for already hashed, called from %pF\n",
+		pr_err("%s(): request for already hashed, called from %pS\n",
 		       __func__, __builtin_return_address(0));
 		ret = 0;
 	}
@@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
 static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
 {
 	__ip_vs_conn_put(cp);
-	ip_vs_conn_expire((unsigned long)cp);
+	ip_vs_conn_expire(&cp->timer);
 }
 
 /*
@@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
 	kmem_cache_free(ip_vs_conn_cachep, cp);
 }
 
-static void ip_vs_conn_expire(unsigned long data)
+static void ip_vs_conn_expire(struct timer_list *t)
 {
-	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+	struct ip_vs_conn *cp = from_timer(cp, t, timer);
 	struct netns_ipvs *ipvs = cp->ipvs;
 
 	/*
@@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 	}
 
 	INIT_HLIST_NODE(&cp->c_list);
-	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+	timer_setup(&cp->timer, ip_vs_conn_expire, 0);
 	cp->ipvs	   = ipvs;
 	cp->af		   = p->af;
 	cp->daf		   = dest_af;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b58aba..5cb7cac9177d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		rcu_read_lock();
 		svc = rcu_dereference(dest->svc);
 		s = this_cpu_ptr(svc->stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
 		s->cnt.inpkts++;
 		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
-		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		rcu_read_lock();
 		svc = rcu_dereference(dest->svc);
 		s = this_cpu_ptr(svc->stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
 		s->cnt.outpkts++;
 		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
-		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
 	if (!pptr)
 		return NULL;
 
-	rcu_read_lock();
 	dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
 				       &iph->saddr, pptr[0]);
 	if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
 						  pptr[0], pptr[1]);
 		}
 	}
-	rcu_read_unlock();
 
 	return cp;
 }
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
 			if (dest) {
 				struct ip_vs_dest_dst *dest_dst;
 
-				rcu_read_lock();
 				dest_dst = rcu_dereference(dest->dest_dst);
 				if (dest_dst)
 					mtu = dst_mtu(dest_dst->dst_cache);
-				rcu_read_unlock();
 			}
 			if (mtu > 68 + sizeof(struct iphdr))
 				mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
 #endif
 
 
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c2307b6e..fff213eacf2a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 	unsigned int hash;
 
 	if (svc->flags & IP_VS_SVC_F_HASHED) {
-		pr_err("%s(): request for already hashed, called from %pF\n",
+		pr_err("%s(): request for already hashed, called from %pS\n",
 		       __func__, __builtin_return_address(0));
 		return 0;
 	}
@@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 {
 	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-		pr_err("%s(): request for unhash flagged, called from %pF\n",
+		pr_err("%s(): request for unhash flagged, called from %pS\n",
 		       __func__, __builtin_return_address(0));
 		return 0;
 	}
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 	/* Check for "full" addressed entries */
 	hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-	rcu_read_lock();
 	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
 		if (dest->port == dport &&
 		    dest->af == af &&
 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
 		    (dest->protocol == protocol || dest->vfwmark)) {
 			/* HIT */
-			rcu_read_unlock();
 			return true;
 		}
 	}
-	rcu_read_unlock();
 
 	return false;
 }
@@ -1149,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	return 0;
 }
 
-static void ip_vs_dest_trash_expire(unsigned long data)
+static void ip_vs_dest_trash_expire(struct timer_list *t)
 {
-	struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
+	struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer);
 	struct ip_vs_dest *dest, *next;
 	unsigned long now = jiffies;
 
@@ -2037,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 		seq_puts(seq,
 			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
 	} else {
+		struct net *net = seq_file_net(seq);
+		struct netns_ipvs *ipvs = net_ipvs(net);
 		const struct ip_vs_service *svc = v;
 		const struct ip_vs_iter *iter = seq->private;
 		const struct ip_vs_dest *dest;
 		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
 		char *sched_name = sched ? sched->name : "none";
 
+		if (svc->ipvs != ipvs)
+			return 0;
 		if (iter->table == ip_vs_svc_table) {
 #ifdef CONFIG_IP_VS_IPV6
 			if (svc->af == AF_INET6)
@@ -4022,8 +4023,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
 
 	INIT_LIST_HEAD(&ipvs->dest_trash);
 	spin_lock_init(&ipvs->dest_trash_lock);
-	setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
-		    (unsigned long) ipvs);
+	timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
 	atomic_set(&ipvs->ftpsvc_counter, 0);
 	atomic_set(&ipvs->nullsvc_counter, 0);
 	atomic_set(&ipvs->conn_out_counter, 0);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 457c6c193e13..489055091a9b 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
 }
 
 
-static void estimation_timer(unsigned long arg)
+static void estimation_timer(struct timer_list *t)
 {
 	struct ip_vs_estimator *e;
 	struct ip_vs_stats *s;
 	u64 rate;
-	struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
+	struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
 
 	spin_lock(&ipvs->est_lock);
 	list_for_each_entry(e, &ipvs->est_list, list) {
@@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
 {
 	INIT_LIST_HEAD(&ipvs->est_list);
 	spin_lock_init(&ipvs->est_lock);
-	setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
+	timer_setup(&ipvs->est_timer, estimation_timer, 0);
 	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
 	return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be76d15..3e17d32b629d 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 			 * hopefully it will succeed on the retransmitted
 			 * packet.
 			 */
-			rcu_read_lock();
 			mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 							   iph->ihl * 4,
 							   start - data,
 							   end - start,
 							   buf, buf_len);
-			rcu_read_unlock();
 			if (mangled) {
 				ip_vs_nfct_expect_related(skb, ct, n_cp,
 							  IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index b6aa4a970c6e..d625179de485 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -106,6 +106,7 @@ struct ip_vs_lblc_table {
 	struct rcu_head		rcu_head;
 	struct hlist_head	bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */
 	struct timer_list       periodic_timer; /* collect stale entries */
+	struct ip_vs_service	*svc;		/* pointer back to service */
 	atomic_t                entries;        /* number of entries */
 	int                     max_size;       /* maximum size of entries */
 	int                     rover;          /* rover for expire check */
@@ -294,10 +295,10 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
  *             of the table.
  *      The full expiration check is for this purpose now.
  */
-static void ip_vs_lblc_check_expire(unsigned long data)
+static void ip_vs_lblc_check_expire(struct timer_list *t)
 {
-	struct ip_vs_service *svc = (struct ip_vs_service *) data;
-	struct ip_vs_lblc_table *tbl = svc->sched_data;
+	struct ip_vs_lblc_table *tbl = from_timer(tbl, t, periodic_timer);
+	struct ip_vs_service *svc = tbl->svc;
 	unsigned long now = jiffies;
 	int goal;
 	int i, j;
@@ -369,12 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	tbl->rover = 0;
 	tbl->counter = 1;
 	tbl->dead = 0;
+	tbl->svc = svc;
 
 	/*
 	 *    Hook periodic timer for garbage collection
 	 */
-	setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
-			(unsigned long)svc);
+	timer_setup(&tbl->periodic_timer, ip_vs_lblc_check_expire, 0);
 	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
 
 	return 0;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c13ff575f9f7..84c57b62a588 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -278,6 +278,7 @@ struct ip_vs_lblcr_table {
 	atomic_t                entries;        /* number of entries */
 	int                     max_size;       /* maximum size of entries */
 	struct timer_list       periodic_timer; /* collect stale entries */
+	struct ip_vs_service	*svc;		/* pointer back to service */
 	int                     rover;          /* rover for expire check */
 	int                     counter;        /* counter for no expire */
 	bool			dead;
@@ -458,10 +459,10 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
  *             of the table.
  *      The full expiration check is for this purpose now.
  */
-static void ip_vs_lblcr_check_expire(unsigned long data)
+static void ip_vs_lblcr_check_expire(struct timer_list *t)
 {
-	struct ip_vs_service *svc = (struct ip_vs_service *) data;
-	struct ip_vs_lblcr_table *tbl = svc->sched_data;
+	struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer);
+	struct ip_vs_service *svc = tbl->svc;
 	unsigned long now = jiffies;
 	int goal;
 	int i, j;
@@ -532,12 +533,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	tbl->rover = 0;
 	tbl->counter = 1;
 	tbl->dead = 0;
+	tbl->svc = svc;
 
 	/*
 	 *    Hook periodic timer for garbage collection
 	 */
-	setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
-			(unsigned long)svc);
+	timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0);
 	mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
 
 	return 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4adaddf..eff7569824e5 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/ip.h>
 #include <linux/sctp.h>
@@ -24,9 +25,13 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		if (sh) {
 			sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
 						 sizeof(_schunkh), &_schunkh);
-			if (sch && (sch->type == SCTP_CID_INIT ||
-				    sysctl_sloppy_sctp(ipvs)))
+			if (sch) {
+				if (sch->type == SCTP_CID_ABORT ||
+				    !(sysctl_sloppy_sctp(ipvs) ||
+				      sch->type == SCTP_CID_INIT))
+					return 1;
 				ports = &sh->source;
+			}
 		}
 	} else {
 		ports = skb_header_pointer(
@@ -38,7 +43,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		return 0;
 	}
 
-	rcu_read_lock();
 	if (likely(!ip_vs_iph_inverse(iph)))
 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
 					 &iph->daddr, ports[1]);
@@ -53,7 +57,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -67,11 +70,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
 			else
 				*verdict = NF_DROP;
-			rcu_read_unlock();
 			return 0;
 		}
 	}
-	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -526,12 +527,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = sctp_app_hashkey(cp->vport);
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
 					"%s:%u to app %s on port %u\n",
@@ -544,11 +543,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-			goto out;
+			break;
 		}
 	}
-	rcu_read_unlock();
-out:
+
 	return result;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 12dc8d5bc37d..121a321b91be 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 	}
 
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
-	rcu_read_lock();
 
 	if (likely(!ip_vs_iph_inverse(iph)))
 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
@@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
 			else
 				*verdict = NF_DROP;
-			rcu_read_unlock();
 			return 0;
 		}
 	}
-	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = tcp_app_hashkey(cp->vport);
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-			goto out;
+			break;
 		}
 	}
-	rcu_read_unlock();
 
-  out:
 	return result;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e494e9a88c7f..30e11cd6aa8a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		return 0;
 	}
 
-	rcu_read_lock();
 	if (likely(!ip_vs_iph_inverse(iph)))
 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
 					 &iph->daddr, ports[1]);
@@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
 			else
 				*verdict = NF_DROP;
-			rcu_read_unlock();
 			return 0;
 		}
 	}
-	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = udp_app_hashkey(cp->vport);
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-			goto out;
+			break;
 		}
 	}
-	rcu_read_unlock();
 
-  out:
 	return result;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 0e5b64a75da0..9ee71cb276d7 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * IPVS         An implementation of the IP virtual server support for the
  *              LINUX operating system.  IPVS is now implemented as a module
@@ -457,7 +458,7 @@ static inline bool in_persistence(struct ip_vs_conn *cp)
 static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,
 				  struct ip_vs_conn *cp, int pkts)
 {
-	unsigned long orig = ACCESS_ONCE(cp->sync_endtime);
+	unsigned long orig = READ_ONCE(cp->sync_endtime);
 	unsigned long now = jiffies;
 	unsigned long n = (now + cp->timeout) & ~3UL;
 	unsigned int sync_refresh_period;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 2eab1e0400f4..4527921b1c3a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
 			       IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
 		goto tx_error;
@@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
  tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
 				  &iph->daddr, NULL,
 				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
  tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 		__be16 _pt, *p;
@@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return rc;
 
   tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
 		__be16 _pt, *p;
@@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return rc;
@@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 tx_error:
 	LeaveFunction(10);
 	kfree_skb(skb);
-	rcu_read_unlock();
 	return NF_STOLEN;
 }
 #endif
@@ -933,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
 {
 	struct sk_buff *new_skb = NULL;
 	struct iphdr *old_iph = NULL;
+	__u8 old_dsfield;
 #ifdef CONFIG_IP_VS_IPV6
 	struct ipv6hdr *old_ipv6h = NULL;
 #endif
@@ -957,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
 			*payload_len =
 				ntohs(old_ipv6h->payload_len) +
 				sizeof(*old_ipv6h);
-		*dsfield = ipv6_get_dsfield(old_ipv6h);
+		old_dsfield = ipv6_get_dsfield(old_ipv6h);
 		*ttl = old_ipv6h->hop_limit;
 		if (df)
 			*df = 0;
@@ -972,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
 
 		/* fix old IP header checksum */
 		ip_send_check(old_iph);
-		*dsfield = ipv4_get_dsfield(old_iph);
+		old_dsfield = ipv4_get_dsfield(old_iph);
 		*ttl = old_iph->ttl;
 		if (payload_len)
 			*payload_len = ntohs(old_iph->tot_len);
 	}
 
+	/* Implement full-functionality option for ECN encapsulation */
+	*dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield);
+
 	return skb;
 error:
 	kfree_skb(skb);
@@ -1035,7 +1027,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
@@ -1043,10 +1034,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 				   IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
-	}
 
 	rt = skb_rtable(skb);
 	tdev = rt->dst.dev;
@@ -1095,7 +1084,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip_local_out(net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 
@@ -1104,7 +1092,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
   tx_error:
 	if (!IS_ERR(skb))
 		kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1127,7 +1114,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6,
 				      &saddr, ipvsh, 1,
@@ -1136,10 +1122,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 				      IP_VS_RT_MODE_TUNNEL);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
-	}
 
 	rt = (struct rt6_info *) skb_dst(skb);
 	tdev = rt->dst.dev;
@@ -1185,7 +1169,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip6_local_out(cp->ipvs->net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 
@@ -1194,7 +1177,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 tx_error:
 	if (!IS_ERR(skb))
 		kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1213,17 +1195,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
 				   IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
-	}
 
 	ip_send_check(ip_hdr(skb));
 
@@ -1231,14 +1210,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
   tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1252,7 +1229,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6,
 				      NULL, ipvsh, 0,
@@ -1261,23 +1237,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 				      IP_VS_RT_MODE_KNOWN_NH);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
-	}
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
 tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1322,7 +1294,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	rt_mode = (hooknum != NF_INET_FORWARD) ?
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
 				   NULL, iph);
 	if (local < 0)
@@ -1368,12 +1339,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
-	rcu_read_unlock();
 	goto out;
 
   tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	rc = NF_STOLEN;
   out:
 	LeaveFunction(10);
@@ -1414,7 +1383,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	rt_mode = (hooknum != NF_INET_FORWARD) ?
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
 	if (local < 0)
@@ -1460,12 +1428,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
-	rcu_read_unlock();
 	goto out;
 
 tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	rc = NF_STOLEN;
 out:
 	LeaveFunction(10);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4e99cca61612..ecc3ab784633 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		goto out;
 
-	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
@@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 			}
 		} endfor_ifa(in_dev);
 	}
-	rcu_read_unlock();
 
 	if (mask == 0)
 		goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9979f46c81dc..85f643c1e227 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
 
+#include "nf_internals.h"
+
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -96,19 +98,26 @@ static struct conntrack_gc_work conntrack_gc_work;
 
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 {
+	/* 1) Acquire the lock */
 	spin_lock(lock);
-	while (unlikely(nf_conntrack_locks_all)) {
-		spin_unlock(lock);
 
-		/*
-		 * Order the 'nf_conntrack_locks_all' load vs. the
-		 * spin_unlock_wait() loads below, to ensure
-		 * that 'nf_conntrack_locks_all_lock' is indeed held:
-		 */
-		smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
-		spin_unlock_wait(&nf_conntrack_locks_all_lock);
-		spin_lock(lock);
-	}
+	/* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
+	 * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
+	 */
+	if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
+		return;
+
+	/* fast path failed, unlock */
+	spin_unlock(lock);
+
+	/* Slow path 1) get global lock */
+	spin_lock(&nf_conntrack_locks_all_lock);
+
+	/* Slow path 2) get the lock we want */
+	spin_lock(lock);
+
+	/* Slow path 3) release the global lock */
+	spin_unlock(&nf_conntrack_locks_all_lock);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
@@ -149,28 +158,27 @@ static void nf_conntrack_all_lock(void)
 	int i;
 
 	spin_lock(&nf_conntrack_locks_all_lock);
-	nf_conntrack_locks_all = true;
 
-	/*
-	 * Order the above store of 'nf_conntrack_locks_all' against
-	 * the spin_unlock_wait() loads below, such that if
-	 * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
-	 * we must observe nf_conntrack_locks[] held:
-	 */
-	smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+	nf_conntrack_locks_all = true;
 
 	for (i = 0; i < CONNTRACK_LOCKS; i++) {
-		spin_unlock_wait(&nf_conntrack_locks[i]);
+		spin_lock(&nf_conntrack_locks[i]);
+
+		/* This spin_unlock provides the "release" to ensure that
+		 * nf_conntrack_locks_all==true is visible to everyone that
+		 * acquired spin_lock(&nf_conntrack_locks[]).
+		 */
+		spin_unlock(&nf_conntrack_locks[i]);
 	}
 }
 
 static void nf_conntrack_all_unlock(void)
 {
-	/*
-	 * All prior stores must be complete before we clear
+	/* All prior stores must be complete before we clear
 	 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
 	 * might observe the false value but not the entire
-	 * critical section:
+	 * critical section.
+	 * It pairs with the smp_load_acquire() in nf_conntrack_lock()
 	 */
 	smp_store_release(&nf_conntrack_locks_all, false);
 	spin_unlock(&nf_conntrack_locks_all_lock);
@@ -248,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	unsigned int protoff;
 	u_int8_t protonum;
 	int ret;
@@ -398,22 +406,19 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
-	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+	WARN_ON(atomic_read(&nfct->use) != 0);
 
 	if (unlikely(nf_ct_is_template(ct))) {
 		nf_ct_tmpl_free(ct);
 		return;
 	}
-	rcu_read_lock();
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->destroy)
 		l4proto->destroy(ct);
 
-	rcu_read_unlock();
-
 	local_bh_disable();
 	/* Expectations will have been removed in clean_from_lists,
 	 * except TFTP can create an expectation on the first packet,
@@ -695,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 {
 	/* This is the conntrack entry already in hashes that won race. */
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
@@ -757,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	 * connections for unconfirmed conns.  But packet copies and
 	 * REJECT will give spurious warnings here.
 	 */
-	/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 
 	/* No external references means no one else could have
 	 * confirmed us.
 	 */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 	pr_debug("Confirming conntrack %p\n", ct);
 	/* We have to check the DYING flag after unlink to prevent
 	 * a race against nf_ct_get_next_corpse() possibly called from
@@ -1079,12 +1083,12 @@ static void gc_worker(struct work_struct *work)
 	next_run = gc_work->next_gc_run;
 	gc_work->last_bucket = i;
 	gc_work->early_drop = false;
-	queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
+	queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
 }
 
 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
 {
-	INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+	INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
 	gc_work->next_gc_run = HZ;
 	gc_work->exiting = false;
 }
@@ -1161,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	/* A freed object has refcnt == 0, that's
 	 * the golden rule for SLAB_TYPESAFE_BY_RCU
 	 */
-	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+	WARN_ON(atomic_read(&ct->ct_general.use) != 0);
 
 	nf_ct_ext_destroy(ct);
 	nf_ct_ext_free(ct);
@@ -1177,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       struct nf_conntrack_l3proto *l3proto,
-	       struct nf_conntrack_l4proto *l4proto,
+	       const struct nf_conntrack_l3proto *l3proto,
+	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
 {
@@ -1289,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
-		  struct nf_conntrack_l3proto *l3proto,
-		  struct nf_conntrack_l4proto *l4proto)
+		  const struct nf_conntrack_l3proto *l3proto,
+		  const struct nf_conntrack_l4proto *l4proto)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nf_conntrack_tuple tuple;
@@ -1345,10 +1349,10 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conn *ct, *tmpl;
 	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
 	unsigned int *timeouts;
 	unsigned int dataoff;
 	u_int8_t protonum;
@@ -1415,7 +1419,7 @@ repeat:
 	/* Decide what timeout policy we want to apply to this flow. */
 	timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
 
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
@@ -1469,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 	struct nf_conn_help *help = nfct_help(ct);
 
 	/* Should be unconfirmed, so not in hash table yet */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 
 	pr_debug("Altering reply tuple of %p to ", ct);
 	nf_ct_dump_tuple(newreply);
@@ -1491,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 			  unsigned long extra_jiffies,
 			  int do_acct)
 {
-	NF_CT_ASSERT(skb);
+	WARN_ON(!skb);
 
 	/* Only update if this is not a fixed timeout */
 	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
@@ -1559,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
 }
 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
 
-int nf_ct_port_nlattr_tuple_size(void)
+unsigned int nf_ct_port_nlattr_tuple_size(void)
 {
-	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
 }
 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
 #endif
@@ -1689,6 +1698,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
 	}
 }
 
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+	might_sleep();
+
+	if (atomic_read(&net->ct.count) > 0) {
+		__nf_ct_unconfirmed_destroy(net);
+		nf_queue_nf_hook_drop(net);
+		synchronize_net();
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
 void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
 			       void *data, u32 portid, int report)
@@ -1700,14 +1721,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
 	if (atomic_read(&net->ct.count) == 0)
 		return;
 
-	__nf_ct_unconfirmed_destroy(net);
-
 	d.iter = iter;
 	d.data = data;
 	d.net = net;
 
-	synchronize_net();
-
 	nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1733,6 +1750,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 		if (atomic_read(&net->ct.count) == 0)
 			continue;
 		__nf_ct_unconfirmed_destroy(net);
+		nf_queue_nf_hook_drop(net);
 	}
 	rtnl_unlock();
 
@@ -1927,7 +1945,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 	return 0;
 }
 
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
 {
 	unsigned int hashsize;
 	int rc;
@@ -2071,7 +2089,7 @@ int nf_conntrack_init_start(void)
 		goto err_proto;
 
 	conntrack_gc_work_init(&conntrack_gc_work);
-	queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
+	queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
 
 	return 0;
 
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 899c2c36da13..64778f9a8548 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
 
-	NF_CT_ASSERT(master_help);
-	NF_CT_ASSERT(!timer_pending(&exp->timeout));
+	WARN_ON(!master_help);
+	WARN_ON(timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	/* two references : one for hash insert, one for the timer */
 	refcount_add(2, &exp->use);
 
-	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
-	master_help->expecting[exp->class]++;
-
-	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
-	net->ct.expect_count++;
-
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
 	helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	}
 	add_timer(&exp->timeout);
 
+	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+	master_help->expecting[exp->class]++;
+
+	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+	net->ct.expect_count++;
+
 	NF_CT_STAT_INC(net, expect_create);
 }
 
@@ -474,6 +474,60 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+				  void *data)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
+			      void *data,
+			      u32 portid, int report)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+
+			if (!net_eq(nf_ct_exp_net(exp), net))
+				continue;
+
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect_report(exp, portid, report);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 6c605e88ebae..9fe0ddc333fb 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 	struct nf_ct_ext_type *t;
 
 	/* Conntrack must not be confirmed to avoid races on reallocation. */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 
 	old = ct->ext;
 
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 89b2e46925c4..cf1bf2605c10 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -91,41 +91,41 @@ typedef struct field_t {
 } field_t;
 
 /* Bit Stream */
-typedef struct {
+struct bitstr {
 	unsigned char *buf;
 	unsigned char *beg;
 	unsigned char *end;
 	unsigned char *cur;
 	unsigned int bit;
-} bitstr_t;
+};
 
 /* Tool Functions */
 #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;}
 #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;}
 #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;}
 #define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND)
-static unsigned int get_len(bitstr_t *bs);
-static unsigned int get_bit(bitstr_t *bs);
-static unsigned int get_bits(bitstr_t *bs, unsigned int b);
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b);
-static unsigned int get_uint(bitstr_t *bs, int b);
+static unsigned int get_len(struct bitstr *bs);
+static unsigned int get_bit(struct bitstr *bs);
+static unsigned int get_bits(struct bitstr *bs, unsigned int b);
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b);
+static unsigned int get_uint(struct bitstr *bs, int b);
 
 /* Decoder Functions */
-static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level);
-static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level);
+static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level);
+static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level);
 
 /* Decoder Functions Vector */
-typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int);
+typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int);
 static const decoder_t Decoders[] = {
 	decode_nul,
 	decode_bool,
@@ -150,7 +150,7 @@ static const decoder_t Decoders[] = {
  * Functions
  ****************************************************************************/
 /* Assume bs is aligned && v < 16384 */
-static unsigned int get_len(bitstr_t *bs)
+static unsigned int get_len(struct bitstr *bs)
 {
 	unsigned int v;
 
@@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs)
 }
 
 /****************************************************************************/
-static unsigned int get_bit(bitstr_t *bs)
+static unsigned int get_bit(struct bitstr *bs)
 {
 	unsigned int b = (*bs->cur) & (0x80 >> bs->bit);
 
@@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs)
 
 /****************************************************************************/
 /* Assume b <= 8 */
-static unsigned int get_bits(bitstr_t *bs, unsigned int b)
+static unsigned int get_bits(struct bitstr *bs, unsigned int b)
 {
 	unsigned int v, l;
 
@@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b)
 
 /****************************************************************************/
 /* Assume b <= 32 */
-static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
+static unsigned int get_bitmap(struct bitstr *bs, unsigned int b)
 {
 	unsigned int v, l, shift, bytes;
 
@@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b)
 /****************************************************************************
  * Assume bs is aligned and sizeof(unsigned int) == 4
  ****************************************************************************/
-static unsigned int get_uint(bitstr_t *bs, int b)
+static unsigned int get_uint(struct bitstr *bs, int b)
 {
 	unsigned int v = 0;
 
@@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b)
 }
 
 /****************************************************************************/
-static int decode_nul(bitstr_t *bs, const struct field_t *f,
+static int decode_nul(struct bitstr *bs, const struct field_t *f,
                       char *base, int level)
 {
 	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_bool(bitstr_t *bs, const struct field_t *f,
+static int decode_bool(struct bitstr *bs, const struct field_t *f,
                        char *base, int level)
 {
 	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_oid(bitstr_t *bs, const struct field_t *f,
+static int decode_oid(struct bitstr *bs, const struct field_t *f,
                       char *base, int level)
 {
 	int len;
@@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_int(bitstr_t *bs, const struct field_t *f,
+static int decode_int(struct bitstr *bs, const struct field_t *f,
                       char *base, int level)
 {
 	unsigned int len;
@@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_enum(bitstr_t *bs, const struct field_t *f,
+static int decode_enum(struct bitstr *bs, const struct field_t *f,
                        char *base, int level)
 {
 	PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name);
@@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bitstr(struct bitstr *bs, const struct field_t *f,
                          char *base, int level)
 {
 	unsigned int len;
@@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_numstr(bitstr_t *bs, const struct field_t *f,
+static int decode_numstr(struct bitstr *bs, const struct field_t *f,
                          char *base, int level)
 {
 	unsigned int len;
@@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_octstr(bitstr_t *bs, const struct field_t *f,
+static int decode_octstr(struct bitstr *bs, const struct field_t *f,
                          char *base, int level)
 {
 	unsigned int len;
@@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
+static int decode_bmpstr(struct bitstr *bs, const struct field_t *f,
                          char *base, int level)
 {
 	unsigned int len;
@@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_seq(bitstr_t *bs, const struct field_t *f,
+static int decode_seq(struct bitstr *bs, const struct field_t *f,
                       char *base, int level)
 {
 	unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len;
@@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f,
 }
 
 /****************************************************************************/
-static int decode_seqof(bitstr_t *bs, const struct field_t *f,
+static int decode_seqof(struct bitstr *bs, const struct field_t *f,
                         char *base, int level)
 {
 	unsigned int count, effective_count = 0, i, len = 0;
@@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
 
 
 /****************************************************************************/
-static int decode_choice(bitstr_t *bs, const struct field_t *f,
+static int decode_choice(struct bitstr *bs, const struct field_t *f,
                          char *base, int level)
 {
 	unsigned int type, ext, len = 0;
@@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras)
 		FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT,
 		0, _RasMessage
 	};
-	bitstr_t bs;
+	struct bitstr bs;
 
 	bs.buf = bs.beg = bs.cur = buf;
 	bs.end = buf + sz;
@@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg,
 		FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT,
 		0, _H323_UserInformation
 	};
-	bitstr_t bs;
+	struct bitstr bs;
 
 	bs.buf = buf;
 	bs.beg = bs.cur = beg;
@@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz,
 		FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4,
 		DECODE | EXT, 0, _MultimediaSystemControlMessage
 	};
-	bitstr_t bs;
+	struct bitstr bs;
 
 	bs.buf = bs.beg = bs.cur = buf;
 	bs.end = buf + sz;
@@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
 		if (sz < 1)
 			break;
 		len = *p++;
+		sz--;
 		if (sz < len)
 			break;
 		p += len;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3b5153..551a1eddf0fa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
 {
-	struct nf_conntrack_expect *exp;
-	const struct hlist_node *next;
-	unsigned int i;
+	struct nf_conn_help *help = nfct_help(exp->master);
+	const struct nf_conntrack_helper *me = data;
+	const struct nf_conntrack_helper *this;
+
+	if (exp->helper == me)
+		return true;
 
+	this = rcu_dereference_protected(help->helper,
+					 lockdep_is_held(&nf_conntrack_expect_lock));
+	return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
 	mutex_lock(&nf_ct_helper_mutex);
 	hlist_del_rcu(&me->hnode);
 	nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	 */
 	synchronize_rcu();
 
-	/* Get rid of expectations */
-	spin_lock_bh(&nf_conntrack_expect_lock);
-	for (i = 0; i < nf_ct_expect_hsize; i++) {
-		hlist_for_each_entry_safe(exp, next,
-					  &nf_ct_expect_hash[i], hnode) {
-			struct nf_conn_help *help = nfct_help(exp->master);
-			if ((rcu_dereference_protected(
-					help->helper,
-					lockdep_is_held(&nf_conntrack_expect_lock)
-					) == me || exp->helper == me))
-				nf_ct_remove_expect(exp);
-		}
-	}
-	spin_unlock_bh(&nf_conntrack_expect_lock);
-
+	nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
 	nf_ct_iterate_destroy(unhelp, me);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace70bece..397e6911214f 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void generic_print_tuple(struct seq_file *s,
-				const struct nf_conntrack_tuple *tuple)
-{
-}
-
 static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			       unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.l3proto	 = PF_UNSPEC,
-	.name		 = "unknown",
 	.pkt_to_tuple	 = generic_pkt_to_tuple,
 	.invert_tuple	 = generic_invert_tuple,
-	.print_tuple	 = generic_print_tuple,
 	.get_l4proto	 = generic_get_l4proto,
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7999e70c3bfb..59c08997bfdf 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
 static char __initdata version[] = "0.93";
 
 static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
-				       const struct nf_conntrack_tuple *tuple,
-				       struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    struct nf_conntrack_l3proto *l3proto)
+				    const struct nf_conntrack_l3proto *l3proto)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -109,9 +109,9 @@ nla_put_failure:
 static int ctnetlink_dump_tuples(struct sk_buff *skb,
 				 const struct nf_conntrack_tuple *tuple)
 {
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	int ret;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
 
 	rcu_read_lock();
 	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
 {
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *nest_proto;
 	int ret;
 
@@ -533,22 +533,27 @@ nla_put_failure:
 	return -1;
 }
 
-static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
+#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
+static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
-	size_t len = 0;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
+	size_t len, len4 = 0;
 
-	rcu_read_lock();
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	len += l3proto->nla_size;
+	len = l3proto->nla_size;
+	len *= 3u; /* ORIG, REPLY, MASTER */
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	len += l4proto->nla_size;
-	rcu_read_unlock();
+	if (l4proto->nlattr_tuple_size) {
+		len4 = l4proto->nlattr_tuple_size();
+		len4 *= 3u; /* ORIG, REPLY, MASTER */
+	}
 
-	return len;
+	return len + len4;
 }
+#endif
 
 static inline size_t ctnetlink_acct_size(const struct nf_conn *ct)
 {
@@ -664,7 +669,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	nfmsg->version	= NFNETLINK_V0;
 	nfmsg->res_id	= 0;
 
-	rcu_read_lock();
 	zone = nf_ct_zone(ct);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -736,8 +740,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	    && ctnetlink_dump_mark(skb, ct) < 0)
 		goto nla_put_failure;
 #endif
-	rcu_read_unlock();
-
 	nlmsg_end(skb, nlh);
 	err = nfnetlink_send(skb, net, item->portid, group, item->report,
 			     GFP_ATOMIC);
@@ -747,7 +749,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	return 0;
 
 nla_put_failure:
-	rcu_read_unlock();
 	nlmsg_cancel(skb, nlh);
 nlmsg_failure:
 	kfree_skb(skb);
@@ -941,8 +942,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
 static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 				       struct nf_conntrack_tuple *tuple)
 {
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTO_MAX+1];
-	struct nf_conntrack_l4proto *l4proto;
 	int ret = 0;
 
 	ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1585,8 +1586,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
 				      const struct nlattr * const cda[])
 {
 	const struct nlattr *attr = cda[CTA_PROTOINFO];
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTOINFO_MAX+1];
-	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
 	err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2213,7 +2214,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 	const struct nf_conntrack_zone *zone;
 	struct nlattr *nest_parms;
 
-	rcu_read_lock();
 	zone = nf_ct_zone(ct);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -2272,11 +2272,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 #endif
 	if (ctnetlink_dump_labels(skb, ct) < 0)
 		goto nla_put_failure;
-	rcu_read_unlock();
 	return 0;
 
 nla_put_failure:
-	rcu_read_unlock();
 	return -ENOSPC;
 }
 
@@ -2483,11 +2481,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 				   const struct nf_conntrack_tuple *tuple,
 				   const struct nf_conntrack_tuple_mask *mask)
 {
-	int ret;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple m;
 	struct nlattr *nest_parms;
+	int ret;
 
 	memset(&m, 0xFF, sizeof(m));
 	memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
@@ -2661,17 +2659,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	nfmsg->version	    = NFNETLINK_V0;
 	nfmsg->res_id	    = 0;
 
-	rcu_read_lock();
 	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
 		goto nla_put_failure;
-	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
 	nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
 	return 0;
 
 nla_put_failure:
-	rcu_read_unlock();
 	nlmsg_cancel(skb, nlh);
 nlmsg_failure:
 	kfree_skb(skb);
@@ -2910,6 +2905,21 @@ out:
 	return err == -EAGAIN ? -ENOBUFS : err;
 }
 
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+	const struct nf_conn_help *m_help;
+	const char *name = data;
+
+	m_help = nfct_help(exp->master);
+
+	return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+	return true;
+}
+
 static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const cda[],
@@ -2918,10 +2928,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	struct hlist_node *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
-	unsigned int i;
 	int err;
 
 	if (cda[CTA_EXPECT_TUPLE]) {
@@ -2961,49 +2969,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 		nf_ct_expect_put(exp);
 	} else if (cda[CTA_EXPECT_HELP_NAME]) {
 		char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-		struct nf_conn_help *m_help;
-
-		/* delete all expectations for this helper */
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, next,
-						  &nf_ct_expect_hash[i],
-						  hnode) {
-
-				if (!net_eq(nf_ct_exp_net(exp), net))
-					continue;
 
-				m_help = nfct_help(exp->master);
-				if (!strcmp(m_help->helper->name, name) &&
-				    del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).portid,
-							nlmsg_report(nlh));
-					nf_ct_expect_put(exp);
-				}
-			}
-		}
-		spin_unlock_bh(&nf_conntrack_expect_lock);
+		nf_ct_expect_iterate_net(net, expect_iter_name, name,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(nlh));
 	} else {
 		/* This basically means we have to flush everything*/
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, next,
-						  &nf_ct_expect_hash[i],
-						  hnode) {
-
-				if (!net_eq(nf_ct_exp_net(exp), net))
-					continue;
-
-				if (del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).portid,
-							nlmsg_report(nlh));
-					nf_ct_expect_put(exp);
-				}
-			}
-		}
-		spin_unlock_bh(&nf_conntrack_expect_lock);
+		nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(nlh));
 	}
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6959e93063d4..11562f2a08bb 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct,
 	/* Can you see how rusty this code is, compared with the pre-2.6.11
 	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
 
-	rcu_read_lock();
 	nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
 	if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
 		nf_nat_pptp_expectfn(ct, exp);
@@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct,
 			pr_debug("not found\n");
 		}
 	}
-	rcu_read_unlock();
 }
 
 static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad229c3cc..c8e9c9503a08 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -27,6 +27,7 @@
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_log.h>
 
 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
 struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
@@ -63,9 +64,55 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
 	*header = NULL;
 	*table = NULL;
 }
+
+__printf(5, 6)
+void nf_l4proto_log_invalid(const struct sk_buff *skb,
+			    struct net *net,
+			    u16 pf, u8 protonum,
+			    const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	if (net->ct.sysctl_log_invalid != protonum ||
+	    net->ct.sysctl_log_invalid != IPPROTO_RAW)
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+		      "nf_ct_proto_%d: %pV ", protonum, &vaf);
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
+
+__printf(3, 4)
+void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
+			       const struct nf_conn *ct,
+			       const char *fmt, ...)
+{
+	struct va_format vaf;
+	struct net *net;
+	va_list args;
+
+	net = nf_ct_net(ct);
+	if (likely(net->ct.sysctl_log_invalid == 0))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+			       nf_ct_protonum(ct), "%pV", &vaf);
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
 #endif
 
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
 __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 {
 	if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +124,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
 /* this is guaranteed to always return a valid protocol helper, since
  * it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
 nf_ct_l3proto_find_get(u_int16_t l3proto)
 {
 	struct nf_conntrack_l3proto *p;
@@ -95,8 +142,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
 int
 nf_ct_l3proto_try_module_get(unsigned short l3proto)
 {
+	const struct nf_conntrack_l3proto *p;
 	int ret;
-	struct nf_conntrack_l3proto *p;
 
 retry:	p = nf_ct_l3proto_find_get(l3proto);
 	if (p == &nf_conntrack_l3proto_generic) {
@@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
 
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 {
 	const struct nf_conntrack_l3proto *l3proto;
 	int ret;
@@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto)
 
 	return ret;
 }
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+	int err;
+
+	if (nfproto == NFPROTO_INET) {
+		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+		if (err < 0)
+			goto err1;
+		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+		if (err < 0)
+			goto err2;
+	} else {
+		err = nf_ct_netns_do_get(net, nfproto);
+		if (err < 0)
+			goto err1;
+	}
+	return 0;
+
+err2:
+	nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+	return err;
+}
 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
 
-void nf_ct_netns_put(struct net *net, u8 nfproto)
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
 {
 	const struct nf_conntrack_l3proto *l3proto;
 
@@ -171,12 +242,21 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
 
 	nf_ct_l3proto_module_put(nfproto);
 }
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+	if (nfproto == NFPROTO_INET) {
+		nf_ct_netns_do_put(net, NFPROTO_IPV4);
+		nf_ct_netns_do_put(net, NFPROTO_IPV6);
+	} else
+		nf_ct_netns_do_put(net, nfproto);
+}
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
-	struct nf_conntrack_l4proto *p;
+	const struct nf_conntrack_l4proto *p;
 
 	rcu_read_lock();
 	p = __nf_ct_l4proto_find(l3num, l4num);
@@ -188,7 +268,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
 
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
 {
 	module_put(p->me);
 }
@@ -196,28 +276,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
 
 static int kill_l3proto(struct nf_conn *i, void *data)
 {
-	return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+	return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
 }
 
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	l4proto = data;
 	return nf_ct_protonum(i) == l4proto->l4proto &&
 	       nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
 
 	if (proto->l3proto >= NFPROTO_NUMPROTO)
 		return -EBUSY;
-
-	if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (proto->tuple_to_nlattr && proto->nla_size == 0)
 		return -EINVAL;
-
+#endif
 	mutex_lock(&nf_ct_proto_mutex);
 	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
 					lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +306,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 		goto out_unlock;
 	}
 
-	if (proto->nlattr_tuple_size)
-		proto->nla_size = 3 * proto->nlattr_tuple_size();
-
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
 
 out_unlock:
@@ -238,21 +315,7 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l3proto *proto)
-{
-	if (nf_conntrack_default_on == 0)
-		return 0;
-
-	return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
 
@@ -266,27 +329,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 
 	synchronize_rcu();
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l3proto, proto);
+	nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
-void nf_ct_l3proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l3proto *proto)
-{
-	/*
-	 * nf_conntrack_default_on *might* have registered hooks.
-	 * ->net_ns_put must cope with more puts() than get(), i.e.
-	 * if nf_conntrack_default_on was 0 at time of
-	 * nf_ct_l3proto_pernet_register invocation this net_ns_put()
-	 * should be a noop.
-	 */
-	if (proto->net_ns_put)
-		proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
-					      struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	if (l4proto->get_net_proto) {
 		/* statically built-in protocols use static per-net */
@@ -301,7 +349,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 static
 int nf_ct_l4proto_register_sysctl(struct net *net,
 				  struct nf_proto_net *pn,
-				  struct nf_conntrack_l4proto *l4proto)
+				  const struct nf_conntrack_l4proto *l4proto)
 {
 	int err = 0;
 
@@ -324,8 +372,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
 
 static
 void nf_ct_l4proto_unregister_sysctl(struct net *net,
-				     struct nf_proto_net *pn,
-				     struct nf_conntrack_l4proto *l4proto)
+				struct nf_proto_net *pn,
+				const struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_SYSCTL
 	if (pn->ctl_table_header != NULL)
@@ -383,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
 	l4proto->nla_size = 0;
 	if (l4proto->nlattr_size)
 		l4proto->nla_size += l4proto->nlattr_size();
-	if (l4proto->nlattr_tuple_size)
-		l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
 
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   l4proto);
@@ -395,7 +441,7 @@ out_unlock:
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
 int nf_ct_l4proto_pernet_register_one(struct net *net,
-				      struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nf_proto_net *pn = NULL;
@@ -420,7 +466,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 
 {
 	BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -433,7 +479,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 			   &nf_conntrack_l4proto_generic);
 }
 
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 {
 	mutex_lock(&nf_ct_proto_mutex);
 	__nf_ct_l4proto_unregister_one(l4proto);
@@ -444,7 +490,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-					 struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
 
@@ -469,8 +515,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
 	}
 	if (i != num_proto) {
 		ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
-		pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
-		       ver, l4proto[i]->name, ver);
+		pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+		       ver, l4proto[i]->l4proto);
 		nf_ct_l4proto_unregister(l4proto, i);
 	}
 	return ret;
@@ -478,7 +524,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
 int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *l4proto[],
+				  struct nf_conntrack_l4proto *const l4proto[],
 				  unsigned int num_proto)
 {
 	int ret = -EINVAL;
@@ -490,8 +536,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 			break;
 	}
 	if (i != num_proto) {
-		pr_err("nf_conntrack_%s%d: pernet registration failed\n",
-		       l4proto[i]->name,
+		pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+		       l4proto[i]->l4proto,
 		       l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
 		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
 	}
@@ -514,8 +560,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
 
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *l4proto[],
-				     unsigned int num_proto)
+				struct nf_conntrack_l4proto *const l4proto[],
+				unsigned int num_proto)
 {
 	while (num_proto-- != 0)
 		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d997558a..2a446f4a554c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	default:
 		dn = dccp_pernet(net);
 		if (dn->dccp_loose == 0) {
-			msg = "nf_ct_dccp: not picking up existing connection ";
+			msg = "not picking up existing connection ";
 			goto out_invalid;
 		}
 	case CT_DCCP_REQUEST:
 		break;
 	case CT_DCCP_INVALID:
-		msg = "nf_ct_dccp: invalid state transition ";
+		msg = "invalid state transition ";
 		goto out_invalid;
 	}
 
@@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 
 out_invalid:
-	if (LOG_INVALID(net, IPPROTO_DCCP))
-		nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
-			      NULL, "%s", msg);
+	nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
 	return false;
 }
 
@@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net)
 
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       u_int8_t pf, unsigned int hooknum,
 		       unsigned int *timeouts)
 {
-	struct net *net = nf_ct_net(ct);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
 	u_int8_t type, old_state, new_state;
@@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		ct->proto.dccp.last_pkt = type;
 
 		spin_unlock_bh(&ct->lock);
-		if (LOG_INVALID(net, IPPROTO_DCCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_dccp: invalid packet ignored ");
+		nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
 		return NF_ACCEPT;
 	case CT_DCCP_INVALID:
 		spin_unlock_bh(&ct->lock);
-		if (LOG_INVALID(net, IPPROTO_DCCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_dccp: invalid state transition ");
+		nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
 		return -NF_ACCEPT;
 	}
 
@@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
 	return NF_ACCEPT;
 
 out_invalid:
-	if (LOG_INVALID(net, IPPROTO_DCCP))
-		nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg);
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
 	return -NF_ACCEPT;
 }
 
@@ -623,18 +614,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
 	return false;
 }
 
-static void dccp_print_tuple(struct seq_file *s,
-			     const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.dccp.port),
-		   ntohs(tuple->dst.u.dccp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
 }
+#endif
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -880,7 +865,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
-	.name			= "dccp",
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
 	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
@@ -888,8 +872,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
-	.print_tuple		= dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
 	.nlattr_size		= dccp_nlattr_size,
@@ -916,7 +901,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
-	.name			= "dccp",
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
 	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
@@ -924,8 +908,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
-	.print_tuple		= dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
 	.nlattr_size		= dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868bad33a7..1f86ddf6649a 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
 static bool nf_generic_should_process(u8 proto)
 {
 	switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
-	case IPPROTO_SCTP:
-		return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
-	case IPPROTO_DCCP:
-		return false;
-#endif
 #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
 	case IPPROTO_GRE:
 		return false;
 #endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
-	case IPPROTO_UDPLITE:
-		return false;
-#endif
 	default:
 		return true;
 	}
@@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
-				const struct nf_conntrack_tuple *tuple)
-{
-}
-
 static unsigned int *generic_get_timeouts(struct net *net)
 {
 	return &(generic_pernet(net)->timeout);
@@ -78,8 +60,6 @@ static int generic_packet(struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
 			  enum ip_conntrack_info ctinfo,
-			  u_int8_t pf,
-			  unsigned int hooknum,
 			  unsigned int *timeout)
 {
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
@@ -187,10 +167,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
 {
 	.l3proto		= PF_UNSPEC,
 	.l4proto		= 255,
-	.name			= "unknown",
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.invert_tuple		= generic_invert_tuple,
-	.print_tuple		= generic_print_tuple,
 	.packet			= generic_packet,
 	.get_timeouts		= generic_get_timeouts,
 	.new			= generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a3feb5..a2503005d80b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "srckey=0x%x dstkey=0x%x ",
-		   ntohs(tuple->src.u.gre.key),
-		   ntohs(tuple->dst.u.gre.key));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* print private data for conntrack */
 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 		   (ct->proto.gre.timeout / HZ),
 		   (ct->proto.gre.stream_timeout / HZ));
 }
+#endif
 
 static unsigned int *gre_get_timeouts(struct net *net)
 {
@@ -251,8 +244,6 @@ static int gre_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
@@ -364,11 +355,11 @@ static int gre_init_net(struct net *net, u_int16_t proto)
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 	.l3proto	 = AF_INET,
 	.l4proto	 = IPPROTO_GRE,
-	.name		 = "gre",
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
-	.print_tuple	 = gre_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
+#endif
 	.get_timeouts    = gre_get_timeouts,
 	.packet		 = gre_packet,
 	.new		 = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d2eec4..80faf04ddf15 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
-			     const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.sctp.port),
-		   ntohs(tuple->dst.u.sctp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
 }
+#endif
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
 for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;	\
@@ -313,8 +306,6 @@ static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
-		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeouts)
 {
 	enum sctp_conntrack new_state, old_state;
@@ -530,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
 	}
 	return NF_ACCEPT;
 out_invalid:
-	if (LOG_INVALID(net, IPPROTO_SCTP))
-		nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg);
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
 	return -NF_ACCEPT;
 }
 
@@ -791,11 +781,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
-	.name 			= "sctp",
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
-	.print_tuple 		= sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
+#endif
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
@@ -828,11 +818,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_SCTP,
-	.name 			= "sctp",
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
-	.print_tuple 		= sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
+#endif
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7dfd83e..b12fc07111d0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.tcp.port),
-		   ntohs(tuple->dst.u.tcp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
 }
+#endif
 
 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 {
@@ -500,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			  unsigned int index,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
-			  const struct tcphdr *tcph,
-			  u_int8_t pf)
+			  const struct tcphdr *tcph)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_tcp_net *tn = tcp_pernet(net);
@@ -709,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
 		    tn->tcp_be_liberal)
 			res = true;
-		if (!res && LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-			"nf_ct_tcp: %s ",
+		if (!res) {
+			nf_ct_l4proto_log_invalid(skb, ct,
+			"%s",
 			before(seq, sender->td_maxend + 1) ?
 			in_recv_win ?
 			before(sack, receiver->td_end + 1) ?
@@ -720,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			: "ACK is over the upper bound (ACKed data not seen yet)"
 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
 			: "SEQ is over the upper bound (over the window of the receiver)");
+		}
 	}
 
 	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
@@ -745,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
 };
 
+static void tcp_error_log(const struct sk_buff *skb, struct net *net,
+			  u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+}
+
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
 static int tcp_error(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
@@ -760,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	/* Smaller that minimal TCP header? */
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	if (th == NULL) {
-		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				"nf_ct_tcp: short packet ");
+		tcp_error_log(skb, net, pf, "short packet");
 		return -NF_ACCEPT;
 	}
 
 	/* Not whole TCP header or malformed packet */
 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				"nf_ct_tcp: truncated/malformed packet ");
+		tcp_error_log(skb, net, pf, "truncated packet");
 		return -NF_ACCEPT;
 	}
 
@@ -781,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	/* FIXME: Source route IP option packets --RR */
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
-		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				  "nf_ct_tcp: bad TCP checksum ");
+		tcp_error_log(skb, net, pf, "bad checksum");
 		return -NF_ACCEPT;
 	}
 
 	/* Check TCP flags. */
 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
-		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				  "nf_ct_tcp: invalid TCP flag combination ");
+		tcp_error_log(skb, net, pf, "invalid tcp flag combination");
 		return -NF_ACCEPT;
 	}
 
@@ -809,8 +800,6 @@ static int tcp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
@@ -947,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct,
 					IP_CT_EXP_CHALLENGE_ACK;
 		}
 		spin_unlock_bh(&ct->lock);
-		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				  "nf_ct_tcp: invalid packet ignored in "
-				  "state %s ", tcp_conntrack_names[old_state]);
+		nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
+					  "state %s ", tcp_conntrack_names[old_state]);
 		return NF_ACCEPT;
 	case TCP_CONNTRACK_MAX:
 		/* Special case for SYN proxy: when the SYN to the server or
@@ -972,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
 			 dir, get_conntrack_index(th), old_state);
 		spin_unlock_bh(&ct->lock);
-		if (LOG_INVALID(net, IPPROTO_TCP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				  "nf_ct_tcp: invalid state ");
+		nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
 		return -NF_ACCEPT;
 	case TCP_CONNTRACK_TIME_WAIT:
 		/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -989,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct,
 			/* Detected RFC5961 challenge ACK */
 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
 			spin_unlock_bh(&ct->lock);
-			if (LOG_INVALID(net, IPPROTO_TCP))
-				nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_tcp: challenge-ACK ignored ");
+			nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
 			return NF_ACCEPT; /* Don't change state */
 		}
 		break;
@@ -1001,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct,
 		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
 			/* Invalid RST  */
 			spin_unlock_bh(&ct->lock);
-			if (LOG_INVALID(net, IPPROTO_TCP))
-				nf_log_packet(net, pf, 0, skb, NULL, NULL,
-					      NULL, "nf_ct_tcp: invalid RST ");
+			nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
 			return -NF_ACCEPT;
 		}
 		if (index == TCP_RST_SET
@@ -1030,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct,
 	}
 
 	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
-			   skb, dataoff, th, pf)) {
+			   skb, dataoff, th)) {
 		spin_unlock_bh(&ct->lock);
 		return -NF_ACCEPT;
 	}
@@ -1296,9 +1277,14 @@ static int tcp_nlattr_size(void)
 		+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
 }
 
-static int tcp_nlattr_tuple_size(void)
+static unsigned int tcp_nlattr_tuple_size(void)
 {
-	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
 }
 #endif
 
@@ -1556,11 +1542,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_TCP,
-	.name 			= "tcp",
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
+#endif
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
@@ -1594,11 +1580,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_TCP,
-	.name 			= "tcp",
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
+#endif
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6178ca..3a5f727103af 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.udp.port),
-		   ntohs(tuple->dst.u.udp.port));
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
 	return udp_pernet(net)->timeouts;
@@ -82,8 +73,6 @@ static int udp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
-		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
@@ -109,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
 }
 
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
+static void udplite_error_log(const struct sk_buff *skb, struct net *net,
+			      u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
+}
+
 static int udplite_error(struct net *net, struct nf_conn *tmpl,
 			 struct sk_buff *skb,
 			 unsigned int dataoff,
@@ -122,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 	/* Header is too small? */
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (!hdr) {
-		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_udplite: short packet ");
+		udplite_error_log(skb, net, pf, "short packet");
 		return -NF_ACCEPT;
 	}
 
@@ -132,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 	if (cscov == 0) {
 		cscov = udplen;
 	} else if (cscov < sizeof(*hdr) || cscov > udplen) {
-		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_udplite: invalid checksum coverage ");
+		udplite_error_log(skb, net, pf, "invalid checksum coverage");
 		return -NF_ACCEPT;
 	}
 
 	/* UDPLITE mandates checksums */
 	if (!hdr->check) {
-		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_udplite: checksum missing ");
+		udplite_error_log(skb, net, pf, "checksum missing");
 		return -NF_ACCEPT;
 	}
 
@@ -150,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
 				pf)) {
-		if (LOG_INVALID(net, IPPROTO_UDPLITE))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_udplite: bad UDPLite checksum ");
+		udplite_error_log(skb, net, pf, "bad checksum");
 		return -NF_ACCEPT;
 	}
 
@@ -160,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
 }
 #endif
 
+static void udp_error_log(const struct sk_buff *skb, struct net *net,
+			  u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
+}
+
 static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 		     unsigned int dataoff,
 		     u_int8_t pf,
@@ -172,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	/* Header is too small? */
 	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
-		if (LOG_INVALID(net, IPPROTO_UDP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				      "nf_ct_udp: short packet ");
+		udp_error_log(skb, net, pf, "short packet");
 		return -NF_ACCEPT;
 	}
 
 	/* Truncated/malformed packets */
 	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-		if (LOG_INVALID(net, IPPROTO_UDP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				"nf_ct_udp: truncated/malformed packet ");
+		udp_error_log(skb, net, pf, "truncated/malformed packet");
 		return -NF_ACCEPT;
 	}
 
@@ -196,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	 * FIXME: Source route IP option packets --RR */
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
-		if (LOG_INVALID(net, IPPROTO_UDP))
-			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
-				"nf_ct_udp: bad UDP checksum ");
+		udp_error_log(skb, net, pf, "bad checksum");
 		return -NF_ACCEPT;
 	}
 
@@ -313,11 +300,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDP,
-	.name			= "udp",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -347,11 +332,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDPLITE,
-	.name			= "udplite",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -381,11 +364,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDP,
-	.name			= "udp",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -415,11 +396,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDPLITE,
-	.name			= "udplite",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d38af4274335..4dbb5bad4363 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	tuple.dst.u3		= *daddr;
 	tuple.dst.u.udp.port	= port;
 
-	rcu_read_lock();
 	do {
 		exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 
@@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 			goto err1;
 	}
 
-	if (skip_expect) {
-		rcu_read_unlock();
+	if (skip_expect)
 		return NF_ACCEPT;
-	}
 
 	rtp_exp = nf_ct_expect_alloc(ct);
 	if (rtp_exp == NULL)
@@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 err2:
 	nf_ct_expect_put(rtp_exp);
 err1:
-	rcu_read_unlock();
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9043e0..5a101caa3e12 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *l4proto)
 {
-	l3proto->print_tuple(s, tuple);
-	l4proto->print_tuple(s, tuple);
+	switch (l3proto->l3proto) {
+	case NFPROTO_IPV4:
+		seq_printf(s, "src=%pI4 dst=%pI4 ",
+			   &tuple->src.u3.ip, &tuple->dst.u3.ip);
+		break;
+	case NFPROTO_IPV6:
+		seq_printf(s, "src=%pI6 dst=%pI6 ",
+			   tuple->src.u3.ip6, tuple->dst.u3.ip6);
+		break;
+	default:
+		break;
+	}
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_TCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.tcp.port),
+			   ntohs(tuple->dst.u.tcp.port));
+		break;
+	case IPPROTO_UDPLITE: /* fallthrough */
+	case IPPROTO_UDP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.udp.port),
+			   ntohs(tuple->dst.u.udp.port));
+
+		break;
+	case IPPROTO_DCCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.dccp.port),
+			   ntohs(tuple->dst.u.dccp.port));
+		break;
+	case IPPROTO_SCTP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.sctp.port),
+			   ntohs(tuple->dst.u.sctp.port));
+		break;
+	case IPPROTO_ICMPV6:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_GRE:
+		seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+			   ntohs(tuple->src.u.gre.key),
+			   ntohs(tuple->dst.u.gre.key));
+		break;
+	default:
+		break;
+	}
 }
 EXPORT_SYMBOL_GPL(print_tuple);
 
@@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+static const char* l3proto_name(u16 proto)
+{
+	switch (proto) {
+	case AF_INET: return "ipv4";
+	case AF_INET6: return "ipv6";
+	}
+
+	return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+	switch (proto) {
+	case IPPROTO_ICMP: return "icmp";
+	case IPPROTO_TCP: return "tcp";
+	case IPPROTO_UDP: return "udp";
+	case IPPROTO_DCCP: return "dccp";
+	case IPPROTO_GRE: return "gre";
+	case IPPROTO_SCTP: return "sctp";
+	case IPPROTO_UDPLITE: return "udplite";
+	}
+
+	return "unknown";
+}
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -208,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	struct net *net = seq_file_net(s);
 	int ret = 0;
 
-	NF_CT_ASSERT(ct);
+	WARN_ON(!ct);
 	if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
 		return 0;
 
@@ -225,14 +304,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	NF_CT_ASSERT(l3proto);
+	WARN_ON(!l3proto);
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	NF_CT_ASSERT(l4proto);
+	WARN_ON(!l4proto);
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u %ld ",
-		   l3proto->name, nf_ct_l3num(ct),
-		   l4proto->name, nf_ct_protonum(ct),
+		   l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+		   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
 		   nf_ct_expires(ct)  / HZ);
 
 	if (l4proto->print_conntrack)
@@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255;
 /* size the user *wants to set */
 static unsigned int nf_conntrack_htable_size_user __read_mostly;
 
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
 static int
 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "nf_conntrack_default_on",
-		.data		= &nf_conntrack_default_on,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{ }
 };
 
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742da83af..44284cd2528d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _NF_INTERNALS_H
 #define _NF_INTERNALS_H
 
@@ -5,17 +6,11 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     struct nf_hook_entry **entryp, unsigned int verdict);
+	     const struct nf_hook_entries *entries, unsigned int index,
+	     unsigned int verdict);
 unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
 
 /* nf_log.c */
 int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index eb541786ccb7..6c38421e31f9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
+static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
+
 static DEFINE_MUTEX(nf_nat_proto_mutex);
 static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
 						__read_mostly;
 
-struct nf_nat_conn_key {
-	const struct net *net;
-	const struct nf_conntrack_tuple *tuple;
-	const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
 
 inline const struct nf_nat_l3proto *
 __nf_nat_l3proto_find(u8 family)
@@ -118,17 +116,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 EXPORT_SYMBOL(nf_xfrm_me_harder);
 #endif /* CONFIG_XFRM */
 
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_tuple *t;
-	const struct nf_conn *ct = data;
+	unsigned int hash;
+
+	get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
 
-	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	/* Original src, to ensure we map it consistently if poss. */
+	hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+		      tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
 
-	seed ^= net_hash_mix(nf_ct_net(ct));
-	return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
-		      t->dst.protonum ^ seed);
+	return reciprocal_scale(hash, nf_nat_htable_size);
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -184,28 +184,6 @@ same_src(const struct nf_conn *ct,
 		t->src.u.all == tuple->src.u.all);
 }
 
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
-			       const void *obj)
-{
-	const struct nf_nat_conn_key *key = arg->key;
-	const struct nf_conn *ct = obj;
-
-	if (!same_src(ct, key->tuple) ||
-	    !net_eq(nf_ct_net(ct), key->net) ||
-	    !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
-		return 1;
-
-	return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
-	.head_offset = offsetof(struct nf_conn, nat_bysource),
-	.obj_hashfn = nf_nat_bysource_hash,
-	.obj_cmpfn = nf_nat_bysource_cmp,
-	.nelem_hint = 256,
-	.min_size = 1024,
-};
-
 /* Only called for SRC manip */
 static int
 find_appropriate_src(struct net *net,
@@ -216,26 +194,22 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn *ct;
-	struct nf_nat_conn_key key = {
-		.net = net,
-		.tuple = tuple,
-		.zone = zone
-	};
-	struct rhlist_head *hl, *h;
 
-	hl = rhltable_lookup(&nf_nat_bysource_table, &key,
-			     nf_nat_bysource_params);
-
-	rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
-		nf_ct_invert_tuplepr(result,
-				     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-		result->dst = tuple->dst;
-
-		if (in_range(l3proto, l4proto, result, range))
-			return 1;
+	hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+		if (same_src(ct, tuple) &&
+		    net_eq(net, nf_ct_net(ct)) &&
+		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+			/* Copy source part from reply tuple. */
+			nf_ct_invert_tuplepr(result,
+				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+			result->dst = tuple->dst;
+
+			if (in_range(l3proto, l4proto, result, range))
+				return 1;
+		}
 	}
-
 	return 0;
 }
 
@@ -408,15 +382,18 @@ nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
+	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_tuple curr_tuple, new_tuple;
 
 	/* Can't setup nat info for confirmed ct. */
 	if (nf_ct_is_confirmed(ct))
 		return NF_ACCEPT;
 
-	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
-		     maniptype == NF_NAT_MANIP_DST);
-	BUG_ON(nf_nat_initialized(ct, maniptype));
+	WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+		maniptype != NF_NAT_MANIP_DST);
+
+	if (WARN_ON(nf_nat_initialized(ct, maniptype)))
+		return NF_DROP;
 
 	/* What we've got will look like inverse of reply. Normally
 	 * this is what is in the conntrack, except for prior
@@ -441,25 +418,22 @@ nf_nat_setup_info(struct nf_conn *ct,
 		else
 			ct->status |= IPS_DST_NAT;
 
-		if (nfct_help(ct))
+		if (nfct_help(ct) && !nfct_seqadj(ct))
 			if (!nfct_seqadj_ext_add(ct))
 				return NF_DROP;
 	}
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
-		struct nf_nat_conn_key key = {
-			.net = nf_ct_net(ct),
-			.tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-			.zone = nf_ct_zone(ct),
-		};
-		int err;
-
-		err = rhltable_insert_key(&nf_nat_bysource_table,
-					  &key,
-					  &ct->nat_bysource,
-					  nf_nat_bysource_params);
-		if (err)
-			return NF_DROP;
+		unsigned int srchash;
+		spinlock_t *lock;
+
+		srchash = hash_by_src(net,
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
+		spin_lock_bh(lock);
+		hlist_add_head_rcu(&ct->nat_bysource,
+				   &nf_nat_bysource[srchash]);
+		spin_unlock_bh(lock);
 	}
 
 	/* It's done. */
@@ -553,23 +527,29 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
 	return i->status & IPS_NAT_MASK ? 1 : 0;
 }
 
+static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+	unsigned int h;
+
+	h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+	hlist_del_rcu(&ct->nat_bysource);
+	spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
+}
+
 static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 {
 	if (nf_nat_proto_remove(ct, data))
 		return 1;
 
-	if ((ct->status & IPS_SRC_NAT_DONE) == 0)
-		return 0;
-
-	/* This netns is being destroyed, and conntrack has nat null binding.
+	/* This module is being removed and conntrack has nat null binding.
 	 * Remove it from bysource hash, as the table will be freed soon.
 	 *
 	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
 	 * will delete entry from already-freed table.
 	 */
-	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
-	rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
-			nf_nat_bysource_params);
+	if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status))
+		__nf_nat_cleanup_conntrack(ct);
 
 	/* don't delete conntrack.  Although that would make things a lot
 	 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -698,8 +678,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
 	if (ct->status & IPS_SRC_NAT_DONE)
-		rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
-				nf_nat_bysource_params);
+		__nf_nat_cleanup_conntrack(ct);
 }
 
 static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -821,19 +800,27 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
 
 static int __init nf_nat_init(void)
 {
-	int ret;
+	int ret, i;
 
-	ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
-	if (ret)
-		return ret;
+	/* Leave them the same for the moment. */
+	nf_nat_htable_size = nf_conntrack_htable_size;
+	if (nf_nat_htable_size < CONNTRACK_LOCKS)
+		nf_nat_htable_size = CONNTRACK_LOCKS;
+
+	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+	if (!nf_nat_bysource)
+		return -ENOMEM;
 
 	ret = nf_ct_extend_register(&nat_extend);
 	if (ret < 0) {
-		rhltable_destroy(&nf_nat_bysource_table);
+		nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
 		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
 		return ret;
 	}
 
+	for (i = 0; i < CONNTRACK_LOCKS; i++)
+		spin_lock_init(&nf_nat_locks[i]);
+
 	nf_ct_helper_expectfn_register(&follow_master_nat);
 
 	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -863,8 +850,8 @@ static void __exit nf_nat_cleanup(void)
 
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
-
-	rhltable_destroy(&nf_nat_bysource_table);
+	synchronize_net();
+	nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
 {
 	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 0648cb096bd8..dcb5f6375d9d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -106,7 +106,7 @@ static int __init nf_nat_irc_init(void)
 }
 
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
 {
 	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 86067560a318..25b06b959118 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 	__be32 newdst;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
-		     hooknum == NF_INET_LOCAL_OUT);
+	WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+		hooknum != NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	/* Local packets: make them go to loopback */
 	if (hooknum == NF_INET_LOCAL_OUT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c9d154..f7e21953b1de 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
 
 	return count;
 }
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
-		      struct nf_hook_entry *hook_entry, unsigned int queuenum)
+		      const struct nf_hook_entries *entries,
+		      unsigned int index, unsigned int queuenum)
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
@@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.state	= *state,
-		.hook	= hook_entry,
+		.hook_index = index,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -162,18 +164,16 @@ err:
 
 /* Packets leaving via this function must come back through nf_reinject(). */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     struct nf_hook_entry **entryp, unsigned int verdict)
+	     const struct nf_hook_entries *entries, unsigned int index,
+	     unsigned int verdict)
 {
-	struct nf_hook_entry *entry = *entryp;
 	int ret;
 
-	ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+	ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
 	if (ret < 0) {
 		if (ret == -ESRCH &&
-		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
-			*entryp = rcu_dereference(entry->next);
+		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
 			return 1;
-		}
 		kfree_skb(skb);
 	}
 
@@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 
 static unsigned int nf_iterate(struct sk_buff *skb,
 			       struct nf_hook_state *state,
-			       struct nf_hook_entry **entryp)
+			       const struct nf_hook_entries *hooks,
+			       unsigned int *index)
 {
-	unsigned int verdict;
+	const struct nf_hook_entry *hook;
+	unsigned int verdict, i = *index;
 
-	do {
+	while (i < hooks->num_hook_entries) {
+		hook = &hooks->hooks[i];
 repeat:
-		verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+		verdict = nf_hook_entry_hookfn(hook, skb, state);
 		if (verdict != NF_ACCEPT) {
 			if (verdict != NF_REPEAT)
 				return verdict;
 			goto repeat;
 		}
-		*entryp = rcu_dereference((*entryp)->next);
-	} while (*entryp);
+		i++;
+	}
 
+	*index = i;
 	return NF_ACCEPT;
 }
 
+/* Caller must hold rcu read-side lock */
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-	struct nf_hook_entry *hook_entry = entry->hook;
+	const struct nf_hook_entry *hook_entry;
+	const struct nf_hook_entries *hooks;
 	struct sk_buff *skb = entry->skb;
 	const struct nf_afinfo *afinfo;
+	const struct net *net;
+	unsigned int i;
 	int err;
+	u8 pf;
+
+	net = entry->state.net;
+	pf = entry->state.pf;
+
+	hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
 
 	nf_queue_entry_release_refs(entry);
 
+	i = entry->hook_index;
+	if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+		kfree_skb(skb);
+		kfree(entry);
+		return;
+	}
+
+	hook_entry = &hooks->hooks[i];
+
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT)
 		verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	}
 
 	if (verdict == NF_ACCEPT) {
-		hook_entry = rcu_dereference(hook_entry->next);
-		if (hook_entry)
 next_hook:
-			verdict = nf_iterate(skb, &entry->state, &hook_entry);
+		++i;
+		verdict = nf_iterate(skb, &entry->state, hooks, &i);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_STOP:
-okfn:
 		local_bh_disable();
 		entry->state.okfn(entry->state.net, entry->state.sk, skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, &entry->state, &hook_entry, verdict);
-		if (err == 1) {
-			if (hook_entry)
-				goto next_hook;
-			goto okfn;
-		}
+		err = nf_queue(skb, &entry->state, hooks, i, verdict);
+		if (err == 1)
+			goto next_hook;
 		break;
 	case NF_STOLEN:
 		break;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e58b362..46cb3786e0ec 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -33,7 +34,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
 				reg->set_optmin, reg->set_optmax)
 			|| overlap(ops->get_optmin, ops->get_optmax,
 				   reg->get_optmin, reg->get_optmax))) {
-			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+			pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
 				ops->set_optmin, ops->set_optmax,
 				ops->get_optmin, ops->get_optmax,
 				reg->set_optmin, reg->set_optmax,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa33c59..d8327b43e4dc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (table == NULL)
 		goto err2;
 
-	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+	table->name = nla_strdup(name, GFP_KERNEL);
+	if (table->name == NULL)
+		goto err3;
+
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
 	if (err < 0)
-		goto err3;
+		goto err4;
 
 	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
+err4:
+	kfree(table->name);
 err3:
 	kfree(table);
 err2:
@@ -855,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
+	if (nlh->nlmsg_flags & NLM_F_NONREC &&
+	    table->use > 0)
+		return -EBUSY;
+
 	ctx.afi = afi;
 	ctx.table = table;
 
@@ -865,6 +874,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
+	kfree(ctx->table->name);
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -1038,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 		if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
 			goto nla_put_failure;
 
-		if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
+		if (basechain->stats && nft_dump_stats(skb, basechain->stats))
 			goto nla_put_failure;
 	}
 
@@ -1240,10 +1250,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 
 		module_put(basechain->type->owner);
 		free_percpu(basechain->stats);
+		if (basechain->stats)
+			static_branch_dec(&nft_counters_enabled);
 		if (basechain->ops[0].dev != NULL)
 			dev_put(basechain->ops[0].dev);
+		kfree(chain->name);
 		kfree(basechain);
 	} else {
+		kfree(chain->name);
 		kfree(chain);
 	}
 }
@@ -1325,155 +1339,18 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
 		dev_put(hook->dev);
 }
 
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+			      u8 policy, bool create)
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nlattr * uninitialized_var(name);
-	struct nft_af_info *afi;
-	struct nft_table *table;
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table = ctx->table;
+	struct nft_af_info *afi = ctx->afi;
+	struct nft_base_chain *basechain;
+	struct nft_stats __percpu *stats;
+	struct net *net = ctx->net;
 	struct nft_chain *chain;
-	struct nft_base_chain *basechain = NULL;
-	u8 genmask = nft_genmask_next(net);
-	int family = nfmsg->nfgen_family;
-	u8 policy = NF_ACCEPT;
-	u64 handle = 0;
 	unsigned int i;
-	struct nft_stats __percpu *stats;
 	int err;
-	bool create;
-	struct nft_ctx ctx;
-
-	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
-
-	afi = nf_tables_afinfo_lookup(net, family, true);
-	if (IS_ERR(afi))
-		return PTR_ERR(afi);
-
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
-	if (IS_ERR(table))
-		return PTR_ERR(table);
-
-	chain = NULL;
-	name = nla[NFTA_CHAIN_NAME];
-
-	if (nla[NFTA_CHAIN_HANDLE]) {
-		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
-		if (IS_ERR(chain))
-			return PTR_ERR(chain);
-	} else {
-		chain = nf_tables_chain_lookup(table, name, genmask);
-		if (IS_ERR(chain)) {
-			if (PTR_ERR(chain) != -ENOENT)
-				return PTR_ERR(chain);
-			chain = NULL;
-		}
-	}
-
-	if (nla[NFTA_CHAIN_POLICY]) {
-		if (chain != NULL &&
-		    !nft_is_base_chain(chain))
-			return -EOPNOTSUPP;
-
-		if (chain == NULL &&
-		    nla[NFTA_CHAIN_HOOK] == NULL)
-			return -EOPNOTSUPP;
-
-		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
-		switch (policy) {
-		case NF_DROP:
-		case NF_ACCEPT:
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	if (chain != NULL) {
-		struct nft_stats *stats = NULL;
-		struct nft_trans *trans;
-
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
-			return -EEXIST;
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
-			return -EOPNOTSUPP;
-
-		if (nla[NFTA_CHAIN_HOOK]) {
-			struct nft_base_chain *basechain;
-			struct nft_chain_hook hook;
-			struct nf_hook_ops *ops;
-
-			if (!nft_is_base_chain(chain))
-				return -EBUSY;
-
-			err = nft_chain_parse_hook(net, nla, afi, &hook,
-						   create);
-			if (err < 0)
-				return err;
-
-			basechain = nft_base_chain(chain);
-			if (basechain->type != hook.type) {
-				nft_chain_release_hook(&hook);
-				return -EBUSY;
-			}
-
-			for (i = 0; i < afi->nops; i++) {
-				ops = &basechain->ops[i];
-				if (ops->hooknum != hook.num ||
-				    ops->priority != hook.priority ||
-				    ops->dev != hook.dev) {
-					nft_chain_release_hook(&hook);
-					return -EBUSY;
-				}
-			}
-			nft_chain_release_hook(&hook);
-		}
-
-		if (nla[NFTA_CHAIN_HANDLE] && name) {
-			struct nft_chain *chain2;
-
-			chain2 = nf_tables_chain_lookup(table,
-							nla[NFTA_CHAIN_NAME],
-							genmask);
-			if (IS_ERR(chain2))
-				return PTR_ERR(chain2);
-		}
-
-		if (nla[NFTA_CHAIN_COUNTERS]) {
-			if (!nft_is_base_chain(chain))
-				return -EOPNOTSUPP;
-
-			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
-			if (IS_ERR(stats))
-				return PTR_ERR(stats);
-		}
-
-		nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
-		trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
-					sizeof(struct nft_trans_chain));
-		if (trans == NULL) {
-			free_percpu(stats);
-			return -ENOMEM;
-		}
-
-		nft_trans_chain_stats(trans) = stats;
-		nft_trans_chain_update(trans) = true;
-
-		if (nla[NFTA_CHAIN_POLICY])
-			nft_trans_chain_policy(trans) = policy;
-		else
-			nft_trans_chain_policy(trans) = -1;
-
-		if (nla[NFTA_CHAIN_HANDLE] && name) {
-			nla_strlcpy(nft_trans_chain_name(trans), name,
-				    NFT_CHAIN_MAXNAMELEN);
-		}
-		list_add_tail(&trans->list, &net->nft.commit_list);
-		return 0;
-	}
 
 	if (table->use == UINT_MAX)
 		return -EOVERFLOW;
@@ -1504,14 +1381,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 				return PTR_ERR(stats);
 			}
 			basechain->stats = stats;
-		} else {
-			stats = netdev_alloc_pcpu_stats(struct nft_stats);
-			if (stats == NULL) {
-				nft_chain_release_hook(&hook);
-				kfree(basechain);
-				return -ENOMEM;
-			}
-			rcu_assign_pointer(basechain->stats, stats);
+			static_branch_inc(&nft_counters_enabled);
 		}
 
 		hookfn = hook.type->hooks[hook.num];
@@ -1539,31 +1409,204 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 		if (chain == NULL)
 			return -ENOMEM;
 	}
-
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
 	chain->table = table;
-	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+	chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+	if (!chain->name) {
+		err = -ENOMEM;
+		goto err1;
+	}
 
 	err = nf_tables_register_hooks(net, table, chain, afi->nops);
 	if (err < 0)
 		goto err1;
 
-	nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
-	err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+	ctx->chain = chain;
+	err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
 	if (err < 0)
 		goto err2;
 
 	table->use++;
 	list_add_tail_rcu(&chain->list, &table->chains);
+
 	return 0;
 err2:
 	nf_tables_unregister_hooks(net, table, chain, afi->nops);
 err1:
 	nf_tables_chain_destroy(chain);
+
 	return err;
 }
 
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+			      bool create)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table = ctx->table;
+	struct nft_chain *chain = ctx->chain;
+	struct nft_af_info *afi = ctx->afi;
+	struct nft_base_chain *basechain;
+	struct nft_stats *stats = NULL;
+	struct nft_chain_hook hook;
+	const struct nlattr *name;
+	struct nf_hook_ops *ops;
+	struct nft_trans *trans;
+	int err, i;
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		if (!nft_is_base_chain(chain))
+			return -EBUSY;
+
+		err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+					   create);
+		if (err < 0)
+			return err;
+
+		basechain = nft_base_chain(chain);
+		if (basechain->type != hook.type) {
+			nft_chain_release_hook(&hook);
+			return -EBUSY;
+		}
+
+		for (i = 0; i < afi->nops; i++) {
+			ops = &basechain->ops[i];
+			if (ops->hooknum != hook.num ||
+			    ops->priority != hook.priority ||
+			    ops->dev != hook.dev) {
+				nft_chain_release_hook(&hook);
+				return -EBUSY;
+			}
+		}
+		nft_chain_release_hook(&hook);
+	}
+
+	if (nla[NFTA_CHAIN_HANDLE] &&
+	    nla[NFTA_CHAIN_NAME]) {
+		struct nft_chain *chain2;
+
+		chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
+						genmask);
+		if (!IS_ERR(chain2))
+			return -EEXIST;
+	}
+
+	if (nla[NFTA_CHAIN_COUNTERS]) {
+		if (!nft_is_base_chain(chain))
+			return -EOPNOTSUPP;
+
+		stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+		if (IS_ERR(stats))
+			return PTR_ERR(stats);
+	}
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+				sizeof(struct nft_trans_chain));
+	if (trans == NULL) {
+		free_percpu(stats);
+		return -ENOMEM;
+	}
+
+	nft_trans_chain_stats(trans) = stats;
+	nft_trans_chain_update(trans) = true;
+
+	if (nla[NFTA_CHAIN_POLICY])
+		nft_trans_chain_policy(trans) = policy;
+	else
+		nft_trans_chain_policy(trans) = -1;
+
+	name = nla[NFTA_CHAIN_NAME];
+	if (nla[NFTA_CHAIN_HANDLE] && name) {
+		nft_trans_chain_name(trans) =
+			nla_strdup(name, GFP_KERNEL);
+		if (!nft_trans_chain_name(trans)) {
+			kfree(trans);
+			free_percpu(stats);
+			return -ENOMEM;
+		}
+	}
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+}
+
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+			      struct sk_buff *skb, const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr * uninitialized_var(name);
+	u8 genmask = nft_genmask_next(net);
+	int family = nfmsg->nfgen_family;
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	u8 policy = NF_ACCEPT;
+	struct nft_ctx ctx;
+	u64 handle = 0;
+	bool create;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(net, family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = NULL;
+	name = nla[NFTA_CHAIN_NAME];
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+	} else {
+		chain = nf_tables_chain_lookup(table, name, genmask);
+		if (IS_ERR(chain)) {
+			if (PTR_ERR(chain) != -ENOENT)
+				return PTR_ERR(chain);
+			chain = NULL;
+		}
+	}
+
+	if (nla[NFTA_CHAIN_POLICY]) {
+		if (chain != NULL &&
+		    !nft_is_base_chain(chain))
+			return -EOPNOTSUPP;
+
+		if (chain == NULL &&
+		    nla[NFTA_CHAIN_HOOK] == NULL)
+			return -EOPNOTSUPP;
+
+		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
+		switch (policy) {
+		case NF_DROP:
+		case NF_ACCEPT:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+
+	if (chain != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		return nf_tables_updchain(&ctx, genmask, policy, create);
+	}
+
+	return nf_tables_addchain(&ctx, family, genmask, policy, create);
+}
+
 static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[],
@@ -1574,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
+	struct nft_rule *rule;
 	int family = nfmsg->nfgen_family;
 	struct nft_ctx ctx;
+	u32 use;
+	int err;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1588,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-	if (chain->use > 0)
+
+	if (nlh->nlmsg_flags & NLM_F_NONREC &&
+	    chain->use > 0)
 		return -EBUSY;
 
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
 
+	use = chain->use;
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (!nft_is_active_next(net, rule))
+			continue;
+		use--;
+
+		err = nft_delrule(&ctx, rule);
+		if (err < 0)
+			return err;
+	}
+
+	/* There are rules and elements that are still holding references to us,
+	 * we cannot do a recursive removal in this case.
+	 */
+	if (use > 0)
+		return -EBUSY;
+
 	return nft_delchain(&ctx);
 }
 
@@ -1977,8 +2042,8 @@ err:
 }
 
 struct nft_rule_dump_ctx {
-	char table[NFT_TABLE_MAXNAMELEN];
-	char chain[NFT_CHAIN_MAXNAMELEN];
+	char *table;
+	char *chain;
 };
 
 static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2002,7 +2067,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 			continue;
 
 		list_for_each_entry_rcu(table, &afi->tables, list) {
-			if (ctx && ctx->table[0] &&
+			if (ctx && ctx->table &&
 			    strcmp(ctx->table, table->name) != 0)
 				continue;
 
@@ -2042,7 +2107,13 @@ done:
 
 static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 {
-	kfree(cb->data);
+	struct nft_rule_dump_ctx *ctx = cb->data;
+
+	if (ctx) {
+		kfree(ctx->table);
+		kfree(ctx->chain);
+		kfree(ctx);
+	}
 	return 0;
 }
 
@@ -2074,12 +2145,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			if (!ctx)
 				return -ENOMEM;
 
-			if (nla[NFTA_RULE_TABLE])
-				nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
-					    sizeof(ctx->table));
-			if (nla[NFTA_RULE_CHAIN])
-				nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
-					    sizeof(ctx->chain));
+			if (nla[NFTA_RULE_TABLE]) {
+				ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+							GFP_KERNEL);
+				if (!ctx->table) {
+					kfree(ctx);
+					return -ENOMEM;
+				}
+			}
+			if (nla[NFTA_RULE_CHAIN]) {
+				ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+							GFP_KERNEL);
+				if (!ctx->chain) {
+					kfree(ctx->table);
+					kfree(ctx);
+					return -ENOMEM;
+				}
+			}
 			c.data = ctx;
 		}
 
@@ -2467,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 		case NFT_SET_POL_PERFORMANCE:
 			if (est.lookup < best.lookup)
 				break;
-			if (est.lookup == best.lookup) {
-				if (!desc->size) {
-					if (est.space < best.space)
-						break;
-				} else if (est.size < best.size) {
-					break;
-				}
-			}
+			if (est.lookup == best.lookup &&
+			    est.space < best.space)
+				break;
 			continue;
 		case NFT_SET_POL_MEMORY:
 			if (!desc->size) {
@@ -2621,7 +2698,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 	unsigned long *inuse;
 	unsigned int n = 0, min = 0;
 
-	p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+	p = strchr(name, '%');
 	if (p != NULL) {
 		if (p[1] != 'd' || strchr(p + 2, '%'))
 			return -EINVAL;
@@ -2652,12 +2729,17 @@ cont:
 		free_page((unsigned long)inuse);
 	}
 
-	snprintf(set->name, sizeof(set->name), name, min + n);
+	set->name = kasprintf(GFP_KERNEL, name, min + n);
+	if (!set->name)
+		return -ENOMEM;
+
 	list_for_each_entry(i, &ctx->table->sets, list) {
 		if (!nft_is_active_next(ctx->net, i))
 			continue;
-		if (!strcmp(set->name, i->name))
+		if (!strcmp(set->name, i->name)) {
+			kfree(set->name);
 			return -ENFILE;
+		}
 	}
 	return 0;
 }
@@ -2929,7 +3011,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	char name[NFT_SET_MAXNAMELEN];
+	char *name;
 	unsigned int size;
 	bool create;
 	u64 timeout;
@@ -3075,8 +3157,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 
-	nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+	if (!name) {
+		err = -ENOMEM;
+		goto err2;
+	}
+
 	err = nf_tables_set_alloc_name(&ctx, set, name);
+	kfree(name);
 	if (err < 0)
 		goto err2;
 
@@ -3126,6 +3214,7 @@ static void nft_set_destroy(struct nft_set *set)
 {
 	set->ops->destroy(set);
 	module_put(set->ops->type->owner);
+	kfree(set->name);
 	kvfree(set);
 }
 
@@ -3159,7 +3248,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (!list_empty(&set->bindings))
+
+	if (!list_empty(&set->bindings) ||
+	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
 		return -EBUSY;
 
 	return nft_delset(&ctx, set);
@@ -3497,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
 	return 0;
 }
 
-static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[],
-				struct netlink_ext_ack *extack)
-{
-	u8 genmask = nft_genmask_cur(net);
-	const struct nft_set *set;
-	struct nft_ctx ctx;
-	int err;
-
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
-	if (err < 0)
-		return err;
-
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
-
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct netlink_dump_control c = {
-			.dump = nf_tables_dump_set,
-			.done = nf_tables_dump_set_done,
-		};
-		struct nft_set_dump_ctx *dump_ctx;
-
-		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
-		if (!dump_ctx)
-			return -ENOMEM;
-
-		dump_ctx->set = set;
-		dump_ctx->ctx = ctx;
-
-		c.data = dump_ctx;
-		return netlink_dump_start(nlsk, skb, nlh, &c);
-	}
-	return -EOPNOTSUPP;
-}
-
 static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 				       const struct nft_ctx *ctx, u32 seq,
 				       u32 portid, int event, u16 flags,
@@ -3581,6 +3633,135 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_setelem_parse_flags(const struct nft_set *set,
+				   const struct nlattr *attr, u32 *flags)
+{
+	if (attr == NULL)
+		return 0;
+
+	*flags = ntohl(nla_get_be32(attr));
+	if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
+		return -EINVAL;
+	if (!(set->flags & NFT_SET_INTERVAL) &&
+	    *flags & NFT_SET_ELEM_INTERVAL_END)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+			    const struct nlattr *attr)
+{
+	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+	const struct nft_set_ext *ext;
+	struct nft_data_desc desc;
+	struct nft_set_elem elem;
+	struct sk_buff *skb;
+	uint32_t flags = 0;
+	void *priv;
+	int err;
+
+	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+			       nft_set_elem_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!nla[NFTA_SET_ELEM_KEY])
+		return -EINVAL;
+
+	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+			    nla[NFTA_SET_ELEM_KEY]);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+		return err;
+
+	priv = set->ops->get(ctx->net, set, &elem, flags);
+	if (IS_ERR(priv))
+		return PTR_ERR(priv);
+
+	elem.priv = priv;
+	ext = nft_set_elem_ext(set, &elem);
+
+	err = -ENOMEM;
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb == NULL)
+		goto err1;
+
+	err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
+					  NFT_MSG_NEWSETELEM, 0, set, &elem);
+	if (err < 0)
+		goto err2;
+
+	err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
+	/* This avoids a loop in nfnetlink. */
+	if (err < 0)
+		goto err1;
+
+	return 0;
+err2:
+	kfree_skb(skb);
+err1:
+	/* this avoids a loop in nfnetlink. */
+	return err == -EAGAIN ? -ENOBUFS : err;
+}
+
+static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
+				struct sk_buff *skb, const struct nlmsghdr *nlh,
+				const struct nlattr * const nla[],
+				struct netlink_ext_ack *extack)
+{
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set *set;
+	struct nlattr *attr;
+	struct nft_ctx ctx;
+	int rem, err = 0;
+
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
+	if (err < 0)
+		return err;
+
+	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				   genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = nf_tables_dump_set,
+			.done = nf_tables_dump_set_done,
+		};
+		struct nft_set_dump_ctx *dump_ctx;
+
+		dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+		if (!dump_ctx)
+			return -ENOMEM;
+
+		dump_ctx->set = set;
+		dump_ctx->ctx = ctx;
+
+		c.data = dump_ctx;
+		return netlink_dump_start(nlsk, skb, nlh, &c);
+	}
+
+	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+		return -EINVAL;
+
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_get_set_elem(&ctx, set, attr);
+		if (err < 0)
+			break;
+	}
+
+	return err;
+}
+
 static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
 				     const struct nft_set *set,
 				     const struct nft_set_elem *elem,
@@ -3681,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
 	kfree(elem);
 }
 
-static int nft_setelem_parse_flags(const struct nft_set *set,
-				   const struct nlattr *attr, u32 *flags)
-{
-	if (attr == NULL)
-		return 0;
-
-	*flags = ntohl(nla_get_be32(attr));
-	if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
-		return -EINVAL;
-	if (!(set->flags & NFT_SET_INTERVAL) &&
-	    *flags & NFT_SET_ELEM_INTERVAL_END)
-		return -EINVAL;
-
-	return 0;
-}
-
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr, u32 nlmsg_flags)
 {
@@ -4209,7 +4374,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 
 	list_for_each_entry(obj, &table->objects, list) {
 		if (!nla_strcmp(nla, obj->name) &&
-		    objtype == obj->type->type &&
+		    objtype == obj->ops->type->type &&
 		    nft_active_genmask(obj, genmask))
 			return obj;
 	}
@@ -4231,6 +4396,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 				       const struct nlattr *attr)
 {
 	struct nlattr *tb[type->maxattr + 1];
+	const struct nft_object_ops *ops;
 	struct nft_object *obj;
 	int err;
 
@@ -4243,16 +4409,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
 	}
 
+	if (type->select_ops) {
+		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+		if (IS_ERR(ops)) {
+			err = PTR_ERR(ops);
+			goto err1;
+		}
+	} else {
+		ops = type->ops;
+	}
+
 	err = -ENOMEM;
-	obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
 	if (obj == NULL)
 		goto err1;
 
-	err = type->init(ctx, (const struct nlattr * const *)tb, obj);
+	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
 	if (err < 0)
 		goto err2;
 
-	obj->type = type;
+	obj->ops = ops;
+
 	return obj;
 err2:
 	kfree(obj);
@@ -4268,7 +4445,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
 	nest = nla_nest_start(skb, attr);
 	if (!nest)
 		goto nla_put_failure;
-	if (obj->type->dump(skb, obj, reset) < 0)
+	if (obj->ops->dump(skb, obj, reset) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 	return 0;
@@ -4363,18 +4540,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 	obj->table = table;
-	nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+	obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	if (!obj->name) {
+		err = -ENOMEM;
+		goto err2;
+	}
 
 	err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	list_add_tail_rcu(&obj->list, &table->objects);
 	table->use++;
 	return 0;
+err3:
+	kfree(obj->name);
 err2:
-	if (obj->type->destroy)
-		obj->type->destroy(obj);
+	if (obj->ops->destroy)
+		obj->ops->destroy(obj);
 	kfree(obj);
 err1:
 	module_put(type->owner);
@@ -4401,7 +4584,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
 	    nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
-	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
 	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
 	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
 		goto nla_put_failure;
@@ -4415,7 +4598,7 @@ nla_put_failure:
 }
 
 struct nft_obj_filter {
-	char		table[NFT_OBJ_MAXNAMELEN];
+	char		*table;
 	u32		type;
 };
 
@@ -4455,7 +4638,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 					goto cont;
 				if (filter &&
 				    filter->type != NFT_OBJECT_UNSPEC &&
-				    obj->type->type != filter->type)
+				    obj->ops->type->type != filter->type)
 					goto cont;
 
 				if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
@@ -4480,7 +4663,10 @@ done:
 
 static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
-	kfree(cb->data);
+	struct nft_obj_filter *filter = cb->data;
+
+	kfree(filter->table);
+	kfree(filter);
 
 	return 0;
 }
@@ -4494,9 +4680,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
 	if (!filter)
 		return ERR_PTR(-ENOMEM);
 
-	if (nla[NFTA_OBJ_TABLE])
-		nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
-			    NFT_TABLE_MAXNAMELEN);
+	if (nla[NFTA_OBJ_TABLE]) {
+		filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+		if (!filter->table) {
+			kfree(filter);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 	if (nla[NFTA_OBJ_TYPE])
 		filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 
@@ -4576,10 +4766,11 @@ err:
 
 static void nft_obj_destroy(struct nft_object *obj)
 {
-	if (obj->type->destroy)
-		obj->type->destroy(obj);
+	if (obj->ops->destroy)
+		obj->ops->destroy(obj);
 
-	module_put(obj->type->owner);
+	module_put(obj->ops->type->owner);
+	kfree(obj->name);
 	kfree(obj);
 }
 
@@ -4662,6 +4853,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
+	char buf[TASK_COMM_LEN];
 	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4673,7 +4865,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	nfmsg->version		= NFNETLINK_V0;
 	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+	    nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
@@ -4842,7 +5036,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
 {
 	struct nft_base_chain *basechain;
 
-	if (nft_trans_chain_name(trans)[0])
+	if (nft_trans_chain_name(trans))
 		strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
 
 	if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbeadcb118..dfd0bf3810d2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
 	[NFT_TRACETYPE_RULE]	= "rule",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
 	.type = NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 	return true;
 }
 
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+					    const struct nft_pktinfo *pkt)
+{
+	struct nft_stats *stats;
+
+	local_bh_disable();
+	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+	u64_stats_update_begin(&stats->syncp);
+	stats->pkts++;
+	stats->bytes += pkt->skb->len;
+	u64_stats_update_end(&stats->syncp);
+	local_bh_enable();
+}
+
 struct nft_jumpstack {
 	const struct nft_chain	*chain;
 	const struct nft_rule	*rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 	struct nft_regs regs;
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
-	struct nft_stats *stats;
 	int rulenum;
 	unsigned int gencursor = nft_genmask_cur(net);
 	struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
 	nft_trace_packet(&info, basechain, NULL, -1,
 			 NFT_TRACETYPE_POLICY);
 
-	rcu_read_lock_bh();
-	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
-	u64_stats_update_begin(&stats->syncp);
-	stats->pkts++;
-	stats->bytes += pkt->skb->len;
-	u64_stats_update_end(&stats->syncp);
-	rcu_read_unlock_bh();
+	if (static_branch_unlikely(&nft_counters_enabled))
+		nft_update_chain_stats(basechain, pkt);
 
 	return nft_base_chain(basechain)->policy;
 }
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7a5793..e1dc527a493b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
 			    NFTA_TRACE_PAD);
 }
 
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+	switch (info->type) {
+	case NFT_TRACETYPE_RETURN:
+	case NFT_TRACETYPE_RULE:
+		break;
+	default:
+		return false;
+	}
+
+	switch (info->verdict->code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 void nft_trace_notify(struct nft_traceinfo *info)
 {
 	const struct nft_pktinfo *pkt = info->pkt;
@@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		return;
 
 	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
-		nla_total_size(NFT_TABLE_MAXNAMELEN) +
-		nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+		nla_total_size(strlen(info->chain->table->name)) +
+		nla_total_size(strlen(info->chain->name)) +
 		nla_total_size_64bit(sizeof(__be64)) +	/* rule handle */
 		nla_total_size(sizeof(__be32)) +	/* trace type */
 		nla_total_size(0) +			/* VERDICT, nested */
 			nla_total_size(sizeof(u32)) +	/* verdict code */
-			nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
 		nla_total_size(sizeof(u32)) +		/* id */
 		nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
 		nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		nla_total_size(sizeof(u32)) +		/* nfproto */
 		nla_total_size(sizeof(u32));		/* policy */
 
+	if (nft_trace_have_verdict_chain(info))
+		size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
 	skb = nlmsg_new(size, GFP_ATOMIC);
 	if (!skb)
 		return;
@@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
 	if (trace_fill_id(skb, pkt->skb))
 		goto nla_put_failure;
 
-	if (info->chain) {
-		if (nla_put_string(skb, NFTA_TRACE_CHAIN,
-				   info->chain->name))
-			goto nla_put_failure;
-		if (nla_put_string(skb, NFTA_TRACE_TABLE,
-				   info->chain->table->name))
-			goto nla_put_failure;
-	}
+	if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+		goto nla_put_failure;
 
 	if (nf_trace_fill_rule_info(skb, info))
 		goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 400e9ae97153..32b1c0b44e79 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
 	int ret = 0;
@@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 {
 	__u16 l3num;
 	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout, *matching = NULL;
 	char *name;
 	int ret;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
-	struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+	const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
+	const struct nf_conntrack_l4proto *l4proto;
+	unsigned int *timeouts;
 	__u16 l3num;
 	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
-	unsigned int *timeouts;
 	int ret;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -401,7 +402,7 @@ err:
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 			    u32 seq, u32 type, int event,
-			    struct nf_conntrack_l4proto *l4proto)
+			    const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
@@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
-	__u16 l3num;
-	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct sk_buff *skb2;
 	int ret, err;
+	__u16 l3num;
+	__u8 l4num;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
 		return -EINVAL;
@@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
 {
 	struct ctnl_timeout *timeout, *matching = NULL;
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
 		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
@@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
 		break;
 	}
 err:
-	rcu_read_unlock();
 	return matching;
 }
 
@@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 {
 	struct ctnl_timeout *cur, *tmp;
 
+	nf_ct_unconfirmed_destroy(net);
 	ctnl_untimeout(net, NULL);
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba95dbb4..cad6498f10b0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ nla_put_failure:
 	return -1;
 }
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type =		NF_LOG_TYPE_ULOG,
 	.u = {
 		.ulog = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 16fa04086880..a16356cacec3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
 #include "../bridge/br_private.h"
 #endif
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
 #define NFQNL_QMAX_DEFAULT 1024
 
 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -397,7 +401,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 
 	outdev = entry->state.out;
 
-	switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
+	switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
 	case NFQNL_COPY_META:
 	case NFQNL_COPY_NONE:
 		break;
@@ -408,7 +412,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		    skb_checksum_help(entskb))
 			return NULL;
 
-		data_len = ACCESS_ONCE(queue->copy_range);
+		data_len = READ_ONCE(queue->copy_range);
 		if (data_len > entskb->len)
 			data_len = entskb->len;
 
@@ -612,6 +616,18 @@ nlmsg_failure:
 	return NULL;
 }
 
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+	const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+	if (ct && ((ct->status & flags) == IPS_DYING))
+		return true;
+#endif
+	return false;
+}
+
 static int
 __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
 			struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
 	}
 	spin_lock_bh(&queue->lock);
 
+	if (nf_ct_drop_unconfirmed(entry))
+		goto err_out_free_nskb;
+
 	if (queue->queue_total >= queue->queue_maxlen) {
 		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
 			failopen = 1;
@@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
 	unsigned int instances = 0;
 	int i;
 
-	rcu_read_lock();
 	for (i = 0; i < INSTANCE_BUCKETS; i++) {
 		struct nfqnl_instance *inst;
 		struct hlist_head *head = &q->instance_table[i];
@@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
 			instances++;
 		}
 	}
-	rcu_read_unlock();
 
 	return instances;
 }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f5a7cb68694e..b89f4f65b2a0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -305,7 +305,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
 		const struct nf_hook_ops *ops = &basechain->ops[0];
 
 		hook_mask = 1 << ops->hooknum;
-		if (!(hook_mask & target->hooks))
+		if (target->hooks && !(hook_mask & target->hooks))
 			return -EINVAL;
 
 		ret = nft_compat_chain_validate_dependency(target->table,
@@ -484,7 +484,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
 		const struct nf_hook_ops *ops = &basechain->ops[0];
 
 		hook_mask = 1 << ops->hooknum;
-		if (!(hook_mask & match->hooks))
+		if (match->hooks && !(hook_mask & match->hooks))
 			return -EINVAL;
 
 		ret = nft_compat_chain_validate_dependency(match->table,
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 67a710ebde09..eefe3b409925 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
 	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
 };
 
-static struct nft_object_type nft_counter_obj __read_mostly = {
-	.type		= NFT_OBJECT_COUNTER,
+static struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+	.type		= &nft_counter_obj_type,
 	.size		= sizeof(struct nft_counter_percpu_priv),
-	.maxattr	= NFTA_COUNTER_MAX,
-	.policy		= nft_counter_policy,
 	.eval		= nft_counter_obj_eval,
 	.init		= nft_counter_obj_init,
 	.destroy	= nft_counter_obj_destroy,
 	.dump		= nft_counter_obj_dump,
+};
+
+static struct nft_object_type nft_counter_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_COUNTER,
+	.ops		= &nft_counter_obj_ops,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.policy		= nft_counter_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void)
 	for_each_possible_cpu(cpu)
 		seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
 
-	err = nft_register_obj(&nft_counter_obj);
+	err = nft_register_obj(&nft_counter_obj_type);
 	if (err < 0)
 		return err;
 
@@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void)
 
 	return 0;
 err1:
-	nft_unregister_obj(&nft_counter_obj);
+	nft_unregister_obj(&nft_counter_obj_type);
 	return err;
 }
 
 static void __exit nft_counter_module_exit(void)
 {
 	nft_unregister_expr(&nft_counter_type);
-	nft_unregister_obj(&nft_counter_obj);
+	nft_unregister_obj(&nft_counter_obj_type);
 }
 
 module_init(nft_counter_module_init);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1678e9e75e8e..2647b895f4b0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
 	[NFTA_CT_SREG]		= { .type = NLA_U32 },
 };
 
-static int nft_ct_netns_get(struct net *net, uint8_t family)
-{
-	int err;
-
-	if (family == NFPROTO_INET) {
-		err = nf_ct_netns_get(net, NFPROTO_IPV4);
-		if (err < 0)
-			goto err1;
-		err = nf_ct_netns_get(net, NFPROTO_IPV6);
-		if (err < 0)
-			goto err2;
-	} else {
-		err = nf_ct_netns_get(net, family);
-		if (err < 0)
-			goto err1;
-	}
-	return 0;
-
-err2:
-	nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
-	return err;
-}
-
-static void nft_ct_netns_put(struct net *net, uint8_t family)
-{
-	if (family == NFPROTO_INET) {
-		nf_ct_netns_put(net, NFPROTO_IPV4);
-		nf_ct_netns_put(net, NFPROTO_IPV6);
-	} else
-		nf_ct_netns_put(net, family);
-}
-
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 static void nft_ct_tmpl_put_pcpu(void)
 {
@@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
-	err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+	err = nf_ct_netns_get(ctx->net, ctx->afi->family);
 	if (err < 0)
 		return err;
 
@@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 	if (err < 0)
 		goto err1;
 
-	err = nft_ct_netns_get(ctx->net, ctx->afi->family);
+	err = nf_ct_netns_get(ctx->net, ctx->afi->family);
 	if (err < 0)
 		goto err1;
 
@@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
 	struct nft_ct *priv = nft_expr_priv(expr);
 
 	__nft_ct_set_destroy(ctx, priv);
-	nft_ct_netns_put(ctx->net, ctx->afi->family);
+	nf_ct_netns_put(ctx->net, ctx->afi->family);
 }
 
 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -904,15 +871,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
 	[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
 };
 
-static struct nft_object_type nft_ct_helper_obj __read_mostly = {
-	.type		= NFT_OBJECT_CT_HELPER,
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+	.type		= &nft_ct_helper_obj_type,
 	.size		= sizeof(struct nft_ct_helper_obj),
-	.maxattr	= NFTA_CT_HELPER_MAX,
-	.policy		= nft_ct_helper_policy,
 	.eval		= nft_ct_helper_obj_eval,
 	.init		= nft_ct_helper_obj_init,
 	.destroy	= nft_ct_helper_obj_destroy,
 	.dump		= nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_HELPER,
+	.ops		= &nft_ct_helper_obj_ops,
+	.maxattr	= NFTA_CT_HELPER_MAX,
+	.policy		= nft_ct_helper_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -930,7 +903,7 @@ static int __init nft_ct_module_init(void)
 	if (err < 0)
 		goto err1;
 
-	err = nft_register_obj(&nft_ct_helper_obj);
+	err = nft_register_obj(&nft_ct_helper_obj_type);
 	if (err < 0)
 		goto err2;
 
@@ -945,7 +918,7 @@ err1:
 
 static void __exit nft_ct_module_exit(void)
 {
-	nft_unregister_obj(&nft_ct_helper_obj);
+	nft_unregister_obj(&nft_ct_helper_obj_type);
 	nft_unregister_expr(&nft_notrack_type);
 	nft_unregister_expr(&nft_ct_type);
 }
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe5845f..a0a93d987a3b 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
+#include <asm/unaligned.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -23,6 +24,7 @@ struct nft_exthdr {
 	u8			len;
 	u8			op;
 	enum nft_registers	dreg:8;
+	enum nft_registers	sreg:8;
 	u8			flags;
 };
 
@@ -61,6 +63,26 @@ err:
 	regs->verdict.code = NFT_BREAK;
 }
 
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+		       unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+	struct tcphdr *tcph;
+
+	if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+		return NULL;
+
+	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+	if (!tcph)
+		return NULL;
+
+	*tcphdr_len = __tcp_hdrlen(tcph);
+	if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+		return NULL;
+
+	return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
 				struct nft_regs *regs,
 				const struct nft_pktinfo *pkt)
@@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
 	struct tcphdr *tcph;
 	u8 *opt;
 
-	if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
-		goto err;
-
-	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
-	if (!tcph)
-		goto err;
-
-	tcphdr_len = __tcp_hdrlen(tcph);
-	if (tcphdr_len < sizeof(*tcph))
-		goto err;
-
-	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
 	if (!tcph)
 		goto err;
 
@@ -115,6 +126,88 @@ err:
 		regs->verdict.code = NFT_BREAK;
 }
 
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	unsigned int i, optl, tcphdr_len, offset;
+	struct tcphdr *tcph;
+	u8 *opt;
+	u32 src;
+
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+	if (!tcph)
+		return;
+
+	opt = (u8 *)tcph;
+	for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+		union {
+			u8 octet;
+			__be16 v16;
+			__be32 v32;
+		} old, new;
+
+		optl = optlen(opt, i);
+
+		if (priv->type != opt[i])
+			continue;
+
+		if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+			return;
+
+		if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+			return;
+
+		tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+					      &tcphdr_len);
+		if (!tcph)
+			return;
+
+		src = regs->data[priv->sreg];
+		offset = i + priv->offset;
+
+		switch (priv->len) {
+		case 2:
+			old.v16 = get_unaligned((u16 *)(opt + offset));
+			new.v16 = src;
+
+			switch (priv->type) {
+			case TCPOPT_MSS:
+				/* increase can cause connection to stall */
+				if (ntohs(old.v16) <= ntohs(new.v16))
+					return;
+			break;
+			}
+
+			if (old.v16 == new.v16)
+				return;
+
+			put_unaligned(new.v16, (u16*)(opt + offset));
+			inet_proto_csum_replace2(&tcph->check, pkt->skb,
+						 old.v16, new.v16, false);
+			break;
+		case 4:
+			new.v32 = src;
+			old.v32 = get_unaligned((u32 *)(opt + offset));
+
+			if (old.v32 == new.v32)
+				return;
+
+			put_unaligned(new.v32, (u32*)(opt + offset));
+			inet_proto_csum_replace4(&tcph->check, pkt->skb,
+						 old.v32, new.v32, false);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			break;
+		}
+
+		return;
+	}
+}
+
 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
 	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
 	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
@@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, priv->len);
 }
 
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr,
+				   const struct nlattr * const tb[])
 {
-	const struct nft_exthdr *priv = nft_expr_priv(expr);
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+	int err;
 
-	if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
-		goto nla_put_failure;
+	if (!tb[NFTA_EXTHDR_SREG] ||
+	    !tb[NFTA_EXTHDR_TYPE] ||
+	    !tb[NFTA_EXTHDR_OFFSET] ||
+	    !tb[NFTA_EXTHDR_LEN])
+		return -EINVAL;
+
+	if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+		return -EINVAL;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	if (offset < 2)
+		return -EOPNOTSUPP;
+
+	switch (len) {
+	case 2: break;
+	case 4: break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+	if (err < 0)
+		return err;
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = offset;
+	priv->len    = len;
+	priv->sreg   = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
+	priv->flags  = flags;
+	priv->op     = op;
+
+	return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
 	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -193,6 +331,26 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+		return -1;
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+		return -1;
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
 static struct nft_expr_type nft_exthdr_type;
 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
 	.type		= &nft_exthdr_type,
@@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
 	.dump		= nft_exthdr_dump,
 };
 
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_tcp_set_eval,
+	.init		= nft_exthdr_tcp_set_init,
+	.dump		= nft_exthdr_dump_set,
+};
+
 static const struct nft_expr_ops *
 nft_exthdr_select_ops(const struct nft_ctx *ctx,
 		      const struct nlattr * const tb[])
@@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
 	if (!tb[NFTA_EXTHDR_OP])
 		return &nft_exthdr_ipv6_ops;
 
-	op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+	if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+		return ERR_PTR(-EOPNOTSUPP);
+
+	op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
 	switch (op) {
 	case NFT_EXTHDR_OP_TCPOPT:
-		return &nft_exthdr_tcp_ops;
+		if (tb[NFTA_EXTHDR_SREG])
+			return &nft_exthdr_tcp_set_ops;
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_tcp_ops;
+		break;
 	case NFT_EXTHDR_OP_IPV6:
-		return &nft_exthdr_ipv6_ops;
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_ipv6_ops;
+		break;
 	}
 
 	return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 000000000000..3997ee36cfbd
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	switch (ntohs(pkt->skb->protocol)) {
+	case ETH_P_IP:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib4_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib4_eval_type(expr, regs, pkt);
+		}
+		break;
+	case ETH_P_IPV6:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib6_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib6_eval_type(expr, regs, pkt);
+		}
+		break;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+	.type		= &nft_fib_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib_netdev_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+	.family		= NFPROTO_NETDEV,
+	.name		= "fib",
+	.ops		= &nft_fib_netdev_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+	return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 18dd57a52651..a9fc298ef4c3 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -65,19 +65,23 @@ static int nft_limit_init(struct nft_limit *limit,
 	limit->nsecs = unit * NSEC_PER_SEC;
 	if (limit->rate == 0 || limit->nsecs < unit)
 		return -EOVERFLOW;
-	limit->tokens = limit->tokens_max = limit->nsecs;
-
-	if (tb[NFTA_LIMIT_BURST]) {
-		u64 rate;
 
+	if (tb[NFTA_LIMIT_BURST])
 		limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+	else
+		limit->burst = 0;
 
-		rate = limit->rate + limit->burst;
-		if (rate < limit->rate)
-			return -EOVERFLOW;
+	if (limit->rate + limit->burst < limit->rate)
+		return -EOVERFLOW;
+
+	/* The token bucket size limits the number of tokens can be
+	 * accumulated. tokens_max specifies the bucket size.
+	 * tokens_max = unit * (rate + burst) / rate.
+	 */
+	limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+				limit->rate);
+	limit->tokens_max = limit->tokens;
 
-		limit->rate = rate;
-	}
 	if (tb[NFTA_LIMIT_FLAGS]) {
 		u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
 
@@ -95,9 +99,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
 {
 	u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
 	u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
-	u64 rate = limit->rate - limit->burst;
 
-	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate),
+	if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate),
 			 NFTA_LIMIT_PAD) ||
 	    nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs),
 			 NFTA_LIMIT_PAD) ||
@@ -165,9 +168,9 @@ static const struct nft_expr_ops nft_limit_pkts_ops = {
 	.dump		= nft_limit_pkts_dump,
 };
 
-static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
-				     struct nft_regs *regs,
-				     const struct nft_pktinfo *pkt)
+static void nft_limit_bytes_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
 {
 	struct nft_limit *priv = nft_expr_priv(expr);
 	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
@@ -176,29 +179,29 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
 		regs->verdict.code = NFT_BREAK;
 }
 
-static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
-				    const struct nft_expr *expr,
-				    const struct nlattr * const tb[])
+static int nft_limit_bytes_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
 {
 	struct nft_limit *priv = nft_expr_priv(expr);
 
 	return nft_limit_init(priv, tb);
 }
 
-static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
-				    const struct nft_expr *expr)
+static int nft_limit_bytes_dump(struct sk_buff *skb,
+				const struct nft_expr *expr)
 {
 	const struct nft_limit *priv = nft_expr_priv(expr);
 
 	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
 }
 
-static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
+static const struct nft_expr_ops nft_limit_bytes_ops = {
 	.type		= &nft_limit_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit)),
-	.eval		= nft_limit_pkt_bytes_eval,
-	.init		= nft_limit_pkt_bytes_init,
-	.dump		= nft_limit_pkt_bytes_dump,
+	.eval		= nft_limit_bytes_eval,
+	.init		= nft_limit_bytes_init,
+	.dump		= nft_limit_bytes_dump,
 };
 
 static const struct nft_expr_ops *
@@ -212,7 +215,7 @@ nft_limit_select_ops(const struct nft_ctx *ctx,
 	case NFT_LIMIT_PKTS:
 		return &nft_limit_pkts_ops;
 	case NFT_LIMIT_PKT_BYTES:
-		return &nft_limit_pkt_bytes_ops;
+		return &nft_limit_bytes_ops;
 	}
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -226,14 +229,133 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
+static void nft_limit_obj_pkts_eval(struct nft_object *obj,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+	if (nft_limit_eval(&priv->limit, priv->cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
+				   const struct nlattr * const tb[],
+				   struct nft_object *obj)
+{
+	struct nft_limit_pkts *priv = nft_obj_data(obj);
+	int err;
+
+	err = nft_limit_init(&priv->limit, tb);
+	if (err < 0)
+		return err;
+
+	priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+	return 0;
+}
+
+static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
+				   struct nft_object *obj,
+				   bool reset)
+{
+	const struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_pkts_ops = {
+	.type		= &nft_limit_obj_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+	.init		= nft_limit_obj_pkts_init,
+	.eval		= nft_limit_obj_pkts_eval,
+	.dump		= nft_limit_obj_pkts_dump,
+};
+
+static void nft_limit_obj_bytes_eval(struct nft_object *obj,
+				     struct nft_regs *regs,
+				     const struct nft_pktinfo *pkt)
+{
+	struct nft_limit *priv = nft_obj_data(obj);
+	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+	if (nft_limit_eval(priv, cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
+				    const struct nlattr * const tb[],
+				    struct nft_object *obj)
+{
+	struct nft_limit *priv = nft_obj_data(obj);
+
+	return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
+				    struct nft_object *obj,
+				    bool reset)
+{
+	const struct nft_limit *priv = nft_obj_data(obj);
+
+	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_bytes_ops = {
+	.type		= &nft_limit_obj_type,
+	.size		= sizeof(struct nft_limit),
+	.init		= nft_limit_obj_bytes_init,
+	.eval		= nft_limit_obj_bytes_eval,
+	.dump		= nft_limit_obj_bytes_dump,
+};
+
+static const struct nft_object_ops *
+nft_limit_obj_select_ops(const struct nft_ctx *ctx,
+			 const struct nlattr * const tb[])
+{
+	if (!tb[NFTA_LIMIT_TYPE])
+		return &nft_limit_obj_pkts_ops;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+	case NFT_LIMIT_PKTS:
+		return &nft_limit_obj_pkts_ops;
+	case NFT_LIMIT_PKT_BYTES:
+		return &nft_limit_obj_bytes_ops;
+	}
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_object_type nft_limit_obj_type __read_mostly = {
+	.select_ops	= nft_limit_obj_select_ops,
+	.type		= NFT_OBJECT_LIMIT,
+	.maxattr	= NFTA_LIMIT_MAX,
+	.policy		= nft_limit_policy,
+	.owner		= THIS_MODULE,
+};
+
 static int __init nft_limit_module_init(void)
 {
-	return nft_register_expr(&nft_limit_type);
+	int err;
+
+	err = nft_register_obj(&nft_limit_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_limit_type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+err1:
+	nft_unregister_obj(&nft_limit_obj_type);
+	return err;
 }
 
 static void __exit nft_limit_module_exit(void)
 {
 	nft_unregister_expr(&nft_limit_type);
+	nft_unregister_obj(&nft_limit_obj_type);
 }
 
 module_init(nft_limit_module_init);
@@ -242,3 +364,4 @@ module_exit(nft_limit_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS_NFT_EXPR("limit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1dd428fbaaa3..7bcdc48f3d73 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr,
 {
 	struct nft_object *obj = nft_objref_priv(expr);
 
-	obj->type->eval(obj, regs, pkt);
+	obj->ops->eval(obj, regs, pkt);
 }
 
 static int nft_objref_init(const struct nft_ctx *ctx,
@@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	const struct nft_object *obj = nft_objref_priv(expr);
 
 	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
-	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+			 htonl(obj->ops->type->type)))
 		goto nla_put_failure;
 
 	return 0;
@@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
 		return;
 	}
 	obj = *nft_set_ext_obj(ext);
-	obj->type->eval(obj, regs, pkt);
+	obj->ops->eval(obj, regs, pkt);
 }
 
 static int nft_objref_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bbd45b0..e110b0ebbf58 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
 	if (!uh)
 		return false;
 
-	return uh->check;
+	return (__force bool)uh->check;
 }
 
 static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 25e33159be57..0ed124a93fcf 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
 	return nft_quota_do_dump(skb, priv, reset);
 }
 
-static struct nft_object_type nft_quota_obj __read_mostly = {
-	.type		= NFT_OBJECT_QUOTA,
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+	.type		= &nft_quota_obj_type,
 	.size		= sizeof(struct nft_quota),
-	.maxattr	= NFTA_QUOTA_MAX,
-	.policy		= nft_quota_policy,
 	.init		= nft_quota_obj_init,
 	.eval		= nft_quota_obj_eval,
 	.dump		= nft_quota_obj_dump,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_QUOTA,
+	.ops		= &nft_quota_obj_ops,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.policy		= nft_quota_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void)
 {
 	int err;
 
-	err = nft_register_obj(&nft_quota_obj);
+	err = nft_register_obj(&nft_quota_obj_type);
 	if (err < 0)
 		return err;
 
@@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void)
 
 	return 0;
 err1:
-	nft_unregister_obj(&nft_quota_obj);
+	nft_unregister_obj(&nft_quota_obj_type);
 	return err;
 }
 
 static void __exit nft_quota_module_exit(void)
 {
 	nft_unregister_expr(&nft_quota_type);
-	nft_unregister_obj(&nft_quota_obj);
+	nft_unregister_obj(&nft_quota_obj_type);
 }
 
 module_init(nft_quota_module_init);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8f88d0..a6b7d05aeacf 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,43 @@ struct nft_rt {
 	enum nft_registers	dreg:8;
 };
 
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+	u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+	const struct sk_buff *skb = pkt->skb;
+	const struct nf_afinfo *ai;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+		minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+		break;
+	case NFPROTO_IPV6:
+		fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+		minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+		break;
+	}
+
+	ai = nf_get_afinfo(nft_pf(pkt));
+	if (ai) {
+		struct dst_entry *dst = NULL;
+
+		ai->route(nft_net(pkt), &dst, &fl, false);
+		if (dst) {
+			mtu = min(mtu, dst_mtu(dst));
+			dst_release(dst);
+		}
+	}
+
+	if (mtu <= minlen || mtu > 0xffff)
+		return TCP_MSS_DEFAULT;
+
+	return mtu - minlen;
+}
+
 static void nft_rt_get_eval(const struct nft_expr *expr,
 			    struct nft_regs *regs,
 			    const struct nft_pktinfo *pkt)
@@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
 		if (nft_pf(pkt) != NFPROTO_IPV4)
 			goto err;
 
-		*dest = rt_nexthop((const struct rtable *)dst,
-				   ip_hdr(skb)->daddr);
+		*dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+						ip_hdr(skb)->daddr);
 		break;
 	case NFT_RT_NEXTHOP6:
 		if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
 					 &ipv6_hdr(skb)->daddr),
 		       sizeof(struct in6_addr));
 		break;
+	case NFT_RT_TCPMSS:
+		nft_reg_store16(dest, get_tcpmss(pkt, dst));
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -67,7 +107,7 @@ err:
 	regs->verdict.code = NFT_BREAK;
 }
 
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
 	[NFTA_RT_DREG]		= { .type = NLA_U32 },
 	[NFTA_RT_KEY]		= { .type = NLA_U32 },
 };
@@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
 	case NFT_RT_NEXTHOP6:
 		len = sizeof(struct in6_addr);
 		break;
+	case NFT_RT_TCPMSS:
+		len = sizeof(u16);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -118,6 +161,29 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			   const struct nft_data **data)
+{
+	const struct nft_rt *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	switch (priv->key) {
+	case NFT_RT_NEXTHOP4:
+	case NFT_RT_NEXTHOP6:
+	case NFT_RT_CLASSID:
+		return 0;
+	case NFT_RT_TCPMSS:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_OUT) |
+			(1 << NF_INET_POST_ROUTING);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
 static struct nft_expr_type nft_rt_type;
 static const struct nft_expr_ops nft_rt_get_ops = {
 	.type		= &nft_rt_type,
@@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
 	.eval		= nft_rt_get_eval,
 	.init		= nft_rt_get_init,
 	.dump		= nft_rt_get_dump,
+	.validate	= nft_rt_validate,
 };
 
 static struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 734989c40579..45fb2752fb63 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
 	return NULL;
 }
 
+static void *nft_bitmap_get(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem, unsigned int flags)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_bitmap_elem *be;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) ||
+		    !nft_set_elem_active(&be->ext, genmask))
+			continue;
+
+		return be;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
 static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
 			     const struct nft_set_elem *elem,
 			     struct nft_set_ext **ext)
@@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
 	.activate	= nft_bitmap_activate,
 	.lookup		= nft_bitmap_lookup,
 	.walk		= nft_bitmap_walk,
+	.get		= nft_bitmap_get,
 };
 
 static struct nft_set_type nft_bitmap_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0fa01d772c5e..f8166c1d5430 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
 	return !!he;
 }
 
+static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
+			   const struct nft_set_elem *elem, unsigned int flags)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_elem *he;
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = nft_genmask_cur(net),
+		.set	 = set,
+		.key	 = elem->key.val.data,
+	};
+
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+	if (he != NULL)
+		return he;
+
+	return ERR_PTR(-ENOENT);
+}
+
 static bool nft_rhash_update(struct nft_set *set, const u32 *key,
 			     void *(*new)(struct nft_set *,
 					  const struct nft_expr *,
@@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
 	return false;
 }
 
+static void *nft_hash_get(const struct net *net, const struct nft_set *set,
+			  const struct nft_set_elem *elem, unsigned int flags)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_hash_elem *he;
+	u32 hash;
+
+	hash = jhash(elem->key.val.data, set->klen, priv->seed);
+	hash = reciprocal_scale(hash, priv->buckets);
+	hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+		if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
+		    nft_set_elem_active(&he->ext, genmask))
+			return he;
+	}
+	return ERR_PTR(-ENOENT);
+}
+
 /* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
 static inline u32 nft_hash_key(const u32 *key, u32 klen)
 {
@@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net,
 	hash = reciprocal_scale(hash, priv->buckets);
 	hlist_for_each_entry(he, &priv->table[hash], node) {
 		if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
-			    set->klen) ||
+			    set->klen) &&
 		    nft_set_elem_active(&he->ext, genmask)) {
 			nft_set_elem_change_active(net, set, &he->ext);
 			return he;
@@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = {
 	.lookup		= nft_rhash_lookup,
 	.update		= nft_rhash_update,
 	.walk		= nft_rhash_walk,
+	.get		= nft_rhash_get,
 	.features	= NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
 };
 
@@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.remove		= nft_hash_remove,
 	.lookup		= nft_hash_lookup,
 	.walk		= nft_hash_walk,
+	.get		= nft_hash_get,
 	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
 };
 
@@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
 	.remove		= nft_hash_remove,
 	.lookup		= nft_hash_lookup_fast,
 	.walk		= nft_hash_walk,
+	.get		= nft_hash_get,
 	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
 };
 
@@ -643,7 +682,6 @@ nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
 {
 	if (desc->size) {
 		switch (desc->klen) {
-		case 2:
 		case 4:
 			return &nft_hash_fast_ops;
 		default:
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382f1d49..e6f08bc5f359 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
 #include <net/netfilter/nf_tables.h>
 
 struct nft_rbtree {
-	rwlock_t		lock;
 	struct rb_root		root;
+	rwlock_t		lock;
+	seqcount_t		count;
 };
 
 struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
 	return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
 }
 
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
-			      const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+				const u32 *key, const struct nft_set_ext **ext,
+				unsigned int seq)
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
 	const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 	const void *this;
 	int d;
 
-	read_lock_bh(&priv->lock);
-	parent = priv->root.rb_node;
+	parent = rcu_dereference_raw(priv->root.rb_node);
 	while (parent != NULL) {
+		if (read_seqcount_retry(&priv->count, seq))
+			return false;
+
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
 		this = nft_set_ext_key(&rbe->ext);
 		d = memcmp(this, key, set->klen);
 		if (d < 0) {
-			parent = parent->rb_left;
+			parent = rcu_dereference_raw(parent->rb_left);
 			if (interval &&
 			    nft_rbtree_equal(set, this, interval) &&
 			    nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 				continue;
 			interval = rbe;
 		} else if (d > 0)
-			parent = parent->rb_right;
+			parent = rcu_dereference_raw(parent->rb_right);
 		else {
 			if (!nft_set_elem_active(&rbe->ext, genmask)) {
-				parent = parent->rb_left;
+				parent = rcu_dereference_raw(parent->rb_left);
 				continue;
 			}
 			if (nft_rbtree_interval_end(rbe))
 				goto out;
-			read_unlock_bh(&priv->lock);
 
 			*ext = &rbe->ext;
 			return true;
@@ -84,15 +87,104 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
 	    nft_set_elem_active(&interval->ext, genmask) &&
 	    !nft_rbtree_interval_end(interval)) {
-		read_unlock_bh(&priv->lock);
 		*ext = &interval->ext;
 		return true;
 	}
 out:
+	return false;
+}
+
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+			      const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	unsigned int seq = read_seqcount_begin(&priv->count);
+	bool ret;
+
+	ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+	if (ret || !read_seqcount_retry(&priv->count, seq))
+		return ret;
+
+	read_lock_bh(&priv->lock);
+	seq = read_seqcount_begin(&priv->count);
+	ret = __nft_rbtree_lookup(net, set, key, ext, seq);
 	read_unlock_bh(&priv->lock);
+
+	return ret;
+}
+
+static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
+			     const u32 *key, struct nft_rbtree_elem **elem,
+			     unsigned int seq, unsigned int flags, u8 genmask)
+{
+	struct nft_rbtree_elem *rbe, *interval = NULL;
+	struct nft_rbtree *priv = nft_set_priv(set);
+	const struct rb_node *parent;
+	const void *this;
+	int d;
+
+	parent = rcu_dereference_raw(priv->root.rb_node);
+	while (parent != NULL) {
+		if (read_seqcount_retry(&priv->count, seq))
+			return false;
+
+		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+		this = nft_set_ext_key(&rbe->ext);
+		d = memcmp(this, key, set->klen);
+		if (d < 0) {
+			parent = rcu_dereference_raw(parent->rb_left);
+			interval = rbe;
+		} else if (d > 0) {
+			parent = rcu_dereference_raw(parent->rb_right);
+		} else {
+			if (!nft_set_elem_active(&rbe->ext, genmask))
+				parent = rcu_dereference_raw(parent->rb_left);
+
+			if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
+			    (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
+			    (flags & NFT_SET_ELEM_INTERVAL_END)) {
+				*elem = rbe;
+				return true;
+			}
+			return false;
+		}
+	}
+
+	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+	    nft_set_elem_active(&interval->ext, genmask) &&
+	    !nft_rbtree_interval_end(interval)) {
+		*elem = interval;
+		return true;
+	}
+
 	return false;
 }
 
+static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
+			    const struct nft_set_elem *elem, unsigned int flags)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	unsigned int seq = read_seqcount_begin(&priv->count);
+	struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT);
+	const u32 *key = (const u32 *)&elem->key.val;
+	u8 genmask = nft_genmask_cur(net);
+	bool ret;
+
+	ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+	if (ret || !read_seqcount_retry(&priv->count, seq))
+		return rbe;
+
+	read_lock_bh(&priv->lock);
+	seq = read_seqcount_begin(&priv->count);
+	ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask);
+	if (!ret)
+		rbe = ERR_PTR(-ENOENT);
+	read_unlock_bh(&priv->lock);
+
+	return rbe;
+}
+
 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 			       struct nft_rbtree_elem *new,
 			       struct nft_set_ext **ext)
@@ -130,7 +222,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 			}
 		}
 	}
-	rb_link_node(&new->node, parent, p);
+	rb_link_node_rcu(&new->node, parent, p);
 	rb_insert_color(&new->node, &priv->root);
 	return 0;
 }
@@ -144,7 +236,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 	int err;
 
 	write_lock_bh(&priv->lock);
+	write_seqcount_begin(&priv->count);
 	err = __nft_rbtree_insert(net, set, rbe, ext);
+	write_seqcount_end(&priv->count);
 	write_unlock_bh(&priv->lock);
 
 	return err;
@@ -158,7 +252,9 @@ static void nft_rbtree_remove(const struct net *net,
 	struct nft_rbtree_elem *rbe = elem->priv;
 
 	write_lock_bh(&priv->lock);
+	write_seqcount_begin(&priv->count);
 	rb_erase(&rbe->node, &priv->root);
+	write_seqcount_end(&priv->count);
 	write_unlock_bh(&priv->lock);
 }
 
@@ -264,6 +360,7 @@ static int nft_rbtree_init(const struct nft_set *set,
 	struct nft_rbtree *priv = nft_set_priv(set);
 
 	rwlock_init(&priv->lock);
+	seqcount_init(&priv->count);
 	priv->root = RB_ROOT;
 	return 0;
 }
@@ -311,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 	.activate	= nft_rbtree_activate,
 	.lookup		= nft_rbtree_lookup,
 	.walk		= nft_rbtree_walk,
+	.get		= nft_rbtree_get,
 	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
 };
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e1648238a9c9..a77dd514297c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 		if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
 			return ERR_PTR(-EFAULT);
 
-		strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
 		info->num_counters = compat_tmp.num_counters;
 		user += sizeof(compat_tmp);
 	} else
@@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 		if (copy_from_user(info, user, sizeof(*info)) != 0)
 			return ERR_PTR(-EFAULT);
 
-		info->name[sizeof(info->name) - 1] = '\0';
 		user += sizeof(*info);
 	}
+	info->name[sizeof(info->name) - 1] = '\0';
 
 	size = sizeof(struct xt_counters);
 	size *= info->num_counters;
@@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table,
 	      int *error)
 {
 	struct xt_table_info *private;
+	unsigned int cpu;
 	int ret;
 
 	ret = xt_jumpstack_alloc(newinfo);
@@ -1182,26 +1183,34 @@ xt_replace_table(struct xt_table *table,
 	smp_wmb();
 	table->private = newinfo;
 
+	/* make sure all cpus see new ->private value */
+	smp_wmb();
+
 	/*
 	 * Even though table entries have now been swapped, other CPU's
-	 * may still be using the old entries. This is okay, because
-	 * resynchronization happens because of the locking done
-	 * during the get_counters() routine.
+	 * may still be using the old entries...
 	 */
 	local_bh_enable();
 
+	/* ... so wait for even xt_recseq on all cpus */
+	for_each_possible_cpu(cpu) {
+		seqcount_t *s = &per_cpu(xt_recseq, cpu);
+		u32 seq = raw_read_seqcount(s);
+
+		if (seq & 1) {
+			do {
+				cond_resched();
+				cpu_relax();
+			} while (seq == raw_read_seqcount(s));
+		}
+	}
+
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		struct audit_buffer *ab;
-
-		ab = audit_log_start(current->audit_context, GFP_KERNEL,
-				     AUDIT_NETFILTER_CFG);
-		if (ab) {
-			audit_log_format(ab, "table=%s family=%u entries=%u",
-					 table->name, table->af,
-					 private->number);
-			audit_log_end(ab);
-		}
+		audit_log(current->audit_context, GFP_KERNEL,
+			  AUDIT_NETFILTER_CFG,
+			  "table=%s family=%u entries=%u",
+			  table->name, table->af, private->number);
 	}
 #endif
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37de886..5a152e2acfd5 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout;
 	struct nf_conn_timeout *timeout_ext;
-	struct nf_conntrack_l4proto *l4proto;
 	int ret = 0;
 	u8 proto;
 
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index e45a01255e70..58aa9dd3c5b7 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
-		     xt_hooknum(par) == NF_INET_POST_ROUTING ||
-		     xt_hooknum(par) == NF_INET_LOCAL_OUT ||
-		     xt_hooknum(par) == NF_INET_LOCAL_IN);
+	WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+		xt_hooknum(par) != NF_INET_POST_ROUTING &&
+		xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+		xt_hooknum(par) != NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c64aca611ac5..9dae4d665965 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
 		memset(fl6, 0, sizeof(*fl6));
 		fl6->daddr = ipv6_hdr(skb)->saddr;
 	}
-	rcu_read_lock();
 	ai = nf_get_afinfo(family);
 	if (ai != NULL)
 		ai->route(net, (struct dst_entry **)&rt, &fl, false);
-	rcu_read_unlock();
 
 	if (rt != NULL) {
 		mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35fff6b..17d7705e3bd4 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 		return user_laddr;
 
 	laddr = 0;
-	rcu_read_lock();
 	indev = __in_dev_get_rcu(skb->dev);
 	for_primary_ifa(indev) {
 		laddr = ifa->ifa_local;
 		break;
 	} endfor_ifa(indev);
-	rcu_read_unlock();
 
 	return laddr ? laddr : daddr;
 }
@@ -125,7 +123,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
 						      __tcp_hdrlen(tcph),
 						    saddr, sport,
 						    daddr, dport,
-						    in->ifindex);
+						    in->ifindex, 0);
 
 			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
@@ -195,7 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 						   thoff + __tcp_hdrlen(tcph),
 						   saddr, sport,
 						   daddr, ntohs(dport),
-						   in->ifindex);
+						   in->ifindex, 0);
 
 			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
@@ -208,7 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 		case NFT_LOOKUP_ESTABLISHED:
 			sk = __inet6_lookup_established(net, &tcp_hashinfo,
 							saddr, sport, daddr, ntohs(dport),
-							in->ifindex);
+							in->ifindex, 0);
 			break;
 		default:
 			BUG();
@@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
 		return user_laddr;
 	laddr = NULL;
 
-	rcu_read_lock();
 	indev = __in6_dev_get(skb->dev);
 	if (indev) {
 		read_lock_bh(&indev->lock);
@@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
 		}
 		read_unlock_bh(&indev->lock);
 	}
-	rcu_read_unlock();
 
 	return laddr ? laddr : daddr;
 }
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e329dabde35f..3b2be2ae6987 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	if (dev)
 		flow.flowi6_oif = dev->ifindex;
 
-	rcu_read_lock();
-
 	afinfo = nf_get_afinfo(NFPROTO_IPV6);
 	if (afinfo != NULL) {
 		const struct nf_ipv6_ops *v6ops;
@@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	} else {
 		route_err = 1;
 	}
-	rcu_read_unlock();
 
 	if (route_err)
 		return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 38986a95216c..041da0d9c06f 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/syscalls.h>
 #include <linux/skbuff.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 	return 0;
 }
 
+static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
+{
+	mm_segment_t oldfs = get_fs();
+	int retval, fd;
+
+	set_fs(KERNEL_DS);
+	fd = bpf_obj_get_user(path, 0);
+	set_fs(oldfs);
+	if (fd < 0)
+		return fd;
+
+	retval = __bpf_mt_check_fd(fd, ret);
+	sys_close(fd);
+	return retval;
+}
+
 static int bpf_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_bpf_info *info = par->matchinfo;
@@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
 		return __bpf_mt_check_bytecode(info->bpf_program,
 					       info->bpf_program_num_elem,
 					       &info->filter);
-	else if (info->mode == XT_BPF_MODE_FD_PINNED ||
-		 info->mode == XT_BPF_MODE_FD_ELF)
+	else if (info->mode == XT_BPF_MODE_FD_ELF)
 		return __bpf_mt_check_fd(info->fd, &info->filter);
+	else if (info->mode == XT_BPF_MODE_PATH_PINNED)
+		return __bpf_mt_check_path(info->path, &info->filter);
 	else
 		return -EINVAL;
 }
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b8fd4ab762ed..a6214f235333 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -46,7 +46,6 @@
 struct xt_connlimit_conn {
 	struct hlist_node		node;
 	struct nf_conntrack_tuple	tuple;
-	union nf_inet_addr		addr;
 };
 
 struct xt_connlimit_rb {
@@ -58,8 +57,7 @@ struct xt_connlimit_rb {
 static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
 
 struct xt_connlimit_data {
-	struct rb_root climit_root4[CONNLIMIT_SLOTS];
-	struct rb_root climit_root6[CONNLIMIT_SLOTS];
+	struct rb_root climit_root[CONNLIMIT_SLOTS];
 };
 
 static u_int32_t connlimit_rnd __read_mostly;
@@ -73,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr)
 }
 
 static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr,
-                  const union nf_inet_addr *mask)
+connlimit_iphash6(const union nf_inet_addr *addr)
 {
-	union nf_inet_addr res;
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
-		res.ip6[i] = addr->ip6[i] & mask->ip6[i];
-
-	return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
+	return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
 		       connlimit_rnd) % CONNLIMIT_SLOTS;
 }
 
@@ -96,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn)
 }
 
 static int
-same_source_net(const union nf_inet_addr *addr,
-		const union nf_inet_addr *mask,
-		const union nf_inet_addr *u3, u_int8_t family)
+same_source(const union nf_inet_addr *addr,
+	    const union nf_inet_addr *u3, u_int8_t family)
 {
-	if (family == NFPROTO_IPV4) {
-		return ntohl(addr->ip & mask->ip) -
-		       ntohl(u3->ip & mask->ip);
-	} else {
-		union nf_inet_addr lh, rh;
-		unsigned int i;
-
-		for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
-			lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
-			rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
-		}
+	if (family == NFPROTO_IPV4)
+		return ntohl(addr->ip) - ntohl(u3->ip);
 
-		return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
-	}
+	return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
 }
 
 static bool add_hlist(struct hlist_head *head,
@@ -126,7 +106,6 @@ static bool add_hlist(struct hlist_head *head,
 	if (conn == NULL)
 		return false;
 	conn->tuple = *tuple;
-	conn->addr = *addr;
 	hlist_add_head(&conn->node, head);
 	return true;
 }
@@ -144,7 +123,6 @@ static unsigned int check_hlist(struct net *net,
 	unsigned int length = 0;
 
 	*addit = true;
-	rcu_read_lock();
 
 	/* check the saved connections */
 	hlist_for_each_entry_safe(conn, n, head, node) {
@@ -179,8 +157,6 @@ static unsigned int check_hlist(struct net *net,
 		length++;
 	}
 
-	rcu_read_unlock();
-
 	return length;
 }
 
@@ -200,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root,
 static unsigned int
 count_tree(struct net *net, struct rb_root *root,
 	   const struct nf_conntrack_tuple *tuple,
-	   const union nf_inet_addr *addr, const union nf_inet_addr *mask,
+	   const union nf_inet_addr *addr,
 	   u8 family, const struct nf_conntrack_zone *zone)
 {
 	struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
@@ -221,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root,
 		rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
 
 		parent = *rbnode;
-		diff = same_source_net(addr, mask, &rbconn->addr, family);
+		diff = same_source(addr, &rbconn->addr, family);
 		if (diff < 0) {
 			rbnode = &((*rbnode)->rb_left);
 		} else if (diff > 0) {
@@ -274,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root,
 	}
 
 	conn->tuple = *tuple;
-	conn->addr = *addr;
 	rbconn->addr = *addr;
 
 	INIT_HLIST_HEAD(&rbconn->hhead);
@@ -289,7 +264,6 @@ static int count_them(struct net *net,
 		      struct xt_connlimit_data *data,
 		      const struct nf_conntrack_tuple *tuple,
 		      const union nf_inet_addr *addr,
-		      const union nf_inet_addr *mask,
 		      u_int8_t family,
 		      const struct nf_conntrack_zone *zone)
 {
@@ -297,17 +271,15 @@ static int count_them(struct net *net,
 	int count;
 	u32 hash;
 
-	if (family == NFPROTO_IPV6) {
-		hash = connlimit_iphash6(addr, mask);
-		root = &data->climit_root6[hash];
-	} else {
-		hash = connlimit_iphash(addr->ip & mask->ip);
-		root = &data->climit_root4[hash];
-	}
+	if (family == NFPROTO_IPV6)
+		hash = connlimit_iphash6(addr);
+	else
+		hash = connlimit_iphash(addr->ip);
+	root = &data->climit_root[hash];
 
 	spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 
-	count = count_tree(net, root, tuple, addr, mask, family, zone);
+	count = count_tree(net, root, tuple, addr, family, zone);
 
 	spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 
@@ -338,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	if (xt_family(par) == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
+		unsigned int i;
+
 		memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
 		       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
+
+		for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
+			addr.ip6[i] &= info->mask.ip6[i];
 	} else {
 		const struct iphdr *iph = ip_hdr(skb);
 		addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
 			  iph->daddr : iph->saddr;
+
+		addr.ip &= info->mask.ip;
 	}
 
 	connections = count_them(net, info->data, tuple_ptr, &addr,
-	                         &info->mask, xt_family(par), zone);
+				 xt_family(par), zone);
 	if (connections == 0)
 		/* kmalloc failed, drop it entirely */
 		goto hotdrop;
@@ -382,10 +361,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-		info->data->climit_root4[i] = RB_ROOT;
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
-		info->data->climit_root6[i] = RB_ROOT;
+	for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+		info->data->climit_root[i] = RB_ROOT;
 
 	return 0;
 }
@@ -416,10 +393,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 
 	nf_ct_netns_put(par->net, par->family);
 
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-		destroy_tree(&info->data->climit_root4[i]);
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
-		destroy_tree(&info->data->climit_root6[i]);
+	for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+		destroy_tree(&info->data->climit_root[i]);
 
 	kfree(info->data);
 }
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e1874f28b..5da8746f7b88 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -35,6 +35,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/xt_hashlimit.h>
 #include <linux/mutex.h>
+#include <linux/kernel.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -56,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
 }
 
 /* need to declare this at the top */
+static const struct file_operations dl_file_ops_v2;
 static const struct file_operations dl_file_ops_v1;
 static const struct file_operations dl_file_ops;
 
@@ -87,8 +89,19 @@ struct dsthash_ent {
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
-		u_int64_t credit;
-		u_int64_t credit_cap, cost;
+		union {
+			struct {
+				u_int64_t credit;
+				u_int64_t credit_cap;
+				u_int64_t cost;
+			};
+			struct {
+				u_int32_t interval, prev_window;
+				u_int64_t current_rate;
+				u_int64_t rate;
+				int64_t burst;
+			};
+		};
 	} rateinfo;
 	struct rcu_head rcu;
 };
@@ -99,7 +112,7 @@ struct xt_hashlimit_htable {
 	u_int8_t family;
 	bool rnd_initialized;
 
-	struct hashlimit_cfg2 cfg;	/* config */
+	struct hashlimit_cfg3 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
@@ -116,10 +129,10 @@ struct xt_hashlimit_htable {
 };
 
 static int
-cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
 {
 	if (revision == 1) {
-		struct hashlimit_cfg1 *cfg = from;
+		struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
 
 		to->mode = cfg->mode;
 		to->avg = cfg->avg;
@@ -131,7 +144,19 @@ cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
 		to->srcmask = cfg->srcmask;
 		to->dstmask = cfg->dstmask;
 	} else if (revision == 2) {
-		memcpy(to, from, sizeof(struct hashlimit_cfg2));
+		struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 3) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg3));
 	} else {
 		return -EINVAL;
 	}
@@ -240,13 +265,14 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(struct work_struct *work);
 
-static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 			 const char *name, u_int8_t family,
 			 struct xt_hashlimit_htable **out_hinfo,
 			 int revision)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
+	const struct file_operations *fops;
 	unsigned int size, i;
 	int ret;
 
@@ -254,7 +280,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 		size = cfg->size;
 	} else {
 		size = (totalram_pages << PAGE_SHIFT) / 16384 /
-		       sizeof(struct list_head);
+		       sizeof(struct hlist_head);
 		if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 			size = 8192;
 		if (size < 16)
@@ -262,13 +288,13 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 	}
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
-	                sizeof(struct list_head) * size);
+	                sizeof(struct hlist_head) * size);
 	if (hinfo == NULL)
 		return -ENOMEM;
 	*out_hinfo = hinfo;
 
 	/* copy match config into hashtable config */
-	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
 
 	if (ret)
 		return ret;
@@ -293,11 +319,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 	}
 	spin_lock_init(&hinfo->lock);
 
+	switch (revision) {
+	case 1:
+		fops = &dl_file_ops_v1;
+		break;
+	case 2:
+		fops = &dl_file_ops_v2;
+		break;
+	default:
+		fops = &dl_file_ops;
+	}
+
 	hinfo->pde = proc_create_data(name, 0,
 		(family == NFPROTO_IPV4) ?
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		(revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
-		hinfo);
+		fops, hinfo);
 	if (hinfo->pde == NULL) {
 		kfree(hinfo->name);
 		vfree(hinfo);
@@ -482,6 +518,25 @@ static u32 user2credits_byte(u32 user)
 	return (u32) (us >> 32);
 }
 
+static u64 user2rate(u64 user)
+{
+	if (user != 0) {
+		return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+	} else {
+		pr_warn("invalid rate from userspace: %llu\n", user);
+		return 0;
+	}
+}
+
+static u64 user2rate_bytes(u32 user)
+{
+	u64 r;
+
+	r = user ? U32_MAX / user : U32_MAX;
+	r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
+	return r;
+}
+
 static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
 			    u32 mode, int revision)
 {
@@ -491,6 +546,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
 	if (delta == 0)
 		return;
 
+	if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+		u64 interval = dh->rateinfo.interval * HZ;
+
+		if (delta < interval)
+			return;
+
+		dh->rateinfo.prev = now;
+		dh->rateinfo.prev_window =
+			((dh->rateinfo.current_rate * interval) >
+			 (delta * dh->rateinfo.rate));
+		dh->rateinfo.current_rate = 0;
+
+		return;
+	}
+
 	dh->rateinfo.prev = now;
 
 	if (mode & XT_HASHLIMIT_BYTES) {
@@ -515,7 +585,24 @@ static void rateinfo_init(struct dsthash_ent *dh,
 			  struct xt_hashlimit_htable *hinfo, int revision)
 {
 	dh->rateinfo.prev = jiffies;
-	if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+	if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+		dh->rateinfo.prev_window = 0;
+		dh->rateinfo.current_rate = 0;
+		if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+			dh->rateinfo.rate =
+				user2rate_bytes((u32)hinfo->cfg.avg);
+			if (hinfo->cfg.burst)
+				dh->rateinfo.burst =
+					hinfo->cfg.burst * dh->rateinfo.rate;
+			else
+				dh->rateinfo.burst = dh->rateinfo.rate;
+		} else {
+			dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+			dh->rateinfo.burst =
+				hinfo->cfg.burst + dh->rateinfo.rate;
+		}
+		dh->rateinfo.interval = hinfo->cfg.interval;
+	} else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
 		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
 		dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
 		dh->rateinfo.credit_cap = hinfo->cfg.burst;
@@ -648,7 +735,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 static bool
 hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		    struct xt_hashlimit_htable *hinfo,
-		    const struct hashlimit_cfg2 *cfg, int revision)
+		    const struct hashlimit_cfg3 *cfg, int revision)
 {
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
@@ -659,12 +746,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
-	rcu_read_lock_bh();
+	local_bh_disable();
 	dh = dsthash_find(hinfo, &dst);
 	if (dh == NULL) {
 		dh = dsthash_alloc_init(hinfo, &dst, &race);
 		if (dh == NULL) {
-			rcu_read_unlock_bh();
+			local_bh_enable();
 			goto hotdrop;
 		} else if (race) {
 			/* Already got an entry, update expiration timeout */
@@ -680,6 +767,20 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 	}
 
+	if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+		dh->rateinfo.current_rate += cost;
+
+		if (!dh->rateinfo.prev_window &&
+		    (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+			spin_unlock(&dh->lock);
+			rcu_read_unlock_bh();
+			return !(cfg->mode & XT_HASHLIMIT_INVERT);
+		} else {
+			goto overlimit;
+		}
+	}
+
 	if (cfg->mode & XT_HASHLIMIT_BYTES)
 		cost = hashlimit_byte_cost(skb->len, dh);
 	else
@@ -689,12 +790,13 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		/* below the limit */
 		dh->rateinfo.credit -= cost;
 		spin_unlock(&dh->lock);
-		rcu_read_unlock_bh();
+		local_bh_enable();
 		return !(cfg->mode & XT_HASHLIMIT_INVERT);
 	}
 
+overlimit:
 	spin_unlock(&dh->lock);
-	rcu_read_unlock_bh();
+	local_bh_enable();
 	/* default match is underlimit - so over the limit, we need to invert */
 	return cfg->mode & XT_HASHLIMIT_INVERT;
 
@@ -708,7 +810,7 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
-	struct hashlimit_cfg2 cfg = {};
+	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -720,17 +822,33 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
 
-	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
 }
 
 static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 				     struct xt_hashlimit_htable **hinfo,
-				     struct hashlimit_cfg2 *cfg,
+				     struct hashlimit_cfg3 *cfg,
 				     const char *name, int revision)
 {
 	struct net *net = par->net;
@@ -753,7 +871,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 	}
 
 	/* Check for overflow. */
-	if (cfg->mode & XT_HASHLIMIT_BYTES) {
+	if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		if (cfg->avg == 0 || cfg->avg > U32_MAX) {
+			pr_info("hashlimit invalid rate\n");
+			return -ERANGE;
+		}
+
+		if (cfg->interval == 0) {
+			pr_info("hashlimit invalid interval\n");
+			return -EINVAL;
+		}
+	} else if (cfg->mode & XT_HASHLIMIT_BYTES) {
 		if (user2credits_byte(cfg->avg) == 0) {
 			pr_info("overflow, rate too high: %llu\n", cfg->avg);
 			return -EINVAL;
@@ -784,7 +912,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
-	struct hashlimit_cfg2 cfg = {};
+	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
 	if (info->name[sizeof(info->name) - 1] != '\0')
@@ -799,15 +927,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 					 &cfg, info->name, 1);
 }
 
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
 
 	if (info->name[sizeof(info->name) - 1] != '\0')
 		return -EINVAL;
 
 	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
-					 info->name, 2);
+					 info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
 }
 
 static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
@@ -819,7 +972,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
 
 static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
 
 	htable_put(info->hinfo);
 }
@@ -840,9 +993,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.name           = "hashlimit",
 		.revision       = 2,
 		.family         = NFPROTO_IPV4,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v2,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -863,9 +1027,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.name           = "hashlimit",
 		.revision       = 2,
 		.family         = NFPROTO_IPV6,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v2,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -947,6 +1122,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
 	}
 }
 
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
 static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
@@ -969,7 +1159,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
-	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
 
 	dl_seq_print(ent, family, s);
 
@@ -977,6 +1167,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 	return seq_has_overflowed(s);
 }
 
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = s->private;
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show_v2(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
 static int dl_seq_show_v1(struct seq_file *s, void *v)
 {
 	struct xt_hashlimit_htable *htable = s->private;
@@ -1012,6 +1216,13 @@ static const struct seq_operations dl_seq_ops_v1 = {
 	.show  = dl_seq_show_v1
 };
 
+static const struct seq_operations dl_seq_ops_v2 = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show_v2
+};
+
 static const struct seq_operations dl_seq_ops = {
 	.start = dl_seq_start,
 	.next  = dl_seq_next,
@@ -1019,6 +1230,18 @@ static const struct seq_operations dl_seq_ops = {
 	.show  = dl_seq_show
 };
 
+static int dl_proc_open_v2(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &dl_seq_ops_v2);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+
+		sf->private = PDE_DATA(inode);
+	}
+	return ret;
+}
+
 static int dl_proc_open_v1(struct inode *inode, struct file *file)
 {
 	int ret = seq_open(file, &dl_seq_ops_v1);
@@ -1042,6 +1265,14 @@ static int dl_proc_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static const struct file_operations dl_file_ops_v2 = {
+	.owner   = THIS_MODULE,
+	.open    = dl_proc_open_v2,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 static const struct file_operations dl_file_ops_v1 = {
 	.owner   = THIS_MODULE,
 	.open    = dl_proc_open_v1,
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 8107b3eb865f..0fd14d1eb09d 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-		      ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
 
 	xt_nat_convert_range(&range, &mr->range[0]);
 	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
@@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	xt_nat_convert_range(&range, &mr->range[0]);
 	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
@@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-		      ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
 
 	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
 }
@@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
 }
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 71cfa9551d08..36e14b1f061d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 				sizeof(struct tcphdr), optsize, opts);
 	}
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
 		int foptsize, optnum;
 
@@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 		    info->loglevel == XT_OSF_LOGLEVEL_FIRST)
 			break;
 	}
-	rcu_read_unlock();
 
 	if (!fcount && (info->flags & XT_OSF_LOG))
 		nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 3f6c4fa78bdb..245fa350a7a8 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-static const struct file_operations recent_old_fops, recent_mt_fops;
+static const struct file_operations recent_mt_fops;
 #endif
 
 static u_int32_t hash_rnd __read_mostly;
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 8fd324116e6f..68ccbe50bb1e 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Today's hack: quantum tunneling in structs
  *
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e75ef39669c5..575d2153e3b8 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 			transparent = nf_sk_is_transparent(sk);
 
 		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
-		    transparent)
+		    transparent && sk_fullsock(sk))
 			pskb->mark = sk->sk_mark;
 
 		if (sk != skb->sk)
@@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 			transparent = nf_sk_is_transparent(sk);
 
 		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
-		    transparent)
+		    transparent && sk_fullsock(sk))
 			pskb->mark = sk->sk_mark;
 
 		if (sk != skb->sk)
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index d341ede0dca5..5a46381a64e7 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the NetLabel subsystem.
 #
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index d0f38bc9af6d..ac709f0f197b 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s,
 	struct list_head *i = s;
 	struct netlbl_af4list *n = __af4list_entry(s);
 	while (i != h && !n->valid) {
-		i = rcu_dereference(i->next);
+		i = rcu_dereference(list_next_rcu(i));
 		n = __af4list_entry(i);
 	}
 	return n;
@@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s,
 	struct list_head *i = s;
 	struct netlbl_af6list *n = __af6list_entry(s);
 	while (i != h && !n->valid) {
-		i = rcu_dereference(i->next);
+		i = rcu_dereference(list_next_rcu(i));
 		n = __af6list_entry(i);
 	}
 	return n;
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index d177dd066504..4d748975117d 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register);
 
 static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
 {
-	return ACCESS_ONCE(calipso_ops);
+	return READ_ONCE(calipso_ops);
 }
 
 /**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5acee49db90b..b9e0ee4e22f5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
 };
 
 static int netlink_dump(struct sock *sk);
-static void netlink_skb_destructor(struct sk_buff *skb);
 
 /* nl_table locking explained:
  * Lookup and traversal are protected with an RCU read-side lock. Insertion
@@ -691,6 +690,9 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
 	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
 	struct sock *sk = &nlk->sk;
 
+	kfree(nlk->groups);
+	nlk->groups = NULL;
+
 	if (!refcount_dec_and_test(&sk->sk_refcnt))
 		return;
 
@@ -769,9 +771,6 @@ static int netlink_release(struct socket *sock)
 		netlink_table_ungrab();
 	}
 
-	kfree(nlk->groups);
-	nlk->groups = NULL;
-
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 	local_bh_enable();
@@ -955,7 +954,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	struct net *net = sock_net(sk);
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
-	int err;
+	int err = 0;
 	long unsigned int groups = nladdr->nl_groups;
 	bool bound;
 
@@ -983,6 +982,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			return -EINVAL;
 	}
 
+	netlink_lock_table();
 	if (nlk->netlink_bind && groups) {
 		int group;
 
@@ -993,7 +993,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			if (!err)
 				continue;
 			netlink_undo_bind(group, groups, sk);
-			return err;
+			goto unlock;
 		}
 	}
 
@@ -1006,12 +1006,13 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			netlink_autobind(sock);
 		if (err) {
 			netlink_undo_bind(nlk->ngroups, groups, sk);
-			return err;
+			goto unlock;
 		}
 	}
 
 	if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
-		return 0;
+		goto unlock;
+	netlink_unlock_table();
 
 	netlink_table_grab();
 	netlink_update_subscriptions(sk, nlk->subscriptions +
@@ -1022,6 +1023,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	netlink_table_ungrab();
 
 	return 0;
+
+unlock:
+	netlink_unlock_table();
+	return err;
 }
 
 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
@@ -1079,7 +1084,9 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 		nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 	} else {
 		nladdr->nl_pid = nlk->portid;
+		netlink_lock_table();
 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
+		netlink_unlock_table();
 	}
 	return 0;
 }
@@ -2128,7 +2135,7 @@ static int netlink_dump(struct sock *sk)
 	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
 	struct module *module;
-	int len, err = -ENOBUFS;
+	int err = -ENOBUFS;
 	int alloc_min_size;
 	int alloc_size;
 
@@ -2175,9 +2182,11 @@ static int netlink_dump(struct sock *sk)
 	skb_reserve(skb, skb_tailroom(skb) - alloc_size);
 	netlink_skb_set_owner_r(skb, sk);
 
-	len = cb->dump(skb, cb);
+	if (nlk->dump_done_errno > 0)
+		nlk->dump_done_errno = cb->dump(skb, cb);
 
-	if (len > 0) {
+	if (nlk->dump_done_errno > 0 ||
+	    skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
 		mutex_unlock(nlk->cb_mutex);
 
 		if (sk_filter(sk, skb))
@@ -2187,13 +2196,15 @@ static int netlink_dump(struct sock *sk)
 		return 0;
 	}
 
-	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
-	if (!nlh)
+	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
+			       sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+	if (WARN_ON(!nlh))
 		goto errout_skb;
 
 	nl_dump_check_consistent(cb, nlh);
 
-	memcpy(nlmsg_data(nlh), &len, sizeof(len));
+	memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
+	       sizeof(nlk->dump_done_errno));
 
 	if (sk_filter(sk, skb))
 		kfree_skb(skb);
@@ -2258,14 +2269,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	cb->min_dump_alloc = control->min_dump_alloc;
 	cb->skb = skb;
 
+	if (cb->start) {
+		ret = cb->start(cb);
+		if (ret)
+			goto error_unlock;
+	}
+
 	nlk->cb_running = true;
+	nlk->dump_done_errno = INT_MAX;
 
 	mutex_unlock(nlk->cb_mutex);
 
-	if (cb->start)
-		cb->start(cb);
-
 	ret = netlink_dump(sk);
+
 	sock_put(sk);
 
 	if (ret)
@@ -2295,27 +2311,26 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 	size_t tlvlen = 0;
 	struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
 	unsigned int flags = 0;
+	bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK;
 
 	/* Error messages get the original request appened, unless the user
 	 * requests to cap the error message, and get extra error data if
 	 * requested.
 	 */
+	if (nlk_has_extack && extack && extack->_msg)
+		tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+
 	if (err) {
 		if (!(nlk->flags & NETLINK_F_CAP_ACK))
 			payload += nlmsg_len(nlh);
 		else
 			flags |= NLM_F_CAPPED;
-		if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
-			if (extack->_msg)
-				tlvlen += nla_total_size(strlen(extack->_msg) + 1);
-			if (extack->bad_attr)
-				tlvlen += nla_total_size(sizeof(u32));
-		}
+		if (nlk_has_extack && extack && extack->bad_attr)
+			tlvlen += nla_total_size(sizeof(u32));
 	} else {
 		flags |= NLM_F_CAPPED;
 
-		if (nlk->flags & NETLINK_F_EXT_ACK &&
-		    extack && extack->cookie_len)
+		if (nlk_has_extack && extack && extack->cookie_len)
 			tlvlen += nla_total_size(extack->cookie_len);
 	}
 
@@ -2324,16 +2339,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 
 	skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
 	if (!skb) {
-		struct sock *sk;
-
-		sk = netlink_lookup(sock_net(in_skb->sk),
-				    in_skb->sk->sk_protocol,
-				    NETLINK_CB(in_skb).portid);
-		if (sk) {
-			sk->sk_err = ENOBUFS;
-			sk->sk_error_report(sk);
-			sock_put(sk);
-		}
+		NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+		NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
 		return;
 	}
 
@@ -2343,11 +2350,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 	errmsg->error = err;
 	memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
 
-	if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+	if (nlk_has_extack && extack) {
+		if (extack->_msg) {
+			WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
+					       extack->_msg));
+		}
 		if (err) {
-			if (extack->_msg)
-				WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
-						       extack->_msg));
 			if (extack->bad_attr &&
 			    !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
 				     (u8 *)extack->bad_attr >= in_skb->data +
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 3490f2430532..962de7b3c023 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _AF_NETLINK_H
 #define _AF_NETLINK_H
 
@@ -33,6 +34,7 @@ struct netlink_sock {
 	wait_queue_head_t	wait;
 	bool			bound;
 	bool			cb_running;
+	int			dump_done_errno;
 	struct netlink_callback	cb;
 	struct mutex		*cb_mutex;
 	struct mutex		cb_def_mutex;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 10f8b4cff40a..d444daf1ac04 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * NETLINK      Generic Netlink Family
  *
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ebf16f7f9089..2dec3583c97d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *);
 /*
  *	Handler for deferred kills.
  */
-static void nr_destroy_timer(unsigned long data)
+static void nr_destroy_timer(struct timer_list *t)
 {
-	struct sock *sk=(struct sock *)data;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 	bh_lock_sock(sk);
 	sock_hold(sk);
 	nr_destroy_socket(sk);
@@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk)
 
 	if (sk_has_allocations(sk)) {
 		/* Defer: outstanding buffers */
-		sk->sk_timer.function = nr_destroy_timer;
+		sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer;
 		sk->sk_timer.expires  = jiffies + 2 * HZ;
 		add_timer(&sk->sk_timer);
 	} else
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 80dbd0beb516..fbfdae452ff9 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
 
 	case NR_DISCREQ:
 		nr_write_internal(sk, NR_DISCACK);
-
+		/* fall through */
 	case NR_DISCACK:
 		nr_disconnect(sk, 0);
 		break;
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 94d4e922af53..989ae647825e 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -18,7 +18,7 @@
 static void nr_loopback_timer(unsigned long);
 
 static struct sk_buff_head loopback_queue;
-static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0);
+static DEFINE_TIMER(loopback_timer, nr_loopback_timer);
 
 void __init nr_loopback_init(void)
 {
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 0c59354e280e..75e6ba970fde 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
 
 static void nr_remove_neigh(struct nr_neigh *);
 
+/*      re-sort the routes in quality order.    */
+static void re_sort_routes(struct nr_node *nr_node, int x, int y)
+{
+	if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
+		if (nr_node->which == x)
+			nr_node->which = y;
+		else if (nr_node->which == y)
+			nr_node->which = x;
+
+		swap(nr_node->routes[x], nr_node->routes[y]);
+	}
+}
+
 /*
  *	Add a new route to a node, and in the process add the node and the
  *	neighbour if it is new.
@@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 {
 	struct nr_node  *nr_node;
 	struct nr_neigh *nr_neigh;
-	struct nr_route nr_route;
 	int i, found;
 	struct net_device *odev;
 
@@ -251,49 +263,11 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 	/* Now re-sort the routes in quality order */
 	switch (nr_node->count) {
 	case 3:
-		if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
-			switch (nr_node->which) {
-			case 0:
-				nr_node->which = 1;
-				break;
-			case 1:
-				nr_node->which = 0;
-				break;
-			}
-			nr_route           = nr_node->routes[0];
-			nr_node->routes[0] = nr_node->routes[1];
-			nr_node->routes[1] = nr_route;
-		}
-		if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
-			switch (nr_node->which) {
-			case 1:  nr_node->which = 2;
-				break;
-
-			case 2:  nr_node->which = 1;
-				break;
-
-			default:
-				break;
-			}
-			nr_route           = nr_node->routes[1];
-			nr_node->routes[1] = nr_node->routes[2];
-			nr_node->routes[2] = nr_route;
-		}
+		re_sort_routes(nr_node, 0, 1);
+		re_sort_routes(nr_node, 1, 2);
+		/* fall through */
 	case 2:
-		if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
-			switch (nr_node->which) {
-			case 0:  nr_node->which = 1;
-				break;
-
-			case 1:  nr_node->which = 0;
-				break;
-
-			default: break;
-			}
-			nr_route           = nr_node->routes[0];
-			nr_node->routes[0] = nr_node->routes[1];
-			nr_node->routes[1] = nr_route;
-			}
+		re_sort_routes(nr_node, 0, 1);
 	case 1:
 		break;
 	}
@@ -384,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
 				switch (i) {
 				case 0:
 					nr_node->routes[0] = nr_node->routes[1];
+					/* fall through */
 				case 1:
 					nr_node->routes[1] = nr_node->routes[2];
 				case 2:
@@ -553,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev)
 						switch (i) {
 						case 0:
 							t->routes[0] = t->routes[1];
+							/* fall through */
 						case 1:
 							t->routes[1] = t->routes[2];
 						case 2:
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806a9a2..43569aea0f5e 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -29,24 +29,23 @@
 #include <linux/interrupt.h>
 #include <net/netrom.h>
 
-static void nr_heartbeat_expiry(unsigned long);
-static void nr_t1timer_expiry(unsigned long);
-static void nr_t2timer_expiry(unsigned long);
-static void nr_t4timer_expiry(unsigned long);
-static void nr_idletimer_expiry(unsigned long);
+static void nr_heartbeat_expiry(struct timer_list *);
+static void nr_t1timer_expiry(struct timer_list *);
+static void nr_t2timer_expiry(struct timer_list *);
+static void nr_t4timer_expiry(struct timer_list *);
+static void nr_idletimer_expiry(struct timer_list *);
 
 void nr_init_timers(struct sock *sk)
 {
 	struct nr_sock *nr = nr_sk(sk);
 
-	setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
-	setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
-	setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
-	setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
+	timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
+	timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
+	timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
+	timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
 
 	/* initialized by sock_init_data */
-	sk->sk_timer.data     = (unsigned long)sk;
-	sk->sk_timer.function = &nr_heartbeat_expiry;
+	sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry;
 }
 
 void nr_start_t1timer(struct sock *sk)
@@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk)
 	return timer_pending(&nr_sk(sk)->t1timer);
 }
 
-static void nr_heartbeat_expiry(unsigned long param)
+static void nr_heartbeat_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 	struct nr_sock *nr = nr_sk(sk);
 
 	bh_lock_sock(sk);
@@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param)
 	bh_unlock_sock(sk);
 }
 
-static void nr_t2timer_expiry(unsigned long param)
+static void nr_t2timer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
-	struct nr_sock *nr = nr_sk(sk);
+	struct nr_sock *nr = from_timer(nr, t, t2timer);
+	struct sock *sk = &nr->sock;
 
 	bh_lock_sock(sk);
 	if (nr->condition & NR_COND_ACK_PENDING) {
@@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param)
 	bh_unlock_sock(sk);
 }
 
-static void nr_t4timer_expiry(unsigned long param)
+static void nr_t4timer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
+	struct nr_sock *nr = from_timer(nr, t, t4timer);
+	struct sock *sk = &nr->sock;
 
 	bh_lock_sock(sk);
 	nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
 	bh_unlock_sock(sk);
 }
 
-static void nr_idletimer_expiry(unsigned long param)
+static void nr_idletimer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
-	struct nr_sock *nr = nr_sk(sk);
+	struct nr_sock *nr = from_timer(nr, t, idletimer);
+	struct sock *sk = &nr->sock;
 
 	bh_lock_sock(sk);
 
@@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param)
 	bh_unlock_sock(sk);
 }
 
-static void nr_t1timer_expiry(unsigned long param)
+static void nr_t1timer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
-	struct nr_sock *nr = nr_sk(sk);
+	struct nr_sock *nr = from_timer(nr, t, t1timer);
+	struct sock *sk = &nr->sock;
 
 	bh_lock_sock(sk);
 	switch (nr->state) {
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index 2555ff8e7219..2ffc69b473fc 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux NFC subsystem.
 #
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df888c3..947a470f929d 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1015,9 +1015,9 @@ exit:
 	device_unlock(&dev->dev);
 }
 
-static void nfc_check_pres_timeout(unsigned long data)
+static void nfc_check_pres_timeout(struct timer_list *t)
 {
-	struct nfc_dev *dev = (struct nfc_dev *)data;
+	struct nfc_dev *dev = from_timer(dev, t, check_pres_timer);
 
 	schedule_work(&dev->check_pres_work);
 }
@@ -1094,10 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 	dev->targets_generation = 1;
 
 	if (ops->check_presence) {
-		init_timer(&dev->check_pres_timer);
-		dev->check_pres_timer.data = (unsigned long)dev;
-		dev->check_pres_timer.function = nfc_check_pres_timeout;
-
+		timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0);
 		INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
 	}
 
@@ -1106,7 +1103,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 err_free_dev:
 	kfree(dev);
 
-	return ERR_PTR(rc);
+	return NULL;
 }
 EXPORT_SYMBOL(nfc_allocate_device);
 
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index de6dd37d04c7..ec0a8998e52d 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev,
 		return;
 	}
 
+	digital_abort_cmd(ddev);
 	ddev->curr_protocol = 0;
 }
 
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef0acc5..ac8030c4bcf8 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -428,9 +428,9 @@ exit_noskb:
 		nfc_hci_driver_failure(hdev, r);
 }
 
-static void nfc_hci_cmd_timeout(unsigned long data)
+static void nfc_hci_cmd_timeout(struct timer_list *t)
 {
-	struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data;
+	struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer);
 
 	schedule_work(&hdev->msg_tx_work);
 }
@@ -1004,9 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
 
 	INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
 
-	init_timer(&hdev->cmd_timer);
-	hdev->cmd_timer.data = (unsigned long)hdev;
-	hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+	timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0);
 
 	skb_queue_head_init(&hdev->rx_hcp_frags);
 
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a009ce6..fe988936ad92 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
 	}
 }
 
-static void llc_shdlc_connect_timeout(unsigned long data)
+static void llc_shdlc_connect_timeout(struct timer_list *t)
 {
-	struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+	struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer);
 
 	pr_debug("\n");
 
 	schedule_work(&shdlc->sm_work);
 }
 
-static void llc_shdlc_t1_timeout(unsigned long data)
+static void llc_shdlc_t1_timeout(struct timer_list *t)
 {
-	struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+	struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer);
 
 	pr_debug("SoftIRQ: need to send ack\n");
 
 	schedule_work(&shdlc->sm_work);
 }
 
-static void llc_shdlc_t2_timeout(unsigned long data)
+static void llc_shdlc_t2_timeout(struct timer_list *t)
 {
-	struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+	struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer);
 
 	pr_debug("SoftIRQ: need to retransmit\n");
 
@@ -763,17 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
 	mutex_init(&shdlc->state_mutex);
 	shdlc->state = SHDLC_DISCONNECTED;
 
-	init_timer(&shdlc->connect_timer);
-	shdlc->connect_timer.data = (unsigned long)shdlc;
-	shdlc->connect_timer.function = llc_shdlc_connect_timeout;
-
-	init_timer(&shdlc->t1_timer);
-	shdlc->t1_timer.data = (unsigned long)shdlc;
-	shdlc->t1_timer.function = llc_shdlc_t1_timeout;
-
-	init_timer(&shdlc->t2_timer);
-	shdlc->t2_timer.data = (unsigned long)shdlc;
-	shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+	timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0);
+	timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0);
+	timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0);
 
 	shdlc->w = SHDLC_MAX_WINDOW;
 	shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5cf3cce..ef4026a23e80 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work)
 	nfc_dep_link_down(local->dev);
 }
 
-static void nfc_llcp_symm_timer(unsigned long data)
+static void nfc_llcp_symm_timer(struct timer_list *t)
 {
-	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+	struct nfc_llcp_local *local = from_timer(local, t, link_timer);
 
 	pr_err("SYMM timeout\n");
 
@@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
 		nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
 }
 
-static void nfc_llcp_sdreq_timer(unsigned long data)
+static void nfc_llcp_sdreq_timer(struct timer_list *t)
 {
-	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+	struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer);
 
 	schedule_work(&local->sdreq_timeout_work);
 }
@@ -1573,9 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 	INIT_LIST_HEAD(&local->list);
 	kref_init(&local->ref);
 	mutex_init(&local->sdp_lock);
-	init_timer(&local->link_timer);
-	local->link_timer.data = (unsigned long) local;
-	local->link_timer.function = nfc_llcp_symm_timer;
+	timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0);
 
 	skb_queue_head_init(&local->tx_queue);
 	INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 
 	mutex_init(&local->sdreq_lock);
 	INIT_HLIST_HEAD(&local->pending_sdreqs);
-	init_timer(&local->sdreq_timer);
-	local->sdreq_timer.data = (unsigned long) local;
-	local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+	timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
 	INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
 
 	list_add(&local->list, &llcp_devices);
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 0ca31d9bf741..c3362c499281 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux NFC NCI layer.
 #
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b251fb936a27..c0b83dc9d993 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 	if (!hdr)
 		return -EMSGSIZE;
 
-	genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+	genl_dump_check_consistent(cb, hdr);
 
 	if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) ||
 	    nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) ||
@@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 		return -EMSGSIZE;
 
 	if (cb)
-		genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+		genl_dump_check_consistent(cb, hdr);
 
 	if (nfc_genl_setup_device_added(dev, msg))
 		goto nla_put_failure;
@@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
 	return rc;
 }
 
+static int nfc_genl_deactivate_target(struct sk_buff *skb,
+				      struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	u32 device_idx, target_idx;
+	int rc;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+		return -EINVAL;
+
+	device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+	dev = nfc_get_device(device_idx);
+	if (!dev)
+		return -ENODEV;
+
+	target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
+
+	rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
+
+	nfc_put_device(dev);
+	return rc;
+}
+
 static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nfc_dev *dev;
@@ -1332,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
 			goto nla_put_failure;
 
 		if (cb)
-			genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+			genl_dump_check_consistent(cb, hdr);
 
 		if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
 		    nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
@@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_vendor_cmd,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_DEACTIVATE_TARGET,
+		.doit = nfc_genl_deactivate_target,
+		.policy = nfc_genl_policy,
+	},
 };
 
 static struct genl_family nfc_genl_family __ro_after_init = {
diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig
new file mode 100644
index 000000000000..bafc3dd60c2c
--- /dev/null
+++ b/net/nsh/Kconfig
@@ -0,0 +1,9 @@
+menuconfig NET_NSH
+	tristate "Network Service Header (NSH) protocol"
+	default n
+	---help---
+	  Network Service Header is an implementation of Service Function
+	  Chaining (RFC 7665). The current implementation in Linux supports
+	  only MD type 1 and only with the openvswitch module.
+
+	  If unsure, say N.
diff --git a/net/nsh/Makefile b/net/nsh/Makefile
new file mode 100644
index 000000000000..c93c787385ca
--- /dev/null
+++ b/net/nsh/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NET_NSH) += nsh.o
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
new file mode 100644
index 000000000000..d7da99a0b0b8
--- /dev/null
+++ b/net/nsh/nsh.c
@@ -0,0 +1,151 @@
+/*
+ * Network Service Header
+ *
+ * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/nsh.h>
+#include <net/tun_proto.h>
+
+int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh)
+{
+	struct nshhdr *nh;
+	size_t length = nsh_hdr_len(pushed_nh);
+	u8 next_proto;
+
+	if (skb->mac_len) {
+		next_proto = TUN_P_ETHERNET;
+	} else {
+		next_proto = tun_p_from_eth_p(skb->protocol);
+		if (!next_proto)
+			return -EAFNOSUPPORT;
+	}
+
+	/* Add the NSH header */
+	if (skb_cow_head(skb, length) < 0)
+		return -ENOMEM;
+
+	skb_push(skb, length);
+	nh = (struct nshhdr *)(skb->data);
+	memcpy(nh, pushed_nh, length);
+	nh->np = next_proto;
+	skb_postpush_rcsum(skb, nh, length);
+
+	skb->protocol = htons(ETH_P_NSH);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_mac_len(skb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_push);
+
+int nsh_pop(struct sk_buff *skb)
+{
+	struct nshhdr *nh;
+	size_t length;
+	__be16 inner_proto;
+
+	if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
+		return -ENOMEM;
+	nh = (struct nshhdr *)(skb->data);
+	length = nsh_hdr_len(nh);
+	inner_proto = tun_p_to_eth_p(nh->np);
+	if (!pskb_may_pull(skb, length))
+		return -ENOMEM;
+
+	if (!inner_proto)
+		return -EAFNOSUPPORT;
+
+	skb_pull_rcsum(skb, length);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_mac_len(skb);
+	skb->protocol = inner_proto;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nsh_pop);
+
+static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	unsigned int nsh_len, mac_len;
+	__be16 proto;
+	int nhoff;
+
+	skb_reset_network_header(skb);
+
+	nhoff = skb->network_header - skb->mac_header;
+	mac_len = skb->mac_len;
+
+	if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
+		goto out;
+	nsh_len = nsh_hdr_len(nsh_hdr(skb));
+	if (unlikely(!pskb_may_pull(skb, nsh_len)))
+		goto out;
+
+	proto = tun_p_to_eth_p(nsh_hdr(skb)->np);
+	if (!proto)
+		goto out;
+
+	__skb_pull(skb, nsh_len);
+
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+	skb->protocol = proto;
+
+	features &= NETIF_F_SG;
+	segs = skb_mac_gso_segment(skb, features);
+	if (IS_ERR_OR_NULL(segs)) {
+		skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
+				     skb->network_header - nhoff,
+				     mac_len);
+		goto out;
+	}
+
+	for (skb = segs; skb; skb = skb->next) {
+		skb->protocol = htons(ETH_P_NSH);
+		__skb_push(skb, nsh_len);
+		skb_set_mac_header(skb, -nhoff);
+		skb->network_header = skb->mac_header + mac_len;
+		skb->mac_len = mac_len;
+	}
+
+out:
+	return segs;
+}
+
+static struct packet_offload nsh_packet_offload __read_mostly = {
+	.type = htons(ETH_P_NSH),
+	.priority = 15,
+	.callbacks = {
+		.gso_segment = nsh_gso_segment,
+	},
+};
+
+static int __init nsh_init_module(void)
+{
+	dev_add_offload(&nsh_packet_offload);
+	return 0;
+}
+
+static void __exit nsh_cleanup_module(void)
+{
+	dev_remove_offload(&nsh_packet_offload);
+}
+
+module_init(nsh_init_module);
+module_exit(nsh_cleanup_module);
+
+MODULE_AUTHOR("Jiri Benc <jbenc@redhat.com>");
+MODULE_DESCRIPTION("NSH protocol");
+MODULE_LICENSE("GPL v2");
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ce947292ae77..2650205cdaf9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -14,6 +14,7 @@ config OPENVSWITCH
 	select MPLS
 	select NET_MPLS_GSO
 	select DST_CACHE
+	select NET_NSH
 	---help---
 	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
 	  environments.  In addition to supporting a variety of features
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 60f809085b92..41109c326f3a 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for Open vSwitch.
 #
@@ -11,6 +12,7 @@ openvswitch-y := \
 	flow.o \
 	flow_netlink.o \
 	flow_table.o \
+	meter.o \
 	vport.o \
 	vport-internal_dev.o \
 	vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e4610676299b..30a5df27116e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -43,6 +43,7 @@
 #include "flow.h"
 #include "conntrack.h"
 #include "vport.h"
+#include "flow_netlink.h"
 
 struct deferred_action {
 	struct sk_buff *skb;
@@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
+static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct nshhdr *nh)
+{
+	int err;
+
+	err = nsh_push(skb, nh);
+	if (err)
+		return err;
+
+	/* safe right before invalidate_flow_key */
+	key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
+
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int err;
+
+	err = nsh_pop(skb);
+	if (err)
+		return err;
+
+	/* safe right before invalidate_flow_key */
+	if (skb->protocol == htons(ETH_P_TEB))
+		key->mac_proto = MAC_PROTO_ETHERNET;
+	else
+		key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
+
 static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
 				  __be32 addr, __be32 new_addr)
 {
@@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	return 0;
 }
 
+static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct nlattr *a)
+{
+	struct nshhdr *nh;
+	size_t length;
+	int err;
+	u8 flags;
+	u8 ttl;
+	int i;
+
+	struct ovs_key_nsh key;
+	struct ovs_key_nsh mask;
+
+	err = nsh_key_from_nlattr(a, &key, &mask);
+	if (err)
+		return err;
+
+	/* Make sure the NSH base header is there */
+	if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
+		return -ENOMEM;
+
+	nh = nsh_hdr(skb);
+	length = nsh_hdr_len(nh);
+
+	/* Make sure the whole NSH header is there */
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				       length);
+	if (unlikely(err))
+		return err;
+
+	nh = nsh_hdr(skb);
+	skb_postpull_rcsum(skb, nh, length);
+	flags = nsh_get_flags(nh);
+	flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
+	flow_key->nsh.base.flags = flags;
+	ttl = nsh_get_ttl(nh);
+	ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
+	flow_key->nsh.base.ttl = ttl;
+	nsh_set_flags_and_ttl(nh, flags, ttl);
+	nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
+				  mask.base.path_hdr);
+	flow_key->nsh.base.path_hdr = nh->path_hdr;
+	switch (nh->mdtype) {
+	case NSH_M_TYPE1:
+		for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
+			nh->md1.context[i] =
+			    OVS_MASKED(nh->md1.context[i], key.context[i],
+				       mask.context[i]);
+		}
+		memcpy(flow_key->nsh.context, nh->md1.context,
+		       sizeof(nh->md1.context));
+		break;
+	case NSH_M_TYPE2:
+		memset(flow_key->nsh.context, 0,
+		       sizeof(flow_key->nsh.context));
+		break;
+	default:
+		return -EINVAL;
+	}
+	skb_postpush_rcsum(skb, nh, length);
+	return 0;
+}
+
 /* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
 			__be16 new_port, __sum16 *check)
@@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb,
 				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
+	case OVS_KEY_ATTR_NSH:
+		err = set_nsh(skb, flow_key, a);
+		break;
+
 	case OVS_KEY_ATTR_IPV4:
 		err = set_ipv4(skb, flow_key, nla_data(a),
 			       get_mask(a, struct ovs_key_ipv4 *));
@@ -1203,6 +1303,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				return err == -EINPROGRESS ? 0 : err;
 			break;
 
+		case OVS_ACTION_ATTR_CT_CLEAR:
+			err = ovs_ct_clear(skb, key);
+			break;
+
 		case OVS_ACTION_ATTR_PUSH_ETH:
 			err = push_eth(skb, key, nla_data(a));
 			break;
@@ -1210,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		case OVS_ACTION_ATTR_POP_ETH:
 			err = pop_eth(skb, key);
 			break;
+
+		case OVS_ACTION_ATTR_PUSH_NSH: {
+			u8 buffer[NSH_HDR_MAX_LEN];
+			struct nshhdr *nh = (struct nshhdr *)buffer;
+
+			err = nsh_hdr_from_nlattr(nla_data(a), nh,
+						  NSH_HDR_MAX_LEN);
+			if (unlikely(err))
+				break;
+			err = push_nsh(skb, key, nh);
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_NSH:
+			err = pop_nsh(skb, key);
+			break;
+
+		case OVS_ACTION_ATTR_METER:
+			if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
+				consume_skb(skb);
+				return 0;
+			}
 		}
 
 		if (unlikely(err)) {
@@ -1337,6 +1463,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		goto out;
 	}
 
+	OVS_CB(skb)->acts_origlen = acts->orig_len;
 	err = do_execute_actions(dp, skb, key,
 				 acts->actions, acts->actions_len);
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 03859e386b47..b27c5c6d9cab 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -579,8 +579,8 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
@@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 			}
 		}
 		/* Non-ICMP, fall thru to initialize if needed. */
+		/* fall through */
 	case IP_CT_NEW:
 		/* Seen it before?  This can happen for loopback, retrans,
 		 * or local packets.
@@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 	return err;
 }
 
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	if (skb_nfct(skb)) {
+		nf_conntrack_put(skb_nfct(skb));
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		ovs_ct_fill_key(skb, key);
+	}
+
+	return 0;
+}
+
 static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 			     const struct sw_flow_key *key, bool log)
 {
@@ -1180,15 +1192,13 @@ static int parse_nat(const struct nlattr *attr,
 		int type = nla_type(a);
 
 		if (type > OVS_NAT_ATTR_MAX) {
-			OVS_NLERR(log,
-				  "Unknown NAT attribute (type=%d, max=%d).\n",
+			OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)",
 				  type, OVS_NAT_ATTR_MAX);
 			return -EINVAL;
 		}
 
 		if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
-			OVS_NLERR(log,
-				  "NAT attribute type %d has unexpected length (%d != %d).\n",
+			OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)",
 				  type, nla_len(a),
 				  ovs_nat_attr_lens[type][ip_vers]);
 			return -EINVAL;
@@ -1198,9 +1208,7 @@ static int parse_nat(const struct nlattr *attr,
 		case OVS_NAT_ATTR_SRC:
 		case OVS_NAT_ATTR_DST:
 			if (info->nat) {
-				OVS_NLERR(log,
-					  "Only one type of NAT may be specified.\n"
-					  );
+				OVS_NLERR(log, "Only one type of NAT may be specified");
 				return -ERANGE;
 			}
 			info->nat |= OVS_CT_NAT;
@@ -1245,13 +1253,13 @@ static int parse_nat(const struct nlattr *attr,
 			break;
 
 		default:
-			OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
+			OVS_NLERR(log, "Unknown nat attribute (%d)", type);
 			return -EINVAL;
 		}
 	}
 
 	if (rem > 0) {
-		OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
+		OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem);
 		return -EINVAL;
 	}
 	if (!info->nat) {
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index bc7efd1867ab..399dfdd2c4f9 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
 
 int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
 		   const struct ovs_conntrack_info *);
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
 
 void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
 int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 	return -ENOTSUPP;
 }
 
+static inline int ovs_ct_clear(struct sk_buff *skb,
+			       struct sw_flow_key *key)
+{
+	return -ENOTSUPP;
+}
+
 static inline void ovs_ct_fill_key(const struct sk_buff *skb,
 				   struct sw_flow_key *key)
 {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 45fe8c8a884d..0dab33fb9844 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
 #include "flow.h"
 #include "flow_table.h"
 #include "flow_netlink.h"
+#include "meter.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
@@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 				  const struct dp_upcall_info *,
 				  uint32_t cutlen);
 
-/* Must be called with rcu_read_lock. */
-static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
-{
-	struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
-
-	if (dev) {
-		struct vport *vport = ovs_internal_dev_get_vport(dev);
-		if (vport)
-			return vport->dp;
-	}
-
-	return NULL;
-}
-
-/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
- * returned dp pointer valid.
- */
-static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
-{
-	struct datapath *dp;
-
-	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
-	rcu_read_lock();
-	dp = get_dp_rcu(net, dp_ifindex);
-	rcu_read_unlock();
-
-	return dp;
-}
-
 /* Must be called with rcu_read_lock or ovs_mutex. */
 const char *ovs_dp_name(const struct datapath *dp)
 {
@@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
 	ovs_flow_tbl_destroy(&dp->table);
 	free_percpu(dp->stats_percpu);
 	kfree(dp->ports);
+	ovs_meters_exit(dp);
 	kfree(dp);
 }
 
@@ -335,8 +308,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 			     const struct dp_upcall_info *upcall_info,
 				 uint32_t cutlen)
 {
-	unsigned short gso_type = skb_shinfo(skb)->gso_type;
-	struct sw_flow_key later_key;
 	struct sk_buff *segs, *nskb;
 	int err;
 
@@ -347,21 +318,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 	if (segs == NULL)
 		return -EINVAL;
 
-	if (gso_type & SKB_GSO_UDP) {
-		/* The initial flow key extracted by ovs_flow_key_extract()
-		 * in this case is for a first fragment, so we need to
-		 * properly mark later fragments.
-		 */
-		later_key = *key;
-		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-	}
-
 	/* Queue all of the segments. */
 	skb = segs;
 	do {
-		if (gso_type & SKB_GSO_UDP && skb != segs)
-			key = &later_key;
-
 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 		if (err)
 			break;
@@ -381,7 +340,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 }
 
 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
-			      unsigned int hdrlen)
+			      unsigned int hdrlen, int actions_attrlen)
 {
 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
 		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
@@ -398,7 +357,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 
 	/* OVS_PACKET_ATTR_ACTIONS */
 	if (upcall_info->actions_len)
-		size += nla_total_size(upcall_info->actions_len);
+		size += nla_total_size(actions_attrlen);
 
 	/* OVS_PACKET_ATTR_MRU */
 	if (upcall_info->mru)
@@ -465,7 +424,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	else
 		hlen = skb->len;
 
-	len = upcall_msg_size(upcall_info, hlen - cutlen);
+	len = upcall_msg_size(upcall_info, hlen - cutlen,
+			      OVS_CB(skb)->acts_origlen);
 	user_skb = genlmsg_new(len, GFP_ATOMIC);
 	if (!user_skb) {
 		err = -ENOMEM;
@@ -1125,7 +1085,8 @@ static int ovs_nla_init_match_and_action(struct net *net,
 		if (!a[OVS_FLOW_ATTR_KEY]) {
 			OVS_NLERR(log,
 				  "Flow key attribute not present in set flow.");
-			return -EINVAL;
+			error = -EINVAL;
+			goto error;
 		}
 
 		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
@@ -1613,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
 		INIT_HLIST_HEAD(&dp->ports[i]);
 
+	err = ovs_meters_init(dp);
+	if (err)
+		goto err_destroy_ports_array;
+
 	/* Set up our datapath device. */
 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
 	parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1641,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 				ovs_dp_reset_user_features(skb, info);
 		}
 
-		goto err_destroy_ports_array;
+		goto err_destroy_meters;
 	}
 
 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1656,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	ovs_notify(&dp_datapath_genl_family, reply, info);
 	return 0;
 
-err_destroy_ports_array:
+err_destroy_meters:
 	ovs_unlock();
+	ovs_meters_exit(dp);
+err_destroy_ports_array:
 	kfree(dp->ports);
 err_destroy_percpu:
 	free_percpu(dp->stats_percpu);
@@ -1860,7 +1827,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
 
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
-				   u32 portid, u32 seq, u32 flags, u8 cmd)
+				   struct net *net, u32 portid, u32 seq,
+				   u32 flags, u8 cmd)
 {
 	struct ovs_header *ovs_header;
 	struct ovs_vport_stats vport_stats;
@@ -1876,9 +1844,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
 	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
-			   ovs_vport_name(vport)))
+			   ovs_vport_name(vport)) ||
+	    nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
 		goto nla_put_failure;
 
+	if (!net_eq(net, dev_net(vport->dev))) {
+		int id = peernet2id_alloc(net, dev_net(vport->dev));
+
+		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+			goto nla_put_failure;
+	}
+
 	ovs_vport_get_stats(vport, &vport_stats);
 	if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
 			  sizeof(struct ovs_vport_stats), &vport_stats,
@@ -1908,8 +1884,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void)
 }
 
 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
-					 u32 seq, u8 cmd)
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+					 u32 portid, u32 seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int retval;
@@ -1918,7 +1894,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
+	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
 	BUG_ON(retval < 0);
 
 	return skb;
@@ -1932,6 +1908,8 @@ static struct vport *lookup_vport(struct net *net,
 	struct datapath *dp;
 	struct vport *vport;
 
+	if (a[OVS_VPORT_ATTR_IFINDEX])
+		return ERR_PTR(-EOPNOTSUPP);
 	if (a[OVS_VPORT_ATTR_NAME]) {
 		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
 		if (!vport)
@@ -1956,6 +1934,7 @@ static struct vport *lookup_vport(struct net *net,
 		return vport;
 	} else
 		return ERR_PTR(-EINVAL);
+
 }
 
 /* Called with ovs_mutex */
@@ -1995,6 +1974,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
 		return -EINVAL;
+	if (a[OVS_VPORT_ATTR_IFINDEX])
+		return -EOPNOTSUPP;
 
 	port_no = a[OVS_VPORT_ATTR_PORT_NO]
 		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
@@ -2044,8 +2025,9 @@ restart:
 		goto exit_unlock_free;
 	}
 
-	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
-				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_NEW);
 
 	if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
 		update_headroom(dp);
@@ -2102,8 +2084,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 			goto exit_unlock_free;
 	}
 
-	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
-				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_NEW);
 	BUG_ON(err < 0);
 
 	ovs_unlock();
@@ -2140,8 +2123,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock_free;
 	}
 
-	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
-				      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_DEL);
 	BUG_ON(err < 0);
 
 	/* the vport deletion may trigger dp headroom update */
@@ -2181,8 +2165,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
 		goto exit_unlock_free;
-	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
-				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+				      info->snd_portid, info->snd_seq, 0,
+				      OVS_VPORT_CMD_NEW);
 	BUG_ON(err < 0);
 	rcu_read_unlock();
 
@@ -2214,6 +2199,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
 			if (j >= skip &&
 			    ovs_vport_cmd_fill_info(vport, skb,
+						    sock_net(skb->sk),
 						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    NLM_F_MULTI,
@@ -2240,6 +2226,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
 	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
 	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
 	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+	[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
 };
 
 static const struct genl_ops dp_vport_genl_ops[] = {
@@ -2285,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = {
 	&dp_vport_genl_family,
 	&dp_flow_genl_family,
 	&dp_packet_genl_family,
+	&dp_meter_genl_family,
 };
 
 static void dp_unregister_genl(int n_families)
@@ -2465,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 5d8dcd88815f..523d65526766 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -30,6 +30,8 @@
 #include "conntrack.h"
 #include "flow.h"
 #include "flow_table.h"
+#include "meter.h"
+#include "vport-internal_dev.h"
 
 #define DP_MAX_PORTS           USHRT_MAX
 #define DP_VPORT_HASH_BUCKETS  1024
@@ -91,6 +93,9 @@ struct datapath {
 	u32 user_features;
 
 	u32 max_headroom;
+
+	/* Switch meters. */
+	struct hlist_head *meters;
 };
 
 /**
@@ -99,11 +104,13 @@ struct datapath {
  * when a packet is received by OVS.
  * @mru: The maximum received fragement size; 0 if the packet is not
  * fragmented.
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
  * @cutlen: The number of bytes from the packet end to be removed.
  */
 struct ovs_skb_cb {
 	struct vport		*input_vport;
 	u16			mru;
+	u16			acts_origlen;
 	u32			cutlen;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -188,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n
 	return ovs_lookup_vport(dp, port_no);
 }
 
+/* Must be called with rcu_read_lock. */
+static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
+{
+	struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
+
+	if (dev) {
+		struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+		if (vport)
+			return vport->dp;
+	}
+
+	return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+	struct datapath *dp;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+	rcu_read_lock();
+	dp = get_dp_rcu(net, dp_ifindex);
+	rcu_read_unlock();
+
+	return dp;
+}
+
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
@@ -198,8 +235,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
 		  uint32_t cutlen);
 
 const char *ovs_dp_name(const struct datapath *dp);
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
-					 u8 cmd);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+					 u32 portid, u32 seq, u8 cmd);
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			const struct sw_flow_actions *, struct sw_flow_key *);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 653d073bae45..f3ee2f2825c0 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport)
 	struct datapath *dp;
 
 	dp = vport->dp;
-	notify = ovs_vport_cmd_build_info(vport, 0, 0,
-					  OVS_VPORT_CMD_DEL);
+	notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
+					  0, 0, OVS_VPORT_CMD_DEL);
 	ovs_dp_detach_port(vport);
 	if (IS_ERR(notify)) {
 		genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 3f76cb765e5b..864ddb1e3642 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
 #include <net/ipv6.h>
 #include <net/mpls.h>
 #include <net/ndisc.h>
+#include <net/nsh.h>
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -72,8 +73,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			   const struct sk_buff *skb)
 {
 	struct flow_stats *stats;
-	int node = numa_node_id();
-	int cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	stats = rcu_dereference(flow->stats[cpu]);
@@ -108,7 +108,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 							      __GFP_THISNODE |
 							      __GFP_NOWARN |
 							      __GFP_NOMEMALLOC,
-							      node);
+							      numa_node_id());
 				if (likely(new_stats)) {
 					new_stats->used = jiffies;
 					new_stats->packet_count = 1;
@@ -118,6 +118,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
 					rcu_assign_pointer(flow->stats[cpu],
 							   new_stats);
+					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
 					goto unlock;
 				}
 			}
@@ -145,7 +146,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
@@ -169,7 +170,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
 	int cpu;
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
@@ -490,6 +491,52 @@ invalid:
 	return 0;
 }
 
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct nshhdr *nh;
+	unsigned int nh_ofs = skb_network_offset(skb);
+	u8 version, length;
+	int err;
+
+	err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
+	if (unlikely(err))
+		return err;
+
+	nh = nsh_hdr(skb);
+	version = nsh_get_ver(nh);
+	length = nsh_hdr_len(nh);
+
+	if (version != 0)
+		return -EINVAL;
+
+	err = check_header(skb, nh_ofs + length);
+	if (unlikely(err))
+		return err;
+
+	nh = nsh_hdr(skb);
+	key->nsh.base.flags = nsh_get_flags(nh);
+	key->nsh.base.ttl = nsh_get_ttl(nh);
+	key->nsh.base.mdtype = nh->mdtype;
+	key->nsh.base.np = nh->np;
+	key->nsh.base.path_hdr = nh->path_hdr;
+	switch (key->nsh.base.mdtype) {
+	case NSH_M_TYPE1:
+		if (length != NSH_M_TYPE1_LEN)
+			return -EINVAL;
+		memcpy(key->nsh.context, nh->md1.context,
+		       sizeof(nh->md1));
+		break;
+	case NSH_M_TYPE2:
+		memset(key->nsh.context, 0,
+		       sizeof(nh->md1));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * key_extract - extracts a flow key from an Ethernet frame.
  * @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -584,8 +631,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
 			return 0;
 		}
-		if (nh->frag_off & htons(IP_MF) ||
-			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+		if (nh->frag_off & htons(IP_MF))
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
 		else
 			key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -701,9 +747,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
 			return 0;
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-			key->ip.frag = OVS_FRAG_TYPE_FIRST;
-
 		/* Transport layer. */
 		if (key->ip.proto == NEXTHDR_TCP) {
 			if (tcphdr_ok(skb)) {
@@ -739,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
+	} else if (key->eth.type == htons(ETH_P_NSH)) {
+		error = parse_nsh(skb, key);
+		if (error)
+			return error;
 	}
 	return 0;
 }
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a9bc1c875965..c670dd24b8b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -31,9 +31,11 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <linux/cpumask.h>
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/dst_metadata.h>
+#include <net/nsh.h>
 
 struct sk_buff;
 
@@ -65,6 +67,11 @@ struct vlan_head {
 	(offsetof(struct sw_flow_key, recirc_id) +	\
 	FIELD_SIZEOF(struct sw_flow_key, recirc_id))
 
+struct ovs_key_nsh {
+	struct ovs_nsh_key_base base;
+	__be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
 struct sw_flow_key {
 	u8 tun_opts[IP_TUNNEL_OPTS_MAX];
 	u8 tun_opts_len;
@@ -142,6 +149,7 @@ struct sw_flow_key {
 				} nd;
 			};
 		} ipv6;
+		struct ovs_key_nsh nsh;         /* network service header */
 	};
 	struct {
 		/* Connection tracking fields not packed above. */
@@ -219,6 +227,7 @@ struct sw_flow {
 					 */
 	struct sw_flow_key key;
 	struct sw_flow_id id;
+	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f07d10ac35d8..dc424798ba6f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,8 @@
 #include <net/ndisc.h>
 #include <net/mpls.h>
 #include <net/vxlan.h>
+#include <net/tun_proto.h>
+#include <net/erspan.h>
 
 #include "flow_netlink.h"
 
@@ -75,16 +77,20 @@ static bool actions_may_change_flow(const struct nlattr *actions)
 			break;
 
 		case OVS_ACTION_ATTR_CT:
+		case OVS_ACTION_ATTR_CT_CLEAR:
 		case OVS_ACTION_ATTR_HASH:
 		case OVS_ACTION_ATTR_POP_ETH:
 		case OVS_ACTION_ATTR_POP_MPLS:
+		case OVS_ACTION_ATTR_POP_NSH:
 		case OVS_ACTION_ATTR_POP_VLAN:
 		case OVS_ACTION_ATTR_PUSH_ETH:
 		case OVS_ACTION_ATTR_PUSH_MPLS:
+		case OVS_ACTION_ATTR_PUSH_NSH:
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 		case OVS_ACTION_ATTR_SAMPLE:
 		case OVS_ACTION_ATTR_SET:
 		case OVS_ACTION_ATTR_SET_MASKED:
+		case OVS_ACTION_ATTR_METER:
 		default:
 			return true;
 		}
@@ -173,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match,
 			| (1 << OVS_KEY_ATTR_ICMPV6)
 			| (1 << OVS_KEY_ATTR_ARP)
 			| (1 << OVS_KEY_ATTR_ND)
-			| (1 << OVS_KEY_ATTR_MPLS));
+			| (1 << OVS_KEY_ATTR_MPLS)
+			| (1 << OVS_KEY_ATTR_NSH));
 
 	/* Always allowed mask fields. */
 	mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -282,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match,
 		}
 	}
 
+	if (match->key->eth.type == htons(ETH_P_NSH)) {
+		key_expected |= 1 << OVS_KEY_ATTR_NSH;
+		if (match->mask &&
+		    match->mask->key.eth.type == htons(0xffff)) {
+			mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
+		}
+	}
+
 	if ((key_attrs & key_expected) != key_expected) {
 		/* Key attributes check failed. */
 		OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
@@ -319,7 +334,21 @@ size_t ovs_tun_key_attr_size(void)
 		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
 		 */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
-		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+		+ nla_total_size(4);   /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
+}
+
+static size_t ovs_nsh_key_attr_size(void)
+{
+	/* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
+	 * updating this function.
+	 */
+	return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
+		/* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
+		 * mutually exclusive, so the bigger one can cover
+		 * the small one.
+		 */
+		+ nla_total_size(NSH_CTX_HDRS_MAX_LEN);
 }
 
 size_t ovs_key_attr_size(void)
@@ -327,7 +356,7 @@ size_t ovs_key_attr_size(void)
 	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
 	 * updating this function.
 	 */
-	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
+	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
 
 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
@@ -341,6 +370,8 @@ size_t ovs_key_attr_size(void)
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
 		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
 		+ nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
+		  + ovs_nsh_key_attr_size()
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
@@ -371,6 +402,14 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 						.next = ovs_vxlan_ext_key_lens },
 	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
+	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = sizeof(u32) },
+};
+
+static const struct ovs_len_tbl
+ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
+	[OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
+	[OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
+	[OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
 };
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -405,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 		.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
 	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
 		.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+	[OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
+				     .next = ovs_nsh_key_attr_lens, },
 };
 
 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -593,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
 	return 0;
 }
 
+static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
+				      struct sw_flow_match *match, bool is_mask,
+				      bool log)
+{
+	unsigned long opt_key_offset;
+	struct erspan_metadata opts;
+
+	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+	memset(&opts, 0, sizeof(opts));
+	opts.index = nla_get_be32(attr);
+
+	/* Index has only 20-bit */
+	if (ntohl(opts.index) & ~INDEX_MASK) {
+		OVS_NLERR(log, "ERSPAN index number %x too large.",
+			  ntohl(opts.index));
+		return -EINVAL;
+	}
+
+	SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
+	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+				  is_mask);
+
+	return 0;
+}
+
 static int ip_tun_from_nlattr(const struct nlattr *attr,
 			      struct sw_flow_match *match, bool is_mask,
 			      bool log)
@@ -700,6 +768,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
 			break;
 		case OVS_TUNNEL_KEY_ATTR_PAD:
 			break;
+		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			tun_flags |= TUNNEL_ERSPAN_OPT;
+			opts_type = type;
+			break;
 		default:
 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);
@@ -824,6 +905,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
 		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
 			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
 			return -EMSGSIZE;
+		else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+			 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+				      ((struct erspan_metadata *)tun_opts)->index))
+			return -EMSGSIZE;
 	}
 
 	return 0;
@@ -1179,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 	return 0;
 }
 
+int nsh_hdr_from_nlattr(const struct nlattr *attr,
+			struct nshhdr *nh, size_t size)
+{
+	struct nlattr *a;
+	int rem;
+	u8 flags = 0;
+	u8 ttl = 0;
+	int mdlen = 0;
+
+	/* validate_nsh has check this, so we needn't do duplicate check here
+	 */
+	if (size < NSH_BASE_HDR_LEN)
+		return -ENOBUFS;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base = nla_data(a);
+
+			flags = base->flags;
+			ttl = base->ttl;
+			nh->np = base->np;
+			nh->mdtype = base->mdtype;
+			nh->path_hdr = base->path_hdr;
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1:
+			mdlen = nla_len(a);
+			if (mdlen > size - NSH_BASE_HDR_LEN)
+				return -ENOBUFS;
+			memcpy(&nh->md1, nla_data(a), mdlen);
+			break;
+
+		case OVS_NSH_KEY_ATTR_MD2:
+			mdlen = nla_len(a);
+			if (mdlen > size - NSH_BASE_HDR_LEN)
+				return -ENOBUFS;
+			memcpy(&nh->md2, nla_data(a), mdlen);
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
+	nh->ver_flags_ttl_len = 0;
+	nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
+
+	return 0;
+}
+
+int nsh_key_from_nlattr(const struct nlattr *attr,
+			struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
+{
+	struct nlattr *a;
+	int rem;
+
+	/* validate_nsh has check this, so we needn't do duplicate check here
+	 */
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base = nla_data(a);
+			const struct ovs_nsh_key_base *base_mask = base + 1;
+
+			nsh->base = *base;
+			nsh_mask->base = *base_mask;
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1: {
+			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+			const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
+
+			memcpy(nsh->context, md1->context, sizeof(*md1));
+			memcpy(nsh_mask->context, md1_mask->context,
+			       sizeof(*md1_mask));
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD2:
+			/* Not supported yet */
+			return -ENOTSUPP;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int nsh_key_put_from_nlattr(const struct nlattr *attr,
+				   struct sw_flow_match *match, bool is_mask,
+				   bool is_push_nsh, bool log)
+{
+	struct nlattr *a;
+	int rem;
+	bool has_base = false;
+	bool has_md1 = false;
+	bool has_md2 = false;
+	u8 mdtype = 0;
+	int mdlen = 0;
+
+	if (WARN_ON(is_push_nsh && is_mask))
+		return -EINVAL;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		int i;
+
+		if (type > OVS_NSH_KEY_ATTR_MAX) {
+			OVS_NLERR(log, "nsh attr %d is out of range max %d",
+				  type, OVS_NSH_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (!check_attr_len(nla_len(a),
+				    ovs_nsh_key_attr_lens[type].len)) {
+			OVS_NLERR(
+			    log,
+			    "nsh attr %d has unexpected len %d expected %d",
+			    type,
+			    nla_len(a),
+			    ovs_nsh_key_attr_lens[type].len
+			);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_NSH_KEY_ATTR_BASE: {
+			const struct ovs_nsh_key_base *base = nla_data(a);
+
+			has_base = true;
+			mdtype = base->mdtype;
+			SW_FLOW_KEY_PUT(match, nsh.base.flags,
+					base->flags, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.ttl,
+					base->ttl, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
+					base->mdtype, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.np,
+					base->np, is_mask);
+			SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
+					base->path_hdr, is_mask);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD1: {
+			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
+
+			has_md1 = true;
+			for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
+				SW_FLOW_KEY_PUT(match, nsh.context[i],
+						md1->context[i], is_mask);
+			break;
+		}
+		case OVS_NSH_KEY_ATTR_MD2:
+			if (!is_push_nsh) /* Not supported MD type 2 yet */
+				return -ENOTSUPP;
+
+			has_md2 = true;
+			mdlen = nla_len(a);
+			if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
+				OVS_NLERR(
+				    log,
+				    "Invalid MD length %d for MD type %d",
+				    mdlen,
+				    mdtype
+				);
+				return -EINVAL;
+			}
+			break;
+		default:
+			OVS_NLERR(log, "Unknown nsh attribute %d",
+				  type);
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0) {
+		OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
+		return -EINVAL;
+	}
+
+	if (has_md1 && has_md2) {
+		OVS_NLERR(
+		    1,
+		    "invalid nsh attribute: md1 and md2 are exclusive."
+		);
+		return -EINVAL;
+	}
+
+	if (!is_mask) {
+		if ((has_md1 && mdtype != NSH_M_TYPE1) ||
+		    (has_md2 && mdtype != NSH_M_TYPE2)) {
+			OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
+				  mdtype);
+			return -EINVAL;
+		}
+
+		if (is_push_nsh &&
+		    (!has_base || (!has_md1 && !has_md2))) {
+			OVS_NLERR(
+			    1,
+			    "push_nsh: missing base or metadata attributes"
+			);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 				u64 attrs, const struct nlattr **a,
 				bool is_mask, bool log)
@@ -1255,7 +1555,7 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 		}
 
 		if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
-			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
+			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
 				  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
 			return -EINVAL;
 		}
@@ -1306,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
 	}
 
+	if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+		if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
+					    is_mask, false, log) < 0)
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+	}
+
 	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
 		const struct ovs_key_mpls *mpls_key;
 
@@ -1622,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
 	return 0;
 }
 
+static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
+			     struct sk_buff *skb)
+{
+	struct nlattr *start;
+
+	start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
+		goto nla_put_failure;
+
+	if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
+		if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
+			    sizeof(nsh->context), nsh->context))
+			goto nla_put_failure;
+	}
+
+	/* Don't support MD type 2 yet */
+
+	nla_nest_end(skb, start);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 			     const struct sw_flow_key *output, bool is_mask,
 			     struct sk_buff *skb)
@@ -1750,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 		ipv6_key->ipv6_tclass = output->ip.tos;
 		ipv6_key->ipv6_hlimit = output->ip.ttl;
 		ipv6_key->ipv6_frag = output->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_NSH)) {
+		if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
+			goto nla_put_failure;
 	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
 		   swkey->eth.type == htons(ETH_P_RARP)) {
 		struct ovs_key_arp *arp_key;
@@ -2195,6 +2533,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 			break;
 		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
 			break;
+		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+			break;
 		}
 	};
 
@@ -2242,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
+static bool validate_nsh(const struct nlattr *attr, bool is_mask,
+			 bool is_push_nsh, bool log)
+{
+	struct sw_flow_match match;
+	struct sw_flow_key key;
+	int ret = 0;
+
+	ovs_match_init(&match, &key, true, NULL);
+	ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
+				      is_push_nsh, log);
+	return !ret;
+}
+
 /* Return false if there are any non-masked bits set.
  * Mask follows data immediately, before any netlink padding.
  */
@@ -2384,6 +2737,13 @@ static int validate_set(const struct nlattr *a,
 
 		break;
 
+	case OVS_KEY_ATTR_NSH:
+		if (eth_type != htons(ETH_P_NSH))
+			return -EINVAL;
+		if (!validate_nsh(nla_data(a), masked, false, log))
+			return -EINVAL;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -2479,9 +2839,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
 			[OVS_ACTION_ATTR_CT] = (u32)-1,
+			[OVS_ACTION_ATTR_CT_CLEAR] = 0,
 			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
 			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
 			[OVS_ACTION_ATTR_POP_ETH] = 0,
+			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+			[OVS_ACTION_ATTR_POP_NSH] = 0,
+			[OVS_ACTION_ATTR_METER] = sizeof(u32),
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -2620,6 +2984,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			skip_copy = true;
 			break;
 
+		case OVS_ACTION_ATTR_CT_CLEAR:
+			break;
+
 		case OVS_ACTION_ATTR_PUSH_ETH:
 			/* Disallow pushing an Ethernet header if one
 			 * is already present */
@@ -2636,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			mac_proto = MAC_PROTO_ETHERNET;
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_NSH:
+			if (mac_proto != MAC_PROTO_ETHERNET) {
+				u8 next_proto;
+
+				next_proto = tun_p_from_eth_p(eth_type);
+				if (!next_proto)
+					return -EINVAL;
+			}
+			mac_proto = MAC_PROTO_NONE;
+			if (!validate_nsh(nla_data(a), false, true, true))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_POP_NSH: {
+			__be16 inner_proto;
+
+			if (eth_type != htons(ETH_P_NSH))
+				return -EINVAL;
+			inner_proto = tun_p_to_eth_p(key->nsh.base.np);
+			if (!inner_proto)
+				return -EINVAL;
+			if (key->nsh.base.np == TUN_P_ETHERNET)
+				mac_proto = MAC_PROTO_ETHERNET;
+			else
+				mac_proto = MAC_PROTO_NONE;
+			break;
+		}
+
+		case OVS_ACTION_ATTR_METER:
+			/* Non-existent meters are simply ignored.  */
+			break;
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 929c665ac3aa..6657606b2b47 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr,
 void ovs_nla_free_flow_actions(struct sw_flow_actions *);
 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
 
+int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh,
+			struct ovs_key_nsh *nsh_mask);
+int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh,
+			size_t size);
+
 #endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index ea7a8073fa02..80ea2a71852e 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	RCU_INIT_POINTER(flow->stats[0], stats);
 
+	cpumask_set_cpu(0, &flow->cpu_used_mask);
+
 	return flow;
 err:
 	kmem_cache_free(flow_cache, flow);
@@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow)
 	if (flow->sf_acts)
 		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
 					(struct flow_stats __force *)flow->stats[cpu]);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
new file mode 100644
index 000000000000..3fbfc78991ac
--- /dev/null
+++ b/net/openvswitch/meter.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/openvswitch.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "meter.h"
+
+#define METER_HASH_BUCKETS 1024
+
+static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
+	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
+	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
+	[OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+	[OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
+	[OVS_METER_ATTR_USED] = { .type = NLA_U64 },
+	[OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
+	[OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
+	[OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
+	[OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
+	[OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
+	[OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
+	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
+};
+
+static void ovs_meter_free(struct dp_meter *meter)
+{
+	if (!meter)
+		return;
+
+	kfree_rcu(meter, rcu);
+}
+
+static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
+					    u32 meter_id)
+{
+	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
+}
+
+/* Call with ovs_mutex or RCU read lock. */
+static struct dp_meter *lookup_meter(const struct datapath *dp,
+				     u32 meter_id)
+{
+	struct dp_meter *meter;
+	struct hlist_head *head;
+
+	head = meter_hash_bucket(dp, meter_id);
+	hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
+		if (meter->id == meter_id)
+			return meter;
+	}
+	return NULL;
+}
+
+static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+{
+	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+
+	hlist_add_head_rcu(&meter->dp_hash_node, head);
+}
+
+static void detach_meter(struct dp_meter *meter)
+{
+	ASSERT_OVSL();
+	if (meter)
+		hlist_del_rcu(&meter->dp_hash_node);
+}
+
+static struct sk_buff *
+ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
+			  struct ovs_header **ovs_reply_header)
+{
+	struct sk_buff *skb;
+	struct ovs_header *ovs_header = info->userhdr;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	*ovs_reply_header = genlmsg_put(skb, info->snd_portid,
+					info->snd_seq,
+					&dp_meter_genl_family, 0, cmd);
+	if (!*ovs_reply_header) {
+		nlmsg_free(skb);
+		return ERR_PTR(-EMSGSIZE);
+	}
+	(*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
+
+	return skb;
+}
+
+static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
+				     struct dp_meter *meter)
+{
+	struct nlattr *nla;
+	struct dp_meter_band *band;
+	u16 i;
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
+		goto error;
+
+	if (!meter)
+		return 0;
+
+	if (nla_put(reply, OVS_METER_ATTR_STATS,
+		    sizeof(struct ovs_flow_stats), &meter->stats) ||
+	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+			      OVS_METER_ATTR_PAD))
+		goto error;
+
+	nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+	if (!nla)
+		goto error;
+
+	band = meter->bands;
+
+	for (i = 0; i < meter->n_bands; ++i, ++band) {
+		struct nlattr *band_nla;
+
+		band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+		if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
+					 sizeof(struct ovs_flow_stats),
+					 &band->stats))
+			goto error;
+		nla_nest_end(reply, band_nla);
+	}
+	nla_nest_end(reply, nla);
+
+	return 0;
+error:
+	return -EMSGSIZE;
+}
+
+static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct ovs_header *ovs_reply_header;
+	struct nlattr *nla, *band_nla;
+	int err;
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
+					  &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
+	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+		goto nla_put_failure;
+
+	nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
+	if (!nla)
+		goto nla_put_failure;
+
+	band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
+	if (!band_nla)
+		goto nla_put_failure;
+	/* Currently only DROP band type is supported. */
+	if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
+		goto nla_put_failure;
+	nla_nest_end(reply, band_nla);
+	nla_nest_end(reply, nla);
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+nla_put_failure:
+	nlmsg_free(reply);
+	err = -EMSGSIZE;
+	return err;
+}
+
+static struct dp_meter *dp_meter_create(struct nlattr **a)
+{
+	struct nlattr *nla;
+	int rem;
+	u16 n_bands = 0;
+	struct dp_meter *meter;
+	struct dp_meter_band *band;
+	int err;
+
+	/* Validate attributes, count the bands. */
+	if (!a[OVS_METER_ATTR_BANDS])
+		return ERR_PTR(-EINVAL);
+
+	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
+		if (++n_bands > DP_MAX_BANDS)
+			return ERR_PTR(-EINVAL);
+
+	/* Allocate and set up the meter before locking anything. */
+	meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
+			sizeof(*meter), GFP_KERNEL);
+	if (!meter)
+		return ERR_PTR(-ENOMEM);
+
+	meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
+	meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
+	meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
+	spin_lock_init(&meter->lock);
+	if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
+		meter->stats = *(struct ovs_flow_stats *)
+			nla_data(a[OVS_METER_ATTR_STATS]);
+	}
+	meter->n_bands = n_bands;
+
+	/* Set up meter bands. */
+	band = meter->bands;
+	nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
+		struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
+		u32 band_max_delta_t;
+
+		err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
+				nla_data(nla), nla_len(nla), band_policy,
+				NULL);
+		if (err)
+			goto exit_free_meter;
+
+		if (!attr[OVS_BAND_ATTR_TYPE] ||
+		    !attr[OVS_BAND_ATTR_RATE] ||
+		    !attr[OVS_BAND_ATTR_BURST]) {
+			err = -EINVAL;
+			goto exit_free_meter;
+		}
+
+		band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
+		band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+		band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
+		/* Figure out max delta_t that is enough to fill any bucket.
+		 * Keep max_delta_t size to the bucket units:
+		 * pkts => 1/1000 packets, kilobits => bits.
+		 */
+		band_max_delta_t = (band->burst_size + band->rate) * 1000;
+		/* Start with a full bucket. */
+		band->bucket = band_max_delta_t;
+		if (band_max_delta_t > meter->max_delta_t)
+			meter->max_delta_t = band_max_delta_t;
+		band++;
+	}
+
+	return meter;
+
+exit_free_meter:
+	kfree(meter);
+	return ERR_PTR(err);
+}
+
+static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct dp_meter *meter, *old_meter;
+	struct sk_buff *reply;
+	struct ovs_header *ovs_reply_header;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct datapath *dp;
+	int err;
+	u32 meter_id;
+	bool failed;
+
+	meter = dp_meter_create(a);
+	if (IS_ERR_OR_NULL(meter))
+		return PTR_ERR(meter);
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
+					  &ovs_reply_header);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		goto exit_free_meter;
+	}
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	if (!a[OVS_METER_ATTR_ID]) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+	/* Cannot fail after this. */
+	old_meter = lookup_meter(dp, meter_id);
+	detach_meter(old_meter);
+	attach_meter(dp, meter);
+	ovs_unlock();
+
+	/* Build response with the meter_id and stats from
+	 * the old meter, if any.
+	 */
+	failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
+	WARN_ON(failed);
+	if (old_meter) {
+		spin_lock_bh(&old_meter->lock);
+		if (old_meter->keep_stats) {
+			err = ovs_meter_cmd_reply_stats(reply, meter_id,
+							old_meter);
+			WARN_ON(err);
+		}
+		spin_unlock_bh(&old_meter->lock);
+		ovs_meter_free(old_meter);
+	}
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	ovs_unlock();
+	nlmsg_free(reply);
+exit_free_meter:
+	kfree(meter);
+	return err;
+}
+
+static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	u32 meter_id;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_reply_header;
+	struct datapath *dp;
+	int err;
+	struct sk_buff *reply;
+	struct dp_meter *meter;
+
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
+
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
+					  &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	ovs_lock();
+
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	/* Locate meter, copy stats. */
+	meter = lookup_meter(dp, meter_id);
+	if (!meter) {
+		err = -ENOENT;
+		goto exit_unlock;
+	}
+
+	spin_lock_bh(&meter->lock);
+	err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
+	spin_unlock_bh(&meter->lock);
+	if (err)
+		goto exit_unlock;
+
+	ovs_unlock();
+
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	ovs_unlock();
+	nlmsg_free(reply);
+	return err;
+}
+
+static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	u32 meter_id;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct ovs_header *ovs_reply_header;
+	struct datapath *dp;
+	int err;
+	struct sk_buff *reply;
+	struct dp_meter *old_meter;
+
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+
+	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
+					  &ovs_reply_header);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	ovs_lock();
+
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	old_meter = lookup_meter(dp, meter_id);
+	if (old_meter) {
+		spin_lock_bh(&old_meter->lock);
+		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
+		WARN_ON(err);
+		spin_unlock_bh(&old_meter->lock);
+		detach_meter(old_meter);
+	}
+	ovs_unlock();
+	ovs_meter_free(old_meter);
+	genlmsg_end(reply, ovs_reply_header);
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	ovs_unlock();
+	nlmsg_free(reply);
+	return err;
+}
+
+/* Meter action execution.
+ *
+ * Return true 'meter_id' drop band is triggered. The 'skb' should be
+ * dropped by the caller'.
+ */
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+		       struct sw_flow_key *key, u32 meter_id)
+{
+	struct dp_meter *meter;
+	struct dp_meter_band *band;
+	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
+	long long int long_delta_ms;
+	u32 delta_ms;
+	u32 cost;
+	int i, band_exceeded_max = -1;
+	u32 band_exceeded_rate = 0;
+
+	meter = lookup_meter(dp, meter_id);
+	/* Do not drop the packet when there is no meter. */
+	if (!meter)
+		return false;
+
+	/* Lock the meter while using it. */
+	spin_lock(&meter->lock);
+
+	long_delta_ms = (now_ms - meter->used); /* ms */
+
+	/* Make sure delta_ms will not be too large, so that bucket will not
+	 * wrap around below.
+	 */
+	delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
+		   ? meter->max_delta_t : (u32)long_delta_ms;
+
+	/* Update meter statistics.
+	 */
+	meter->used = now_ms;
+	meter->stats.n_packets += 1;
+	meter->stats.n_bytes += skb->len;
+
+	/* Bucket rate is either in kilobits per second, or in packets per
+	 * second.  We maintain the bucket in the units of either bits or
+	 * 1/1000th of a packet, correspondingly.
+	 * Then, when rate is multiplied with milliseconds, we get the
+	 * bucket units:
+	 * msec * kbps = bits, and
+	 * msec * packets/sec = 1/1000 packets.
+	 *
+	 * 'cost' is the number of bucket units in this packet.
+	 */
+	cost = (meter->kbps) ? skb->len * 8 : 1000;
+
+	/* Update all bands and find the one hit with the highest rate. */
+	for (i = 0; i < meter->n_bands; ++i) {
+		long long int max_bucket_size;
+
+		band = &meter->bands[i];
+		max_bucket_size = (band->burst_size + band->rate) * 1000;
+
+		band->bucket += delta_ms * band->rate;
+		if (band->bucket > max_bucket_size)
+			band->bucket = max_bucket_size;
+
+		if (band->bucket >= cost) {
+			band->bucket -= cost;
+		} else if (band->rate > band_exceeded_rate) {
+			band_exceeded_rate = band->rate;
+			band_exceeded_max = i;
+		}
+	}
+
+	if (band_exceeded_max >= 0) {
+		/* Update band statistics. */
+		band = &meter->bands[band_exceeded_max];
+		band->stats.n_packets += 1;
+		band->stats.n_bytes += skb->len;
+
+		/* Drop band triggered, let the caller drop the 'skb'.  */
+		if (band->type == OVS_METER_BAND_TYPE_DROP) {
+			spin_unlock(&meter->lock);
+			return true;
+		}
+	}
+
+	spin_unlock(&meter->lock);
+	return false;
+}
+
+static struct genl_ops dp_meter_genl_ops[] = {
+	{ .cmd = OVS_METER_CMD_FEATURES,
+		.flags = 0,		  /* OK for unprivileged users. */
+		.policy = meter_policy,
+		.doit = ovs_meter_cmd_features
+	},
+	{ .cmd = OVS_METER_CMD_SET,
+		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+					   *  privilege.
+					   */
+		.policy = meter_policy,
+		.doit = ovs_meter_cmd_set,
+	},
+	{ .cmd = OVS_METER_CMD_GET,
+		.flags = 0,		  /* OK for unprivileged users. */
+		.policy = meter_policy,
+		.doit = ovs_meter_cmd_get,
+	},
+	{ .cmd = OVS_METER_CMD_DEL,
+		.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
+					   *  privilege.
+					   */
+		.policy = meter_policy,
+		.doit = ovs_meter_cmd_del
+	},
+};
+
+static const struct genl_multicast_group ovs_meter_multicast_group = {
+	.name = OVS_METER_MCGROUP,
+};
+
+struct genl_family dp_meter_genl_family __ro_after_init = {
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_METER_FAMILY,
+	.version = OVS_METER_VERSION,
+	.maxattr = OVS_METER_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_meter_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_meter_genl_ops),
+	.mcgrps = &ovs_meter_multicast_group,
+	.n_mcgrps = 1,
+	.module = THIS_MODULE,
+};
+
+int ovs_meters_init(struct datapath *dp)
+{
+	int i;
+
+	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
+				   sizeof(struct hlist_head), GFP_KERNEL);
+
+	if (!dp->meters)
+		return -ENOMEM;
+
+	for (i = 0; i < METER_HASH_BUCKETS; i++)
+		INIT_HLIST_HEAD(&dp->meters[i]);
+
+	return 0;
+}
+
+void ovs_meters_exit(struct datapath *dp)
+{
+	int i;
+
+	for (i = 0; i < METER_HASH_BUCKETS; i++) {
+		struct hlist_head *head = &dp->meters[i];
+		struct dp_meter *meter;
+		struct hlist_node *n;
+
+		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
+			kfree(meter);
+	}
+
+	kfree(dp->meters);
+}
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
new file mode 100644
index 000000000000..964ace2650f8
--- /dev/null
+++ b/net/openvswitch/meter.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#ifndef METER_H
+#define METER_H 1
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/genetlink.h>
+#include <linux/skbuff.h>
+
+#include "flow.h"
+struct datapath;
+
+#define DP_MAX_BANDS		1
+
+struct dp_meter_band {
+	u32 type;
+	u32 rate;
+	u32 burst_size;
+	u32 bucket; /* 1/1000 packets, or in bits */
+	struct ovs_flow_stats stats;
+};
+
+struct dp_meter {
+	spinlock_t lock;    /* Per meter lock */
+	struct rcu_head rcu;
+	struct hlist_node dp_hash_node; /*Element in datapath->meters
+					 * hash table.
+					 */
+	u32 id;
+	u16 kbps:1, keep_stats:1;
+	u16 n_bands;
+	u32 max_delta_t;
+	u64 used;
+	struct ovs_flow_stats stats;
+	struct dp_meter_band bands[];
+};
+
+extern struct genl_family dp_meter_genl_family;
+int ovs_meters_init(struct datapath *dp);
+void ovs_meters_exit(struct datapath *dp);
+bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
+		       struct sw_flow_key *key, u32 meter_id);
+
+#endif /* meter.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398fa4ab..2e5e7a41d8ef 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
 
 	rtnl_lock();
 	err = netdev_master_upper_dev_link(vport->dev,
-					   get_dpdev(vport->dp), NULL, NULL);
+					   get_dpdev(vport->dp),
+					   NULL, NULL, NULL);
 	if (err)
 		goto error_unlock;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 008a45ca3112..737092ca9b4e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 #define BLK_PLUS_PRIV(sz_of_priv) \
 	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
 
-#define PGV_FROM_VMALLOC 1
-
 #define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)
 #define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)
 #define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)
@@ -203,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *,
 static int prb_queue_frozen(struct tpacket_kbdq_core *);
 static void prb_open_block(struct tpacket_kbdq_core *,
 		struct tpacket_block_desc *);
-static void prb_retire_rx_blk_timer_expired(unsigned long);
+static void prb_retire_rx_blk_timer_expired(struct timer_list *);
 static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
-static void prb_init_blk_timer(struct packet_sock *,
-		struct tpacket_kbdq_core *,
-		void (*func) (unsigned long));
 static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
 static void prb_clear_rxhash(struct tpacket_kbdq_core *,
 		struct tpacket3_hdr *);
@@ -542,22 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
 	prb_del_retire_blk_timer(pkc);
 }
 
-static void prb_init_blk_timer(struct packet_sock *po,
-		struct tpacket_kbdq_core *pkc,
-		void (*func) (unsigned long))
-{
-	init_timer(&pkc->retire_blk_timer);
-	pkc->retire_blk_timer.data = (long)po;
-	pkc->retire_blk_timer.function = func;
-	pkc->retire_blk_timer.expires = jiffies;
-}
-
 static void prb_setup_retire_blk_timer(struct packet_sock *po)
 {
 	struct tpacket_kbdq_core *pkc;
 
 	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
-	prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
+	timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
+		    0);
+	pkc->retire_blk_timer.expires = jiffies;
 }
 
 static int prb_calc_retire_blk_tmo(struct packet_sock *po,
@@ -675,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
  * prb_calc_retire_blk_tmo() calculates the tmo.
  *
  */
-static void prb_retire_rx_blk_timer_expired(unsigned long data)
+static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
 {
-	struct packet_sock *po = (struct packet_sock *)data;
+	struct packet_sock *po =
+		from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
 	struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
 	unsigned int frozen;
 	struct tpacket_block_desc *pbd;
@@ -1686,10 +1674,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 
 	mutex_lock(&fanout_mutex);
 
-	err = -EINVAL;
-	if (!po->running)
-		goto out;
-
 	err = -EALREADY;
 	if (po->fanout)
 		goto out;
@@ -1751,7 +1735,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		list_add(&match->list, &fanout_list);
 	}
 	err = -EINVAL;
-	if (match->type == type &&
+
+	spin_lock(&po->bind_lock);
+	if (po->running &&
+	    match->type == type &&
 	    match->prot_hook.type == po->prot_hook.type &&
 	    match->prot_hook.dev == po->prot_hook.dev) {
 		err = -ENOSPC;
@@ -1763,9 +1750,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 			err = 0;
 		}
 	}
+	spin_unlock(&po->bind_lock);
+
+	if (err && !refcount_read(&match->sk_ref)) {
+		list_del(&match->list);
+		kfree(match);
+	}
+
 out:
 	if (err && rollover) {
-		kfree(rollover);
+		kfree_rcu(rollover, rcu);
 		po->rollover = NULL;
 	}
 	mutex_unlock(&fanout_mutex);
@@ -1792,8 +1786,10 @@ static struct packet_fanout *fanout_release(struct sock *sk)
 		else
 			f = NULL;
 
-		if (po->rollover)
+		if (po->rollover) {
 			kfree_rcu(po->rollover, rcu);
+			po->rollover = NULL;
+		}
 	}
 	mutex_unlock(&fanout_mutex);
 
@@ -2191,6 +2187,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct timespec ts;
 	__u32 ts_status;
 	bool is_drop_n_account = false;
+	bool do_vnet = false;
 
 	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
 	 * We may add members to them until current aligned size without forcing
@@ -2241,8 +2238,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
 				       (maclen < 16 ? 16 : maclen)) +
 				       po->tp_reserve;
-		if (po->has_vnet_hdr)
+		if (po->has_vnet_hdr) {
 			netoff += sizeof(struct virtio_net_hdr);
+			do_vnet = true;
+		}
 		macoff = netoff - maclen;
 	}
 	if (po->tp_version <= TPACKET_V2) {
@@ -2259,8 +2258,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 					skb_set_owner_r(copy_skb, sk);
 			}
 			snaplen = po->rx_ring.frame_size - macoff;
-			if ((int)snaplen < 0)
+			if ((int)snaplen < 0) {
 				snaplen = 0;
+				do_vnet = false;
+			}
 		}
 	} else if (unlikely(macoff + snaplen >
 			    GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
@@ -2273,6 +2274,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		if (unlikely((int)snaplen < 0)) {
 			snaplen = 0;
 			macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+			do_vnet = false;
 		}
 	}
 	spin_lock(&sk->sk_receive_queue.lock);
@@ -2298,7 +2300,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	}
 	spin_unlock(&sk->sk_receive_queue.lock);
 
-	if (po->has_vnet_hdr) {
+	if (do_vnet) {
 		if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
 					    sizeof(struct virtio_net_hdr),
 					    vio_le(), true)) {
@@ -2830,6 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
 	struct packet_sock *po = pkt_sk(sk);
+	bool has_vnet_hdr = false;
 	int hlen, tlen, linear;
 	int extra_len = 0;
 
@@ -2873,6 +2876,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
 		if (err)
 			goto out_unlock;
+		has_vnet_hdr = true;
 	}
 
 	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2931,7 +2935,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->priority = sk->sk_priority;
 	skb->mark = sockc.mark;
 
-	if (po->has_vnet_hdr) {
+	if (has_vnet_hdr) {
 		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
 		if (err)
 			goto out_free;
@@ -3059,13 +3063,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
 	int ret = 0;
 	bool unlisted = false;
 
-	if (po->fanout)
-		return -EINVAL;
-
 	lock_sock(sk);
 	spin_lock(&po->bind_lock);
 	rcu_read_lock();
 
+	if (po->fanout) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	if (name) {
 		dev = dev_get_by_name_rcu(sock_net(sk), name);
 		if (!dev) {
@@ -3837,6 +3843,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	void *data = &val;
 	union tpacket_stats_u st;
 	struct tpacket_rollover_stats rstats;
+	struct packet_rollover *rollover;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -3915,13 +3922,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		       0);
 		break;
 	case PACKET_ROLLOVER_STATS:
-		if (!po->rollover)
+		rcu_read_lock();
+		rollover = rcu_dereference(po->rollover);
+		if (rollover) {
+			rstats.tp_all = atomic_long_read(&rollover->num);
+			rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+			rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+			data = &rstats;
+			lv = sizeof(rstats);
+		}
+		rcu_read_unlock();
+		if (!rollover)
 			return -EINVAL;
-		rstats.tp_all = atomic_long_read(&po->rollover->num);
-		rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
-		rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
-		data = &rstats;
-		lv = sizeof(rstats);
 		break;
 	case PACKET_TX_HAS_OFF:
 		val = po->tp_tx_has_off;
@@ -4548,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net)
 static void __net_exit packet_net_exit(struct net *net)
 {
 	remove_proc_entry("packet", net->proc_net);
+	WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
 }
 
 static struct pernet_operations packet_net_ops = {
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 94d1d405a116..562fbc155006 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __PACKET_INTERNAL_H__
 #define __PACKET_INTERNAL_H__
 
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index e10b1b182ce3..444f875932b9 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
 
 phonet-y := \
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753668a7..3b0ef691f5b1 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
 #include <net/phonet/pn_dev.h>
 
 /* Transport protocol registration */
-static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
 
-static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
+static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
 {
-	struct phonet_protocol *pp;
+	const struct phonet_protocol *pp;
 
 	if (protocol >= PHONET_NPROTO)
 		return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
 	return pp;
 }
 
-static inline void phonet_proto_put(struct phonet_protocol *pp)
+static inline void phonet_proto_put(const struct phonet_protocol *pp)
 {
 	module_put(pp->prot->owner);
 }
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 	struct pn_sock *pn;
-	struct phonet_protocol *pnp;
+	const struct phonet_protocol *pnp;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 	return 1;
 }
 
-struct header_ops phonet_header_ops = {
+const struct header_ops phonet_header_ops = {
 	.create = pn_header_create,
 	.parse = pn_header_parse,
 };
@@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
 static DEFINE_MUTEX(proto_tab_lock);
 
 int __init_or_module phonet_proto_register(unsigned int protocol,
-						struct phonet_protocol *pp)
+				const struct phonet_protocol *pp)
 {
 	int err = 0;
 
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
 }
 EXPORT_SYMBOL(phonet_proto_register);
 
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol,
+			const struct phonet_protocol *pp)
 {
 	mutex_lock(&proto_tab_lock);
 	BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e710435ffa9..b44fb9018fb8 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static struct proto pn_proto = {
 	.name		= "PHONET",
 };
 
-static struct phonet_protocol pn_dgram_proto = {
+static const struct phonet_protocol pn_dgram_proto = {
 	.ops		= &phonet_dgram_ops,
 	.prot		= &pn_proto,
 	.sock_type	= SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e81537991ddf..9fc76b19cd3c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static struct proto pep_proto = {
 	.name		= "PNPIPE",
 };
 
-static struct phonet_protocol pep_pn_proto = {
+static const struct phonet_protocol pep_pn_proto = {
 	.ops		= &phonet_stream_ops,
 	.prot		= &pep_proto,
 	.sock_type	= SOCK_SEQPACKET,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 2cb4c5dfad6f..77787512fc32 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net)
 
 static void __net_exit phonet_exit_net(struct net *net)
 {
+	struct phonet_net *pnn = phonet_pernet(net);
+
 	remove_proc_entry("phonet", net->proc_net);
+	WARN_ON_ONCE(!list_empty(&pnn->pndevs.list));
 }
 
 static struct pernet_operations phonet_net_ops = {
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 45b3af3080d8..da754fc926e7 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -300,15 +300,15 @@ out:
 int __init phonet_netlink_register(void)
 {
 	int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
-				  NULL, NULL);
+				  NULL, 0);
 	if (err)
 		return err;
 
 	/* Further __rtnl_register() cannot fail */
-	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL);
-	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL);
+	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0);
+	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0);
 	return 0;
 }
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 3a6ad0f438dc..64f95624f219 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -296,6 +296,6 @@ static void __exit psample_module_exit(void)
 module_init(psample_module_init);
 module_exit(psample_module_exit);
 
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
 MODULE_DESCRIPTION("netlink channel for packet sampling");
 MODULE_LICENSE("GPL v2");
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5586609afa27..77ab05e23001 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,26 +20,15 @@
 
 #include "qrtr.h"
 
-#define QRTR_PROTO_VER 1
+#define QRTR_PROTO_VER_1 1
+#define QRTR_PROTO_VER_2 3
 
 /* auto-bind range */
 #define QRTR_MIN_EPH_SOCKET 0x4000
 #define QRTR_MAX_EPH_SOCKET 0x7fff
 
-enum qrtr_pkt_type {
-	QRTR_TYPE_DATA		= 1,
-	QRTR_TYPE_HELLO		= 2,
-	QRTR_TYPE_BYE		= 3,
-	QRTR_TYPE_NEW_SERVER	= 4,
-	QRTR_TYPE_DEL_SERVER	= 5,
-	QRTR_TYPE_DEL_CLIENT	= 6,
-	QRTR_TYPE_RESUME_TX	= 7,
-	QRTR_TYPE_EXIT		= 8,
-	QRTR_TYPE_PING		= 9,
-};
-
 /**
- * struct qrtr_hdr - (I|R)PCrouter packet header
+ * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
  * @version: protocol version
  * @type: packet type; one of QRTR_TYPE_*
  * @src_node_id: source node
@@ -49,7 +38,7 @@ enum qrtr_pkt_type {
  * @dst_node_id: destination node
  * @dst_port_id: destination port
  */
-struct qrtr_hdr {
+struct qrtr_hdr_v1 {
 	__le32 version;
 	__le32 type;
 	__le32 src_node_id;
@@ -60,9 +49,44 @@ struct qrtr_hdr {
 	__le32 dst_port_id;
 } __packed;
 
-#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
-#define QRTR_NODE_BCAST ((unsigned int)-1)
-#define QRTR_PORT_CTRL ((unsigned int)-2)
+/**
+ * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
+ * @version: protocol version
+ * @type: packet type; one of QRTR_TYPE_*
+ * @flags: bitmask of QRTR_FLAGS_*
+ * @optlen: length of optional header data
+ * @size: length of packet, excluding this header and optlen
+ * @src_node_id: source node
+ * @src_port_id: source port
+ * @dst_node_id: destination node
+ * @dst_port_id: destination port
+ */
+struct qrtr_hdr_v2 {
+	u8 version;
+	u8 type;
+	u8 flags;
+	u8 optlen;
+	__le32 size;
+	__le16 src_node_id;
+	__le16 src_port_id;
+	__le16 dst_node_id;
+	__le16 dst_port_id;
+};
+
+#define QRTR_FLAGS_CONFIRM_RX	BIT(0)
+
+struct qrtr_cb {
+	u32 src_node;
+	u32 src_port;
+	u32 dst_node;
+	u32 dst_port;
+
+	u8 type;
+	u8 confirm_rx;
+};
+
+#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
+					sizeof(struct qrtr_hdr_v2))
 
 struct qrtr_sock {
 	/* WARNING: sk must be the first member */
@@ -111,8 +135,12 @@ struct qrtr_node {
 	struct list_head item;
 };
 
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb);
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to);
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to);
 
 /* Release node resources and free the node.
  *
@@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node)
 }
 
 /* Pass an outgoing packet socket buffer to the endpoint driver. */
-static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			     int type, struct sockaddr_qrtr *from,
+			     struct sockaddr_qrtr *to)
 {
+	struct qrtr_hdr_v1 *hdr;
+	size_t len = skb->len;
 	int rc = -ENODEV;
 
+	hdr = skb_push(skb, sizeof(*hdr));
+	hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
+	hdr->type = cpu_to_le32(type);
+	hdr->src_node_id = cpu_to_le32(from->sq_node);
+	hdr->src_port_id = cpu_to_le32(from->sq_port);
+	hdr->dst_node_id = cpu_to_le32(to->sq_node);
+	hdr->dst_port_id = cpu_to_le32(to->sq_port);
+
+	hdr->size = cpu_to_le32(len);
+	hdr->confirm_rx = 0;
+
+	skb_put_padto(skb, ALIGN(len, 4));
+
 	mutex_lock(&node->ep_lock);
 	if (node->ep)
 		rc = node->ep->xmit(node->ep, skb);
@@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
 int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 {
 	struct qrtr_node *node = ep->node;
-	const struct qrtr_hdr *phdr = data;
+	const struct qrtr_hdr_v1 *v1;
+	const struct qrtr_hdr_v2 *v2;
 	struct sk_buff *skb;
-	unsigned int psize;
+	struct qrtr_cb *cb;
 	unsigned int size;
-	unsigned int type;
 	unsigned int ver;
-	unsigned int dst;
-
-	if (len < QRTR_HDR_SIZE || len & 3)
-		return -EINVAL;
-
-	ver = le32_to_cpu(phdr->version);
-	size = le32_to_cpu(phdr->size);
-	type = le32_to_cpu(phdr->type);
-	dst = le32_to_cpu(phdr->dst_port_id);
-
-	psize = (size + 3) & ~3;
+	size_t hdrlen;
 
-	if (ver != QRTR_PROTO_VER)
-		return -EINVAL;
-
-	if (len != psize + QRTR_HDR_SIZE)
-		return -EINVAL;
-
-	if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
+	if (len & 3)
 		return -EINVAL;
 
 	skb = netdev_alloc_skb(NULL, len);
 	if (!skb)
 		return -ENOMEM;
 
-	skb_reset_transport_header(skb);
-	skb_put_data(skb, data, len);
-
-	skb_queue_tail(&node->rx_queue, skb);
-	schedule_work(&node->work);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
+	cb = (struct qrtr_cb *)skb->cb;
 
-static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
-					      u32 src_node, u32 dst_node)
-{
-	struct qrtr_hdr *hdr;
-	struct sk_buff *skb;
-
-	skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
-	if (!skb)
-		return NULL;
-	skb_reset_transport_header(skb);
+	/* Version field in v1 is little endian, so this works for both cases */
+	ver = *(u8*)data;
 
-	hdr = skb_put(skb, QRTR_HDR_SIZE);
-	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
-	hdr->type = cpu_to_le32(type);
-	hdr->src_node_id = cpu_to_le32(src_node);
-	hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
-	hdr->confirm_rx = cpu_to_le32(0);
-	hdr->size = cpu_to_le32(pkt_len);
-	hdr->dst_node_id = cpu_to_le32(dst_node);
-	hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+	switch (ver) {
+	case QRTR_PROTO_VER_1:
+		v1 = data;
+		hdrlen = sizeof(*v1);
 
-	return skb;
-}
+		cb->type = le32_to_cpu(v1->type);
+		cb->src_node = le32_to_cpu(v1->src_node_id);
+		cb->src_port = le32_to_cpu(v1->src_port_id);
+		cb->confirm_rx = !!v1->confirm_rx;
+		cb->dst_node = le32_to_cpu(v1->dst_node_id);
+		cb->dst_port = le32_to_cpu(v1->dst_port_id);
 
-/* Allocate and construct a resume-tx packet. */
-static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
-					    u32 dst_node, u32 port)
-{
-	const int pkt_len = 20;
-	struct sk_buff *skb;
-	__le32 *buf;
+		size = le32_to_cpu(v1->size);
+		break;
+	case QRTR_PROTO_VER_2:
+		v2 = data;
+		hdrlen = sizeof(*v2) + v2->optlen;
+
+		cb->type = v2->type;
+		cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
+		cb->src_node = le16_to_cpu(v2->src_node_id);
+		cb->src_port = le16_to_cpu(v2->src_port_id);
+		cb->dst_node = le16_to_cpu(v2->dst_node_id);
+		cb->dst_port = le16_to_cpu(v2->dst_port_id);
+
+		if (cb->src_port == (u16)QRTR_PORT_CTRL)
+			cb->src_port = QRTR_PORT_CTRL;
+		if (cb->dst_port == (u16)QRTR_PORT_CTRL)
+			cb->dst_port = QRTR_PORT_CTRL;
+
+		size = le32_to_cpu(v2->size);
+		break;
+	default:
+		pr_err("qrtr: Invalid version %d\n", ver);
+		goto err;
+	}
 
-	skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len,
-				     src_node, dst_node);
-	if (!skb)
-		return NULL;
+	if (len != ALIGN(size, 4) + hdrlen)
+		goto err;
 
-	buf = skb_put_zero(skb, pkt_len);
-	buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
-	buf[1] = cpu_to_le32(src_node);
-	buf[2] = cpu_to_le32(port);
+	if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
+		goto err;
 
-	return skb;
-}
+	skb_put_data(skb, data + hdrlen, size);
 
-/* Allocate and construct a BYE message to signal remote termination */
-static struct sk_buff *qrtr_alloc_local_bye(u32 src_node)
-{
-	const int pkt_len = 20;
-	struct sk_buff *skb;
-	__le32 *buf;
+	skb_queue_tail(&node->rx_queue, skb);
+	schedule_work(&node->work);
 
-	skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len,
-				     src_node, qrtr_local_nid);
-	if (!skb)
-		return NULL;
+	return 0;
 
-	buf = skb_put_zero(skb, pkt_len);
-	buf[0] = cpu_to_le32(QRTR_TYPE_BYE);
+err:
+	kfree_skb(skb);
+	return -EINVAL;
 
-	return skb;
 }
+EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
 
-static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq)
+/**
+ * qrtr_alloc_ctrl_packet() - allocate control packet skb
+ * @pkt: reference to qrtr_ctrl_pkt pointer
+ *
+ * Returns newly allocated sk_buff, or NULL on failure
+ *
+ * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
+ * on success returns a reference to the control packet in @pkt.
+ */
+static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
 {
-	const int pkt_len = 20;
+	const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
 	struct sk_buff *skb;
-	__le32 *buf;
 
-	skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len,
-				     sq->sq_node, QRTR_NODE_BCAST);
+	skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
 	if (!skb)
 		return NULL;
 
-	buf = skb_put_zero(skb, pkt_len);
-	buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
-	buf[1] = cpu_to_le32(sq->sq_node);
-	buf[2] = cpu_to_le32(sq->sq_port);
+	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
+	*pkt = skb_put_zero(skb, pkt_len);
 
 	return skb;
 }
@@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
 static void qrtr_node_rx_work(struct work_struct *work)
 {
 	struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+	struct qrtr_ctrl_pkt *pkt;
+	struct sockaddr_qrtr dst;
+	struct sockaddr_qrtr src;
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
-		const struct qrtr_hdr *phdr;
-		u32 dst_node, dst_port;
 		struct qrtr_sock *ipc;
-		u32 src_node;
+		struct qrtr_cb *cb;
 		int confirm;
 
-		phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-		src_node = le32_to_cpu(phdr->src_node_id);
-		dst_node = le32_to_cpu(phdr->dst_node_id);
-		dst_port = le32_to_cpu(phdr->dst_port_id);
-		confirm = !!phdr->confirm_rx;
+		cb = (struct qrtr_cb *)skb->cb;
+		src.sq_node = cb->src_node;
+		src.sq_port = cb->src_port;
+		dst.sq_node = cb->dst_node;
+		dst.sq_port = cb->dst_port;
+		confirm = !!cb->confirm_rx;
 
-		qrtr_node_assign(node, src_node);
+		qrtr_node_assign(node, cb->src_node);
 
-		ipc = qrtr_port_lookup(dst_port);
+		ipc = qrtr_port_lookup(cb->dst_port);
 		if (!ipc) {
 			kfree_skb(skb);
 		} else {
@@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work)
 		}
 
 		if (confirm) {
-			skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
+			skb = qrtr_alloc_ctrl_packet(&pkt);
 			if (!skb)
 				break;
-			if (qrtr_node_enqueue(node, skb))
+
+			pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+			pkt->client.node = cpu_to_le32(dst.sq_node);
+			pkt->client.port = cpu_to_le32(dst.sq_port);
+
+			if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
+					      &dst, &src))
 				break;
 		}
 	}
@@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
 void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
 {
 	struct qrtr_node *node = ep->node;
+	struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
+	struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
+	struct qrtr_ctrl_pkt *pkt;
 	struct sk_buff *skb;
 
 	mutex_lock(&node->ep_lock);
@@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
 	mutex_unlock(&node->ep_lock);
 
 	/* Notify the local controller about the event */
-	skb = qrtr_alloc_local_bye(node->nid);
-	if (skb)
-		qrtr_local_enqueue(NULL, skb);
+	skb = qrtr_alloc_ctrl_packet(&pkt);
+	if (skb) {
+		pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
+		qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
+	}
 
 	qrtr_node_release(node);
 	ep->node = NULL;
@@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
 /* Remove port assignment. */
 static void qrtr_port_remove(struct qrtr_sock *ipc)
 {
+	struct qrtr_ctrl_pkt *pkt;
 	struct sk_buff *skb;
 	int port = ipc->us.sq_port;
+	struct sockaddr_qrtr to;
 
-	skb = qrtr_alloc_del_client(&ipc->us);
+	to.sq_family = AF_QIPCRTR;
+	to.sq_node = QRTR_NODE_BCAST;
+	to.sq_port = QRTR_PORT_CTRL;
+
+	skb = qrtr_alloc_ctrl_packet(&pkt);
 	if (skb) {
+		pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+		pkt->client.node = cpu_to_le32(ipc->us.sq_node);
+		pkt->client.port = cpu_to_le32(ipc->us.sq_port);
+
 		skb_set_owner_w(skb, &ipc->sk);
-		qrtr_bcast_enqueue(NULL, skb);
+		qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
+				   &to);
 	}
 
 	if (port == QRTR_PORT_CTRL)
@@ -541,7 +588,7 @@ static void qrtr_reset_ports(void)
 
 		sock_hold(&ipc->sk);
 		ipc->sk.sk_err = ENETRESET;
-		wake_up_interruptible(sk_sleep(&ipc->sk));
+		ipc->sk.sk_error_report(&ipc->sk);
 		sock_put(&ipc->sk);
 	}
 	mutex_unlock(&qrtr_port_lock);
@@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
 }
 
 /* Queue packet to local peer socket. */
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to)
 {
-	const struct qrtr_hdr *phdr;
 	struct qrtr_sock *ipc;
+	struct qrtr_cb *cb;
 
-	phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-
-	ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
+	ipc = qrtr_port_lookup(to->sq_port);
 	if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
 		kfree_skb(skb);
 		return -ENODEV;
 	}
 
+	cb = (struct qrtr_cb *)skb->cb;
+	cb->src_node = from->sq_node;
+	cb->src_port = from->sq_port;
+
 	if (sock_queue_rcv_skb(&ipc->sk, skb)) {
 		qrtr_port_put(ipc);
 		kfree_skb(skb);
@@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
 }
 
 /* Queue packet for broadcast. */
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to)
 {
 	struct sk_buff *skbn;
 
@@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
 		if (!skbn)
 			break;
 		skb_set_owner_w(skbn, skb->sk);
-		qrtr_node_enqueue(node, skbn);
+		qrtr_node_enqueue(node, skbn, type, from, to);
 	}
 	mutex_unlock(&qrtr_node_lock);
 
-	qrtr_local_enqueue(node, skb);
+	qrtr_local_enqueue(node, skb, type, from, to);
 
 	return 0;
 }
@@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
 static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
-	int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
+	int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
+			  struct sockaddr_qrtr *, struct sockaddr_qrtr *);
 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
 	struct sock *sk = sock->sk;
 	struct qrtr_node *node;
-	struct qrtr_hdr *hdr;
 	struct sk_buff *skb;
 	size_t plen;
+	u32 type = QRTR_TYPE_DATA;
 	int rc;
 
 	if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	plen = (len + 3) & ~3;
-	skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
+	skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
 				  msg->msg_flags & MSG_DONTWAIT, &rc);
 	if (!skb)
 		goto out_node;
 
-	skb_reset_transport_header(skb);
-	skb_put(skb, len + QRTR_HDR_SIZE);
-
-	hdr = (struct qrtr_hdr *)skb_transport_header(skb);
-	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
-	hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
-	hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
-	hdr->confirm_rx = cpu_to_le32(0);
-	hdr->size = cpu_to_le32(len);
-	hdr->dst_node_id = cpu_to_le32(addr->sq_node);
-	hdr->dst_port_id = cpu_to_le32(addr->sq_port);
+	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
 
-	rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
-					 &msg->msg_iter, len);
+	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc) {
 		kfree_skb(skb);
 		goto out_node;
 	}
 
-	if (plen != len) {
-		rc = skb_pad(skb, plen - len);
-		if (rc)
-			goto out_node;
-		skb_put(skb, plen - len);
-	}
-
 	if (ipc->us.sq_port == QRTR_PORT_CTRL) {
 		if (len < 4) {
 			rc = -EINVAL;
@@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		}
 
 		/* control messages already require the type as 'command' */
-		skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
-	} else {
-		hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
+		skb_copy_bits(skb, 0, &type, 4);
+		type = le32_to_cpu(type);
 	}
 
-	rc = enqueue_fn(node, skb);
+	rc = enqueue_fn(node, skb, type, &ipc->us, addr);
 	if (rc >= 0)
 		rc = len;
 
@@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
 			size_t size, int flags)
 {
 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
-	const struct qrtr_hdr *phdr;
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
+	struct qrtr_cb *cb;
 	int copied, rc;
 
 	lock_sock(sk);
@@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
 		return rc;
 	}
 
-	phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-	copied = le32_to_cpu(phdr->size);
+	copied = skb->len;
 	if (copied > size) {
 		copied = size;
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied);
+	rc = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (rc < 0)
 		goto out;
 	rc = copied;
 
 	if (addr) {
+		cb = (struct qrtr_cb *)skb->cb;
 		addr->sq_family = AF_QIPCRTR;
-		addr->sq_node = le32_to_cpu(phdr->src_node_id);
-		addr->sq_port = le32_to_cpu(phdr->src_port_id);
+		addr->sq_node = cb->src_node;
+		addr->sq_port = cb->src_port;
 		msg->msg_namelen = sizeof(*addr);
 	}
 
@@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case TIOCINQ:
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb)
-			len = skb->len - QRTR_HDR_SIZE;
+			len = skb->len;
 		rc = put_user(len, (int __user *)argp);
 		break;
 	case SIOCGIFADDR:
@@ -1081,11 +1116,11 @@ static int __init qrtr_proto_init(void)
 		return rc;
 	}
 
-	rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL);
+	rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
 
 	return 0;
 }
-module_init(qrtr_proto_init);
+postcore_initcall(qrtr_proto_init);
 
 static void __exit qrtr_proto_fini(void)
 {
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
index 2b848718f8fe..b81e6953c04b 100644
--- a/net/qrtr/qrtr.h
+++ b/net/qrtr/qrtr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __QRTR_H_
 #define __QRTR_H_
 
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 56c7d27eefee..b5d568bd479c 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_RDS) += rds.o
 rds-y :=	af_rds.o bind.o cong.o connection.o info.o message.o   \
 			recv.o send.o stats.o sysctl.o threads.o transport.o \
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 3a915bedb76c..75d43dc8e96b 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -40,7 +40,7 @@
 
 static struct rhashtable bind_hash_table;
 
-static struct rhashtable_params ht_parms = {
+static const struct rhashtable_params ht_parms = {
 	.nelem_hint = 768,
 	.key_len = sizeof(u64),
 	.key_offset = offsetof(struct rds_sock, rs_bound_key),
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 50a3789ac23e..7ee2d5d68b78 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -151,6 +151,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	struct rds_transport *loop_trans;
 	unsigned long flags;
 	int ret, i;
+	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rcu_read_lock();
 	conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -172,6 +173,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		conn = ERR_PTR(-ENOMEM);
 		goto out;
 	}
+	conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp);
+	if (!conn->c_path) {
+		kmem_cache_free(rds_conn_slab, conn);
+		conn = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	INIT_HLIST_NODE(&conn->c_hash_node);
 	conn->c_laddr = laddr;
@@ -181,6 +188,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
 	ret = rds_cong_get_maps(conn);
 	if (ret) {
+		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
 		goto out;
@@ -207,13 +215,14 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	conn->c_trans = trans;
 
 	init_waitqueue_head(&conn->c_hs_waitq);
-	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+	for (i = 0; i < npaths; i++) {
 		__rds_conn_path_init(conn, &conn->c_path[i],
 				     is_outgoing);
 		conn->c_path[i].cp_index = i;
 	}
 	ret = trans->conn_alloc(conn, gfp);
 	if (ret) {
+		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
 		goto out;
@@ -236,6 +245,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		/* Creating passive conn */
 		if (parent->c_passive) {
 			trans->conn_free(conn->c_path[0].cp_transport_data);
+			kfree(conn->c_path);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = parent->c_passive;
 		} else {
@@ -252,7 +262,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 			struct rds_conn_path *cp;
 			int i;
 
-			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+			for (i = 0; i < npaths; i++) {
 				cp = &conn->c_path[i];
 				/* The ->conn_alloc invocation may have
 				 * allocated resource for all paths, so all
@@ -261,6 +271,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 				if (cp->cp_transport_data)
 					trans->conn_free(cp->cp_transport_data);
 			}
+			kfree(conn->c_path);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = found;
 		} else {
@@ -374,13 +385,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
 	if (!cp->cp_transport_data)
 		return;
 
-	rds_conn_path_drop(cp);
-	flush_work(&cp->cp_down_w);
-
 	/* make sure lingering queued work won't try to ref the conn */
 	cancel_delayed_work_sync(&cp->cp_send_w);
 	cancel_delayed_work_sync(&cp->cp_recv_w);
 
+	rds_conn_path_drop(cp, true);
+	flush_work(&cp->cp_down_w);
+
 	/* tear down queued messages */
 	list_for_each_entry_safe(rm, rtmp,
 				 &cp->cp_send_queue,
@@ -407,6 +418,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	unsigned long flags;
 	int i;
 	struct rds_conn_path *cp;
+	int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rdsdebug("freeing conn %p for %pI4 -> "
 		 "%pI4\n", conn, &conn->c_laddr,
@@ -420,7 +432,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	synchronize_rcu();
 
 	/* shut the connection down */
-	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+	for (i = 0; i < npaths; i++) {
 		cp = &conn->c_path[i];
 		rds_conn_path_destroy(cp);
 		BUG_ON(!list_empty(&cp->cp_retrans));
@@ -434,6 +446,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	rds_cong_remove_conn(conn);
 
 	put_net(conn->c_net);
+	kfree(conn->c_path);
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
@@ -464,8 +477,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			struct rds_conn_path *cp;
+			int npaths;
+
+			npaths = (conn->c_trans->t_mp_capable ?
+				 RDS_MPATH_WORKERS : 1);
 
-			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+			for (j = 0; j < npaths; j++) {
 				cp = &conn->c_path[j];
 				if (want_send)
 					list = &cp->cp_send_queue;
@@ -486,8 +503,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 				}
 
 				spin_unlock_irqrestore(&cp->cp_lock, flags);
-				if (!conn->c_trans->t_mp_capable)
-					break;
 			}
 		}
 	}
@@ -571,15 +586,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			struct rds_conn_path *cp;
+			int npaths;
 
-			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+			npaths = (conn->c_trans->t_mp_capable ?
+				 RDS_MPATH_WORKERS : 1);
+			for (j = 0; j < npaths; j++) {
 				cp = &conn->c_path[j];
 
 				/* XXX no cp_lock usage.. */
 				if (!visitor(cp, buffer))
 					continue;
-				if (!conn->c_trans->t_mp_capable)
-					break;
 			}
 
 			/* We copy as much as we can fit in the buffer,
@@ -664,9 +680,13 @@ void rds_conn_exit(void)
 /*
  * Force a disconnect
  */
-void rds_conn_path_drop(struct rds_conn_path *cp)
+void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
 {
 	atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+
+	if (!destroy && cp->cp_conn->c_destroy_in_prog)
+		return;
+
 	queue_work(rds_wq, &cp->cp_down_w);
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -674,7 +694,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
 void rds_conn_drop(struct rds_connection *conn)
 {
 	WARN_ON(conn->c_trans->t_mp_capable);
-	rds_conn_path_drop(&conn->c_path[0]);
+	rds_conn_path_drop(&conn->c_path[0], false);
 }
 EXPORT_SYMBOL_GPL(rds_conn_drop);
 
@@ -706,5 +726,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
 	vprintk(fmt, ap);
 	va_end(ap);
 
-	rds_conn_path_drop(cp);
+	rds_conn_path_drop(cp, false);
 }
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a0954ace3774..36dd2099048a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
 static void rds_ib_add_one(struct ib_device *device)
 {
 	struct rds_ib_device *rds_ibdev;
+	bool has_fr, has_fmr;
 
 	/* Only handle IB (no iWARP) devices */
 	if (device->node_type != RDMA_NODE_IB_CA)
@@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device)
 	rds_ibdev->max_wrs = device->attrs.max_qp_wr;
 	rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
 
-	rds_ibdev->has_fr = (device->attrs.device_cap_flags &
-				  IB_DEVICE_MEM_MGT_EXTENSIONS);
-	rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
-			    device->map_phys_fmr && device->unmap_fmr);
-	rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr);
+	has_fr = (device->attrs.device_cap_flags &
+		  IB_DEVICE_MEM_MGT_EXTENSIONS);
+	has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+		   device->map_phys_fmr && device->unmap_fmr);
+	rds_ibdev->use_fastreg = (has_fr && !has_fmr);
 
 	rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
 	rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
diff --git a/net/rds/ib.h b/net/rds/ib.h
index bf4822407567..a6f4d7d68e95 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDS_IB_H
 #define _RDS_IB_H
 
@@ -215,8 +216,6 @@ struct rds_ib_device {
 	struct list_head	conn_list;
 	struct ib_device	*dev;
 	struct ib_pd		*pd;
-	bool                    has_fmr;
-	bool                    has_fr;
 	bool                    use_fastreg;
 
 	unsigned int		max_mrs;
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 86ef907067bb..e0f70c4051b6 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
 		return -EINVAL;
 	}
 
-	dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
-				 rdsibdev_to_node(rds_ibdev));
+	dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
+				       rdsibdev_to_node(rds_ibdev));
 	if (!dma_pages) {
 		ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
 		return -ENOMEM;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 9a3c54e659e9..e678699268a2 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
 	if (pool_type == RDS_IB_MR_1M_POOL) {
 		/* +1 allows for unaligned MRs */
 		pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
-		pool->max_items = RDS_MR_1M_POOL_SIZE;
+		pool->max_items = rds_ibdev->max_1m_mrs;
 	} else {
 		/* pool_type == RDS_IB_MR_8K_POOL */
 		pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
-		pool->max_items = RDS_MR_8K_POOL_SIZE;
+		pool->max_items = rds_ibdev->max_8k_mrs;
 	}
 
 	pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 9722bf839d9d..b4e421aa9727 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 			break;
 		}
 
-		/* XXX when can this fail? */
-		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
-		rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
+		rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
 			 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
 			 (long) ib_sg_dma_address(
 				ic->i_cm_id->device,
-				&recv->r_frag->f_sg),
-			ret);
+				&recv->r_frag->f_sg));
+
+		/* XXX when can this fail? */
+		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
 		if (ret) {
 			rds_ib_conn_error(conn, "recv post on "
 			       "%pI4 returned %d, disconnecting and "
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6ab39dbcca01..8557a1cae041 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 			}
 		}
 
-		rds_ib_set_wr_signal_state(ic, send, 0);
+		rds_ib_set_wr_signal_state(ic, send, false);
 
 		/*
 		 * Always signal the last one if we're stopping due to flow control.
 		 */
-		if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
-			send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+		if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
+			rds_ib_set_wr_signal_state(ic, send, true);
+			send->s_wr.send_flags |= IB_SEND_SOLICITED;
+		}
 
 		if (send->s_wr.send_flags & IB_SEND_SIGNALED)
 			nr_sig++;
@@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	if (scat == &rm->data.op_sg[rm->data.op_count]) {
 		prev->s_op = ic->i_data_op;
 		prev->s_wr.send_flags |= IB_SEND_SOLICITED;
-		if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
-			ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
-			prev->s_wr.send_flags |= IB_SEND_SIGNALED;
-			nr_sig++;
-		}
+		if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
+			nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
 		ic->i_data_op = NULL;
 	}
 
@@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 		send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
 		send->s_atomic_wr.swap_mask = 0;
 	}
+	send->s_wr.send_flags = 0;
 	nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
 	send->s_atomic_wr.wr.num_sge = 1;
 	send->s_atomic_wr.wr.next = NULL;
diff --git a/net/rds/info.h b/net/rds/info.h
index b6c052ca7d22..a069b51c4679 100644
--- a/net/rds/info.h
+++ b/net/rds/info.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDS_INFO_H
 #define _RDS_INFO_H
 
diff --git a/net/rds/loop.h b/net/rds/loop.h
index f32b0939a04d..469fa4b2da4f 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDS_LOOP_H
 #define _RDS_LOOP_H
 
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ff2010e9d20c..d309c4430124 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDMA_TRANSPORT_H
 #define _RDMA_TRANSPORT_H
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 516bcc89b46f..c349c71babff 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDS_RDS_H
 #define _RDS_RDS_H
 
@@ -154,7 +155,7 @@ struct rds_connection {
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
 
-	struct rds_conn_path	c_path[RDS_MPATH_WORKERS];
+	struct rds_conn_path	*c_path;
 	wait_queue_head_t	c_hs_waitq; /* handshake waitq */
 
 	u32			c_my_gen_num;
@@ -700,7 +701,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
-void rds_conn_path_drop(struct rds_conn_path *cpath);
+void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
 void rds_conn_connect_if_down(struct rds_connection *conn);
 void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h
index e1241af7c1ad..9521f6e99bef 100644
--- a/net/rds/rds_single_path.h
+++ b/net/rds/rds_single_path.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDS_RDS_SINGLE_H
 #define _RDS_RDS_SINGLE_H
 
diff --git a/net/rds/send.c b/net/rds/send.c
index 41b9f0f5bb9c..b52cdc8ae428 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -273,7 +273,7 @@ restart:
 			len = ntohl(rm->m_inc.i_hdr.h_len);
 			if (cp->cp_unacked_packets == 0 ||
 			    cp->cp_unacked_bytes < len) {
-				__set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
+				set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
 
 				cp->cp_unacked_packets =
 					rds_sysctl_max_unacked_packets;
@@ -428,14 +428,18 @@ over_batch:
 	 * some work and we will skip our goto
 	 */
 	if (ret == 0) {
+		bool raced;
+
 		smp_mb();
+		raced = send_gen != READ_ONCE(cp->cp_send_gen);
+
 		if ((test_bit(0, &conn->c_map_queued) ||
-		     !list_empty(&cp->cp_send_queue)) &&
-			send_gen == READ_ONCE(cp->cp_send_gen)) {
-			rds_stats_inc(s_send_lock_queue_raced);
+		    !list_empty(&cp->cp_send_queue)) && !raced) {
 			if (batch_count < send_batch_count)
 				goto restart;
 			queue_delayed_work(rds_wq, &cp->cp_send_w, 1);
+		} else if (raced) {
+			rds_stats_inc(s_send_lock_queue_raced);
 		}
 	}
 out:
@@ -829,7 +833,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
 		 * throughput hits a certain threshold.
 		 */
 		if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2)
-			__set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
+			set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
 
 		list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
 		set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 431404dbdad1..6b7ee71f40c6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 			continue;
 
 		/* reconnect with new parameters */
-		rds_conn_path_drop(tc->t_cpath);
+		rds_conn_path_drop(tc->t_cpath, false);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 }
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8800b7ce79c..1aafbf7c3011 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _RDS_TCP_H
 #define _RDS_TCP_H
 
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index cbe08a1fa4c7..46f74dad0e16 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
 		if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
 		    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
 					     RDS_CONN_ERROR)) {
-			rds_conn_path_drop(cp);
+			rds_conn_path_drop(cp, false);
 		} else {
 			rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
 		}
 		break;
 	case TCP_CLOSE_WAIT:
 	case TCP_CLOSE:
-		rds_conn_path_drop(cp);
+		rds_conn_path_drop(cp, false);
 	default:
 		break;
 	}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 0d8616aa5bad..dc860d1bb608 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -157,7 +157,7 @@ out:
 					"returned %d, "
 					"disconnecting and reconnecting\n",
 					&conn->c_faddr, cp->cp_index, ret);
-				rds_conn_path_drop(cp);
+				rds_conn_path_drop(cp, false);
 			}
 		}
 	}
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 2852bc1d37d4..f121daa402c8 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
 				"current state is %d\n",
 				__func__,
 				atomic_read(&cp->cp_state));
-		rds_conn_path_drop(cp);
+		rds_conn_path_drop(cp, false);
 		return;
 	}
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4a9729257023..6a5c4992cf61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *);
 /*
  *	Handler for deferred kills.
  */
-static void rose_destroy_timer(unsigned long data)
+static void rose_destroy_timer(struct timer_list *t)
 {
-	rose_destroy_socket((struct sock *)data);
+	struct sock *sk = from_timer(sk, t, sk_timer);
+
+	rose_destroy_socket(sk);
 }
 
 /*
@@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk)
 
 	if (sk_has_allocations(sk)) {
 		/* Defer: outstanding buffers */
-		setup_timer(&sk->sk_timer, rose_destroy_timer,
-				(unsigned long)sk);
+		timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
 		sk->sk_timer.expires  = jiffies + 10 * HZ;
 		add_timer(&sk->sk_timer);
 	} else
@@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
 	sock->ops    = &rose_proto_ops;
 	sk->sk_protocol = protocol;
 
-	init_timer(&rose->timer);
-	init_timer(&rose->idletimer);
+	timer_setup(&rose->timer, NULL, 0);
+	timer_setup(&rose->idletimer, NULL, 0);
 
 	rose->t1   = msecs_to_jiffies(sysctl_rose_call_request_timeout);
 	rose->t2   = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
@@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk)
 	sk->sk_state    = TCP_ESTABLISHED;
 	sock_copy_flags(sk, osk);
 
-	init_timer(&rose->timer);
-	init_timer(&rose->idletimer);
+	timer_setup(&rose->timer, NULL, 0);
+	timer_setup(&rose->idletimer, NULL, 0);
 
 	orose		= rose_sk(osk);
 	rose->t1	= orose->t1;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0a6394754e81..9bbbfe325c5a 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety
 	switch (frametype) {
 	case ROSE_RESET_REQUEST:
 		rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
+		/* fall through */
 	case ROSE_RESET_CONFIRMATION:
 		rose_stop_timer(sk);
 		rose_start_idletimer(sk);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index c76638cc2cd5..cda4c6678ef1 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -27,8 +27,8 @@
 #include <linux/interrupt.h>
 #include <net/rose.h>
 
-static void rose_ftimer_expiry(unsigned long);
-static void rose_t0timer_expiry(unsigned long);
+static void rose_ftimer_expiry(struct timer_list *);
+static void rose_t0timer_expiry(struct timer_list *);
 
 static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
 static void rose_transmit_restart_request(struct rose_neigh *neigh);
@@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh)
 {
 	del_timer(&neigh->ftimer);
 
-	neigh->ftimer.data     = (unsigned long)neigh;
-	neigh->ftimer.function = &rose_ftimer_expiry;
+	neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry;
 	neigh->ftimer.expires  =
 		jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
 
@@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
 {
 	del_timer(&neigh->t0timer);
 
-	neigh->t0timer.data     = (unsigned long)neigh;
-	neigh->t0timer.function = &rose_t0timer_expiry;
+	neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry;
 	neigh->t0timer.expires  =
 		jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
 
@@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh)
 	return timer_pending(&neigh->t0timer);
 }
 
-static void rose_ftimer_expiry(unsigned long param)
+static void rose_ftimer_expiry(struct timer_list *t)
 {
 }
 
-static void rose_t0timer_expiry(unsigned long param)
+static void rose_t0timer_expiry(struct timer_list *t)
 {
-	struct rose_neigh *neigh = (struct rose_neigh *)param;
+	struct rose_neigh *neigh = from_timer(neigh, t, t0timer);
 
 	rose_transmit_restart_request(neigh);
 
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 344456206b70..7af4f99c4a93 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue;
 static struct timer_list loopback_timer;
 
 static void rose_set_loopback_timer(void);
+static void rose_loopback_timer(struct timer_list *unused);
 
 void rose_loopback_init(void)
 {
 	skb_queue_head_init(&loopback_queue);
 
-	init_timer(&loopback_timer);
+	timer_setup(&loopback_timer, rose_loopback_timer, 0);
 }
 
 static int rose_loopback_running(void)
@@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
 	return 1;
 }
 
-static void rose_loopback_timer(unsigned long);
 
 static void rose_set_loopback_timer(void)
 {
 	del_timer(&loopback_timer);
 
-	loopback_timer.data     = 0;
-	loopback_timer.function = &rose_loopback_timer;
 	loopback_timer.expires  = jiffies + 10;
-
 	add_timer(&loopback_timer);
 }
 
-static void rose_loopback_timer(unsigned long param)
+static void rose_loopback_timer(struct timer_list *unused)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 452bbb38d943..8ca3124df83f 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
 
 		skb_queue_head_init(&rose_neigh->queue);
 
-		init_timer(&rose_neigh->ftimer);
-		init_timer(&rose_neigh->t0timer);
+		timer_setup(&rose_neigh->ftimer, NULL, 0);
+		timer_setup(&rose_neigh->t0timer, NULL, 0);
 
 		if (rose_route->ndigis != 0) {
 			rose_neigh->digipeat =
@@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
 				case 0:
 					rose_node->neighbour[0] =
 						rose_node->neighbour[1];
+					/* fall through */
 				case 1:
 					rose_node->neighbour[1] =
 						rose_node->neighbour[2];
@@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void)
 
 	skb_queue_head_init(&sn->queue);
 
-	init_timer(&sn->ftimer);
-	init_timer(&sn->t0timer);
+	timer_setup(&sn->ftimer, NULL, 0);
+	timer_setup(&sn->t0timer, NULL, 0);
 
 	spin_lock_bh(&rose_neigh_list_lock);
 	sn->next = rose_neigh_list;
@@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev)
 				switch (i) {
 				case 0:
 					t->neighbour[0] = t->neighbour[1];
+					/* fall through */
 				case 1:
 					t->neighbour[1] = t->neighbour[2];
 				case 2:
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index bc5469d6d9cb..ea613b2a9735 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -28,16 +28,15 @@
 #include <linux/interrupt.h>
 #include <net/rose.h>
 
-static void rose_heartbeat_expiry(unsigned long);
-static void rose_timer_expiry(unsigned long);
-static void rose_idletimer_expiry(unsigned long);
+static void rose_heartbeat_expiry(struct timer_list *t);
+static void rose_timer_expiry(struct timer_list *);
+static void rose_idletimer_expiry(struct timer_list *);
 
 void rose_start_heartbeat(struct sock *sk)
 {
 	del_timer(&sk->sk_timer);
 
-	sk->sk_timer.data     = (unsigned long)sk;
-	sk->sk_timer.function = &rose_heartbeat_expiry;
+	sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry;
 	sk->sk_timer.expires  = jiffies + 5 * HZ;
 
 	add_timer(&sk->sk_timer);
@@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk)
 
 	del_timer(&rose->timer);
 
-	rose->timer.data     = (unsigned long)sk;
-	rose->timer.function = &rose_timer_expiry;
+	rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->t1;
 
 	add_timer(&rose->timer);
@@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk)
 
 	del_timer(&rose->timer);
 
-	rose->timer.data     = (unsigned long)sk;
-	rose->timer.function = &rose_timer_expiry;
+	rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->t2;
 
 	add_timer(&rose->timer);
@@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk)
 
 	del_timer(&rose->timer);
 
-	rose->timer.data     = (unsigned long)sk;
-	rose->timer.function = &rose_timer_expiry;
+	rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->t3;
 
 	add_timer(&rose->timer);
@@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk)
 
 	del_timer(&rose->timer);
 
-	rose->timer.data     = (unsigned long)sk;
-	rose->timer.function = &rose_timer_expiry;
+	rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
 	rose->timer.expires  = jiffies + rose->hb;
 
 	add_timer(&rose->timer);
@@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk)
 	del_timer(&rose->idletimer);
 
 	if (rose->idle > 0) {
-		rose->idletimer.data     = (unsigned long)sk;
-		rose->idletimer.function = &rose_idletimer_expiry;
+		rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry;
 		rose->idletimer.expires  = jiffies + rose->idle;
 
 		add_timer(&rose->idletimer);
@@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk)
 	del_timer(&rose_sk(sk)->idletimer);
 }
 
-static void rose_heartbeat_expiry(unsigned long param)
+static void rose_heartbeat_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 	struct rose_sock *rose = rose_sk(sk);
 
 	bh_lock_sock(sk);
@@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param)
 	bh_unlock_sock(sk);
 }
 
-static void rose_timer_expiry(unsigned long param)
+static void rose_timer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
-	struct rose_sock *rose = rose_sk(sk);
+	struct rose_sock *rose = from_timer(rose, t, timer);
+	struct sock *sk = &rose->sock;
 
 	bh_lock_sock(sk);
 	switch (rose->state) {
@@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param)
 	bh_unlock_sock(sk);
 }
 
-static void rose_idletimer_expiry(unsigned long param)
+static void rose_idletimer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
+	struct rose_sock *rose = from_timer(rose, t, idletimer);
+	struct sock *sk = &rose->sock;
 
 	bh_lock_sock(sk);
 	rose_clear_queues(sk);
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 9c68d2f8ba39..6ffb7e9887ce 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for Linux kernel RxRPC
 #
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index a2ad4482376f..9b5c46b052fd 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
 			ret = 0;
 			break;
 		}
+		/* Fall through */
 	default:
 		ret = -EBUSY;
 		break;
@@ -265,6 +266,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
  * @tx_total_len: Total length of data to transmit during the call (or -1)
  * @gfp: The allocation constraints
  * @notify_rx: Where to send notifications instead of socket queue
+ * @upgrade: Request service upgrade for call
  *
  * Allow a kernel service to begin a call on the nominated socket.  This just
  * sets up all the internal tracking structures and allocates connection and
@@ -279,7 +281,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 					   unsigned long user_call_ID,
 					   s64 tx_total_len,
 					   gfp_t gfp,
-					   rxrpc_notify_rx_t notify_rx)
+					   rxrpc_notify_rx_t notify_rx,
+					   bool upgrade)
 {
 	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call *call;
@@ -304,19 +307,29 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 	cp.key			= key;
 	cp.security_level	= 0;
 	cp.exclusive		= false;
+	cp.upgrade		= upgrade;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
 				     gfp);
 	/* The socket has been unlocked. */
-	if (!IS_ERR(call))
+	if (!IS_ERR(call)) {
 		call->notify_rx = notify_rx;
+		mutex_unlock(&call->user_mutex);
+	}
 
-	mutex_unlock(&call->user_mutex);
 	_leave(" = %p", call);
 	return call;
 }
 EXPORT_SYMBOL(rxrpc_kernel_begin_call);
 
+/*
+ * Dummy function used to stop the notifier talking to recvmsg().
+ */
+static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+				  unsigned long call_user_ID)
+{
+}
+
 /**
  * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
  * @sock: The socket the call is on
@@ -331,12 +344,108 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 
 	mutex_lock(&call->user_mutex);
 	rxrpc_release_call(rxrpc_sk(sock->sk), call);
+
+	/* Make sure we're not going to call back into a kernel service */
+	if (call->notify_rx) {
+		spin_lock_bh(&call->notify_lock);
+		call->notify_rx = rxrpc_dummy_notify_rx;
+		spin_unlock_bh(&call->notify_lock);
+	}
+
 	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
 
 /**
+ * rxrpc_kernel_check_life - Check to see whether a call is still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * Allow a kernel service to find out whether a call is still alive - ie. we're
+ * getting ACKs from the server.  Returns a number representing the life state
+ * which can be compared to that returned by a previous call.
+ *
+ * If this is a client call, ping ACKs will be sent to the server to find out
+ * whether it's still responsive and whether the call is still alive on the
+ * server.
+ */
+u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+{
+	return call->acks_latest;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_life);
+
+/**
+ * rxrpc_kernel_check_call - Check a call's state
+ * @sock: The socket the call is on
+ * @call: The call to check
+ * @_compl: Where to store the completion state
+ * @_abort_code: Where to store any abort code
+ *
+ * Allow a kernel service to query the state of a call and find out the manner
+ * of its termination if it has completed.  Returns -EINPROGRESS if the call is
+ * still going, 0 if the call finished successfully, -ECONNABORTED if the call
+ * was aborted and an appropriate error if the call failed in some other way.
+ */
+int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
+			    enum rxrpc_call_completion *_compl, u32 *_abort_code)
+{
+	if (call->state != RXRPC_CALL_COMPLETE)
+		return -EINPROGRESS;
+	smp_rmb();
+	*_compl = call->completion;
+	*_abort_code = call->abort_code;
+	return call->error;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_call);
+
+/**
+ * rxrpc_kernel_retry_call - Allow a kernel service to retry a call
+ * @sock: The socket the call is on
+ * @call: The call to retry
+ * @srx: The address of the peer to contact
+ * @key: The security context to use (defaults to socket setting)
+ *
+ * Allow a kernel service to try resending a client call that failed due to a
+ * network error to a new address.  The Tx queue is maintained intact, thereby
+ * relieving the need to re-encrypt any request data that has already been
+ * buffered.
+ */
+int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
+			    struct sockaddr_rxrpc *srx, struct key *key)
+{
+	struct rxrpc_conn_parameters cp;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	int ret;
+
+	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+	if (!key)
+		key = rx->key;
+	if (key && !key->payload.data[0])
+		key = NULL; /* a no-security key */
+
+	memset(&cp, 0, sizeof(cp));
+	cp.local		= rx->local;
+	cp.key			= key;
+	cp.security_level	= 0;
+	cp.exclusive		= false;
+	cp.service_id		= srx->srx_service;
+
+	mutex_lock(&call->user_mutex);
+
+	ret = rxrpc_prepare_call_for_retry(rx, call);
+	if (ret == 0)
+		ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
+
+	mutex_unlock(&call->user_mutex);
+	_leave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_retry_call);
+
+/**
  * rxrpc_kernel_new_call_notification - Get notifications of new calls
  * @sock: The socket to intercept received messages on
  * @notify_new_call: Function to be called when new calls appear
@@ -468,6 +577,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 			m->msg_name = &rx->connect_srx;
 			m->msg_namelen = sizeof(rx->connect_srx);
 		}
+		/* Fall through */
 	case RXRPC_SERVER_BOUND:
 	case RXRPC_SERVER_LISTENING:
 		ret = rxrpc_do_sendmsg(rx, m, len);
@@ -591,13 +701,13 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
 			    char __user *optval, int __user *_optlen)
 {
 	int optlen;
-	
+
 	if (level != SOL_RXRPC)
 		return -EOPNOTSUPP;
 
 	if (get_user(optlen, _optlen))
 		return -EFAULT;
-	
+
 	switch (optname) {
 	case RXRPC_SUPPORTED_CMSG:
 		if (optlen < sizeof(int))
@@ -606,7 +716,7 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
 		    put_user(sizeof(int), _optlen))
 			return -EFAULT;
 		return 0;
-		
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 69b97339ff9d..b2151993d384 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -15,7 +15,7 @@
 #include <net/netns/generic.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
+#include "protocol.h"
 
 #if 0
 #define CHECK_SLAB_OKAY(X)				     \
@@ -445,6 +445,7 @@ enum rxrpc_call_flag {
 	RXRPC_CALL_EXPOSED,		/* The call was exposed to the world */
 	RXRPC_CALL_RX_LAST,		/* Received the last packet (at rxtx_top) */
 	RXRPC_CALL_TX_LAST,		/* Last packet in Tx buffer (at rxtx_top) */
+	RXRPC_CALL_TX_LASTQ,		/* Last packet has been queued */
 	RXRPC_CALL_SEND_PING,		/* A ping will need to be sent */
 	RXRPC_CALL_PINGING,		/* Ping in process */
 	RXRPC_CALL_RETRANS_TIMEOUT,	/* Retransmission due to timeout occurred */
@@ -482,18 +483,6 @@ enum rxrpc_call_state {
 };
 
 /*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
-	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
-	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
-	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
-	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
-	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
-	NR__RXRPC_CALL_COMPLETIONS
-};
-
-/*
  * Call Tx congestion management modes.
  */
 enum rxrpc_congest_mode {
@@ -536,6 +525,7 @@ struct rxrpc_call {
 	unsigned long		flags;
 	unsigned long		events;
 	spinlock_t		lock;
+	spinlock_t		notify_lock;	/* Kernel notification lock */
 	rwlock_t		state_lock;	/* lock for state transition */
 	u32			abort_code;	/* Local/remote abort code */
 	int			error;		/* Local error incurred */
@@ -687,9 +677,15 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
 					 struct sockaddr_rxrpc *,
 					 unsigned long, s64, gfp_t);
+int rxrpc_retry_client_call(struct rxrpc_sock *,
+			    struct rxrpc_call *,
+			    struct rxrpc_conn_parameters *,
+			    struct sockaddr_rxrpc *,
+			    gfp_t);
 void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
 			 struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 bool __rxrpc_queue_call(struct rxrpc_call *);
 bool rxrpc_queue_call(struct rxrpc_call *);
@@ -830,7 +826,6 @@ void rxrpc_process_connection(struct work_struct *);
  */
 extern unsigned int rxrpc_connection_expiry;
 
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
 						   struct sk_buff *);
@@ -894,7 +889,7 @@ extern struct key_type key_type_rxrpc_s;
 
 int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
 int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
-int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
+int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
 			      u32);
 
 /*
@@ -1060,7 +1055,8 @@ static inline void rxrpc_sysctl_exit(void) {}
 /*
  * utils.c
  */
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
+				struct sk_buff *);
 
 static inline bool before(u32 seq1, u32 seq2)
 {
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index dd30d74824b0..cbd1701e813a 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -223,6 +223,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 	tail = b->call_backlog_tail;
 	while (CIRC_CNT(head, tail, size) > 0) {
 		struct rxrpc_call *call = b->call_backlog[tail];
+		call->socket = rx;
 		if (rx->discard_new_call) {
 			_debug("discard %lx", call->user_call_ID);
 			rx->discard_new_call(call, call->user_call_ID);
@@ -276,7 +277,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		 * anticipation - and to save on stack space.
 		 */
 		xpeer = b->peer_backlog[peer_tail];
-		if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0)
+		if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0)
 			return NULL;
 
 		peer = rxrpc_lookup_incoming_peer(local, xpeer);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 7a77844aab16..3574508baf9a 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
 
 	now = ktime_get_real();
 	if (ktime_before(call->expire_at, now)) {
-		rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+		rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
 		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
 		goto recheck_state;
 	}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d7809a0620b4..4c7fbc6dcce7 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	INIT_LIST_HEAD(&call->sock_link);
 	init_waitqueue_head(&call->waitq);
 	spin_lock_init(&call->lock);
+	spin_lock_init(&call->notify_lock);
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
 	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
@@ -269,11 +270,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
 			 here, NULL);
 
-	spin_lock_bh(&call->conn->params.peer->lock);
-	hlist_add_head(&call->error_link,
-		       &call->conn->params.peer->error_targets);
-	spin_unlock_bh(&call->conn->params.peer->lock);
-
 	rxrpc_start_call_timer(call);
 
 	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -304,6 +300,48 @@ error:
 }
 
 /*
+ * Retry a call to a new address.  It is expected that the Tx queue of the call
+ * will contain data previously packaged for an old call.
+ */
+int rxrpc_retry_client_call(struct rxrpc_sock *rx,
+			    struct rxrpc_call *call,
+			    struct rxrpc_conn_parameters *cp,
+			    struct sockaddr_rxrpc *srx,
+			    gfp_t gfp)
+{
+	const void *here = __builtin_return_address(0);
+	int ret;
+
+	/* Set up or get a connection record and set the protocol parameters,
+	 * including channel number and call ID.
+	 */
+	ret = rxrpc_connect_call(call, cp, srx, gfp);
+	if (ret < 0)
+		goto error;
+
+	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
+			 here, NULL);
+
+	rxrpc_start_call_timer(call);
+
+	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+		rxrpc_queue_call(call);
+
+	_leave(" = 0");
+	return 0;
+
+error:
+	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+				  RX_CALL_DEAD, ret);
+	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
+			 here, ERR_PTR(ret));
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
  * Set up an incoming call.  call->conn points to the connection.
  * This is called in BH context and isn't allowed to fail.
  */
@@ -471,6 +509,61 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 }
 
 /*
+ * Prepare a kernel service call for retry.
+ */
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+	const void *here = __builtin_return_address(0);
+	int i;
+	u8 last = 0;
+
+	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+
+	trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+			 here, (const void *)call->flags);
+
+	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+	ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED);
+	ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED);
+	ASSERT(list_empty(&call->recvmsg_link));
+
+	del_timer_sync(&call->timer);
+
+	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn);
+
+	if (call->conn)
+		rxrpc_disconnect_call(call);
+
+	if (rxrpc_is_service_call(call) ||
+	    !call->tx_phase ||
+	    call->tx_hard_ack != 0 ||
+	    call->rx_hard_ack != 0 ||
+	    call->rx_top != 0)
+		return -EINVAL;
+
+	call->state = RXRPC_CALL_UNINITIALISED;
+	call->completion = RXRPC_CALL_SUCCEEDED;
+	call->call_id = 0;
+	call->cid = 0;
+	call->cong_cwnd = 0;
+	call->cong_extra = 0;
+	call->cong_ssthresh = 0;
+	call->cong_mode = 0;
+	call->cong_dup_acks = 0;
+	call->cong_cumul_acks = 0;
+	call->acks_lowest_nak = 0;
+
+	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+		last |= call->rxtx_annotations[i];
+		call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST;
+		call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
  * release all the calls associated with a socket
  */
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index eb2157680399..5f9624bd311c 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -555,7 +555,10 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
 
 	write_lock_bh(&call->state_lock);
-	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
+		call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	else
+		call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
 	write_unlock_bh(&call->state_lock);
 
 	rxrpc_see_call(call);
@@ -688,15 +691,23 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 
 	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	rxrpc_animate_client_conn(rxnet, call->conn);
 	rxrpc_activate_channels(call->conn);
 
 	ret = rxrpc_wait_for_channel(call, gfp);
-	if (ret < 0)
+	if (ret < 0) {
 		rxrpc_disconnect_client_call(call);
+		goto out;
+	}
+
+	spin_lock_bh(&call->conn->params.peer->lock);
+	hlist_add_head(&call->error_link,
+		       &call->conn->params.peer->error_targets);
+	spin_unlock_bh(&call->conn->params.peer->lock);
 
+out:
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 929b50d5afe8..fe575798592f 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -72,7 +72,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 
 	_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
 
-	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
 		goto not_found;
 
 	k.epoch	= sp->hdr.epoch;
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index e60fcd2a4a02..f6fcdb3130a1 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -50,12 +50,11 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
 			else if (conn->proto.index_key > k.index_key)
 				p = rcu_dereference_raw(p->rb_right);
 			else
-				goto done;
+				break;
 			conn = NULL;
 		}
 	} while (need_seqretry(&peer->service_conn_lock, seq));
 
-done:
 	done_seqretry(&peer->service_conn_lock, seq);
 	_leave(" = %d", conn ? conn->debug_id : -1);
 	return conn;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index e56e23ed2229..1b592073ec96 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
 
 	write_unlock(&call->state_lock);
 	if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
-		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
-				  rxrpc_propose_ack_client_tx_end);
 		trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
 	} else {
 		trace_rxrpc_transmit(call, rxrpc_transmit_end);
@@ -1125,6 +1123,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	case RXRPC_PACKET_TYPE_BUSY:
 		if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
 			goto discard;
+		/* Fall through */
 
 	case RXRPC_PACKET_TYPE_DATA:
 		if (sp->hdr.callNumber == 0)
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 54369225766e..e7f6b8823eb6 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -92,6 +92,7 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
 				    const __be32 *xdr, unsigned int toklen)
 {
 	struct rxrpc_key_token *token, **pptoken;
+	time64_t expiry;
 	size_t plen;
 	u32 tktlen;
 
@@ -158,8 +159,9 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
 	     pptoken = &(*pptoken)->next)
 		continue;
 	*pptoken = token;
-	if (token->kad->expiry < prep->expiry)
-		prep->expiry = token->kad->expiry;
+	expiry = rxrpc_u32_to_time64(token->kad->expiry);
+	if (expiry < prep->expiry)
+		prep->expiry = expiry;
 
 	_leave(" = 0");
 	return 0;
@@ -433,6 +435,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
 	struct rxrpc_key_token *token, **pptoken;
 	struct rxk5_key *rxk5;
 	const __be32 *end_xdr = xdr + (toklen >> 2);
+	time64_t expiry;
 	int ret;
 
 	_enter(",{%x,%x,%x,%x},%u",
@@ -533,8 +536,9 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
 	     pptoken = &(*pptoken)->next)
 		continue;
 	*pptoken = token;
-	if (token->kad->expiry < prep->expiry)
-		prep->expiry = token->kad->expiry;
+	expiry = rxrpc_u32_to_time64(token->k5->endtime);
+	if (expiry < prep->expiry)
+		prep->expiry = expiry;
 
 	_leave(" = 0");
 	return 0;
@@ -691,6 +695,7 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
 {
 	const struct rxrpc_key_data_v1 *v1;
 	struct rxrpc_key_token *token, **pp;
+	time64_t expiry;
 	size_t plen;
 	u32 kver;
 	int ret;
@@ -777,8 +782,9 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
 	while (*pp)
 		pp = &(*pp)->next;
 	*pp = token;
-	if (token->kad->expiry < prep->expiry)
-		prep->expiry = token->kad->expiry;
+	expiry = rxrpc_u32_to_time64(token->kad->expiry);
+	if (expiry < prep->expiry)
+		prep->expiry = expiry;
 	token = NULL;
 	ret = 0;
 
@@ -955,7 +961,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
  */
 int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 			      const void *session_key,
-			      time_t expiry,
+			      time64_t expiry,
 			      u32 kvno)
 {
 	const struct cred *cred = current_cred();
@@ -982,7 +988,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	data.kver = 1;
 	data.v1.security_index = RXRPC_SECURITY_RXKAD;
 	data.v1.ticket_length = 0;
-	data.v1.expiry = expiry;
+	data.v1.expiry = rxrpc_time64_to_u32(expiry);
 	data.v1.kvno = 0;
 
 	memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key));
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 540d3955c1bc..93b5d910b4a1 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 
 	_enter("");
 
-	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
 		return;
 
 	msg.msg_name	= &srx.transport;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5bd2d0fa4a03..f47659c7b224 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,7 +35,8 @@ struct rxrpc_abort_buffer {
 /*
  * Fill out an ACK packet.
  */
-static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+				 struct rxrpc_call *call,
 				 struct rxrpc_ack_buffer *pkt,
 				 rxrpc_seq_t *_hard_ack,
 				 rxrpc_seq_t *_top,
@@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
 		} while (before_eq(seq, top));
 	}
 
-	mtu = call->conn->params.peer->if_mtu;
-	mtu -= call->conn->params.peer->hdrsize;
+	mtu = conn->params.peer->if_mtu;
+	mtu -= conn->params.peer->hdrsize;
 	jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
 	pkt->ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
 	pkt->ackinfo.maxMTU	= htonl(mtu);
@@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
 		}
 		call->ackr_reason = 0;
 	}
-	n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
+	n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
 
 	spin_unlock_bh(&call->lock);
 
@@ -221,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 	rxrpc_serial_t serial;
 	int ret;
 
+	/* Don't bother sending aborts for a client call once the server has
+	 * hard-ACK'd all of its request data.  After that point, we're not
+	 * going to stop the operation proceeding, and whilst we might limit
+	 * the reply, it's not worth it if we can send a new call on the same
+	 * channel instead, thereby closing off this call.
+	 */
+	if (rxrpc_is_client_call(call) &&
+	    test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+		return 0;
+
 	spin_lock_bh(&call->lock);
 	if (call->conn)
 		conn = rxrpc_get_connection_maybe(call->conn);
@@ -444,7 +455,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
 		sp = rxrpc_skb(skb);
 
-		if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
 			msg.msg_namelen = srx.transport_len;
 
 			code = htonl(skb->priority);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 1ed9c0c2e94f..7f749505e699 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -37,6 +37,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 
 	memset(&srx, 0, sizeof(srx));
 	srx.transport_type = local->srx.transport_type;
+	srx.transport_len = local->srx.transport_len;
 	srx.transport.family = local->srx.transport.family;
 
 	/* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
@@ -45,7 +46,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 	switch (srx.transport.family) {
 	case AF_INET:
 		srx.transport.sin.sin_port = serr->port;
-		srx.transport_len = sizeof(struct sockaddr_in);
 		switch (serr->ee.ee_origin) {
 		case SO_EE_ORIGIN_ICMP:
 			_net("Rx ICMP");
@@ -69,7 +69,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 #ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		srx.transport.sin6.sin6_port = serr->port;
-		srx.transport_len = sizeof(struct sockaddr_in6);
 		switch (serr->ee.ee_origin) {
 		case SO_EE_ORIGIN_ICMP6:
 			_net("Rx ICMP6");
@@ -79,6 +78,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 			break;
 		case SO_EE_ORIGIN_ICMP:
 			_net("Rx ICMP on v6 sock");
+			srx.transport.sin6.sin6_addr.s6_addr32[0] = 0;
+			srx.transport.sin6.sin6_addr.s6_addr32[1] = 0;
+			srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
 			memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
 			       skb_network_header(skb) + serr->addr_offset,
 			       sizeof(struct in_addr));
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97f5330..d02a99f37f5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
 	*_srx = call->peer->srx;
 }
 EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ *
+ * Get the call's peer RTT.
+ */
+u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+{
+	return call->peer->rtt;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
new file mode 100644
index 000000000000..4bddcf3face3
--- /dev/null
+++ b/net/rxrpc/protocol.h
@@ -0,0 +1,190 @@
+/* packet.h: Rx packet layout and definitions
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_RXRPC_PACKET_H
+#define _LINUX_RXRPC_PACKET_H
+
+typedef u32	rxrpc_seq_t;	/* Rx message sequence number */
+typedef u32	rxrpc_serial_t;	/* Rx message serial number */
+typedef __be32	rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */
+typedef __be32	rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
+
+/*****************************************************************************/
+/*
+ * on-the-wire Rx packet header
+ * - all multibyte fields should be in network byte order
+ */
+struct rxrpc_wire_header {
+	__be32		epoch;		/* client boot timestamp */
+#define RXRPC_RANDOM_EPOCH	0x80000000	/* Random if set, date-based if not */
+
+	__be32		cid;		/* connection and channel ID */
+#define RXRPC_MAXCALLS		4			/* max active calls per conn */
+#define RXRPC_CHANNELMASK	(RXRPC_MAXCALLS-1)	/* mask for channel ID */
+#define RXRPC_CIDMASK		(~RXRPC_CHANNELMASK)	/* mask for connection ID */
+#define RXRPC_CIDSHIFT		ilog2(RXRPC_MAXCALLS)	/* shift for connection ID */
+#define RXRPC_CID_INC		(1 << RXRPC_CIDSHIFT)	/* connection ID increment */
+
+	__be32		callNumber;	/* call ID (0 for connection-level packets) */
+	__be32		seq;		/* sequence number of pkt in call stream */
+	__be32		serial;		/* serial number of pkt sent to network */
+
+	uint8_t		type;		/* packet type */
+#define RXRPC_PACKET_TYPE_DATA		1	/* data */
+#define RXRPC_PACKET_TYPE_ACK		2	/* ACK */
+#define RXRPC_PACKET_TYPE_BUSY		3	/* call reject */
+#define RXRPC_PACKET_TYPE_ABORT		4	/* call/connection abort */
+#define RXRPC_PACKET_TYPE_ACKALL	5	/* ACK all outstanding packets on call */
+#define RXRPC_PACKET_TYPE_CHALLENGE	6	/* connection security challenge (SRVR->CLNT) */
+#define RXRPC_PACKET_TYPE_RESPONSE	7	/* connection secutity response (CLNT->SRVR) */
+#define RXRPC_PACKET_TYPE_DEBUG		8	/* debug info request */
+#define RXRPC_PACKET_TYPE_VERSION	13	/* version string request */
+#define RXRPC_N_PACKET_TYPES		14	/* number of packet types (incl type 0) */
+
+	uint8_t		flags;		/* packet flags */
+#define RXRPC_CLIENT_INITIATED	0x01		/* signifies a packet generated by a client */
+#define RXRPC_REQUEST_ACK	0x02		/* request an unconditional ACK of this packet */
+#define RXRPC_LAST_PACKET	0x04		/* the last packet from this side for this call */
+#define RXRPC_MORE_PACKETS	0x08		/* more packets to come */
+#define RXRPC_JUMBO_PACKET	0x20		/* [DATA] this is a jumbo packet */
+#define RXRPC_SLOW_START_OK	0x20		/* [ACK] slow start supported */
+
+	uint8_t		userStatus;	/* app-layer defined status */
+#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01	/* AuriStor service upgrade request */
+	
+	uint8_t		securityIndex;	/* security protocol ID */
+	union {
+		__be16	_rsvd;		/* reserved */
+		__be16	cksum;		/* kerberos security checksum */
+	};
+	__be16		serviceId;	/* service ID */
+
+} __packed;
+
+#define RXRPC_SUPPORTED_PACKET_TYPES (			\
+		(1 << RXRPC_PACKET_TYPE_DATA) |		\
+		(1 << RXRPC_PACKET_TYPE_ACK) |		\
+		(1 << RXRPC_PACKET_TYPE_BUSY) |		\
+		(1 << RXRPC_PACKET_TYPE_ABORT) |	\
+		(1 << RXRPC_PACKET_TYPE_ACKALL) |	\
+		(1 << RXRPC_PACKET_TYPE_CHALLENGE) |	\
+		(1 << RXRPC_PACKET_TYPE_RESPONSE) |	\
+		/*(1 << RXRPC_PACKET_TYPE_DEBUG) | */	\
+		(1 << RXRPC_PACKET_TYPE_VERSION))
+
+/*****************************************************************************/
+/*
+ * jumbo packet secondary header
+ * - can be mapped to read header by:
+ *   - new_serial = serial + 1
+ *   - new_seq = seq + 1
+ *   - new_flags = j_flags
+ *   - new__rsvd = j__rsvd
+ *   - duplicating all other fields
+ */
+struct rxrpc_jumbo_header {
+	uint8_t		flags;		/* packet flags (as per rxrpc_header) */
+	uint8_t		pad;
+	union {
+		__be16	_rsvd;		/* reserved */
+		__be16	cksum;		/* kerberos security checksum */
+	};
+};
+
+#define RXRPC_JUMBO_DATALEN	1412	/* non-terminal jumbo packet data length */
+#define RXRPC_JUMBO_SUBPKTLEN	(RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
+
+/*****************************************************************************/
+/*
+ * on-the-wire Rx ACK packet data payload
+ * - all multibyte fields should be in network byte order
+ */
+struct rxrpc_ackpacket {
+	__be16		bufferSpace;	/* number of packet buffers available */
+	__be16		maxSkew;	/* diff between serno being ACK'd and highest serial no
+					 * received */
+	__be32		firstPacket;	/* sequence no of first ACK'd packet in attached list */
+	__be32		previousPacket;	/* sequence no of previous packet received */
+	__be32		serial;		/* serial no of packet that prompted this ACK */
+
+	uint8_t		reason;		/* reason for ACK */
+#define RXRPC_ACK_REQUESTED		1	/* ACK was requested on packet */
+#define RXRPC_ACK_DUPLICATE		2	/* duplicate packet received */
+#define RXRPC_ACK_OUT_OF_SEQUENCE	3	/* out of sequence packet received */
+#define RXRPC_ACK_EXCEEDS_WINDOW	4	/* packet received beyond end of ACK window */
+#define RXRPC_ACK_NOSPACE		5	/* packet discarded due to lack of buffer space */
+#define RXRPC_ACK_PING			6	/* keep alive ACK */
+#define RXRPC_ACK_PING_RESPONSE		7	/* response to RXRPC_ACK_PING */
+#define RXRPC_ACK_DELAY			8	/* nothing happened since received packet */
+#define RXRPC_ACK_IDLE			9	/* ACK due to fully received ACK window */
+#define RXRPC_ACK__INVALID		10	/* Representation of invalid ACK reason */
+
+	uint8_t		nAcks;		/* number of ACKs */
+#define RXRPC_MAXACKS	255
+
+	uint8_t		acks[0];	/* list of ACK/NAKs */
+#define RXRPC_ACK_TYPE_NACK		0
+#define RXRPC_ACK_TYPE_ACK		1
+
+} __packed;
+
+/* Some ACKs refer to specific packets and some are general and can be updated. */
+#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED)	|	\
+			      (1 << RXRPC_ACK_PING_RESPONSE)	|	\
+			      (1 << RXRPC_ACK_DELAY)		|	\
+			      (1 << RXRPC_ACK_IDLE))
+
+
+/*
+ * ACK packets can have a further piece of information tagged on the end
+ */
+struct rxrpc_ackinfo {
+	__be32		rxMTU;		/* maximum Rx MTU size (bytes) [AFS 3.3] */
+	__be32		maxMTU;		/* maximum interface MTU size (bytes) [AFS 3.3] */
+	__be32		rwind;		/* Rx window size (packets) [AFS 3.4] */
+	__be32		jumbo_max;	/* max packets to stick into a jumbo packet [AFS 3.5] */
+};
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 challenge packet
+ */
+struct rxkad_challenge {
+	__be32		version;	/* version of this challenge type */
+	__be32		nonce;		/* encrypted random number */
+	__be32		min_level;	/* minimum security level */
+	__be32		__padding;	/* padding to 8-byte boundary */
+} __packed;
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 response packet
+ */
+struct rxkad_response {
+	__be32		version;	/* version of this response type */
+	__be32		__pad;
+
+	/* encrypted bit of the response */
+	struct {
+		__be32		epoch;		/* current epoch */
+		__be32		cid;		/* parent connection ID */
+		__be32		checksum;	/* checksum */
+		__be32		securityIndex;	/* security type */
+		__be32		call_id[4];	/* encrypted call IDs */
+		__be32		inc_nonce;	/* challenge nonce + 1 */
+		__be32		level;		/* desired level */
+	} encrypted;
+
+	__be32		kvno;		/* Kerberos key version number */
+	__be32		ticket_len;	/* Kerberos ticket length  */
+} __packed;
+
+#endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21f313d..8510a98b87e1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
 	sk = &rx->sk;
 	if (rx && sk->sk_state < RXRPC_CLOSE) {
 		if (call->notify_rx) {
+			spin_lock_bh(&call->notify_lock);
 			call->notify_rx(sk, call, call->user_call_ID);
+			spin_unlock_bh(&call->notify_lock);
 		} else {
 			write_lock_bh(&rx->recvmsg_lock);
 			if (list_empty(&call->recvmsg_link)) {
@@ -607,6 +609,7 @@ wait_error:
  * @_offset: The running offset into the buffer.
  * @want_more: True if more data is expected to be read
  * @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ * @_service: Where to store the actual service ID (may be upgraded)
  *
  * Allow a kernel service to receive data and pick up information about the
  * state of a call.  Returns 0 if got what was asked for and there's more
@@ -624,7 +627,7 @@ wait_error:
  */
 int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 			   void *buf, size_t size, size_t *_offset,
-			   bool want_more, u32 *_abort)
+			   bool want_more, u32 *_abort, u16 *_service)
 {
 	struct iov_iter iter;
 	struct kvec iov;
@@ -680,6 +683,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 read_phase_complete:
 	ret = 1;
 out:
+	if (_service)
+		*_service = call->service_id;
 	mutex_unlock(&call->user_mutex);
 	_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
 	return ret;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 46d1a1f0b55b..c38b3a1de56c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -634,8 +634,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	challenge.min_level	= htonl(0);
 	challenge.__padding	= 0;
 
-	msg.msg_name	= &conn->params.peer->srx.transport.sin;
-	msg.msg_namelen	= sizeof(conn->params.peer->srx.transport.sin);
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
@@ -689,8 +689,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 
 	_enter("");
 
-	msg.msg_name	= &conn->params.peer->srx.transport.sin;
-	msg.msg_namelen	= sizeof(conn->params.peer->srx.transport.sin);
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
@@ -854,7 +854,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 				struct sk_buff *skb,
 				void *ticket, size_t ticket_len,
 				struct rxrpc_crypt *_session_key,
-				time_t *_expiry,
+				time64_t *_expiry,
 				u32 *_abort_code)
 {
 	struct skcipher_request *req;
@@ -864,7 +864,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	struct in_addr addr;
 	unsigned int life;
 	const char *eproto;
-	time_t issue, now;
+	time64_t issue, now;
 	bool little_endian;
 	int ret;
 	u32 abort_code;
@@ -960,15 +960,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	if (little_endian) {
 		__le32 stamp;
 		memcpy(&stamp, p, 4);
-		issue = le32_to_cpu(stamp);
+		issue = rxrpc_u32_to_time64(le32_to_cpu(stamp));
 	} else {
 		__be32 stamp;
 		memcpy(&stamp, p, 4);
-		issue = be32_to_cpu(stamp);
+		issue = rxrpc_u32_to_time64(be32_to_cpu(stamp));
 	}
 	p += 4;
-	now = get_seconds();
-	_debug("KIV ISSUE: %lx [%lx]", issue, now);
+	now = ktime_get_real_seconds();
+	_debug("KIV ISSUE: %llx [%llx]", issue, now);
 
 	/* check the ticket is in date */
 	if (issue > now) {
@@ -1053,7 +1053,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt session_key;
 	const char *eproto;
-	time_t expiry;
+	time64_t expiry;
 	void *ticket;
 	u32 abort_code, version, kvno, ticket_len, level;
 	__be32 csum;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index b0d2cda6ec0a..7d2595582c09 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
 };
 
 /*
+ * Wait for space to appear in the Tx queue or a signal to occur.
+ */
+static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+					 struct rxrpc_call *call,
+					 long *timeo)
+{
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (call->tx_top - call->tx_hard_ack <
+		    min_t(unsigned int, call->tx_winsize,
+			  call->cong_cwnd + call->cong_extra))
+			return 0;
+
+		if (call->state >= RXRPC_CALL_COMPLETE)
+			return call->error;
+
+		if (signal_pending(current))
+			return sock_intr_errno(*timeo);
+
+		trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+		mutex_unlock(&call->user_mutex);
+		*timeo = schedule_timeout(*timeo);
+		if (mutex_lock_interruptible(&call->user_mutex) < 0)
+			return sock_intr_errno(*timeo);
+	}
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly, but with
+ * a timeout of 2*RTT if no progress was made and a signal occurred.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+					    struct rxrpc_call *call)
+{
+	rxrpc_seq_t tx_start, tx_win;
+	signed long rtt2, timeout;
+	u64 rtt;
+
+	rtt = READ_ONCE(call->peer->rtt);
+	rtt2 = nsecs_to_jiffies64(rtt) * 2;
+	if (rtt2 < 1)
+		rtt2 = 1;
+
+	timeout = rtt2;
+	tx_start = READ_ONCE(call->tx_hard_ack);
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+
+		tx_win = READ_ONCE(call->tx_hard_ack);
+		if (call->tx_top - tx_win <
+		    min_t(unsigned int, call->tx_winsize,
+			  call->cong_cwnd + call->cong_extra))
+			return 0;
+
+		if (call->state >= RXRPC_CALL_COMPLETE)
+			return call->error;
+
+		if (timeout == 0 &&
+		    tx_win == tx_start && signal_pending(current))
+			return -EINTR;
+
+		if (tx_win != tx_start) {
+			timeout = rtt2;
+			tx_start = tx_win;
+		}
+
+		trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+		timeout = schedule_timeout(timeout);
+	}
+}
+
+/*
  * wait for space to appear in the transmit/ACK window
  * - caller holds the socket locked
  */
 static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 				    struct rxrpc_call *call,
-				    long *timeo)
+				    long *timeo,
+				    bool waitall)
 {
 	DECLARE_WAITQUEUE(myself, current);
 	int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 
 	add_wait_queue(&call->waitq, &myself);
 
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		ret = 0;
-		if (call->tx_top - call->tx_hard_ack <
-		    min_t(unsigned int, call->tx_winsize,
-			  call->cong_cwnd + call->cong_extra))
-			break;
-		if (call->state >= RXRPC_CALL_COMPLETE) {
-			ret = -call->error;
-			break;
-		}
-		if (signal_pending(current)) {
-			ret = sock_intr_errno(*timeo);
-			break;
-		}
-
-		trace_rxrpc_transmit(call, rxrpc_transmit_wait);
-		mutex_unlock(&call->user_mutex);
-		*timeo = schedule_timeout(*timeo);
-		if (mutex_lock_interruptible(&call->user_mutex) < 0) {
-			ret = sock_intr_errno(*timeo);
-			break;
-		}
-	}
+	if (waitall)
+		ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
+	else
+		ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
 
 	remove_wait_queue(&call->waitq, &myself);
 	set_current_state(TASK_RUNNING);
@@ -101,11 +155,23 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
 }
 
 /*
+ * Notify the owner of the call that the transmit phase is ended and the last
+ * packet has been queued.
+ */
+static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
+				rxrpc_notify_end_tx_t notify_end_tx)
+{
+	if (notify_end_tx)
+		notify_end_tx(&rx->sk, call, call->user_call_ID);
+}
+
+/*
  * Queue a DATA packet for transmission, set the resend timeout and send the
  * packet immediately
  */
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
-			       bool last)
+static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+			       struct sk_buff *skb, bool last,
+			       rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	rxrpc_seq_t seq = sp->hdr.seq;
@@ -116,8 +182,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 
 	ASSERTCMP(seq, ==, call->tx_top + 1);
 
-	if (last)
+	if (last) {
 		annotation |= RXRPC_TX_ANNO_LAST;
+		set_bit(RXRPC_CALL_TX_LASTQ, &call->flags);
+	}
 
 	/* We have to set the timestamp before queueing as the retransmit
 	 * algorithm can see the packet as soon as we queue it.
@@ -141,6 +209,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 		switch (call->state) {
 		case RXRPC_CALL_CLIENT_SEND_REQUEST:
 			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+			rxrpc_notify_end_tx(rx, call, notify_end_tx);
 			break;
 		case RXRPC_CALL_SERVER_ACK_REQUEST:
 			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
@@ -151,8 +220,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 					  ktime_get_real());
 			if (!last)
 				break;
+			/* Fall through */
 		case RXRPC_CALL_SERVER_SEND_REPLY:
 			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+			rxrpc_notify_end_tx(rx, call, notify_end_tx);
 			break;
 		default:
 			break;
@@ -189,7 +260,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
  */
 static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
+			   struct msghdr *msg, size_t len,
+			   rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
@@ -237,7 +309,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 				if (msg->msg_flags & MSG_DONTWAIT)
 					goto maybe_error;
 				ret = rxrpc_wait_for_tx_window(rx, call,
-							       &timeo);
+							       &timeo,
+							       msg->msg_flags & MSG_WAITALL);
 				if (ret < 0)
 					goto maybe_error;
 			}
@@ -311,11 +384,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 				call->tx_total_len -= copy;
 		}
 
-		/* check for the far side aborting the call or a network error
-		 * occurring */
-		if (call->state == RXRPC_CALL_COMPLETE)
-			goto call_terminated;
-
 		/* add the packet to the send queue if it's now full */
 		if (sp->remain <= 0 ||
 		    (msg_data_left(msg) == 0 && !more)) {
@@ -350,9 +418,21 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			if (ret < 0)
 				goto out;
 
-			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+			rxrpc_queue_packet(rx, call, skb,
+					   !msg_data_left(msg) && !more,
+					   notify_end_tx);
 			skb = NULL;
 		}
+
+		/* Check for the far side aborting the call or a network error
+		 * occurring.  If this happens, save any packet that was under
+		 * construction so that in the case of a network error, the
+		 * call can be retried or redirected.
+		 */
+		if (call->state == RXRPC_CALL_COMPLETE) {
+			ret = call->error;
+			goto out;
+		}
 	} while (msg_data_left(msg) > 0);
 
 success:
@@ -362,11 +442,6 @@ out:
 	_leave(" = %d", ret);
 	return ret;
 
-call_terminated:
-	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
-	_leave(" = %d", -call->error);
-	return -call->error;
-
 maybe_error:
 	if (copied)
 		goto success;
@@ -611,7 +686,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		/* Reply phase not begun or not complete for service call. */
 		ret = -EPROTO;
 	} else {
-		ret = rxrpc_send_data(rx, call, msg, len);
+		ret = rxrpc_send_data(rx, call, msg, len, NULL);
 	}
 
 	mutex_unlock(&call->user_mutex);
@@ -631,6 +706,7 @@ error_release_sock:
  * @call: The call to send data through
  * @msg: The data to send
  * @len: The amount of data to send
+ * @notify_end_tx: Notification that the last packet is queued.
  *
  * Allow a kernel service to send data on a call.  The call must be in an state
  * appropriate to sending data.  No control data should be supplied in @msg,
@@ -638,7 +714,8 @@ error_release_sock:
  * more data to come, otherwise this data will end the transmission phase.
  */
 int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
+			   struct msghdr *msg, size_t len,
+			   rxrpc_notify_end_tx_t notify_end_tx)
 {
 	int ret;
 
@@ -656,11 +733,12 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	case RXRPC_CALL_CLIENT_SEND_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
 	case RXRPC_CALL_SERVER_SEND_REPLY:
-		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+				      notify_end_tx);
 		break;
 	case RXRPC_CALL_COMPLETE:
 		read_lock_bh(&call->state_lock);
-		ret = -call->error;
+		ret = call->error;
 		read_unlock_bh(&call->state_lock);
 		break;
 	default:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index ff7af71c4b49..e801171fa351 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -17,17 +17,28 @@
 /*
  * Fill out a peer address from a socket buffer containing a packet.
  */
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
+int rxrpc_extract_addr_from_skb(struct rxrpc_local *local,
+				struct sockaddr_rxrpc *srx,
+				struct sk_buff *skb)
 {
 	memset(srx, 0, sizeof(*srx));
 
 	switch (ntohs(skb->protocol)) {
 	case ETH_P_IP:
-		srx->transport_type = SOCK_DGRAM;
-		srx->transport_len = sizeof(srx->transport.sin);
-		srx->transport.sin.sin_family = AF_INET;
-		srx->transport.sin.sin_port = udp_hdr(skb)->source;
-		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		if (local->srx.transport.family == AF_INET6) {
+			srx->transport_type = SOCK_DGRAM;
+			srx->transport_len = sizeof(srx->transport.sin6);
+			srx->transport.sin6.sin6_family = AF_INET6;
+			srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+			srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
+		} else {
+			srx->transport_type = SOCK_DGRAM;
+			srx->transport_len = sizeof(srx->transport.sin);
+			srx->transport.sin.sin_family = AF_INET;
+			srx->transport.sin.sin_port = udp_hdr(skb)->source;
+			srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		}
 		return 0;
 
 #ifdef CONFIG_AF_RXRPC_IPV6
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26485a2..c03d86a7775e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -172,6 +172,17 @@ config NET_SCH_TBF
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_tbf.
 
+config NET_SCH_CBS
+	tristate "Credit Based Shaper (CBS)"
+	---help---
+	  Say Y here if you want to use the Credit Based Shaper (CBS) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_cbs.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_cbs.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7b915d226de7..5b635447e3f8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux Traffic Control Unit.
 #
@@ -52,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
+obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f2e9ed34a963..4d33a50a8a6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
 #include <linux/kmod.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/rhashtable.h>
+#include <linux/list.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/sch_generic.h>
@@ -53,10 +55,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
-static void free_tcf(struct rcu_head *head)
+/* XXX: For standalone actions, we don't need a RCU grace period either, because
+ * actions are always connected to filters and filters are already destroyed in
+ * RCU callbacks, so after a RCU grace period actions are already disconnected
+ * from filters. Readers later can not find us.
+ */
+static void free_tcf(struct tc_action *p)
 {
-	struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
-
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 
@@ -70,23 +75,21 @@ static void free_tcf(struct rcu_head *head)
 	kfree(p);
 }
 
-static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
+static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
 {
-	spin_lock_bh(&hinfo->lock);
-	hlist_del(&p->tcfa_head);
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
+	spin_unlock_bh(&idrinfo->lock);
 	gen_kill_estimator(&p->tcfa_rate_est);
-	/*
-	 * gen_estimator est_timer() might access p->tcfa_lock
-	 * or bstats, wait a RCU grace period before freeing p
-	 */
-	call_rcu(&p->tcfa_rcu, free_tcf);
+	free_tcf(p);
 }
 
-int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
+int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
 	int ret = 0;
 
+	ASSERT_RTNL();
+
 	if (p) {
 		if (bind)
 			p->tcfa_bindcnt--;
@@ -97,55 +100,64 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
 		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
 			if (p->ops->cleanup)
 				p->ops->cleanup(p, bind);
-			tcf_hash_destroy(p->hinfo, p);
+			tcf_idr_remove(p->idrinfo, p);
 			ret = ACT_P_DELETED;
 		}
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(__tcf_hash_release);
+EXPORT_SYMBOL(__tcf_idr_release);
 
-static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
-	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	int err = 0, index = -1, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
+	unsigned long jiffy_since = cb->args[3];
 	struct nlattr *nest;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	unsigned long id = 1;
 
-	spin_lock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
 
 	s_i = cb->args[0];
 
-	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		struct hlist_head *head;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
-		hlist_for_each_entry_rcu(p, head, tcfa_head) {
-			index++;
-			if (index < s_i)
-				continue;
-
-			nest = nla_nest_start(skb, n_i);
-			if (nest == NULL)
-				goto nla_put_failure;
-			err = tcf_action_dump_1(skb, p, 0, 0);
-			if (err < 0) {
-				index--;
-				nlmsg_trim(skb, nest);
-				goto done;
-			}
-			nla_nest_end(skb, nest);
-			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
-				goto done;
+	idr_for_each_entry_ext(idr, p, id) {
+		index++;
+		if (index < s_i)
+			continue;
+
+		if (jiffy_since &&
+		    time_after(jiffy_since,
+			       (unsigned long)p->tcfa_tm.lastuse))
+			continue;
+
+		nest = nla_nest_start(skb, n_i);
+		if (!nest)
+			goto nla_put_failure;
+		err = tcf_action_dump_1(skb, p, 0, 0);
+		if (err < 0) {
+			index--;
+			nlmsg_trim(skb, nest);
+			goto done;
 		}
+		nla_nest_end(skb, nest);
+		n_i++;
+		if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+		    n_i >= TCA_ACT_MAX_PRIO)
+			goto done;
 	}
 done:
-	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
-		cb->args[0] += n_i;
+	if (index >= 0)
+		cb->args[0] = index + 1;
+
+	spin_unlock_bh(&idrinfo->lock);
+	if (n_i) {
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -153,31 +165,29 @@ nla_put_failure:
 	goto done;
 }
 
-static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			  const struct tc_action_ops *ops)
 {
 	struct nlattr *nest;
-	int i = 0, n_i = 0;
+	int n_i = 0;
 	int ret = -EINVAL;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	unsigned long id = 1;
 
 	nest = nla_nest_start(skb, 0);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
-	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		struct hlist_head *head;
-		struct hlist_node *n;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-		hlist_for_each_entry_safe(p, n, head, tcfa_head) {
-			ret = __tcf_hash_release(p, false, true);
-			if (ret == ACT_P_DELETED) {
-				module_put(p->ops->owner);
-				n_i++;
-			} else if (ret < 0)
-				goto nla_put_failure;
+
+	idr_for_each_entry_ext(idr, p, id) {
+		ret = __tcf_idr_release(p, false, true);
+		if (ret == ACT_P_DELETED) {
+			module_put(ops->owner);
+			n_i++;
+		} else if (ret < 0) {
+			goto nla_put_failure;
 		}
 	}
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -194,12 +204,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
 		       const struct tc_action_ops *ops)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
 	if (type == RTM_DELACTION) {
-		return tcf_del_walker(hinfo, skb, ops);
+		return tcf_del_walker(idrinfo, skb, ops);
 	} else if (type == RTM_GETACTION) {
-		return tcf_dump_walker(hinfo, skb, cb);
+		return tcf_dump_walker(idrinfo, skb, cb);
 	} else {
 		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
 		return -EINVAL;
@@ -207,40 +217,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
 {
 	struct tc_action *p = NULL;
-	struct hlist_head *head;
 
-	spin_lock_bh(&hinfo->lock);
-	head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
-	hlist_for_each_entry_rcu(p, head, tcfa_head)
-		if (p->tcfa_index == index)
-			break;
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	p = idr_find_ext(&idrinfo->action_idr, index);
+	spin_unlock_bh(&idrinfo->lock);
 
 	return p;
 }
 
-u32 tcf_hash_new_index(struct tc_action_net *tn)
-{
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	u32 val = hinfo->index;
-
-	do {
-		if (++val == 0)
-			val = 1;
-	} while (tcf_hash_lookup(val, hinfo));
-
-	hinfo->index = val;
-	return val;
-}
-EXPORT_SYMBOL(tcf_hash_new_index);
-
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tc_action *p = tcf_hash_lookup(index, hinfo);
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
 
 	if (p) {
 		*a = p;
@@ -248,15 +239,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_search);
+EXPORT_SYMBOL(tcf_idr_search);
 
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
-		    int bind)
+bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+		   int bind)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tc_action *p = NULL;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
 
-	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+	if (index && p) {
 		if (bind)
 			p->tcfa_bindcnt++;
 		p->tcfa_refcnt++;
@@ -265,23 +256,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 	}
 	return false;
 }
-EXPORT_SYMBOL(tcf_hash_check);
+EXPORT_SYMBOL(tcf_idr_check);
 
-void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
+void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
 {
 	if (est)
 		gen_kill_estimator(&a->tcfa_rate_est);
-	call_rcu(&a->tcfa_rcu, free_tcf);
+	free_tcf(a);
 }
-EXPORT_SYMBOL(tcf_hash_cleanup);
+EXPORT_SYMBOL(tcf_idr_cleanup);
 
-int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action **a, const struct tc_action_ops *ops,
-		    int bind, bool cpustats)
+int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+		   struct tc_action **a, const struct tc_action_ops *ops,
+		   int bind, bool cpustats)
 {
 	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
-	struct tcf_hashinfo *hinfo = tn->hinfo;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct idr *idr = &idrinfo->action_idr;
 	int err = -ENOMEM;
+	unsigned long idr_index;
 
 	if (unlikely(!p))
 		return -ENOMEM;
@@ -304,8 +297,32 @@ err2:
 		}
 	}
 	spin_lock_init(&p->tcfa_lock);
-	INIT_HLIST_NODE(&p->tcfa_head);
-	p->tcfa_index = index ? index : tcf_hash_new_index(tn);
+	/* user doesn't specify an index */
+	if (!index) {
+		idr_preload(GFP_KERNEL);
+		spin_lock_bh(&idrinfo->lock);
+		err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0,
+				    GFP_ATOMIC);
+		spin_unlock_bh(&idrinfo->lock);
+		idr_preload_end();
+		if (err) {
+err3:
+			free_percpu(p->cpu_qstats);
+			goto err2;
+		}
+		p->tcfa_index = idr_index;
+	} else {
+		idr_preload(GFP_KERNEL);
+		spin_lock_bh(&idrinfo->lock);
+		err = idr_alloc_ext(idr, NULL, NULL, index, index + 1,
+				    GFP_ATOMIC);
+		spin_unlock_bh(&idrinfo->lock);
+		idr_preload_end();
+		if (err)
+			goto err3;
+		p->tcfa_index = index;
+	}
+
 	p->tcfa_tm.install = jiffies;
 	p->tcfa_tm.lastuse = jiffies;
 	p->tcfa_tm.firstuse = 0;
@@ -314,52 +331,46 @@ err2:
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err) {
-			free_percpu(p->cpu_qstats);
-			goto err2;
+			goto err3;
 		}
 	}
 
-	p->hinfo = hinfo;
+	p->idrinfo = idrinfo;
 	p->ops = ops;
 	INIT_LIST_HEAD(&p->list);
 	*a = p;
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_create);
+EXPORT_SYMBOL(tcf_idr_create);
 
-void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
+void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask);
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock_bh(&hinfo->lock);
-	hlist_add_head(&a->tcfa_head, &hinfo->htab[h]);
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index);
+	spin_unlock_bh(&idrinfo->lock);
 }
-EXPORT_SYMBOL(tcf_hash_insert);
+EXPORT_SYMBOL(tcf_idr_insert);
 
-void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
-			  struct tcf_hashinfo *hinfo)
+void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
+			 struct tcf_idrinfo *idrinfo)
 {
-	int i;
-
-	for (i = 0; i < hinfo->hmask + 1; i++) {
-		struct tc_action *p;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) {
-			int ret;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	int ret;
+	unsigned long id = 1;
 
-			ret = __tcf_hash_release(p, false, true);
-			if (ret == ACT_P_DELETED)
-				module_put(ops->owner);
-			else if (ret < 0)
-				return;
-		}
+	idr_for_each_entry_ext(idr, p, id) {
+		ret = __tcf_idr_release(p, false, true);
+		if (ret == ACT_P_DELETED)
+			module_put(ops->owner);
+		else if (ret < 0)
+			return;
 	}
-	kfree(hinfo->htab);
+	idr_destroy(&idrinfo->action_idr);
 }
-EXPORT_SYMBOL(tcf_hashinfo_destroy);
+EXPORT_SYMBOL(tcf_idrinfo_destroy);
 
 static LIST_HEAD(act_base);
 static DEFINE_RWLOCK(act_mod_lock);
@@ -460,9 +471,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res)
 {
-	int ret = -1, i;
 	u32 jmp_prgcnt = 0;
 	u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
+	int i;
+	int ret = TC_ACT_OK;
 
 	if (skb_skip_tc_classify(skb))
 		return TC_ACT_OK;
@@ -506,13 +518,15 @@ EXPORT_SYMBOL(tcf_action_exec);
 
 int tcf_action_destroy(struct list_head *actions, int bind)
 {
+	const struct tc_action_ops *ops;
 	struct tc_action *a, *tmp;
 	int ret = 0;
 
 	list_for_each_entry_safe(a, tmp, actions, list) {
-		ret = __tcf_hash_release(a, bind, true);
+		ops = a->ops;
+		ret = __tcf_idr_release(a, bind, true);
 		if (ret == ACT_P_DELETED)
-			module_put(a->ops->owner);
+			module_put(ops->owner);
 		else if (ret < 0)
 			return ret;
 	}
@@ -1068,11 +1082,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
+			     .validation_data = &tcaa_root_flags_allowed },
+	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCA_ACT_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1080,7 +1101,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1121,16 +1142,12 @@ replay:
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1157,8 +1174,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *count_attr = NULL;
+	unsigned long jiffy_since = 0;
+	struct nlattr *kind = NULL;
+	struct nla_bitfield32 bf;
+	u32 msecs_since = 0;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1168,14 +1197,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	cb->args[2] = 0;
+	if (tb[TCA_ROOT_FLAGS]) {
+		bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
+		cb->args[2] = bf.value;
+	}
+
+	if (tb[TCA_ROOT_TIME_DELTA]) {
+		msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+	}
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	if (msecs_since)
+		jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	cb->args[3] = jiffy_since;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1188,6 +1235,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 
@@ -1203,12 +1253,231 @@ out_module_put:
 	return skb->len;
 }
 
+struct tcf_action_net {
+	struct rhashtable egdev_ht;
+};
+
+static unsigned int tcf_action_net_id;
+
+struct tcf_action_egdev_cb {
+	struct list_head list;
+	tc_setup_cb_t *cb;
+	void *cb_priv;
+};
+
+struct tcf_action_egdev {
+	struct rhash_head ht_node;
+	const struct net_device *dev;
+	unsigned int refcnt;
+	struct list_head cb_list;
+};
+
+static const struct rhashtable_params tcf_action_egdev_ht_params = {
+	.key_offset = offsetof(struct tcf_action_egdev, dev),
+	.head_offset = offsetof(struct tcf_action_egdev, ht_node),
+	.key_len = sizeof(const struct net_device *),
+};
+
+static struct tcf_action_egdev *
+tcf_action_egdev_lookup(const struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+	return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
+				      tcf_action_egdev_ht_params);
+}
+
+static struct tcf_action_egdev *
+tcf_action_egdev_get(const struct net_device *dev)
+{
+	struct tcf_action_egdev *egdev;
+	struct tcf_action_net *tan;
+
+	egdev = tcf_action_egdev_lookup(dev);
+	if (egdev)
+		goto inc_ref;
+
+	egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
+	if (!egdev)
+		return NULL;
+	INIT_LIST_HEAD(&egdev->cb_list);
+	egdev->dev = dev;
+	tan = net_generic(dev_net(dev), tcf_action_net_id);
+	rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
+			       tcf_action_egdev_ht_params);
+
+inc_ref:
+	egdev->refcnt++;
+	return egdev;
+}
+
+static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
+{
+	struct tcf_action_net *tan;
+
+	if (--egdev->refcnt)
+		return;
+	tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
+	rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
+			       tcf_action_egdev_ht_params);
+	kfree(egdev);
+}
+
+static struct tcf_action_egdev_cb *
+tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
+			   tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+
+	list_for_each_entry(egdev_cb, &egdev->cb_list, list)
+		if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
+			return egdev_cb;
+	return NULL;
+}
+
+static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
+				    enum tc_setup_type type,
+				    void *type_data, bool err_stop)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+	int ok_count = 0;
+	int err;
+
+	list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
+		err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
+		if (err) {
+			if (err_stop)
+				return err;
+		} else {
+			ok_count++;
+		}
+	}
+	return ok_count;
+}
+
+static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
+				   tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+
+	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+	if (WARN_ON(egdev_cb))
+		return -EEXIST;
+	egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
+	if (!egdev_cb)
+		return -ENOMEM;
+	egdev_cb->cb = cb;
+	egdev_cb->cb_priv = cb_priv;
+	list_add(&egdev_cb->list, &egdev->cb_list);
+	return 0;
+}
+
+static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
+				    tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev_cb *egdev_cb;
+
+	egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+	if (WARN_ON(!egdev_cb))
+		return;
+	list_del(&egdev_cb->list);
+	kfree(egdev_cb);
+}
+
+static int __tc_setup_cb_egdev_register(const struct net_device *dev,
+					tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
+	int err;
+
+	if (!egdev)
+		return -ENOMEM;
+	err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
+	if (err)
+		goto err_cb_add;
+	return 0;
+
+err_cb_add:
+	tcf_action_egdev_put(egdev);
+	return err;
+}
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+			       tc_setup_cb_t *cb, void *cb_priv)
+{
+	int err;
+
+	rtnl_lock();
+	err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
+
+static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
+					   tc_setup_cb_t *cb, void *cb_priv)
+{
+	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+	if (WARN_ON(!egdev))
+		return;
+	tcf_action_egdev_cb_del(egdev, cb, cb_priv);
+	tcf_action_egdev_put(egdev);
+}
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+				  tc_setup_cb_t *cb, void *cb_priv)
+{
+	rtnl_lock();
+	__tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
+
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+			   enum tc_setup_type type, void *type_data,
+			   bool err_stop)
+{
+	struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+	if (!egdev)
+		return 0;
+	return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
+
+static __net_init int tcf_action_net_init(struct net *net)
+{
+	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+	return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
+}
+
+static void __net_exit tcf_action_net_exit(struct net *net)
+{
+	struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+	rhashtable_destroy(&tan->egdev_ht);
+}
+
+static struct pernet_operations tcf_action_net_ops = {
+	.init = tcf_action_net_init,
+	.exit = tcf_action_net_exit,
+	.id = &tcf_action_net_id,
+	.size = sizeof(struct tcf_action_net),
+};
+
 static int __init tc_action_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
+	int err;
+
+	err = register_pernet_subsys(&tcf_action_net_ops);
+	if (err)
+		return err;
+
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
-		      NULL);
+		      0);
 
 	return 0;
 }
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9afe1337cfd1..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -21,7 +21,6 @@
 #include <linux/tc_act/tc_bpf.h>
 #include <net/tc_act/tc_bpf.h>
 
-#define BPF_TAB_MASK		15
 #define ACT_BPF_NAME_LEN	256
 
 struct tcf_bpf_cfg {
@@ -50,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 	filter = rcu_dereference(prog->filter);
 	if (at_ingress) {
 		__skb_push(skb, skb->mac_len);
-		bpf_compute_data_end(skb);
+		bpf_compute_data_pointers(skb);
 		filter_res = BPF_PROG_RUN(filter, skb);
 		__skb_pull(skb, skb->mac_len);
 	} else {
-		bpf_compute_data_end(skb);
+		bpf_compute_data_pointers(skb);
 		filter_res = BPF_PROG_RUN(filter, skb);
 	}
 	rcu_read_unlock();
@@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, act, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, act,
-				      &act_bpf_ops, bind, true);
+	if (!tcf_idr_check(tn, parm->index, act, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, act,
+				     &act_bpf_ops, bind, true);
 		if (ret < 0)
 			return ret;
 
@@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		if (bind)
 			return 0;
 
-		tcf_hash_release(*act, bind);
+		tcf_idr_release(*act, bind);
 		if (!replace)
 			return -EEXIST;
 	}
@@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	rcu_assign_pointer(prog->filter, cfg.filter);
 
 	if (res == ACT_P_CREATED) {
-		tcf_hash_insert(tn, *act);
+		tcf_idr_insert(tn, *act);
 	} else {
 		/* make sure the program being replaced is no longer executing */
 		synchronize_rcu();
@@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	return res;
 out:
 	if (res == ACT_P_CREATED)
-		tcf_hash_cleanup(*act, est);
+		tcf_idr_cleanup(*act, est);
 
 	return ret;
 }
@@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
@@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
+	return tc_action_net_init(tn, &act_bpf_ops);
 }
 
 static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2155bc6c6a1e..10b7a8855a6c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -28,8 +28,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-#define CONNMARK_TAB_MASK     3
-
 static unsigned int connmark_net_id;
 static struct tc_action_ops act_connmark_ops;
 
@@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_connmark_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_connmark_ops, bind, false);
 		if (ret)
 			return ret;
 
@@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		ci->net = net;
 		ci->zone = parm->zone;
 
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
 	} else {
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		/* replacing action and zone */
@@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_connmark_ops = {
@@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
+	return tc_action_net_init(tn, &act_connmark_ops);
 }
 
 static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3317a2f579da..1c40caadcff9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,8 +37,6 @@
 #include <linux/tc_act/tc_csum.h>
 #include <net/tc_act/tc_csum.h>
 
-#define CSUM_TAB_MASK 15
-
 static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
 };
@@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_csum_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_csum_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -231,9 +229,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
 	const struct iphdr *iph;
 	u16 ul;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-		return 1;
-
 	/*
 	 * Support both UDP and UDPLITE checksum algorithms, Don't use
 	 * udph->len to get the real length without any protocol check,
@@ -287,9 +282,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
 	const struct ipv6hdr *ip6h;
 	u16 ul;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-		return 1;
-
 	/*
 	 * Support both UDP and UDPLITE checksum algorithms, Don't use
 	 * udph->len to get the real length without any protocol check,
@@ -615,7 +607,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_csum_ops = {
@@ -634,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
+	return tc_action_net_init(tn, &act_csum_ops);
 }
 
 static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 99afe8b1f1fb..e29a48ef7fc3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -23,8 +23,6 @@
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
 
-#define GACT_TAB_MASK	15
-
 static unsigned int gact_net_id;
 static struct tc_action_ops act_gact_ops;
 
@@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_gact_ops, bind, true);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_gact_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_gact_ops = {
@@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
+	return tc_action_net_init(tn, &act_gact_ops);
 }
 
 static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c5dec308b8b1..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -34,8 +34,6 @@
 #include <linux/etherdevice.h>
 #include <net/ife.h>
 
-#define IFE_TAB_MASK 15
-
 static unsigned int ife_net_id;
 static int max_metacnt = IFE_META_MAX + 1;
 static struct tc_action_ops act_ife_ops;
@@ -250,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
 	return ret;
 }
 
+#ifdef CONFIG_MODULES
+static const char *ife_meta_id2name(u32 metaid)
+{
+	switch (metaid) {
+	case IFE_META_SKBMARK:
+		return "skbmark";
+	case IFE_META_PRIO:
+		return "skbprio";
+	case IFE_META_TCINDEX:
+		return "tcindex";
+	default:
+		return "unknown";
+	}
+}
+#endif
+
 /* called when adding new meta information
  * under ife->tcf_lock for existing action
 */
@@ -265,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 		if (exists)
 			spin_unlock_bh(&ife->tcf_lock);
 		rtnl_unlock();
-		request_module("ifemeta%u", metaid);
+		request_module("ife-meta-%s", ife_meta_id2name(metaid));
 		rtnl_lock();
 		if (exists)
 			spin_lock_bh(&ife->tcf_lock);
@@ -394,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
 static void tcf_ife_cleanup(struct tc_action *a, int bind)
 {
 	struct tcf_ife_info *ife = to_ife(a);
+	struct tcf_ife_params *p;
 
 	spin_lock_bh(&ife->tcf_lock);
 	_tcf_ife_cleanup(a, bind);
 	spin_unlock_bh(&ife->tcf_lock);
+
+	p = rcu_dereference_protected(ife->params, 1);
+	kfree_rcu(p, rcu);
 }
 
 /* under ife->tcf_lock for existing action */
@@ -434,9 +452,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
 	struct nlattr *tb2[IFE_META_MAX + 1];
+	struct tcf_ife_params *p, *p_old;
 	struct tcf_ife_info *ife;
+	u16 ife_type = ETH_P_IFE;
 	struct tc_ife *parm;
-	u16 ife_type = 0;
 	u8 *daddr = NULL;
 	u8 *saddr = NULL;
 	bool exists = false;
@@ -452,63 +471,70 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_IFE_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
-	if (exists && bind)
-		return 0;
+	/* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
+	 * they cannot run as the same time. Check on all other values which
+	 * are not supported right now.
+	 */
+	if (parm->flags & ~IFE_ENCODE)
+		return -EINVAL;
 
-	if (parm->flags & IFE_ENCODE) {
-		/* Until we get issued the ethertype, we cant have
-		 * a default..
-		**/
-		if (!tb[TCA_IFE_TYPE]) {
-			if (exists)
-				tcf_hash_release(*a, bind);
-			pr_info("You MUST pass etherype for encoding\n");
-			return -EINVAL;
-		}
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	exists = tcf_idr_check(tn, parm->index, a, bind);
+	if (exists && bind) {
+		kfree(p);
+		return 0;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops,
-				      bind, false);
-		if (ret)
+		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
+				     bind, true);
+		if (ret) {
+			kfree(p);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
-		if (!ovr)
+		tcf_idr_release(*a, bind);
+		if (!ovr) {
+			kfree(p);
 			return -EEXIST;
+		}
 	}
 
 	ife = to_ife(*a);
-	ife->flags = parm->flags;
+	p->flags = parm->flags;
 
 	if (parm->flags & IFE_ENCODE) {
-		ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
+		if (tb[TCA_IFE_TYPE])
+			ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
 		if (tb[TCA_IFE_DMAC])
 			daddr = nla_data(tb[TCA_IFE_DMAC]);
 		if (tb[TCA_IFE_SMAC])
 			saddr = nla_data(tb[TCA_IFE_SMAC]);
 	}
 
-	if (exists)
-		spin_lock_bh(&ife->tcf_lock);
 	ife->tcf_action = parm->action;
 
 	if (parm->flags & IFE_ENCODE) {
 		if (daddr)
-			ether_addr_copy(ife->eth_dst, daddr);
+			ether_addr_copy(p->eth_dst, daddr);
 		else
-			eth_zero_addr(ife->eth_dst);
+			eth_zero_addr(p->eth_dst);
 
 		if (saddr)
-			ether_addr_copy(ife->eth_src, saddr);
+			ether_addr_copy(p->eth_src, saddr);
 		else
-			eth_zero_addr(ife->eth_src);
+			eth_zero_addr(p->eth_src);
 
-		ife->eth_type = ife_type;
+		p->eth_type = ife_type;
 	}
 
+	if (exists)
+		spin_lock_bh(&ife->tcf_lock);
+
 	if (ret == ACT_P_CREATED)
 		INIT_LIST_HEAD(&ife->metalist);
 
@@ -518,12 +544,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		if (err) {
 metadata_parse_err:
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(*a, bind);
 
 			if (exists)
 				spin_unlock_bh(&ife->tcf_lock);
+			kfree(p);
 			return err;
 		}
 
@@ -544,6 +571,7 @@ metadata_parse_err:
 
 			if (exists)
 				spin_unlock_bh(&ife->tcf_lock);
+			kfree(p);
 			return err;
 		}
 	}
@@ -551,8 +579,13 @@ metadata_parse_err:
 	if (exists)
 		spin_unlock_bh(&ife->tcf_lock);
 
+	p_old = rtnl_dereference(ife->params);
+	rcu_assign_pointer(ife->params, p);
+	if (p_old)
+		kfree_rcu(p_old, rcu);
+
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -562,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_ife_info *ife = to_ife(a);
+	struct tcf_ife_params *p = rtnl_dereference(ife->params);
 	struct tc_ife opt = {
 		.index = ife->tcf_index,
 		.refcnt = ife->tcf_refcnt - ref,
 		.bindcnt = ife->tcf_bindcnt - bind,
 		.action = ife->tcf_action,
-		.flags = ife->flags,
+		.flags = p->flags,
 	};
 	struct tcf_t t;
 
@@ -578,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
 		goto nla_put_failure;
 
-	if (!is_zero_ether_addr(ife->eth_dst)) {
-		if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst))
+	if (!is_zero_ether_addr(p->eth_dst)) {
+		if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst))
 			goto nla_put_failure;
 	}
 
-	if (!is_zero_ether_addr(ife->eth_src)) {
-		if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src))
+	if (!is_zero_ether_addr(p->eth_src)) {
+		if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src))
 			goto nla_put_failure;
 	}
 
-	if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type))
+	if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
 		goto nla_put_failure;
 
 	if (dump_metalist(skb, ife)) {
@@ -630,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
 	u8 *tlv_data;
 	u16 metalen;
 
-	spin_lock(&ife->tcf_lock);
-	bstats_update(&ife->tcf_bstats, skb);
+	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
 	tcf_lastuse_update(&ife->tcf_tm);
-	spin_unlock(&ife->tcf_lock);
 
 	if (skb_at_tc_ingress(skb))
 		skb_push(skb, skb->dev->hard_header_len);
 
 	tlv_data = ife_decode(skb, &metalen);
 	if (unlikely(!tlv_data)) {
-		spin_lock(&ife->tcf_lock);
-		ife->tcf_qstats.drops++;
-		spin_unlock(&ife->tcf_lock);
+		qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
 		return TC_ACT_SHOT;
 	}
 
@@ -660,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
 			 */
 			pr_info_ratelimited("Unknown metaid %d dlen %d\n",
 					    mtype, dlen);
-			ife->tcf_qstats.overlimits++;
+			qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
 		}
 	}
 
 	if (WARN_ON(tlv_data != ifehdr_end)) {
-		spin_lock(&ife->tcf_lock);
-		ife->tcf_qstats.drops++;
-		spin_unlock(&ife->tcf_lock);
+		qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
 		return TC_ACT_SHOT;
 	}
 
@@ -696,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
 }
 
 static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
-			  struct tcf_result *res)
+			  struct tcf_result *res, struct tcf_ife_params *p)
 {
 	struct tcf_ife_info *ife = to_ife(a);
 	int action = ife->tcf_action;
@@ -719,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 			exceed_mtu = true;
 	}
 
-	spin_lock(&ife->tcf_lock);
-	bstats_update(&ife->tcf_bstats, skb);
+	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
 	tcf_lastuse_update(&ife->tcf_tm);
 
 	if (!metalen) {		/* no metadata to send */
 		/* abuse overlimits to count when we allow packet
 		 * with no metadata
 		 */
-		ife->tcf_qstats.overlimits++;
-		spin_unlock(&ife->tcf_lock);
+		qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
 		return action;
 	}
 	/* could be stupid policy setup or mtu config
 	 * so lets be conservative.. */
 	if ((action == TC_ACT_SHOT) || exceed_mtu) {
-		ife->tcf_qstats.drops++;
-		spin_unlock(&ife->tcf_lock);
+		qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
 		return TC_ACT_SHOT;
 	}
 
@@ -744,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 
 	ife_meta = ife_encode(skb, metalen);
 
+	spin_lock(&ife->tcf_lock);
+
 	/* XXX: we dont have a clever way of telling encode to
 	 * not repeat some of the computations that are done by
 	 * ops->presence_check...
@@ -755,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
 		}
 		if (err < 0) {
 			/* too corrupt to keep around if overwritten */
-			ife->tcf_qstats.drops++;
 			spin_unlock(&ife->tcf_lock);
+			qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
 			return TC_ACT_SHOT;
 		}
 		skboff += err;
 	}
+	spin_unlock(&ife->tcf_lock);
 	oethh = (struct ethhdr *)skb->data;
 
-	if (!is_zero_ether_addr(ife->eth_src))
-		ether_addr_copy(oethh->h_source, ife->eth_src);
-	if (!is_zero_ether_addr(ife->eth_dst))
-		ether_addr_copy(oethh->h_dest, ife->eth_dst);
-	oethh->h_proto = htons(ife->eth_type);
+	if (!is_zero_ether_addr(p->eth_src))
+		ether_addr_copy(oethh->h_source, p->eth_src);
+	if (!is_zero_ether_addr(p->eth_dst))
+		ether_addr_copy(oethh->h_dest, p->eth_dst);
+	oethh->h_proto = htons(p->eth_type);
 
 	if (skb_at_tc_ingress(skb))
 		skb_pull(skb, skb->dev->hard_header_len);
 
-	spin_unlock(&ife->tcf_lock);
-
 	return action;
 }
 
@@ -781,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
 {
 	struct tcf_ife_info *ife = to_ife(a);
+	struct tcf_ife_params *p;
+	int ret;
+
+	rcu_read_lock();
+	p = rcu_dereference(ife->params);
+	if (p->flags & IFE_ENCODE) {
+		ret = tcf_ife_encode(skb, a, res, p);
+		rcu_read_unlock();
+		return ret;
+	}
+	rcu_read_unlock();
 
-	if (ife->flags & IFE_ENCODE)
-		return tcf_ife_encode(skb, a, res);
-
-	if (!(ife->flags & IFE_ENCODE))
-		return tcf_ife_decode(skb, a, res);
-
-	pr_info_ratelimited("unknown failure(policy neither de/encode\n");
-	spin_lock(&ife->tcf_lock);
-	bstats_update(&ife->tcf_bstats, skb);
-	tcf_lastuse_update(&ife->tcf_tm);
-	ife->tcf_qstats.drops++;
-	spin_unlock(&ife->tcf_lock);
-
-	return TC_ACT_SHOT;
+	return tcf_ife_decode(skb, a, res);
 }
 
 static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
@@ -811,7 +835,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_ife_ops = {
@@ -831,7 +855,7 @@ static __net_init int ife_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK);
+	return tc_action_net_init(tn, &act_ife_ops);
 }
 
 static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d516ba8178b8..d9e399a7e3d5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -28,8 +28,6 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 
-#define IPT_TAB_MASK     15
-
 static unsigned int ipt_net_id;
 static struct tc_action_ops act_ipt_ops;
 
@@ -41,6 +39,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
 {
 	struct xt_tgchk_param par;
 	struct xt_target *target;
+	struct ipt_entry e = {};
 	int ret = 0;
 
 	target = xt_request_find_target(AF_INET, t->u.user.name,
@@ -52,6 +51,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
 	memset(&par, 0, sizeof(par));
 	par.net       = net;
 	par.table     = table;
+	par.entryinfo = &e;
 	par.target    = target;
 	par.targinfo  = t->data;
 	par.hook_mask = hook;
@@ -116,33 +116,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-	exists = tcf_hash_check(tn, index, a, bind);
+	exists = tcf_idr_check(tn, index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, index, est, a, ops, bind,
-				      false);
+		ret = tcf_idr_create(tn, index, est, a, ops, bind,
+				     false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 
 		if (!ovr)
 			return -EEXIST;
@@ -178,7 +178,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	ipt->tcfi_hook  = hook;
 	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 
 err3:
@@ -187,7 +187,7 @@ err2:
 	kfree(tname);
 err1:
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(*a, est);
+		tcf_idr_cleanup(*a, est);
 	return err;
 }
 
@@ -314,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_ipt_ops = {
@@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
+	return tc_action_net_init(tn, &act_ipt_ops);
 }
 
 static void __net_exit ipt_exit_net(struct net *net)
@@ -364,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_xt_ops = {
@@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
+	return tc_action_net_init(tn, &act_xt_ops);
 }
 
 static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 82892170ce4f..1e3f10e5da99 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module);
 MODULE_AUTHOR("Jamal Hadi Salim(2015)");
 MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBMARK);
+MODULE_ALIAS_IFE_META("skbmark");
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 26bf4d86030b..4033f9fc4d4a 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c
@@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module);
 MODULE_AUTHOR("Jamal Hadi Salim(2015)");
 MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_PRIO);
+MODULE_ALIAS_IFE_META("skbprio");
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 3b35774ce890..2ea1f26c9e96 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module);
 MODULE_AUTHOR("Jamal Hadi Salim(2016)");
 MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
+MODULE_ALIAS_IFE_META("tcindex");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1b5549ababd4..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -28,7 +28,6 @@
 #include <linux/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_mirred.h>
 
-#define MIRRED_TAB_MASK     7
 static LIST_HEAD(mirred_list);
 static DEFINE_SPINLOCK(mirred_list_lock);
 
@@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
 		dev = __dev_get_by_index(net, parm->ifindex);
 		if (dev == NULL) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -ENODEV;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		if (dev == NULL)
 			return -EINVAL;
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_mirred_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_mirred_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -141,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	m->tcfm_eaction = parm->eaction;
 	if (dev != NULL) {
 		m->tcfm_ifindex = parm->ifindex;
+		m->net = net;
 		if (ret != ACT_P_CREATED)
 			dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
 		dev_hold(dev);
@@ -152,7 +152,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		spin_lock_bh(&mirred_list_lock);
 		list_add(&m->tcfm_list, &mirred_list);
 		spin_unlock_bh(&mirred_list_lock);
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	}
 
 	return ret;
@@ -283,7 +283,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static int mirred_device_event(struct notifier_block *unused,
@@ -314,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
 	.notifier_call = mirred_device_event,
 };
 
-static int tcf_mirred_device(const struct tc_action *a, struct net *net,
-			     struct net_device **mirred_dev)
+static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 {
-	int ifindex = tcf_mirred_ifindex(a);
+	struct tcf_mirred *m = to_mirred(a);
 
-	*mirred_dev = __dev_get_by_index(net, ifindex);
-	if (!*mirred_dev)
-		return -EINVAL;
-	return 0;
+	return __dev_get_by_index(m->net, m->tcfm_ifindex);
 }
 
 static struct tc_action_ops act_mirred_ops = {
@@ -337,14 +333,14 @@ static struct tc_action_ops act_mirred_ops = {
 	.walk		=	tcf_mirred_walker,
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
-	.get_dev	=	tcf_mirred_device,
+	.get_dev	=	tcf_mirred_get_dev,
 };
 
 static __net_init int mirred_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
+	return tc_action_net_init(tn, &act_mirred_ops);
 }
 
 static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9016ab8a0649..c365d01b99c8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -29,8 +29,6 @@
 #include <net/udp.h>
 
 
-#define NAT_TAB_MASK	15
-
 static unsigned int nat_net_id;
 static struct tc_action_ops act_nat_ops;
 
@@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_nat_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_nat_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_nat_ops = {
@@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
+	return tc_action_net_init(tn, &act_nat_ops);
 }
 
 static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7dc5892671c8..491fe5deb09e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,8 +24,6 @@
 #include <net/tc_act/tc_pedit.h>
 #include <uapi/linux/tc_act/tc_pedit.h>
 
-#define PEDIT_TAB_MASK	15
-
 static unsigned int pedit_net_id;
 static struct tc_action_ops act_pedit_ops;
 
@@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		if (!parm->nkeys)
 			return -EINVAL;
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_pedit_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_pedit_ops, bind, false);
 		if (ret)
 			return ret;
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			tcf_hash_cleanup(*a, est);
+			tcf_idr_cleanup(*a, est);
 			kfree(keys_ex);
 			return -ENOMEM;
 		}
@@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		p = to_pedit(*a);
@@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 
 	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_pedit_ops = {
@@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
+	return tc_action_net_init(tn, &act_pedit_ops);
 }
 
 static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b062bc80c7cb..3bb2ebf9e9ae 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,8 +40,6 @@ struct tcf_police {
 
 #define to_police(pc) ((struct tcf_police *)pc)
 
-#define POL_TAB_MASK     15
-
 /* old policer structure from before tc actions */
 struct tc_police_compat {
 	u32			index;
@@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, NULL, a,
-				      &act_police_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, NULL, a,
+				     &act_police_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return ret;
 
 	police->tcfp_t_c = ktime_get_ns();
-	tcf_hash_insert(tn, *a);
+	tcf_idr_insert(tn, *a);
 
 	return ret;
 
@@ -196,7 +194,7 @@ failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(*a, est);
+		tcf_idr_cleanup(*a, est);
 	return err;
 }
 
@@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 MODULE_AUTHOR("Alexey Kuznetsov");
@@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
+	return tc_action_net_init(tn, &act_police_ops);
 }
 
 static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 59d6645a4007..8b5abcd2f32f 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -25,7 +25,6 @@
 
 #include <linux/if_arp.h>
 
-#define SAMPLE_TAB_MASK     7
 static unsigned int sample_net_id;
 static struct tc_action_ops act_sample_ops;
 
@@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_sample_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_sample_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	psample_group = psample_group_get(net, s->psample_group_num);
 	if (!psample_group) {
 		if (ret == ACT_P_CREATED)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_sample_ops = {
@@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK);
+	return tc_action_net_init(tn, &act_sample_ops);
 }
 
 static void __net_exit sample_exit_net(struct net *net)
@@ -265,12 +264,13 @@ static int __init sample_init_module(void)
 
 static void __exit sample_cleanup_module(void)
 {
+	rcu_barrier();
 	tcf_unregister_action(&act_sample_ops, &sample_net_ops);
 }
 
 module_init(sample_init_module);
 module_exit(sample_cleanup_module);
 
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
 MODULE_DESCRIPTION("Packet sampling action");
 MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 43605e7ce051..e7b57e5071a3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -24,8 +24,6 @@
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
-#define SIMP_TAB_MASK     7
-
 static unsigned int simp_net_id;
 static struct tc_action_ops act_simp_ops;
 
@@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	defdata = nla_data(tb[TCA_DEF_DATA]);
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_simp_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_simp_ops, bind, false);
 		if (ret)
 			return ret;
 
 		d = to_defact(*a);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			tcf_hash_cleanup(*a, est);
+			tcf_idr_cleanup(*a, est);
 			return ret;
 		}
 		d->tcf_action = parm->action;
@@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(*a);
 
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 
@@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_simp_ops = {
@@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
+	return tc_action_net_init(tn, &act_simp_ops);
 }
 
 static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6b3e65d7de0c..59949d61f20d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -27,8 +27,6 @@
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
 
-#define SKBEDIT_TAB_MASK     15
-
 static unsigned int skbedit_net_id;
 static struct tc_action_ops act_skbedit_ops;
 
@@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!flags) {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_skbedit_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_skbedit_ops, bind, false);
 		if (ret)
 			return ret;
 
@@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(*a);
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&d->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_skbedit_ops = {
@@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
+	return tc_action_net_init(tn, &act_skbedit_ops);
 }
 
 static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a73c4bbcada2..b642ad3d39dd 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -20,8 +20,6 @@
 #include <linux/tc_act/tc_skbmod.h>
 #include <net/tc_act/tc_skbmod.h>
 
-#define SKBMOD_TAB_MASK     15
-
 static unsigned int skbmod_net_id;
 static struct tc_action_ops act_skbmod_ops;
 
@@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_skbmod_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_skbmod_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
 		if (ovr)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_skbmod_ops = {
@@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+	return tc_action_net_init(tn, &act_skbmod_ops);
 }
 
 static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index fd7e75679c69..30c96274c638 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -20,8 +20,6 @@
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_tunnel_key.h>
 
-#define TUNNEL_KEY_TAB_MASK     15
-
 static unsigned int tunnel_key_net_id;
 static struct tc_action_ops act_tunnel_key_ops;
 
@@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_tunnel_key_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_tunnel_key_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
 		if (ret == ACT_P_CREATED)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 
 err_out:
 	if (exists)
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 	return ret;
 }
 
@@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_tunnel_key_ops = {
@@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+	return tc_action_net_init(tn, &act_tunnel_key_ops);
 }
 
 static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 13ba3a89f675..97f717a13ad5 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -19,8 +19,6 @@
 #include <linux/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_vlan.h>
 
-#define VLAN_TAB_MASK     15
-
 static unsigned int vlan_net_id;
 static struct tc_action_ops act_vlan_ops;
 
@@ -28,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 		    struct tcf_result *res)
 {
 	struct tcf_vlan *v = to_vlan(a);
+	struct tcf_vlan_params *p;
 	int action;
 	int err;
 	u16 tci;
 
-	spin_lock(&v->tcf_lock);
 	tcf_lastuse_update(&v->tcf_tm);
-	bstats_update(&v->tcf_bstats, skb);
-	action = v->tcf_action;
+	bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
 
 	/* Ensure 'data' points at mac_header prior calling vlan manipulating
 	 * functions.
@@ -43,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 	if (skb_at_tc_ingress(skb))
 		skb_push_rcsum(skb, skb->mac_len);
 
-	switch (v->tcfv_action) {
+	rcu_read_lock();
+
+	action = READ_ONCE(v->tcf_action);
+
+	p = rcu_dereference(v->vlan_p);
+
+	switch (p->tcfv_action) {
 	case TCA_VLAN_ACT_POP:
 		err = skb_vlan_pop(skb);
 		if (err)
 			goto drop;
 		break;
 	case TCA_VLAN_ACT_PUSH:
-		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
-				    (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
+		err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
+				    (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
 		if (err)
 			goto drop;
 		break;
@@ -70,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 				goto drop;
 		}
 		/* replace the vid */
-		tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+		tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
 		/* replace prio bits, if tcfv_push_prio specified */
-		if (v->tcfv_push_prio) {
+		if (p->tcfv_push_prio) {
 			tci &= ~VLAN_PRIO_MASK;
-			tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+			tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
 		}
 		/* put updated tci as hwaccel tag */
-		__vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+		__vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci);
 		break;
 	default:
 		BUG();
@@ -87,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 
 drop:
 	action = TC_ACT_SHOT;
-	v->tcf_qstats.drops++;
+	qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+
 unlock:
+	rcu_read_unlock();
 	if (skb_at_tc_ingress(skb))
 		skb_pull_rcsum(skb, skb->mac_len);
 
-	spin_unlock(&v->tcf_lock);
 	return action;
 }
 
@@ -109,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
+	struct tcf_vlan_params *p, *p_old;
 	struct tc_vlan *parm;
 	struct tcf_vlan *v;
 	int action;
@@ -128,7 +133,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -139,13 +144,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	case TCA_VLAN_ACT_MODIFY:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -ERANGE;
 		}
 
@@ -167,66 +172,87 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 	action = parm->v_action;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_vlan_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_vlan_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
 
 	v = to_vlan(*a);
 
-	spin_lock_bh(&v->tcf_lock);
-
-	v->tcfv_action = action;
-	v->tcfv_push_vid = push_vid;
-	v->tcfv_push_prio = push_prio;
-	v->tcfv_push_proto = push_proto;
+	ASSERT_RTNL();
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		if (ovr)
+			tcf_idr_release(*a, bind);
+		return -ENOMEM;
+	}
 
 	v->tcf_action = parm->action;
 
-	spin_unlock_bh(&v->tcf_lock);
+	p_old = rtnl_dereference(v->vlan_p);
+
+	p->tcfv_action = action;
+	p->tcfv_push_vid = push_vid;
+	p->tcfv_push_prio = push_prio;
+	p->tcfv_push_proto = push_proto;
+
+	rcu_assign_pointer(v->vlan_p, p);
+
+	if (p_old)
+		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
+static void tcf_vlan_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_vlan *v = to_vlan(a);
+	struct tcf_vlan_params *p;
+
+	p = rcu_dereference_protected(v->vlan_p, 1);
+	kfree_rcu(p, rcu);
+}
+
 static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 			 int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_vlan *v = to_vlan(a);
+	struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
 	struct tc_vlan opt = {
 		.index    = v->tcf_index,
 		.refcnt   = v->tcf_refcnt - ref,
 		.bindcnt  = v->tcf_bindcnt - bind,
 		.action   = v->tcf_action,
-		.v_action = v->tcfv_action,
+		.v_action = p->tcfv_action,
 	};
 	struct tcf_t t;
 
 	if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
-	     v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
-	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
+	if ((p->tcfv_action == TCA_VLAN_ACT_PUSH ||
+	     p->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
+	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
-			  v->tcfv_push_proto) ||
+			  p->tcfv_push_proto) ||
 	     (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
-					      v->tcfv_push_prio))))
+					      p->tcfv_push_prio))))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &v->tcf_tm);
@@ -252,7 +278,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_vlan_ops = {
@@ -262,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = {
 	.act		=	tcf_vlan,
 	.dump		=	tcf_vlan_dump,
 	.init		=	tcf_vlan_init,
+	.cleanup	=	tcf_vlan_cleanup,
 	.walk		=	tcf_vlan_walker,
 	.lookup		=	tcf_vlan_search,
 	.size		=	sizeof(struct tcf_vlan),
@@ -271,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
+	return tc_action_net_init(tn, &act_vlan_ops);
 }
 
 static void __net_exit vlan_exit_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 39da0c5801c9..ab255b421781 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
 }
 EXPORT_SYMBOL(register_tcf_proto_ops);
 
+static struct workqueue_struct *tc_filter_wq;
+
 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 {
 	struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 	 * tcf_proto_ops's destroy() handler.
 	 */
 	rcu_barrier();
+	flush_workqueue(tc_filter_wq);
 
 	write_lock(&cls_mod_lock);
 	list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,20 +103,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
-static int tfilter_notify(struct net *net, struct sk_buff *oskb,
-			  struct nlmsghdr *n, struct tcf_proto *tp,
-			  unsigned long fh, int event, bool unicast);
-
-static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
-				 struct nlmsghdr *n,
-				 struct tcf_chain *chain, int event)
+bool tcf_queue_work(struct work_struct *work)
 {
-	struct tcf_proto *tp;
-
-	for (tp = rtnl_dereference(chain->filter_chain);
-	     tp; tp = rtnl_dereference(tp->next))
-		tfilter_notify(net, oskb, n, tp, 0, event, false);
+	return queue_work(tc_filter_wq, work);
 }
+EXPORT_SYMBOL(tcf_queue_work);
 
 /* Select new prio value from the range, managed by kernel. */
 
@@ -201,14 +195,22 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 	return chain;
 }
 
+static void tcf_chain_head_change(struct tcf_chain *chain,
+				  struct tcf_proto *tp_head)
+{
+	if (chain->chain_head_change)
+		chain->chain_head_change(tp_head,
+					 chain->chain_head_change_priv);
+}
+
 static void tcf_chain_flush(struct tcf_chain *chain)
 {
 	struct tcf_proto *tp;
 
-	if (*chain->p_filter_chain)
-		RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
+	tcf_chain_head_change(chain, NULL);
 	while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
 		RCU_INIT_POINTER(chain->filter_chain, tp->next);
+		tcf_chain_put(chain);
 		tcf_proto_destroy(tp);
 	}
 }
@@ -216,10 +218,14 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 static void tcf_chain_destroy(struct tcf_chain *chain)
 {
 	list_del(&chain->list);
-	tcf_chain_flush(chain);
 	kfree(chain);
 }
 
+static void tcf_chain_hold(struct tcf_chain *chain)
+{
+	++chain->refcnt;
+}
+
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 				bool create)
 {
@@ -227,36 +233,51 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 
 	list_for_each_entry(chain, &block->chain_list, list) {
 		if (chain->index == chain_index) {
-			chain->refcnt++;
+			tcf_chain_hold(chain);
 			return chain;
 		}
 	}
-	if (create)
-		return tcf_chain_create(block, chain_index);
-	else
-		return NULL;
+
+	return create ? tcf_chain_create(block, chain_index) : NULL;
 }
 EXPORT_SYMBOL(tcf_chain_get);
 
 void tcf_chain_put(struct tcf_chain *chain)
 {
-	/* Destroy unused chain, with exception of chain 0, which is the
-	 * default one and has to be always present.
-	 */
-	if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
+	if (--chain->refcnt == 0)
 		tcf_chain_destroy(chain);
 }
 EXPORT_SYMBOL(tcf_chain_put);
 
-static void
-tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
-			       struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
+				  struct tcf_block_ext_info *ei,
+				  enum tc_block_command command)
 {
-	chain->p_filter_chain = p_filter_chain;
+	struct net_device *dev = q->dev_queue->dev;
+	struct tc_block_offload bo = {};
+
+	if (!dev->netdev_ops->ndo_setup_tc)
+		return;
+	bo.command = command;
+	bo.binder_type = ei->binder_type;
+	bo.block = block;
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 }
 
-int tcf_block_get(struct tcf_block **p_block,
-		  struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
+				   struct tcf_block_ext_info *ei)
+{
+	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
+}
+
+static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
+				     struct tcf_block_ext_info *ei)
+{
+	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
+}
+
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
+		      struct tcf_block_ext_info *ei)
 {
 	struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
 	struct tcf_chain *chain;
@@ -265,13 +286,20 @@ int tcf_block_get(struct tcf_block **p_block,
 	if (!block)
 		return -ENOMEM;
 	INIT_LIST_HEAD(&block->chain_list);
+	INIT_LIST_HEAD(&block->cb_list);
+
 	/* Create chain 0 by default, it has to be always present. */
 	chain = tcf_chain_create(block, 0);
 	if (!chain) {
 		err = -ENOMEM;
 		goto err_chain_create;
 	}
-	tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+	WARN_ON(!ei->chain_head_change);
+	chain->chain_head_change = ei->chain_head_change;
+	chain->chain_head_change_priv = ei->chain_head_change_priv;
+	block->net = qdisc_net(q);
+	block->q = q;
+	tcf_block_offload_bind(block, q, ei);
 	*p_block = block;
 	return 0;
 
@@ -279,21 +307,179 @@ err_chain_create:
 	kfree(block);
 	return err;
 }
+EXPORT_SYMBOL(tcf_block_get_ext);
+
+static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
+{
+	struct tcf_proto __rcu **p_filter_chain = priv;
+
+	rcu_assign_pointer(*p_filter_chain, tp_head);
+}
+
+int tcf_block_get(struct tcf_block **p_block,
+		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+{
+	struct tcf_block_ext_info ei = {
+		.chain_head_change = tcf_chain_head_change_dflt,
+		.chain_head_change_priv = p_filter_chain,
+	};
+
+	WARN_ON(!p_filter_chain);
+	return tcf_block_get_ext(p_block, q, &ei);
+}
 EXPORT_SYMBOL(tcf_block_get);
 
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
 {
+	struct tcf_block *block = container_of(work, struct tcf_block, work);
 	struct tcf_chain *chain, *tmp;
 
-	if (!block)
-		return;
-
+	rtnl_lock();
+	/* Only chain 0 should be still here. */
 	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
-		tcf_chain_destroy(chain);
+		tcf_chain_put(chain);
+	rtnl_unlock();
 	kfree(block);
 }
+
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are now
+ * destroyed in tc filter workqueue with RTNL lock, they can not race here.
+ */
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
+		       struct tcf_block_ext_info *ei)
+{
+	struct tcf_chain *chain, *tmp;
+
+	list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+		tcf_chain_flush(chain);
+
+	tcf_block_offload_unbind(block, q, ei);
+
+	INIT_WORK(&block->work, tcf_block_put_final);
+	/* Wait for existing RCU callbacks to cool down, make sure their works
+	 * have been queued before this. We can not flush pending works here
+	 * because we are holding the RTNL lock.
+	 */
+	rcu_barrier();
+	tcf_queue_work(&block->work);
+}
+EXPORT_SYMBOL(tcf_block_put_ext);
+
+void tcf_block_put(struct tcf_block *block)
+{
+	struct tcf_block_ext_info ei = {0, };
+
+	if (!block)
+		return;
+	tcf_block_put_ext(block, block->q, &ei);
+}
+
 EXPORT_SYMBOL(tcf_block_put);
 
+struct tcf_block_cb {
+	struct list_head list;
+	tc_setup_cb_t *cb;
+	void *cb_ident;
+	void *cb_priv;
+	unsigned int refcnt;
+};
+
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
+{
+	return block_cb->cb_priv;
+}
+EXPORT_SYMBOL(tcf_block_cb_priv);
+
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+					 tc_setup_cb_t *cb, void *cb_ident)
+{	struct tcf_block_cb *block_cb;
+
+	list_for_each_entry(block_cb, &block->cb_list, list)
+		if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
+			return block_cb;
+	return NULL;
+}
+EXPORT_SYMBOL(tcf_block_cb_lookup);
+
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
+{
+	block_cb->refcnt++;
+}
+EXPORT_SYMBOL(tcf_block_cb_incref);
+
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
+{
+	return --block_cb->refcnt;
+}
+EXPORT_SYMBOL(tcf_block_cb_decref);
+
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+					     tc_setup_cb_t *cb, void *cb_ident,
+					     void *cb_priv)
+{
+	struct tcf_block_cb *block_cb;
+
+	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
+	if (!block_cb)
+		return NULL;
+	block_cb->cb = cb;
+	block_cb->cb_ident = cb_ident;
+	block_cb->cb_priv = cb_priv;
+	list_add(&block_cb->list, &block->cb_list);
+	return block_cb;
+}
+EXPORT_SYMBOL(__tcf_block_cb_register);
+
+int tcf_block_cb_register(struct tcf_block *block,
+			  tc_setup_cb_t *cb, void *cb_ident,
+			  void *cb_priv)
+{
+	struct tcf_block_cb *block_cb;
+
+	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+	return block_cb ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(tcf_block_cb_register);
+
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+{
+	list_del(&block_cb->list);
+	kfree(block_cb);
+}
+EXPORT_SYMBOL(__tcf_block_cb_unregister);
+
+void tcf_block_cb_unregister(struct tcf_block *block,
+			     tc_setup_cb_t *cb, void *cb_ident)
+{
+	struct tcf_block_cb *block_cb;
+
+	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
+	if (!block_cb)
+		return;
+	__tcf_block_cb_unregister(block_cb);
+}
+EXPORT_SYMBOL(tcf_block_cb_unregister);
+
+static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
+			     void *type_data, bool err_stop)
+{
+	struct tcf_block_cb *block_cb;
+	int ok_count = 0;
+	int err;
+
+	list_for_each_entry(block_cb, &block->cb_list, list) {
+		err = block_cb->cb(type, type_data, block_cb->cb_priv);
+		if (err) {
+			if (err_stop)
+				return err;
+		} else {
+			ok_count++;
+		}
+	}
+	return ok_count;
+}
+
 /* Main classifier routine: scans classifier chain attached
  * to this qdisc, (optionally) tests for protocol and asks
  * specific classifiers.
@@ -362,11 +548,11 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
 				struct tcf_chain_info *chain_info,
 				struct tcf_proto *tp)
 {
-	if (chain->p_filter_chain &&
-	    *chain_info->pprev == chain->filter_chain)
-		rcu_assign_pointer(*chain->p_filter_chain, tp);
+	if (*chain_info->pprev == chain->filter_chain)
+		tcf_chain_head_change(chain, tp);
 	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
+	tcf_chain_hold(chain);
 }
 
 static void tcf_chain_tp_remove(struct tcf_chain *chain,
@@ -375,9 +561,10 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 {
 	struct tcf_proto *next = rtnl_dereference(chain_info->next);
 
-	if (chain->p_filter_chain && tp == chain->filter_chain)
-		RCU_INIT_POINTER(*chain->p_filter_chain, next);
+	if (tp == chain->filter_chain)
+		tcf_chain_head_change(chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
+	tcf_chain_put(chain);
 }
 
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -407,6 +594,112 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 	return tp;
 }
 
+static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+			 struct tcf_proto *tp, struct Qdisc *q, u32 parent,
+			 void *fh, u32 portid, u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = parent;
+	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
+		goto nla_put_failure;
+	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+		goto nla_put_failure;
+	if (!fh) {
+		tcm->tcm_handle = 0;
+	} else {
+		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+			goto nla_put_failure;
+	}
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+			  struct nlmsghdr *n, struct tcf_proto *tp,
+			  struct Qdisc *q, u32 parent,
+			  void *fh, int event, bool unicast)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
+			  n->nlmsg_flags, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (unicast)
+		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
+			      struct nlmsghdr *n, struct tcf_proto *tp,
+			      struct Qdisc *q, u32 parent,
+			      void *fh, bool unicast, bool *last)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	int err;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
+			  n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	err = tp->ops->delete(tp, fh, last);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	if (unicast)
+		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+				 struct Qdisc *q, u32 parent,
+				 struct nlmsghdr *n,
+				 struct tcf_chain *chain, int event)
+{
+	struct tcf_proto *tp;
+
+	for (tp = rtnl_dereference(chain->filter_chain);
+	     tp; tp = rtnl_dereference(tp->next))
+		tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
+}
+
 /* Add/change/delete/get a filter node */
 
 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -428,7 +721,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct tcf_proto *tp;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
-	unsigned long fh;
+	void *fh;
 	int err;
 	int tp_created;
 
@@ -498,7 +791,7 @@ replay:
 
 	/* Do we search for filter, attached to class? */
 	if (TC_H_MIN(parent)) {
-		cl = cops->get(q, parent);
+		cl = cops->find(q, parent);
 		if (cl == 0)
 			return -ENOENT;
 	}
@@ -523,7 +816,8 @@ replay:
 	}
 
 	if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
-		tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
+		tfilter_notify_chain(net, skb, q, parent, n,
+				     chain, RTM_DELTFILTER);
 		tcf_chain_flush(chain);
 		err = 0;
 		goto errout;
@@ -567,10 +861,10 @@ replay:
 
 	fh = tp->ops->get(tp, t->tcm_handle);
 
-	if (fh == 0) {
+	if (!fh) {
 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
 			tcf_chain_tp_remove(chain, &chain_info, tp);
-			tfilter_notify(net, skb, n, tp, fh,
+			tfilter_notify(net, skb, n, tp, q, parent, fh,
 				       RTM_DELTFILTER, false);
 			tcf_proto_destroy(tp);
 			err = 0;
@@ -595,18 +889,17 @@ replay:
 			}
 			break;
 		case RTM_DELTFILTER:
-			err = tp->ops->delete(tp, fh, &last);
+			err = tfilter_del_notify(net, skb, n, tp, q, parent,
+						 fh, false, &last);
 			if (err)
 				goto errout;
-			tfilter_notify(net, skb, n, tp, t->tcm_handle,
-				       RTM_DELTFILTER, false);
 			if (last) {
 				tcf_chain_tp_remove(chain, &chain_info, tp);
 				tcf_proto_destroy(tp);
 			}
 			goto errout;
 		case RTM_GETTFILTER:
-			err = tfilter_notify(net, skb, n, tp, fh,
+			err = tfilter_notify(net, skb, n, tp, q, parent, fh,
 					     RTM_NEWTFILTER, true);
 			goto errout;
 		default:
@@ -620,7 +913,8 @@ replay:
 	if (err == 0) {
 		if (tp_created)
 			tcf_chain_tp_insert(chain, &chain_info, tp);
-		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
+		tfilter_notify(net, skb, n, tp, q, parent, fh,
+			       RTM_NEWTFILTER, false);
 	} else {
 		if (tp_created)
 			tcf_proto_destroy(tp);
@@ -629,94 +923,33 @@ replay:
 errout:
 	if (chain)
 		tcf_chain_put(chain);
-	if (cl)
-		cops->put(q, cl);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
 	return err;
 }
 
-static int tcf_fill_node(struct net *net, struct sk_buff *skb,
-			 struct tcf_proto *tp, unsigned long fh, u32 portid,
-			 u32 seq, u16 flags, int event)
-{
-	struct tcmsg *tcm;
-	struct nlmsghdr  *nlh;
-	unsigned char *b = skb_tail_pointer(skb);
-
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
-	if (!nlh)
-		goto out_nlmsg_trim;
-	tcm = nlmsg_data(nlh);
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm__pad1 = 0;
-	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
-	tcm->tcm_parent = tp->classid;
-	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
-	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
-		goto nla_put_failure;
-	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
-		goto nla_put_failure;
-	tcm->tcm_handle = fh;
-	if (RTM_DELTFILTER != event) {
-		tcm->tcm_handle = 0;
-		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
-			goto nla_put_failure;
-	}
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int tfilter_notify(struct net *net, struct sk_buff *oskb,
-			  struct nlmsghdr *n, struct tcf_proto *tp,
-			  unsigned long fh, int event, bool unicast)
-{
-	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
-			  n->nlmsg_flags, event) <= 0) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
-}
-
 struct tcf_dump_args {
 	struct tcf_walker w;
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
+	struct Qdisc *q;
+	u32 parent;
 };
 
-static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
-			 struct tcf_walker *arg)
+static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
 {
 	struct tcf_dump_args *a = (void *)arg;
 	struct net *net = sock_net(a->skb->sk);
 
-	return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
+	return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
+			     n, NETLINK_CB(a->cb->skb).portid,
 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
 			     RTM_NEWTFILTER);
 }
 
-static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
-			   struct netlink_callback *cb,
+static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
+			   struct sk_buff *skb, struct netlink_callback *cb,
 			   long index_start, long *p_index)
 {
 	struct net *net = sock_net(skb->sk);
@@ -738,7 +971,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
 			memset(&cb->args[1], 0,
 			       sizeof(cb->args) - sizeof(cb->args[0]));
 		if (cb->args[1] == 0) {
-			if (tcf_fill_node(net, skb, tp, 0,
+			if (tcf_fill_node(net, skb, tp, q, parent, 0,
 					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					  RTM_NEWTFILTER) <= 0)
@@ -751,6 +984,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
 		arg.w.fn = tcf_node_dump;
 		arg.skb = skb;
 		arg.cb = cb;
+		arg.q = q;
+		arg.parent = parent;
 		arg.w.stop = 0;
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
@@ -776,6 +1011,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	const struct Qdisc_class_ops *cops;
 	long index_start;
 	long index;
+	u32 parent;
 	int err;
 
 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
@@ -789,25 +1025,28 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if (!dev)
 		return skb->len;
 
-	if (!tcm->tcm_parent)
+	parent = tcm->tcm_parent;
+	if (!parent) {
 		q = dev->qdisc;
-	else
+		parent = q->handle;
+	} else {
 		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+	}
 	if (!q)
 		goto out;
 	cops = q->ops->cl_ops;
 	if (!cops)
-		goto errout;
+		goto out;
 	if (!cops->tcf_block)
-		goto errout;
+		goto out;
 	if (TC_H_MIN(tcm->tcm_parent)) {
-		cl = cops->get(q, tcm->tcm_parent);
+		cl = cops->find(q, tcm->tcm_parent);
 		if (cl == 0)
-			goto errout;
+			goto out;
 	}
 	block = cops->tcf_block(q, cl);
 	if (!block)
-		goto errout;
+		goto out;
 
 	index_start = cb->args[0];
 	index = 0;
@@ -816,15 +1055,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		if (tca[TCA_CHAIN] &&
 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
-		if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
+		if (!tcf_chain_dump(chain, q, parent, skb, cb,
+				    index_start, &index))
 			break;
 	}
 
 	cb->args[0] = index;
 
-errout:
-	if (cl)
-		cops->put(q, cl);
 out:
 	return skb->len;
 }
@@ -834,6 +1071,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
 	LIST_HEAD(actions);
 
+	ASSERT_RTNL();
 	tcf_exts_to_list(exts, &actions);
 	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
 	kfree(exts->actions);
@@ -872,6 +1110,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 				exts->actions[i++] = act;
 			exts->nr_actions = i;
 		}
+		exts->net = net;
 	}
 #else
 	if ((exts->action && tb[exts->action]) ||
@@ -883,18 +1122,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 }
 EXPORT_SYMBOL(tcf_exts_validate);
 
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src)
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_exts old = *dst;
 
-	tcf_tree_lock(tp);
-	dst->nr_actions = src->nr_actions;
-	dst->actions = src->actions;
-	dst->type = src->type;
-	tcf_tree_unlock(tp);
-
+	*dst = *src;
 	tcf_exts_destroy(&old);
 #endif
 }
@@ -915,7 +1148,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
 	struct nlattr *nest;
 
-	if (exts->action && exts->nr_actions) {
+	if (exts->action && tcf_exts_has_actions(exts)) {
 		/*
 		 * again for backward compatible mode - we want
 		 * to work with both old and new modes of entering
@@ -965,36 +1198,67 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
-		     struct net_device **hw_dev)
+static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
+				       enum tc_setup_type type,
+				       void *type_data, bool err_stop)
 {
+	int ok_count = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	const struct tc_action *a;
-	LIST_HEAD(actions);
+	struct net_device *dev;
+	int i, ret;
 
-	if (tc_no_actions(exts))
-		return -EINVAL;
+	if (!tcf_exts_has_actions(exts))
+		return 0;
 
-	tcf_exts_to_list(exts, &actions);
-	list_for_each_entry(a, &actions, list) {
-		if (a->ops->get_dev) {
-			a->ops->get_dev(a, dev_net(dev), hw_dev);
-			break;
-		}
+	for (i = 0; i < exts->nr_actions; i++) {
+		a = exts->actions[i];
+		if (!a->ops->get_dev)
+			continue;
+		dev = a->ops->get_dev(a);
+		if (!dev)
+			continue;
+		ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
+		if (ret < 0)
+			return ret;
+		ok_count += ret;
 	}
-	if (*hw_dev)
-		return 0;
 #endif
-	return -EOPNOTSUPP;
+	return ok_count;
+}
+
+int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
+		     enum tc_setup_type type, void *type_data, bool err_stop)
+{
+	int ok_count;
+	int ret;
+
+	ret = tcf_block_cb_call(block, type, type_data, err_stop);
+	if (ret < 0)
+		return ret;
+	ok_count = ret;
+
+	if (!exts)
+		return ok_count;
+	ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
+	if (ret < 0)
+		return ret;
+	ok_count += ret;
+
+	return ok_count;
 }
-EXPORT_SYMBOL(tcf_exts_get_dev);
+EXPORT_SYMBOL(tc_setup_cb_call);
 
 static int __init tc_filter_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
+	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+	if (!tc_filter_wq)
+		return -ENOMEM;
+
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
-		      tc_dump_tfilter, NULL);
+		      tc_dump_tfilter, 0);
 
 	return 0;
 }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index c4fd63a068f9..5f169ded347e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,13 +17,14 @@
 #include <linux/errno.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
+#include <linux/idr.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
 struct basic_head {
-	u32			hgenerator;
 	struct list_head	flist;
+	struct idr		handle_idr;
 	struct rcu_head		rcu;
 };
 
@@ -34,7 +35,10 @@ struct basic_filter {
 	struct tcf_result	res;
 	struct tcf_proto	*tp;
 	struct list_head	link;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -56,20 +60,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+static void *basic_get(struct tcf_proto *tp, u32 handle)
 {
-	unsigned long l = 0UL;
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f;
 
 	list_for_each_entry(f, &head->flist, link) {
 		if (f->handle == handle) {
-			l = (unsigned long) f;
-			break;
+			return f;
 		}
 	}
 
-	return l;
+	return NULL;
 }
 
 static int basic_init(struct tcf_proto *tp)
@@ -80,19 +82,36 @@ static int basic_init(struct tcf_proto *tp)
 	if (head == NULL)
 		return -ENOBUFS;
 	INIT_LIST_HEAD(&head->flist);
+	idr_init(&head->handle_idr);
 	rcu_assign_pointer(tp->root, head);
 	return 0;
 }
 
-static void basic_delete_filter(struct rcu_head *head)
+static void __basic_delete_filter(struct basic_filter *f)
 {
-	struct basic_filter *f = container_of(head, struct basic_filter, rcu);
-
 	tcf_exts_destroy(&f->exts);
 	tcf_em_tree_destroy(&f->ematches);
+	tcf_exts_put_net(&f->exts);
 	kfree(f);
 }
 
+static void basic_delete_filter_work(struct work_struct *work)
+{
+	struct basic_filter *f = container_of(work, struct basic_filter, work);
+
+	rtnl_lock();
+	__basic_delete_filter(f);
+	rtnl_unlock();
+}
+
+static void basic_delete_filter(struct rcu_head *head)
+{
+	struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+	INIT_WORK(&f->work, basic_delete_filter_work);
+	tcf_queue_work(&f->work);
+}
+
 static void basic_destroy(struct tcf_proto *tp)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
@@ -101,18 +120,25 @@ static void basic_destroy(struct tcf_proto *tp)
 	list_for_each_entry_safe(f, n, &head->flist, link) {
 		list_del_rcu(&f->link);
 		tcf_unbind_filter(tp, &f->res);
-		call_rcu(&f->rcu, basic_delete_filter);
+		idr_remove_ext(&head->handle_idr, f->handle);
+		if (tcf_exts_get_net(&f->exts))
+			call_rcu(&f->rcu, basic_delete_filter);
+		else
+			__basic_delete_filter(f);
 	}
+	idr_destroy(&head->handle_idr);
 	kfree_rcu(head, rcu);
 }
 
-static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
-	struct basic_filter *f = (struct basic_filter *) arg;
+	struct basic_filter *f = arg;
 
 	list_del_rcu(&f->link);
 	tcf_unbind_filter(tp, &f->res);
+	idr_remove_ext(&head->handle_idr, f->handle);
+	tcf_exts_get_net(&f->exts);
 	call_rcu(&f->rcu, basic_delete_filter);
 	*last = list_empty(&head->flist);
 	return 0;
@@ -129,44 +155,34 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 			   struct nlattr *est, bool ovr)
 {
 	int err;
-	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 
-	err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	if (tb[TCA_BASIC_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
-	tcf_em_tree_change(tp, &f->ematches, &t);
 	f->tp = tp;
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
-			struct nlattr **tca, unsigned long *arg, bool ovr)
+			struct nlattr **tca, void **arg, bool ovr)
 {
 	int err;
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct nlattr *tb[TCA_BASIC_MAX + 1];
 	struct basic_filter *fold = (struct basic_filter *) *arg;
 	struct basic_filter *fnew;
+	unsigned long idr_index;
 
 	if (tca[TCA_OPTIONS] == NULL)
 		return -EINVAL;
@@ -189,35 +205,36 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	err = -EINVAL;
 	if (handle) {
 		fnew->handle = handle;
-	} else if (fold) {
-		fnew->handle = fold->handle;
+		if (!fold) {
+			err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+					    handle, handle + 1, GFP_KERNEL);
+			if (err)
+				goto errout;
+		}
 	} else {
-		unsigned int i = 0x80000000;
-		do {
-			if (++head->hgenerator == 0x7FFFFFFF)
-				head->hgenerator = 1;
-		} while (--i > 0 && basic_get(tp, head->hgenerator));
-
-		if (i <= 0) {
-			pr_err("Insufficient number of handles\n");
+		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+				    1, 0x7FFFFFFF, GFP_KERNEL);
+		if (err)
 			goto errout;
-		}
-
-		fnew->handle = head->hgenerator;
+		fnew->handle = idr_index;
 	}
 
 	err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
-	if (err < 0)
+	if (err < 0) {
+		if (!fold)
+			idr_remove_ext(&head->handle_idr, fnew->handle);
 		goto errout;
+	}
 
-	*arg = (unsigned long)fnew;
+	*arg = fnew;
 
 	if (fold) {
+		idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
 		list_replace_rcu(&fold->link, &fnew->link);
 		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, basic_delete_filter);
 	} else {
 		list_add_rcu(&fnew->link, &head->flist);
@@ -239,7 +256,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 		if (arg->count < arg->skip)
 			goto skip;
 
-		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -248,10 +265,18 @@ skip:
 	}
 }
 
-static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct basic_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
+static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
-	struct basic_filter *f = (struct basic_filter *) fh;
+	struct basic_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -293,6 +318,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
 	.delete		=	basic_delete,
 	.walk		=	basic_walk,
 	.dump		=	basic_dump,
+	.bind_class	=	basic_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index f57bd531ba98..fb680dafac5a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
+#include <linux/idr.h>
 
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
 
 struct cls_bpf_head {
 	struct list_head plist;
-	u32 hgen;
+	struct idr handle_idr;
 	struct rcu_head rcu;
 };
 
@@ -49,7 +50,10 @@ struct cls_bpf_prog {
 	struct sock_filter *bpf_ops;
 	const char *bpf_name;
 	struct tcf_proto *tp;
-	struct rcu_head rcu;
+	union {
+		struct work_struct work;
+		struct rcu_head rcu;
+	};
 };
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -99,11 +103,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		} else if (at_ingress) {
 			/* It is safe to push/pull even if skb_shared() */
 			__skb_push(skb, skb->mac_len);
-			bpf_compute_data_end(skb);
+			bpf_compute_data_pointers(skb);
 			filter_res = BPF_PROG_RUN(prog->filter, skb);
 			__skb_pull(skb, skb->mac_len);
 		} else {
-			bpf_compute_data_end(skb);
+			bpf_compute_data_pointers(skb);
 			filter_res = BPF_PROG_RUN(prog->filter, skb);
 		}
 
@@ -146,35 +150,39 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			       enum tc_clsbpf_command cmd)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_bpf_offload bpf_offload = {};
-	struct tc_to_netdev offload;
+	bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
+	struct tcf_block *block = tp->chain->block;
+	bool skip_sw = tc_skip_sw(prog->gen_flags);
+	struct tc_cls_bpf_offload cls_bpf = {};
 	int err;
 
-	offload.type = TC_SETUP_CLSBPF;
-	offload.cls_bpf = &bpf_offload;
-
-	bpf_offload.command = cmd;
-	bpf_offload.exts = &prog->exts;
-	bpf_offload.prog = prog->filter;
-	bpf_offload.name = prog->bpf_name;
-	bpf_offload.exts_integrated = prog->exts_integrated;
-	bpf_offload.gen_flags = prog->gen_flags;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
+	tc_cls_common_offload_init(&cls_bpf.common, tp);
+	cls_bpf.command = cmd;
+	cls_bpf.exts = &prog->exts;
+	cls_bpf.prog = prog->filter;
+	cls_bpf.name = prog->bpf_name;
+	cls_bpf.exts_integrated = prog->exts_integrated;
+	cls_bpf.gen_flags = prog->gen_flags;
+
+	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
+	if (addorrep) {
+		if (err < 0) {
+			cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+			return err;
+		} else if (err > 0) {
+			prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+		}
+	}
 
-	if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
-		prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+	if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+		return -EINVAL;
 
-	return err;
+	return 0;
 }
 
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			   struct cls_bpf_prog *oldprog)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
 	struct cls_bpf_prog *obj = prog;
 	enum tc_clsbpf_command cmd;
 	bool skip_sw;
@@ -184,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 		(oldprog && tc_skip_sw(oldprog->gen_flags));
 
 	if (oldprog && oldprog->offloaded) {
-		if (tc_should_offload(dev, tp, prog->gen_flags)) {
+		if (!tc_skip_hw(prog->gen_flags)) {
 			cmd = TC_CLSBPF_REPLACE;
 		} else if (!tc_skip_sw(prog->gen_flags)) {
 			obj = oldprog;
@@ -193,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			return -EINVAL;
 		}
 	} else {
-		if (!tc_should_offload(dev, tp, prog->gen_flags))
+		if (tc_skip_hw(prog->gen_flags))
 			return skip_sw ? -EINVAL : 0;
 		cmd = TC_CLSBPF_ADD;
 	}
 
 	ret = cls_bpf_offload_cmd(tp, obj, cmd);
 	if (ret)
-		return skip_sw ? ret : 0;
+		return ret;
 
 	obj->offloaded = true;
 	if (oldprog)
@@ -244,6 +252,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
 		return -ENOBUFS;
 
 	INIT_LIST_HEAD_RCU(&head->plist);
+	idr_init(&head->handle_idr);
 	rcu_assign_pointer(tp->root, head);
 
 	return 0;
@@ -252,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
 static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
 {
 	tcf_exts_destroy(&prog->exts);
+	tcf_exts_put_net(&prog->exts);
 
 	if (cls_bpf_is_ebpf(prog))
 		bpf_prog_put(prog->filter);
@@ -263,24 +273,42 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
 	kfree(prog);
 }
 
+static void cls_bpf_delete_prog_work(struct work_struct *work)
+{
+	struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
+
+	rtnl_lock();
+	__cls_bpf_delete_prog(prog);
+	rtnl_unlock();
+}
+
 static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
 {
-	__cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
+	struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+	INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
+	tcf_queue_work(&prog->work);
 }
 
 static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 {
+	struct cls_bpf_head *head = rtnl_dereference(tp->root);
+
+	idr_remove_ext(&head->handle_idr, prog->handle);
 	cls_bpf_stop_offload(tp, prog);
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
-	call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+	if (tcf_exts_get_net(&prog->exts))
+		call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+	else
+		__cls_bpf_delete_prog(prog);
 }
 
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 
-	__cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
+	__cls_bpf_delete(tp, arg);
 	*last = list_empty(&head->plist);
 	return 0;
 }
@@ -293,23 +321,21 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
 	list_for_each_entry_safe(prog, tmp, &head->plist, link)
 		__cls_bpf_delete(tp, prog);
 
+	idr_destroy(&head->handle_idr);
 	kfree_rcu(head, rcu);
 }
 
-static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+static void *cls_bpf_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *prog;
-	unsigned long ret = 0UL;
 
 	list_for_each_entry(prog, &head->plist, link) {
-		if (prog->handle == handle) {
-			ret = (unsigned long) prog;
-			break;
-		}
+		if (prog->handle == handle)
+			return prog;
 	}
 
-	return ret;
+	return NULL;
 }
 
 static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
@@ -352,7 +378,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 }
 
 static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
-				 const struct tcf_proto *tp)
+				 u32 gen_flags, const struct tcf_proto *tp)
 {
 	struct bpf_prog *fp;
 	char *name = NULL;
@@ -360,7 +386,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 
 	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
 
-	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
+	if (gen_flags & TCA_CLS_FLAGS_SKIP_SW)
+		fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS,
+					   qdisc_dev(tp->q));
+	else
+		fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
@@ -382,13 +412,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 	return 0;
 }
 
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
-				   struct cls_bpf_prog *prog,
-				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est, bool ovr)
+static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+			     struct cls_bpf_prog *prog, unsigned long base,
+			     struct nlattr **tb, struct nlattr *est, bool ovr)
 {
 	bool is_bpf, is_ebpf, have_exts = false;
-	struct tcf_exts exts;
 	u32 gen_flags = 0;
 	int ret;
 
@@ -397,83 +425,51 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr);
 	if (ret < 0)
 		return ret;
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
-	if (ret < 0)
-		goto errout;
 
 	if (tb[TCA_BPF_FLAGS]) {
 		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 
-		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
-			ret = -EINVAL;
-			goto errout;
-		}
+		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+			return -EINVAL;
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 	}
 	if (tb[TCA_BPF_FLAGS_GEN]) {
 		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
 		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
-		    !tc_flags_valid(gen_flags)) {
-			ret = -EINVAL;
-			goto errout;
-		}
+		    !tc_flags_valid(gen_flags))
+			return -EINVAL;
 	}
 
 	prog->exts_integrated = have_exts;
 	prog->gen_flags = gen_flags;
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
-		       cls_bpf_prog_from_efd(tb, prog, tp);
+		       cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
 	if (ret < 0)
-		goto errout;
+		return ret;
 
 	if (tb[TCA_BPF_CLASSID]) {
 		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 		tcf_bind_filter(tp, &prog->res, base);
 	}
 
-	tcf_exts_change(tp, &prog->exts, &exts);
 	return 0;
-
-errout:
-	tcf_exts_destroy(&exts);
-	return ret;
-}
-
-static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
-				   struct cls_bpf_head *head)
-{
-	unsigned int i = 0x80000000;
-	u32 handle;
-
-	do {
-		if (++head->hgen == 0x7FFFFFFF)
-			head->hgen = 1;
-	} while (--i > 0 && cls_bpf_get(tp, head->hgen));
-
-	if (unlikely(i == 0)) {
-		pr_err("Insufficient number of handles\n");
-		handle = 0;
-	} else {
-		handle = head->hgen;
-	}
-
-	return handle;
 }
 
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  unsigned long *arg, bool ovr)
+			  void **arg, bool ovr)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
-	struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
+	struct cls_bpf_prog *oldprog = *arg;
 	struct nlattr *tb[TCA_BPF_MAX + 1];
 	struct cls_bpf_prog *prog;
+	unsigned long idr_index;
 	int ret;
 
 	if (tca[TCA_OPTIONS] == NULL)
@@ -499,22 +495,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		}
 	}
 
-	if (handle == 0)
-		prog->handle = cls_bpf_grab_new_handle(tp, head);
-	else
+	if (handle == 0) {
+		ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+				    1, 0x7FFFFFFF, GFP_KERNEL);
+		if (ret)
+			goto errout;
+		prog->handle = idr_index;
+	} else {
+		if (!oldprog) {
+			ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+					    handle, handle + 1, GFP_KERNEL);
+			if (ret)
+				goto errout;
+		}
 		prog->handle = handle;
-	if (prog->handle == 0) {
-		ret = -EINVAL;
-		goto errout;
 	}
 
-	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
-				      ovr);
+	ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 	if (ret < 0)
-		goto errout;
+		goto errout_idr;
 
 	ret = cls_bpf_offload(tp, prog, oldprog);
 	if (ret) {
+		if (!oldprog)
+			idr_remove_ext(&head->handle_idr, prog->handle);
 		__cls_bpf_delete_prog(prog);
 		return ret;
 	}
@@ -523,16 +527,21 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 
 	if (oldprog) {
+		idr_replace_ext(&head->handle_idr, prog, handle);
 		list_replace_rcu(&oldprog->link, &prog->link);
 		tcf_unbind_filter(tp, &oldprog->res);
+		tcf_exts_get_net(&oldprog->exts);
 		call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
 	} else {
 		list_add_rcu(&prog->link, &head->plist);
 	}
 
-	*arg = (unsigned long) prog;
+	*arg = prog;
 	return 0;
 
+errout_idr:
+	if (!oldprog)
+		idr_remove_ext(&head->handle_idr, prog->handle);
 errout:
 	tcf_exts_destroy(&prog->exts);
 	kfree(prog);
@@ -578,10 +587,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 	return 0;
 }
 
-static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			struct sk_buff *skb, struct tcmsg *tm)
 {
-	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+	struct cls_bpf_prog *prog = fh;
 	struct nlattr *nest;
 	u32 bpf_flags = 0;
 	int ret;
@@ -631,6 +640,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_bpf_prog *prog = fh;
+
+	if (prog && prog->res.classid == classid)
+		prog->res.class = cl;
+}
+
 static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -639,7 +656,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry(prog, &head->plist, link) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+		if (arg->fn(tp, prog, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -659,6 +676,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 	.delete		=	cls_bpf_delete,
 	.walk		=	cls_bpf_walk,
 	.dump		=	cls_bpf_dump,
+	.bind_class	=	cls_bpf_bind_class,
 };
 
 static int __init cls_bpf_init_mod(void)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 12ce547eea04..309d5899265f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
 	struct tcf_exts		exts;
 	struct tcf_ematch_tree	ematches;
 	struct tcf_proto	*tp;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -43,9 +46,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
-static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle)
 {
-	return 0UL;
+	return NULL;
 }
 
 static int cls_cgroup_init(struct tcf_proto *tp)
@@ -57,27 +60,42 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
 };
 
+static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
+{
+	tcf_exts_destroy(&head->exts);
+	tcf_em_tree_destroy(&head->ematches);
+	tcf_exts_put_net(&head->exts);
+	kfree(head);
+}
+
+static void cls_cgroup_destroy_work(struct work_struct *work)
+{
+	struct cls_cgroup_head *head = container_of(work,
+						    struct cls_cgroup_head,
+						    work);
+	rtnl_lock();
+	__cls_cgroup_destroy(head);
+	rtnl_unlock();
+}
+
 static void cls_cgroup_destroy_rcu(struct rcu_head *root)
 {
 	struct cls_cgroup_head *head = container_of(root,
 						    struct cls_cgroup_head,
 						    rcu);
 
-	tcf_exts_destroy(&head->exts);
-	tcf_em_tree_destroy(&head->ematches);
-	kfree(head);
+	INIT_WORK(&head->work, cls_cgroup_destroy_work);
+	tcf_queue_work(&head->work);
 }
 
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     unsigned long *arg, bool ovr)
+			     void **arg, bool ovr)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 	struct cls_cgroup_head *new;
-	struct tcf_ematch_tree t;
-	struct tcf_exts e;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -103,27 +121,19 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr);
 	if (err < 0)
 		goto errout;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0) {
-		tcf_exts_destroy(&e);
-		goto errout;
-	}
 
-	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
-	if (err < 0) {
-		tcf_exts_destroy(&e);
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches);
+	if (err < 0)
 		goto errout;
-	}
-
-	tcf_exts_change(tp, &new->exts, &e);
-	tcf_em_tree_change(tp, &new->ematches, &t);
 
 	rcu_assign_pointer(tp->root, new);
-	if (head)
+	if (head) {
+		tcf_exts_get_net(&head->exts);
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+	}
 	return 0;
 errout:
 	tcf_exts_destroy(&new->exts);
@@ -136,11 +146,15 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 
 	/* Head can still be NULL due to cls_cgroup_init(). */
-	if (head)
-		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+	if (head) {
+		if (tcf_exts_get_net(&head->exts))
+			call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+		else
+			__cls_cgroup_destroy(head);
+	}
 }
 
-static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	return -EOPNOTSUPP;
 }
@@ -152,7 +166,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	if (arg->count < arg->skip)
 		goto skip;
 
-	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+	if (arg->fn(tp, head, arg) < 0) {
 		arg->stop = 1;
 		return;
 	}
@@ -160,7 +174,7 @@ skip:
 	arg->count++;
 }
 
-static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3065752b9cda..25c2a888e1f0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
 	u32			divisor;
 	u32			baseclass;
 	u32			hashrnd;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static inline u32 addr_fold(void *addr)
@@ -345,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static void flow_perturbation(unsigned long arg)
+static void flow_perturbation(struct timer_list *t)
 {
-	struct flow_filter *f = (struct flow_filter *)arg;
+	struct flow_filter *f = from_timer(f, t, perturb_timer);
 
 	get_random_bytes(&f->hashrnd, 4);
 	if (f->perturb_period)
@@ -369,27 +372,41 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
 };
 
-static void flow_destroy_filter(struct rcu_head *head)
+static void __flow_destroy_filter(struct flow_filter *f)
 {
-	struct flow_filter *f = container_of(head, struct flow_filter, rcu);
-
 	del_timer_sync(&f->perturb_timer);
 	tcf_exts_destroy(&f->exts);
 	tcf_em_tree_destroy(&f->ematches);
+	tcf_exts_put_net(&f->exts);
 	kfree(f);
 }
 
+static void flow_destroy_filter_work(struct work_struct *work)
+{
+	struct flow_filter *f = container_of(work, struct flow_filter, work);
+
+	rtnl_lock();
+	__flow_destroy_filter(f);
+	rtnl_unlock();
+}
+
+static void flow_destroy_filter(struct rcu_head *head)
+{
+	struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+	INIT_WORK(&f->work, flow_destroy_filter_work);
+	tcf_queue_work(&f->work);
+}
+
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *fold, *fnew;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FLOW_MAX + 1];
-	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 	unsigned int nkeys = 0;
 	unsigned int perturb_period = 0;
 	u32 baseclass = 0;
@@ -425,31 +442,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 	}
 
-	err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-	if (err < 0)
-		goto err1;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0)
-		goto err1;
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
 	if (err < 0)
 		goto err1;
 
-	err = -ENOBUFS;
-	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
-	if (!fnew)
+	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
 		goto err2;
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
 	if (err < 0)
-		goto err3;
+		goto err2;
 
-	fold = (struct flow_filter *)*arg;
+	fold = *arg;
 	if (fold) {
 		err = -EINVAL;
 		if (fold->handle != handle && handle)
-			goto err3;
+			goto err2;
 
 		/* Copy fold into fnew */
 		fnew->tp = fold->tp;
@@ -469,36 +482,39 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err3;
+			goto err2;
 
 		if (mode == FLOW_MODE_HASH)
 			perturb_period = fold->perturb_period;
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err3;
+				goto err2;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 	} else {
 		err = -EINVAL;
 		if (!handle)
-			goto err3;
+			goto err2;
 		if (!tb[TCA_FLOW_KEYS])
-			goto err3;
+			goto err2;
 
 		mode = FLOW_MODE_MAP;
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err3;
+			goto err2;
 
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err3;
+				goto err2;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 
-		if (TC_H_MAJ(baseclass) == 0)
-			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
+		if (TC_H_MAJ(baseclass) == 0) {
+			struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+			baseclass = TC_H_MAKE(q->handle, baseclass);
+		}
 		if (TC_H_MIN(baseclass) == 0)
 			baseclass = TC_H_MAKE(baseclass, 1);
 
@@ -508,11 +524,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		get_random_bytes(&fnew->hashrnd, 4);
 	}
 
-	setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
-			       (unsigned long)fnew);
-
-	tcf_exts_change(tp, &fnew->exts, &e);
-	tcf_em_tree_change(tp, &fnew->ematches, &t);
+	timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
 
 	netif_keep_dst(qdisc_dev(tp->q));
 
@@ -541,33 +553,34 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (perturb_period)
 		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
 
-	if (*arg == 0)
+	if (!*arg)
 		list_add_tail_rcu(&fnew->list, &head->filters);
 	else
 		list_replace_rcu(&fold->list, &fnew->list);
 
-	*arg = (unsigned long)fnew;
+	*arg = fnew;
 
-	if (fold)
+	if (fold) {
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, flow_destroy_filter);
+	}
 	return 0;
 
-err3:
-	tcf_exts_destroy(&fnew->exts);
 err2:
-	tcf_em_tree_destroy(&t);
-	kfree(fnew);
+	tcf_exts_destroy(&fnew->exts);
+	tcf_em_tree_destroy(&fnew->ematches);
 err1:
-	tcf_exts_destroy(&e);
+	kfree(fnew);
 	return err;
 }
 
-static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
-	struct flow_filter *f = (struct flow_filter *)arg;
+	struct flow_filter *f = arg;
 
 	list_del_rcu(&f->list);
+	tcf_exts_get_net(&f->exts);
 	call_rcu(&f->rcu, flow_destroy_filter);
 	*last = list_empty(&head->filters);
 	return 0;
@@ -592,26 +605,29 @@ static void flow_destroy(struct tcf_proto *tp)
 
 	list_for_each_entry_safe(f, next, &head->filters, list) {
 		list_del_rcu(&f->list);
-		call_rcu(&f->rcu, flow_destroy_filter);
+		if (tcf_exts_get_net(&f->exts))
+			call_rcu(&f->rcu, flow_destroy_filter);
+		else
+			__flow_destroy_filter(f);
 	}
 	kfree_rcu(head, rcu);
 }
 
-static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+static void *flow_get(struct tcf_proto *tp, u32 handle)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f;
 
 	list_for_each_entry(f, &head->filters, list)
 		if (f->handle == handle)
-			return (unsigned long)f;
-	return 0;
+			return f;
+	return NULL;
 }
 
-static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct flow_filter *f = (struct flow_filter *)fh;
+	struct flow_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -677,7 +693,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry(f, &head->filters, list) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7832eb93379b..543a3e875d05 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -68,7 +68,6 @@ struct cls_fl_head {
 	struct rhashtable ht;
 	struct fl_flow_mask mask;
 	struct flow_dissector dissector;
-	u32 hgen;
 	bool mask_assigned;
 	struct list_head filters;
 	struct rhashtable_params ht_params;
@@ -76,6 +75,7 @@ struct cls_fl_head {
 		struct work_struct work;
 		struct rcu_head	rcu;
 	};
+	struct idr handle_idr;
 };
 
 struct cls_fl_filter {
@@ -87,8 +87,10 @@ struct cls_fl_filter {
 	struct list_head list;
 	u32 handle;
 	u32 flags;
-	struct rcu_head	rcu;
-	struct tc_to_netdev tc;
+	union {
+		struct work_struct work;
+		struct rcu_head	rcu;
+	};
 	struct net_device *hw_dev;
 };
 
@@ -153,37 +155,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct cls_fl_filter *f;
 	struct fl_flow_key skb_key;
 	struct fl_flow_key skb_mkey;
-	struct ip_tunnel_info *info;
 
 	if (!atomic_read(&head->ht.nelems))
 		return -1;
 
 	fl_clear_masked_range(&skb_key, &head->mask);
 
-	info = skb_tunnel_info(skb);
-	if (info) {
-		struct ip_tunnel_key *key = &info->key;
-
-		switch (ip_tunnel_info_af(info)) {
-		case AF_INET:
-			skb_key.enc_control.addr_type =
-				FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-			skb_key.enc_ipv4.src = key->u.ipv4.src;
-			skb_key.enc_ipv4.dst = key->u.ipv4.dst;
-			break;
-		case AF_INET6:
-			skb_key.enc_control.addr_type =
-				FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-			skb_key.enc_ipv6.src = key->u.ipv6.src;
-			skb_key.enc_ipv6.dst = key->u.ipv6.dst;
-			break;
-		}
-
-		skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
-		skb_key.enc_tp.src = key->tp_src;
-		skb_key.enc_tp.dst = key->tp_dst;
-	}
-
 	skb_key.indev_ifindex = skb->skb_iif;
 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
 	 * so do it rather here.
@@ -211,36 +188,46 @@ static int fl_init(struct tcf_proto *tp)
 
 	INIT_LIST_HEAD_RCU(&head->filters);
 	rcu_assign_pointer(tp->root, head);
+	idr_init(&head->handle_idr);
 
 	return 0;
 }
 
-static void fl_destroy_filter(struct rcu_head *head)
+static void __fl_destroy_filter(struct cls_fl_filter *f)
 {
-	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
-
 	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
 	kfree(f);
 }
 
-static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_destroy_filter_work(struct work_struct *work)
 {
-	struct tc_cls_flower_offload offload = {0};
-	struct net_device *dev = f->hw_dev;
-	struct tc_to_netdev *tc = &f->tc;
+	struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
 
-	if (!tc_can_offload(dev, tp))
-		return;
+	rtnl_lock();
+	__fl_destroy_filter(f);
+	rtnl_unlock();
+}
 
-	offload.command = TC_CLSFLOWER_DESTROY;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
+static void fl_destroy_filter(struct rcu_head *head)
+{
+	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+	INIT_WORK(&f->work, fl_destroy_filter_work);
+	tcf_queue_work(&f->work);
+}
+
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = tp->chain->block;
 
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_DESTROY;
+	cls_flower.cookie = (unsigned long) f;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
-				      tp->protocol, tc);
+	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -248,72 +235,63 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 				struct fl_flow_key *mask,
 				struct cls_fl_filter *f)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_flower_offload offload = {0};
-	struct tc_to_netdev *tc = &f->tc;
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = tp->chain->block;
+	bool skip_sw = tc_skip_sw(f->flags);
 	int err;
 
-	if (!tc_can_offload(dev, tp)) {
-		if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
-		    (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
-			f->hw_dev = dev;
-			return tc_skip_sw(f->flags) ? -EINVAL : 0;
-		}
-		dev = f->hw_dev;
-		tc->egress_dev = true;
-	} else {
-		f->hw_dev = dev;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_REPLACE;
+	cls_flower.cookie = (unsigned long) f;
+	cls_flower.dissector = dissector;
+	cls_flower.mask = mask;
+	cls_flower.key = &f->mkey;
+	cls_flower.exts = &f->exts;
+	cls_flower.classid = f->res.classid;
+
+	err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+			       &cls_flower, skip_sw);
+	if (err < 0) {
+		fl_hw_destroy_filter(tp, f);
+		return err;
+	} else if (err > 0) {
+		f->flags |= TCA_CLS_FLAGS_IN_HW;
 	}
 
-	offload.command = TC_CLSFLOWER_REPLACE;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
-	offload.dissector = dissector;
-	offload.mask = mask;
-	offload.key = &f->mkey;
-	offload.exts = &f->exts;
-
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol, tc);
-	if (!err)
-		f->flags |= TCA_CLS_FLAGS_IN_HW;
+	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
+		return -EINVAL;
 
-	if (tc_skip_sw(f->flags))
-		return err;
 	return 0;
 }
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct tc_cls_flower_offload offload = {0};
-	struct net_device *dev = f->hw_dev;
-	struct tc_to_netdev *tc = &f->tc;
-
-	if (!tc_can_offload(dev, tp))
-		return;
-
-	offload.command = TC_CLSFLOWER_STATS;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
-	offload.exts = &f->exts;
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = tp->chain->block;
 
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_STATS;
+	cls_flower.cookie = (unsigned long) f;
+	cls_flower.exts = &f->exts;
+	cls_flower.classid = f->res.classid;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-				      tp->chain->index, tp->protocol, tc);
+	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+
+	idr_remove_ext(&head->handle_idr, f->handle);
 	list_del_rcu(&f->list);
 	if (!tc_skip_hw(f->flags))
 		fl_hw_destroy_filter(tp, f);
 	tcf_unbind_filter(tp, &f->res);
-	call_rcu(&f->rcu, fl_destroy_filter);
+	if (tcf_exts_get_net(&f->exts))
+		call_rcu(&f->rcu, fl_destroy_filter);
+	else
+		__fl_destroy_filter(f);
 }
 
 static void fl_destroy_sleepable(struct work_struct *work)
@@ -341,20 +319,17 @@ static void fl_destroy(struct tcf_proto *tp)
 
 	list_for_each_entry_safe(f, next, &head->filters, list)
 		__fl_delete(tp, f);
+	idr_destroy(&head->handle_idr);
 
 	__module_get(THIS_MODULE);
 	call_rcu(&head->rcu, fl_destroy_rcu);
 }
 
-static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+static void *fl_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f;
 
-	list_for_each_entry(f, &head->filters, list)
-		if (f->handle == handle)
-			return (unsigned long) f;
-	return 0;
+	return idr_find_ext(&head->handle_idr, handle);
 }
 
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -852,15 +827,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 			unsigned long base, struct nlattr **tb,
 			struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -869,50 +840,25 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 
 	err = fl_set_key(net, tb, &f->key, &mask->key);
 	if (err)
-		goto errout;
+		return err;
 
 	fl_mask_update_range(mask);
 	fl_set_masked_key(&f->mkey, &f->key, mask);
 
-	tcf_exts_change(tp, &f->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
-}
-
-static u32 fl_grab_new_handle(struct tcf_proto *tp,
-			      struct cls_fl_head *head)
-{
-	unsigned int i = 0x80000000;
-	u32 handle;
-
-	do {
-		if (++head->hgen == 0x7FFFFFFF)
-			head->hgen = 1;
-	} while (--i > 0 && fl_get(tp, head->hgen));
-
-	if (unlikely(i == 0)) {
-		pr_err("Insufficient number of handles\n");
-		handle = 0;
-	} else {
-		handle = head->hgen;
-	}
-
-	return handle;
 }
 
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
-		     unsigned long *arg, bool ovr)
+		     void **arg, bool ovr)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fold = *arg;
 	struct cls_fl_filter *fnew;
 	struct nlattr **tb;
 	struct fl_flow_mask mask = {};
+	unsigned long idr_index;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -943,41 +889,49 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 
 	if (!handle) {
-		handle = fl_grab_new_handle(tp, head);
-		if (!handle) {
-			err = -EINVAL;
+		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+				    1, 0x80000000, GFP_KERNEL);
+		if (err)
 			goto errout;
-		}
+		fnew->handle = idr_index;
+	}
+
+	/* user specifies a handle and it doesn't exist */
+	if (handle && !fold) {
+		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+				    handle, handle + 1, GFP_KERNEL);
+		if (err)
+			goto errout;
+		fnew->handle = idr_index;
 	}
-	fnew->handle = handle;
 
 	if (tb[TCA_FLOWER_FLAGS]) {
 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
 
 		if (!tc_flags_valid(fnew->flags)) {
 			err = -EINVAL;
-			goto errout;
+			goto errout_idr;
 		}
 	}
 
 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
 	if (err)
-		goto errout;
+		goto errout_idr;
 
 	err = fl_check_assign_mask(head, &mask);
 	if (err)
-		goto errout;
+		goto errout_idr;
 
 	if (!tc_skip_sw(fnew->flags)) {
 		if (!fold && fl_lookup(head, &fnew->mkey)) {
 			err = -EEXIST;
-			goto errout;
+			goto errout_idr;
 		}
 
 		err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
 					     head->ht_params);
 		if (err)
-			goto errout;
+			goto errout_idr;
 	}
 
 	if (!tc_skip_hw(fnew->flags)) {
@@ -986,7 +940,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 					   &mask.key,
 					   fnew);
 		if (err)
-			goto errout;
+			goto errout_idr;
 	}
 
 	if (!tc_in_hw(fnew->flags))
@@ -1000,11 +954,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			fl_hw_destroy_filter(tp, fold);
 	}
 
-	*arg = (unsigned long) fnew;
+	*arg = fnew;
 
 	if (fold) {
+		fnew->handle = handle;
+		idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
 		list_replace_rcu(&fold->list, &fnew->list);
 		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, fl_destroy_filter);
 	} else {
 		list_add_tail_rcu(&fnew->list, &head->filters);
@@ -1013,6 +970,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(tb);
 	return 0;
 
+errout_idr:
+	if (fnew->handle)
+		idr_remove_ext(&head->handle_idr, fnew->handle);
 errout:
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
@@ -1021,10 +981,10 @@ errout_tb:
 	return err;
 }
 
-static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+	struct cls_fl_filter *f = arg;
 
 	if (!tc_skip_sw(f->flags))
 		rhashtable_remove_fast(&head->ht, &f->ht_node,
@@ -1042,7 +1002,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry_rcu(f, &head->filters, list) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -1177,11 +1137,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
-static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct cls_fl_filter *f = fh;
 	struct nlattr *nest;
 	struct fl_flow_key *key, *mask;
 
@@ -1383,6 +1343,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_fl_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.kind		= "flower",
 	.classify	= fl_classify,
@@ -1393,6 +1361,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.delete		= fl_delete,
 	.walk		= fl_walk,
 	.dump		= fl_dump,
+	.bind_class	= fl_bind_class,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index d3885362e017..20f0de1a960a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,6 +28,7 @@
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
+#include <net/sch_generic.h>
 
 #define HTSIZE 256
 
@@ -46,7 +47,10 @@ struct fw_filter {
 #endif /* CONFIG_NET_CLS_IND */
 	struct tcf_exts		exts;
 	struct tcf_proto	*tp;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 static u32 fw_hash(u32 handle)
@@ -83,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			}
 		}
 	} else {
+		struct Qdisc *q = tcf_block_q(tp->chain->block);
+
 		/* Old method: classify the packet using its skb mark. */
 		if (id && (TC_H_MAJ(id) == 0 ||
-			   !(TC_H_MAJ(id ^ tp->q->handle)))) {
+			   !(TC_H_MAJ(id ^ q->handle)))) {
 			res->classid = id;
 			res->class = 0;
 			return 0;
@@ -95,20 +101,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+static void *fw_get(struct tcf_proto *tp, u32 handle)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f;
 
 	if (head == NULL)
-		return 0;
+		return NULL;
 
 	f = rtnl_dereference(head->ht[fw_hash(handle)]);
 	for (; f; f = rtnl_dereference(f->next)) {
 		if (f->id == handle)
-			return (unsigned long)f;
+			return f;
 	}
-	return 0;
+	return NULL;
 }
 
 static int fw_init(struct tcf_proto *tp)
@@ -119,12 +125,28 @@ static int fw_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __fw_delete_filter(struct fw_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
+static void fw_delete_filter_work(struct work_struct *work)
+{
+	struct fw_filter *f = container_of(work, struct fw_filter, work);
+
+	rtnl_lock();
+	__fw_delete_filter(f);
+	rtnl_unlock();
+}
+
 static void fw_delete_filter(struct rcu_head *head)
 {
 	struct fw_filter *f = container_of(head, struct fw_filter, rcu);
 
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	INIT_WORK(&f->work, fw_delete_filter_work);
+	tcf_queue_work(&f->work);
 }
 
 static void fw_destroy(struct tcf_proto *tp)
@@ -141,16 +163,19 @@ static void fw_destroy(struct tcf_proto *tp)
 			RCU_INIT_POINTER(head->ht[h],
 					 rtnl_dereference(f->next));
 			tcf_unbind_filter(tp, &f->res);
-			call_rcu(&f->rcu, fw_delete_filter);
+			if (tcf_exts_get_net(&f->exts))
+				call_rcu(&f->rcu, fw_delete_filter);
+			else
+				__fw_delete_filter(f);
 		}
 	}
 	kfree_rcu(head, rcu);
 }
 
-static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *)arg;
+	struct fw_filter *f = arg;
 	struct fw_filter __rcu **fp;
 	struct fw_filter *pfp;
 	int ret = -EINVAL;
@@ -166,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
 		if (pfp == f) {
 			RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
 			tcf_unbind_filter(tp, &f->res);
+			tcf_exts_get_net(&f->exts);
 			call_rcu(&f->rcu, fw_delete_filter);
 			ret = 0;
 			break;
@@ -190,22 +216,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 	[TCA_FW_MASK]		= { .type = NLA_U32 },
 };
 
-static int
-fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
-		struct nlattr **tb, struct nlattr **tca, unsigned long base,
-		bool ovr)
+static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+			struct fw_filter *f, struct nlattr **tb,
+			struct nlattr **tca, unsigned long base, bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct tcf_exts e;
 	u32 mask;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0)
-		goto errout;
 
 	if (tb[TCA_FW_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -216,10 +237,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
-		if (ret < 0) {
-			err = ret;
-			goto errout;
-		}
+		if (ret < 0)
+			return ret;
 		f->ifindex = ret;
 	}
 #endif /* CONFIG_NET_CLS_IND */
@@ -228,25 +247,20 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_MASK]) {
 		mask = nla_get_u32(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
-			goto errout;
+			return err;
 	} else if (head->mask != 0xFFFFFFFF)
-		goto errout;
-
-	tcf_exts_change(tp, &f->exts, &e);
+		return err;
 
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
-		     u32 handle, struct nlattr **tca, unsigned long *arg,
+		     u32 handle, struct nlattr **tca, void **arg,
 		     bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *) *arg;
+	struct fw_filter *f = *arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FW_MAX + 1];
 	int err;
@@ -282,7 +296,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 			return err;
 		}
 
-		err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+		err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr);
 		if (err < 0) {
 			tcf_exts_destroy(&fnew->exts);
 			kfree(fnew);
@@ -298,9 +312,10 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 		RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next));
 		rcu_assign_pointer(*fp, fnew);
 		tcf_unbind_filter(tp, &f->res);
+		tcf_exts_get_net(&f->exts);
 		call_rcu(&f->rcu, fw_delete_filter);
 
-		*arg = (unsigned long)fnew;
+		*arg = fnew;
 		return err;
 	}
 
@@ -330,14 +345,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	f->id = handle;
 	f->tp = tp;
 
-	err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+	err = fw_set_parms(net, tp, f, tb, tca, base, ovr);
 	if (err < 0)
 		goto errout;
 
 	RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
 	rcu_assign_pointer(head->ht[fw_hash(handle)], f);
 
-	*arg = (unsigned long)f;
+	*arg = f;
 	return 0;
 
 errout:
@@ -366,7 +381,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 				arg->count++;
 				continue;
 			}
-			if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+			if (arg->fn(tp, f, arg) < 0) {
 				arg->stop = 1;
 				return;
 			}
@@ -375,11 +390,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *)fh;
+	struct fw_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -387,7 +402,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->id;
 
-	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+	if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
 		return skb->len;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -424,6 +439,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct fw_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.kind		=	"fw",
 	.classify	=	fw_classify,
@@ -434,6 +457,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.delete		=	fw_delete,
 	.walk		=	fw_walk,
 	.dump		=	fw_dump,
+	.bind_class	=	fw_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 9dc26c32cf32..66d4e0099158 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,7 +21,10 @@ struct cls_mall_head {
 	struct tcf_result res;
 	u32 handle;
 	u32 flags;
-	struct rcu_head	rcu;
+	union {
+		struct work_struct work;
+		struct rcu_head	rcu;
+	};
 };
 
 static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -32,6 +35,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tc_skip_sw(head->flags))
 		return -1;
 
+	*res = head->res;
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
@@ -40,74 +44,93 @@ static int mall_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __mall_destroy(struct cls_mall_head *head)
+{
+	tcf_exts_destroy(&head->exts);
+	tcf_exts_put_net(&head->exts);
+	kfree(head);
+}
+
+static void mall_destroy_work(struct work_struct *work)
+{
+	struct cls_mall_head *head = container_of(work, struct cls_mall_head,
+						  work);
+	rtnl_lock();
+	__mall_destroy(head);
+	rtnl_unlock();
+}
+
 static void mall_destroy_rcu(struct rcu_head *rcu)
 {
 	struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
 						  rcu);
 
-	tcf_exts_destroy(&head->exts);
-	kfree(head);
+	INIT_WORK(&head->work, mall_destroy_work);
+	tcf_queue_work(&head->work);
+}
+
+static void mall_destroy_hw_filter(struct tcf_proto *tp,
+				   struct cls_mall_head *head,
+				   unsigned long cookie)
+{
+	struct tc_cls_matchall_offload cls_mall = {};
+	struct tcf_block *block = tp->chain->block;
+
+	tc_cls_common_offload_init(&cls_mall.common, tp);
+	cls_mall.command = TC_CLSMATCHALL_DESTROY;
+	cls_mall.cookie = cookie;
+
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
 }
 
 static int mall_replace_hw_filter(struct tcf_proto *tp,
 				  struct cls_mall_head *head,
 				  unsigned long cookie)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_to_netdev offload;
-	struct tc_cls_matchall_offload mall_offload = {0};
+	struct tc_cls_matchall_offload cls_mall = {};
+	struct tcf_block *block = tp->chain->block;
+	bool skip_sw = tc_skip_sw(head->flags);
 	int err;
 
-	offload.type = TC_SETUP_MATCHALL;
-	offload.cls_mall = &mall_offload;
-	offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
-	offload.cls_mall->exts = &head->exts;
-	offload.cls_mall->cookie = cookie;
+	tc_cls_common_offload_init(&cls_mall.common, tp);
+	cls_mall.command = TC_CLSMATCHALL_REPLACE;
+	cls_mall.exts = &head->exts;
+	cls_mall.cookie = cookie;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
-	if (!err)
+	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
+			       &cls_mall, skip_sw);
+	if (err < 0) {
+		mall_destroy_hw_filter(tp, head, cookie);
+		return err;
+	} else if (err > 0) {
 		head->flags |= TCA_CLS_FLAGS_IN_HW;
+	}
 
-	return err;
-}
+	if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
+		return -EINVAL;
 
-static void mall_destroy_hw_filter(struct tcf_proto *tp,
-				   struct cls_mall_head *head,
-				   unsigned long cookie)
-{
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_to_netdev offload;
-	struct tc_cls_matchall_offload mall_offload = {0};
-
-	offload.type = TC_SETUP_MATCHALL;
-	offload.cls_mall = &mall_offload;
-	offload.cls_mall->command = TC_CLSMATCHALL_DESTROY;
-	offload.cls_mall->exts = NULL;
-	offload.cls_mall->cookie = cookie;
-
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
-				      tp->protocol, &offload);
+	return 0;
 }
 
 static void mall_destroy(struct tcf_proto *tp)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
-	struct net_device *dev = tp->q->dev_queue->dev;
 
 	if (!head)
 		return;
 
-	if (tc_should_offload(dev, tp, head->flags))
+	if (!tc_skip_hw(head->flags))
 		mall_destroy_hw_filter(tp, head, (unsigned long) head);
 
-	call_rcu(&head->rcu, mall_destroy_rcu);
+	if (tcf_exts_get_net(&head->exts))
+		call_rcu(&head->rcu, mall_destroy_rcu);
+	else
+		__mall_destroy(head);
 }
 
-static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
+static void *mall_get(struct tcf_proto *tp, u32 handle)
 {
-	return 0UL;
+	return NULL;
 }
 
 static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -120,36 +143,25 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 			  unsigned long base, struct nlattr **tb,
 			  struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
-	if (err)
-		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	if (tb[TCA_MATCHALL_CLASSID]) {
 		head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
 		tcf_bind_filter(tp, &head->res, base);
 	}
-
-	tcf_exts_change(tp, &head->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
-	struct net_device *dev = tp->q->dev_queue->dev;
 	struct nlattr *tb[TCA_MATCHALL_MAX + 1];
 	struct cls_mall_head *new;
 	u32 flags = 0;
@@ -189,20 +201,16 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto err_set_parms;
 
-	if (tc_should_offload(dev, tp, flags)) {
+	if (!tc_skip_hw(new->flags)) {
 		err = mall_replace_hw_filter(tp, new, (unsigned long) new);
-		if (err) {
-			if (tc_skip_sw(flags))
-				goto err_replace_hw_filter;
-			else
-				err = 0;
-		}
+		if (err)
+			goto err_replace_hw_filter;
 	}
 
 	if (!tc_in_hw(new->flags))
 		new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 
-	*arg = (unsigned long) head;
+	*arg = head;
 	rcu_assign_pointer(tp->root, new);
 	return 0;
 
@@ -214,7 +222,7 @@ err_exts_init:
 	return err;
 }
 
-static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int mall_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	return -EOPNOTSUPP;
 }
@@ -225,16 +233,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 
 	if (arg->count < arg->skip)
 		goto skip;
-	if (arg->fn(tp, (unsigned long) head, arg) < 0)
+	if (arg->fn(tp, head, arg) < 0)
 		arg->stop = 1;
 skip:
 	arg->count++;
 }
 
-static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct cls_mall_head *head = (struct cls_mall_head *) fh;
+	struct cls_mall_head *head = fh;
 	struct nlattr *nest;
 
 	if (!head)
@@ -268,6 +276,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_mall_head *head = fh;
+
+	if (head && head->res.classid == classid)
+		head->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.kind		= "matchall",
 	.classify	= mall_classify,
@@ -278,6 +294,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.delete		= mall_delete,
 	.walk		= mall_walk,
 	.dump		= mall_dump,
+	.bind_class	= mall_bind_class,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d63d5502ee02..ac9a5b8825b9 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -57,7 +57,10 @@ struct route4_filter {
 	u32			handle;
 	struct route4_bucket	*bkt;
 	struct tcf_proto	*tp;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 #define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -113,7 +116,7 @@ static inline int route4_hash_wild(void)
 #define ROUTE4_APPLY_RESULT()					\
 {								\
 	*res = f->res;						\
-	if (tcf_exts_is_available(&f->exts)) {			\
+	if (tcf_exts_has_actions(&f->exts)) {			\
 		int r = tcf_exts_exec(skb, &f->exts, res);	\
 		if (r < 0) {					\
 			dont_cache = 1;				\
@@ -216,7 +219,7 @@ static inline u32 from_hash(u32 id)
 	return 16 + (id & 0xF);
 }
 
-static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+static void *route4_get(struct tcf_proto *tp, u32 handle)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_bucket *b;
@@ -225,11 +228,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 
 	h1 = to_hash(handle);
 	if (h1 > 256)
-		return 0;
+		return NULL;
 
 	h2 = from_hash(handle >> 16);
 	if (h2 > 32)
-		return 0;
+		return NULL;
 
 	b = rtnl_dereference(head->table[h1]);
 	if (b) {
@@ -237,9 +240,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 		     f;
 		     f = rtnl_dereference(f->next))
 			if (f->handle == handle)
-				return (unsigned long)f;
+				return f;
 	}
-	return 0;
+	return NULL;
 }
 
 static int route4_init(struct tcf_proto *tp)
@@ -254,12 +257,28 @@ static int route4_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __route4_delete_filter(struct route4_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
+static void route4_delete_filter_work(struct work_struct *work)
+{
+	struct route4_filter *f = container_of(work, struct route4_filter, work);
+
+	rtnl_lock();
+	__route4_delete_filter(f);
+	rtnl_unlock();
+}
+
 static void route4_delete_filter(struct rcu_head *head)
 {
 	struct route4_filter *f = container_of(head, struct route4_filter, rcu);
 
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	INIT_WORK(&f->work, route4_delete_filter_work);
+	tcf_queue_work(&f->work);
 }
 
 static void route4_destroy(struct tcf_proto *tp)
@@ -284,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp)
 					next = rtnl_dereference(f->next);
 					RCU_INIT_POINTER(b->ht[h2], next);
 					tcf_unbind_filter(tp, &f->res);
-					call_rcu(&f->rcu, route4_delete_filter);
+					if (tcf_exts_get_net(&f->exts))
+						call_rcu(&f->rcu, route4_delete_filter);
+					else
+						__route4_delete_filter(f);
 				}
 			}
 			RCU_INIT_POINTER(head->table[h1], NULL);
@@ -294,10 +316,10 @@ static void route4_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
-	struct route4_filter *f = (struct route4_filter *)arg;
+	struct route4_filter *f = arg;
 	struct route4_filter __rcu **fp;
 	struct route4_filter *nf;
 	struct route4_bucket *b;
@@ -325,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
 
 			/* Delete it */
 			tcf_unbind_filter(tp, &f->res);
+			tcf_exts_get_net(&f->exts);
 			call_rcu(&f->rcu, route4_delete_filter);
 
 			/* Strip RTNL protected tree */
@@ -372,37 +395,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	struct route4_filter *fp;
 	unsigned int h1;
 	struct route4_bucket *b;
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = -EINVAL;
 	if (tb[TCA_ROUTE4_TO]) {
 		if (new && handle & 0x8000)
-			goto errout;
+			return -EINVAL;
 		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
 		if (to > 0xFF)
-			goto errout;
+			return -EINVAL;
 		nhandle = to;
 	}
 
 	if (tb[TCA_ROUTE4_FROM]) {
 		if (tb[TCA_ROUTE4_IIF])
-			goto errout;
+			return -EINVAL;
 		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
 		if (id > 0xFF)
-			goto errout;
+			return -EINVAL;
 		nhandle |= id << 16;
 	} else if (tb[TCA_ROUTE4_IIF]) {
 		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
 		if (id > 0x7FFF)
-			goto errout;
+			return -EINVAL;
 		nhandle |= (id | 0x8000) << 16;
 	} else
 		nhandle |= 0xFFFF << 16;
@@ -410,27 +428,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	if (handle && new) {
 		nhandle |= handle & 0x7F00;
 		if (nhandle != handle)
-			goto errout;
+			return -EINVAL;
 	}
 
 	h1 = to_hash(nhandle);
 	b = rtnl_dereference(head->table[h1]);
 	if (!b) {
-		err = -ENOBUFS;
 		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
 		if (b == NULL)
-			goto errout;
+			return -ENOBUFS;
 
 		rcu_assign_pointer(head->table[h1], b);
 	} else {
 		unsigned int h2 = from_hash(nhandle >> 16);
 
-		err = -EEXIST;
 		for (fp = rtnl_dereference(b->ht[h2]);
 		     fp;
 		     fp = rtnl_dereference(fp->next))
 			if (fp->handle == f->handle)
-				goto errout;
+				return -EEXIST;
 	}
 
 	if (tb[TCA_ROUTE4_TO])
@@ -450,17 +466,12 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
 			 struct tcf_proto *tp, unsigned long base, u32 handle,
-			 struct nlattr **tca, unsigned long *arg, bool ovr)
+			 struct nlattr **tca, void **arg, bool ovr)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
@@ -479,7 +490,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	fold = (struct route4_filter *)*arg;
+	fold = *arg;
 	if (fold && handle && fold->handle != handle)
 			return -EINVAL;
 
@@ -537,9 +548,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	}
 
 	route4_reset_fastmap(head);
-	*arg = (unsigned long)f;
+	*arg = f;
 	if (fold) {
 		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
 		call_rcu(&fold->rcu, route4_delete_filter);
 	}
 	return 0;
@@ -576,7 +588,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 						arg->count++;
 						continue;
 					}
-					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+					if (arg->fn(tp, f, arg) < 0) {
 						arg->stop = 1;
 						return;
 					}
@@ -587,10 +599,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
-	struct route4_filter *f = (struct route4_filter *)fh;
+	struct route4_filter *f = fh;
 	struct nlattr *nest;
 	u32 id;
 
@@ -636,6 +648,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct route4_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.kind		=	"route",
 	.classify	=	route4_classify,
@@ -646,6 +666,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.delete		=	route4_delete,
 	.walk		=	route4_walk,
 	.dump		=	route4_dump,
+	.bind_class	=	route4_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 0d9d07798699..cf325625c99d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -97,7 +97,10 @@ struct rsvp_filter {
 
 	u32				handle;
 	struct rsvp_session		*sess;
-	struct rcu_head			rcu;
+	union {
+		struct work_struct		work;
+		struct rcu_head			rcu;
+	};
 };
 
 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -248,7 +251,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
 	BUG_ON(1);
 }
 
-static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+static void *rsvp_get(struct tcf_proto *tp, u32 handle)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	struct rsvp_session *s;
@@ -257,17 +260,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 	unsigned int h2 = (handle >> 8) & 0xFF;
 
 	if (h2 > 16)
-		return 0;
+		return NULL;
 
 	for (s = rtnl_dereference(head->ht[h1]); s;
 	     s = rtnl_dereference(s->next)) {
 		for (f = rtnl_dereference(s->ht[h2]); f;
 		     f = rtnl_dereference(f->next)) {
 			if (f->handle == handle)
-				return (unsigned long)f;
+				return f;
 		}
 	}
-	return 0;
+	return NULL;
 }
 
 static int rsvp_init(struct tcf_proto *tp)
@@ -282,12 +285,28 @@ static int rsvp_init(struct tcf_proto *tp)
 	return -ENOBUFS;
 }
 
+static void __rsvp_delete_filter(struct rsvp_filter *f)
+{
+	tcf_exts_destroy(&f->exts);
+	tcf_exts_put_net(&f->exts);
+	kfree(f);
+}
+
+static void rsvp_delete_filter_work(struct work_struct *work)
+{
+	struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
+
+	rtnl_lock();
+	__rsvp_delete_filter(f);
+	rtnl_unlock();
+}
+
 static void rsvp_delete_filter_rcu(struct rcu_head *head)
 {
 	struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
 
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
+	INIT_WORK(&f->work, rsvp_delete_filter_work);
+	tcf_queue_work(&f->work);
 }
 
 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
@@ -297,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 	 * grace period, since converted-to-rcu actions are relying on that
 	 * in cleanup() callback
 	 */
-	call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+	if (tcf_exts_get_net(&f->exts))
+		call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+	else
+		__rsvp_delete_filter(f);
 }
 
 static void rsvp_destroy(struct tcf_proto *tp)
@@ -328,10 +350,10 @@ static void rsvp_destroy(struct tcf_proto *tp)
 	kfree_rcu(data, rcu);
 }
 
-static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
-	struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+	struct rsvp_filter *nfp, *f = arg;
 	struct rsvp_filter __rcu **fp;
 	unsigned int h = f->handle;
 	struct rsvp_session __rcu **sp;
@@ -389,7 +411,7 @@ static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 		if ((data->hgenerator += 0x10000) == 0)
 			data->hgenerator = 0x10000;
 		h = data->hgenerator|salt;
-		if (rsvp_get(tp, h) == 0)
+		if (!rsvp_get(tp, h))
 			return h;
 	}
 	return 0;
@@ -464,7 +486,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	struct rsvp_filter *f, *nfp;
@@ -493,7 +515,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout2;
 
-	f = (struct rsvp_filter *)*arg;
+	f = *arg;
 	if (f) {
 		/* Node exists: adjust only classid */
 		struct rsvp_filter *n;
@@ -518,7 +540,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 			tcf_bind_filter(tp, &n->res, base);
 		}
 
-		tcf_exts_change(tp, &n->exts, &e);
+		tcf_exts_change(&n->exts, &e);
 		rsvp_replace(tp, n, handle);
 		return 0;
 	}
@@ -591,7 +613,7 @@ insert:
 			if (f->tunnelhdr == 0)
 				tcf_bind_filter(tp, &f->res, base);
 
-			tcf_exts_change(tp, &f->exts, &e);
+			tcf_exts_change(&f->exts, &e);
 
 			fp = &s->ht[h2];
 			for (nfp = rtnl_dereference(*fp); nfp;
@@ -604,7 +626,7 @@ insert:
 			RCU_INIT_POINTER(f->next, nfp);
 			rcu_assign_pointer(*fp, f);
 
-			*arg = (unsigned long)f;
+			*arg = f;
 			return 0;
 		}
 	}
@@ -663,7 +685,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 						arg->count++;
 						continue;
 					}
-					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+					if (arg->fn(tp, f, arg) < 0) {
 						arg->stop = 1;
 						return;
 					}
@@ -674,10 +696,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct rsvp_filter *f = (struct rsvp_filter *)fh;
+	struct rsvp_filter *f = fh;
 	struct rsvp_session *s;
 	struct nlattr *nest;
 	struct tc_rsvp_pinfo pinfo;
@@ -723,6 +745,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct rsvp_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
@@ -733,6 +763,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.delete		=	rsvp_delete,
 	.walk		=	rsvp_walk,
 	.dump		=	rsvp_dump,
+	.bind_class	=	rsvp_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 8a8a58357c39..67467ae24c97 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,6 +13,7 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
+#include <net/sch_generic.h>
 
 /*
  * Passing parameters to the root seems to be done more awkwardly than really
@@ -27,14 +28,20 @@
 struct tcindex_filter_result {
 	struct tcf_exts		exts;
 	struct tcf_result	res;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 };
 
 struct tcindex_filter {
 	u16 key;
 	struct tcindex_filter_result result;
 	struct tcindex_filter __rcu *next;
-	struct rcu_head rcu;
+	union {
+		struct work_struct work;
+		struct rcu_head rcu;
+	};
 };
 
 
@@ -52,7 +59,7 @@ struct tcindex_data {
 
 static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
 {
-	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+	return tcf_exts_has_actions(&r->exts) || r->res.classid;
 }
 
 static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
@@ -90,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 	f = tcindex_lookup(p, key);
 	if (!f) {
+		struct Qdisc *q = tcf_block_q(tp->chain->block);
+
 		if (!p->fall_through)
 			return -1;
-		res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+		res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
 		res->class = 0;
 		pr_debug("alg 0x%x\n", res->classid);
 		return 0;
@@ -104,16 +113,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 }
 
 
-static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+static void *tcindex_get(struct tcf_proto *tp, u32 handle)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r;
 
 	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
 	if (p->perfect && handle >= p->alloc_hash)
-		return 0;
+		return NULL;
 	r = tcindex_lookup(p, handle);
-	return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+	return r && tcindex_filter_is_set(r) ? r : NULL;
 }
 
 static int tcindex_init(struct tcf_proto *tp)
@@ -133,12 +142,46 @@ static int tcindex_init(struct tcf_proto *tp)
 	return 0;
 }
 
+static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
+{
+	tcf_exts_destroy(&r->exts);
+	tcf_exts_put_net(&r->exts);
+}
+
+static void tcindex_destroy_rexts_work(struct work_struct *work)
+{
+	struct tcindex_filter_result *r;
+
+	r = container_of(work, struct tcindex_filter_result, work);
+	rtnl_lock();
+	__tcindex_destroy_rexts(r);
+	rtnl_unlock();
+}
+
 static void tcindex_destroy_rexts(struct rcu_head *head)
 {
 	struct tcindex_filter_result *r;
 
 	r = container_of(head, struct tcindex_filter_result, rcu);
-	tcf_exts_destroy(&r->exts);
+	INIT_WORK(&r->work, tcindex_destroy_rexts_work);
+	tcf_queue_work(&r->work);
+}
+
+static void __tcindex_destroy_fexts(struct tcindex_filter *f)
+{
+	tcf_exts_destroy(&f->result.exts);
+	tcf_exts_put_net(&f->result.exts);
+	kfree(f);
+}
+
+static void tcindex_destroy_fexts_work(struct work_struct *work)
+{
+	struct tcindex_filter *f = container_of(work, struct tcindex_filter,
+						work);
+
+	rtnl_lock();
+	__tcindex_destroy_fexts(f);
+	rtnl_unlock();
 }
 
 static void tcindex_destroy_fexts(struct rcu_head *head)
@@ -146,18 +189,18 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
 	struct tcindex_filter *f = container_of(head, struct tcindex_filter,
 						rcu);
 
-	tcf_exts_destroy(&f->result.exts);
-	kfree(f);
+	INIT_WORK(&f->work, tcindex_destroy_fexts_work);
+	tcf_queue_work(&f->work);
 }
 
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter_result *r = arg;
 	struct tcindex_filter __rcu **walk;
 	struct tcindex_filter *f = NULL;
 
-	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
+	pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
 	if (p->perfect) {
 		if (!r->res.class)
 			return -ENOENT;
@@ -182,18 +225,24 @@ found:
 	 * grace period, since converted-to-rcu actions are relying on that
 	 * in cleanup() callback
 	 */
-	if (f)
-		call_rcu(&f->rcu, tcindex_destroy_fexts);
-	else
-		call_rcu(&r->rcu, tcindex_destroy_rexts);
+	if (f) {
+		if (tcf_exts_get_net(&f->result.exts))
+			call_rcu(&f->rcu, tcindex_destroy_fexts);
+		else
+			__tcindex_destroy_fexts(f);
+	} else {
+		if (tcf_exts_get_net(&r->exts))
+			call_rcu(&r->rcu, tcindex_destroy_rexts);
+		else
+			__tcindex_destroy_rexts(r);
+	}
 
 	*last = false;
 	return 0;
 }
 
 static int tcindex_destroy_element(struct tcf_proto *tp,
-				   unsigned long arg,
-				   struct tcf_walker *walker)
+				   void *arg, struct tcf_walker *walker)
 {
 	bool last;
 
@@ -419,9 +468,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	}
 
 	if (old_r)
-		tcf_exts_change(tp, &r->exts, &e);
+		tcf_exts_change(&r->exts, &e);
 	else
-		tcf_exts_change(tp, &cr.exts, &e);
+		tcf_exts_change(&cr.exts, &e);
 
 	if (old_r && old_r != r) {
 		err = tcindex_filter_result_init(old_r);
@@ -439,7 +488,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		struct tcindex_filter *nfp;
 		struct tcindex_filter __rcu **fp;
 
-		tcf_exts_change(tp, &f->result.exts, &r->exts);
+		tcf_exts_change(&f->result.exts, &r->exts);
 
 		fp = cp->h + (handle % cp->hash);
 		for (nfp = rtnl_dereference(*fp);
@@ -471,17 +520,17 @@ errout:
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, unsigned long *arg, bool ovr)
+	       struct nlattr **tca, void **arg, bool ovr)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+	struct tcindex_filter_result *r = *arg;
 	int err;
 
 	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
-	    "p %p,r %p,*arg 0x%lx\n",
-	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+	    "p %p,r %p,*arg %p\n",
+	    tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
 
 	if (!opt)
 		return 0;
@@ -506,9 +555,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 			if (!p->perfect[i].res.class)
 				continue;
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp,
-				    (unsigned long) (p->perfect+i), walker)
-				     < 0) {
+				if (walker->fn(tp, p->perfect + i, walker) < 0) {
 					walker->stop = 1;
 					return;
 				}
@@ -522,8 +569,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 		for (f = rtnl_dereference(p->h[i]); f; f = next) {
 			next = rtnl_dereference(f->next);
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp, (unsigned long) &f->result,
-				    walker) < 0) {
+				if (walker->fn(tp, &f->result, walker) < 0) {
 					walker->stop = 1;
 					return;
 				}
@@ -548,14 +594,14 @@ static void tcindex_destroy(struct tcf_proto *tp)
 }
 
 
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+	struct tcindex_filter_result *r = fh;
 	struct nlattr *nest;
 
-	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n",
+	pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
 		 tp, fh, skb, t, p, r);
 	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 
@@ -610,6 +656,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct tcindex_filter_result *r = fh;
+
+	if (r && r->res.classid == classid)
+		r->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.kind		=	"tcindex",
 	.classify	=	tcindex_classify,
@@ -620,6 +674,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.delete		=	tcindex_delete,
 	.walk		=	tcindex_walk,
 	.dump		=	tcindex_dump,
+	.bind_class	=	tcindex_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 2d01195153e6..ac152b4f4247 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -40,10 +40,13 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/bitmap.h>
+#include <linux/netdevice.h>
+#include <linux/hash.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 #include <linux/netdevice.h>
+#include <linux/idr.h>
 
 struct tc_u_knode {
 	struct tc_u_knode __rcu	*next;
@@ -66,7 +69,10 @@ struct tc_u_knode {
 	u32 __percpu		*pcpu_success;
 #endif
 	struct tcf_proto	*tp;
-	struct rcu_head		rcu;
+	union {
+		struct work_struct	work;
+		struct rcu_head		rcu;
+	};
 	/* The 'sel' field MUST be the last field in structure to allow for
 	 * tc_u32_keys allocated at end of structure.
 	 */
@@ -80,6 +86,7 @@ struct tc_u_hnode {
 	struct tc_u_common	*tp_c;
 	int			refcnt;
 	unsigned int		divisor;
+	struct idr		handle_idr;
 	struct rcu_head		rcu;
 	/* The 'ht' field MUST be the last field in structure to allow for
 	 * more entries allocated at end of structure.
@@ -89,9 +96,10 @@ struct tc_u_hnode {
 
 struct tc_u_common {
 	struct tc_u_hnode __rcu	*hlist;
-	struct Qdisc		*q;
+	struct tcf_block	*block;
 	int			refcnt;
-	u32			hgenerator;
+	struct idr		handle_idr;
+	struct hlist_node	hnode;
 	struct rcu_head		rcu;
 };
 
@@ -289,7 +297,7 @@ out:
 }
 
 
-static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+static void *u32_get(struct tcf_proto *tp, u32 handle)
 {
 	struct tc_u_hnode *ht;
 	struct tc_u_common *tp_c = tp->data;
@@ -300,43 +308,68 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
 		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
 
 	if (!ht)
-		return 0;
+		return NULL;
 
 	if (TC_U32_KEY(handle) == 0)
-		return (unsigned long)ht;
+		return ht;
 
-	return (unsigned long)u32_lookup_key(ht, handle);
+	return u32_lookup_key(ht, handle);
 }
 
-static u32 gen_new_htid(struct tc_u_common *tp_c)
+static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
 {
-	int i = 0x800;
+	unsigned long idr_index;
+	int err;
 
-	/* hgenerator only used inside rtnl lock it is safe to increment
+	/* This is only used inside rtnl lock it is safe to increment
 	 * without read _copy_ update semantics
 	 */
-	do {
-		if (++tp_c->hgenerator == 0x7FF)
-			tp_c->hgenerator = 1;
-	} while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+	err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
+			    1, 0x7FF, GFP_KERNEL);
+	if (err)
+		return 0;
+	return (u32)(idr_index | 0x800) << 20;
+}
+
+static struct hlist_head *tc_u_common_hash;
+
+#define U32_HASH_SHIFT 10
+#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
 
-	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+static unsigned int tc_u_hash(const struct tcf_proto *tp)
+{
+	return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
+}
+
+static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+{
+	struct tc_u_common *tc;
+	unsigned int h;
+
+	h = tc_u_hash(tp);
+	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
+		if (tc->block == tp->chain->block)
+			return tc;
+	}
+	return NULL;
 }
 
 static int u32_init(struct tcf_proto *tp)
 {
 	struct tc_u_hnode *root_ht;
 	struct tc_u_common *tp_c;
+	unsigned int h;
 
-	tp_c = tp->q->u32_node;
+	tp_c = tc_u_common_find(tp);
 
 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
 		return -ENOBUFS;
 
 	root_ht->refcnt++;
-	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
 	root_ht->prio = tp->prio;
+	idr_init(&root_ht->handle_idr);
 
 	if (tp_c == NULL) {
 		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
@@ -344,8 +377,12 @@ static int u32_init(struct tcf_proto *tp)
 			kfree(root_ht);
 			return -ENOBUFS;
 		}
-		tp_c->q = tp->q;
-		tp->q->u32_node = tp_c;
+		tp_c->block = tp->chain->block;
+		INIT_HLIST_NODE(&tp_c->hnode);
+		idr_init(&tp_c->handle_idr);
+
+		h = tc_u_hash(tp);
+		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
 	}
 
 	tp_c->refcnt++;
@@ -362,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
 			   bool free_pf)
 {
 	tcf_exts_destroy(&n->exts);
+	tcf_exts_put_net(&n->exts);
 	if (n->ht_down)
 		n->ht_down->refcnt--;
 #ifdef CONFIG_CLS_U32_PERF
@@ -384,11 +422,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
  * this the u32_delete_key_rcu variant does not free the percpu
  * statistics.
  */
+static void u32_delete_key_work(struct work_struct *work)
+{
+	struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+	rtnl_lock();
+	u32_destroy_key(key->tp, key, false);
+	rtnl_unlock();
+}
+
 static void u32_delete_key_rcu(struct rcu_head *rcu)
 {
 	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 
-	u32_destroy_key(key->tp, key, false);
+	INIT_WORK(&key->work, u32_delete_key_work);
+	tcf_queue_work(&key->work);
 }
 
 /* u32_delete_key_freepf_rcu is the rcu callback variant
@@ -398,11 +446,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu)
  * for the variant that should be used with keys return from
  * u32_init_knode()
  */
+static void u32_delete_key_freepf_work(struct work_struct *work)
+{
+	struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+	rtnl_lock();
+	u32_destroy_key(key->tp, key, true);
+	rtnl_unlock();
+}
+
 static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
 {
 	struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
 
-	u32_destroy_key(key->tp, key, true);
+	INIT_WORK(&key->work, u32_delete_key_freepf_work);
+	tcf_queue_work(&key->work);
 }
 
 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -419,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 				RCU_INIT_POINTER(*kp, key->next);
 
 				tcf_unbind_filter(tp, &key->res);
+				tcf_exts_get_net(&key->exts);
 				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
 				return 0;
 			}
@@ -428,111 +487,95 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 	return 0;
 }
 
-static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
-
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	if (tc_should_offload(dev, tp, 0)) {
-		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
-		offload.cls_u32->knode.handle = handle;
-		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->chain->index, tp->protocol,
-					      &offload);
-	}
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_u32_offload cls_u32 = {};
+
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_DELETE_HNODE;
+	cls_u32.hnode.divisor = h->divisor;
+	cls_u32.hnode.handle = h->handle;
+	cls_u32.hnode.prio = h->prio;
+
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
 }
 
 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 				u32 flags)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_u32_offload cls_u32 = {};
+	bool skip_sw = tc_skip_sw(flags);
+	bool offloaded = false;
 	int err;
 
-	if (!tc_should_offload(dev, tp, flags))
-		return tc_skip_sw(flags) ? -EINVAL : 0;
-
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
-	offload.cls_u32->hnode.divisor = h->divisor;
-	offload.cls_u32->hnode.handle = h->handle;
-	offload.cls_u32->hnode.prio = h->prio;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_NEW_HNODE;
+	cls_u32.hnode.divisor = h->divisor;
+	cls_u32.hnode.handle = h->handle;
+	cls_u32.hnode.prio = h->prio;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
-	if (tc_skip_sw(flags))
+	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+	if (err < 0) {
+		u32_clear_hw_hnode(tp, h);
 		return err;
+	} else if (err > 0) {
+		offloaded = true;
+	}
+
+	if (skip_sw && !offloaded)
+		return -EINVAL;
 
 	return 0;
 }
 
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
-
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	if (tc_should_offload(dev, tp, 0)) {
-		offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
-		offload.cls_u32->hnode.divisor = h->divisor;
-		offload.cls_u32->hnode.handle = h->handle;
-		offload.cls_u32->hnode.prio = h->prio;
-
-		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->chain->index, tp->protocol,
-					      &offload);
-	}
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_u32_offload cls_u32 = {};
+
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_DELETE_KNODE;
+	cls_u32.knode.handle = handle;
+
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
 }
 
 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 				u32 flags)
 {
-	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_u32_offload cls_u32 = {};
+	bool skip_sw = tc_skip_sw(flags);
 	int err;
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	if (!tc_should_offload(dev, tp, flags))
-		return tc_skip_sw(flags) ? -EINVAL : 0;
-
-	offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
-	offload.cls_u32->knode.handle = n->handle;
-	offload.cls_u32->knode.fshift = n->fshift;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
+	cls_u32.knode.handle = n->handle;
+	cls_u32.knode.fshift = n->fshift;
 #ifdef CONFIG_CLS_U32_MARK
-	offload.cls_u32->knode.val = n->val;
-	offload.cls_u32->knode.mask = n->mask;
+	cls_u32.knode.val = n->val;
+	cls_u32.knode.mask = n->mask;
 #else
-	offload.cls_u32->knode.val = 0;
-	offload.cls_u32->knode.mask = 0;
+	cls_u32.knode.val = 0;
+	cls_u32.knode.mask = 0;
 #endif
-	offload.cls_u32->knode.sel = &n->sel;
-	offload.cls_u32->knode.exts = &n->exts;
+	cls_u32.knode.sel = &n->sel;
+	cls_u32.knode.exts = &n->exts;
 	if (n->ht_down)
-		offload.cls_u32->knode.link_handle = n->ht_down->handle;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+		cls_u32.knode.link_handle = n->ht_down->handle;
 
-	if (!err)
+	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+	if (err < 0) {
+		u32_remove_hw_knode(tp, n->handle);
+		return err;
+	} else if (err > 0) {
 		n->flags |= TCA_CLS_FLAGS_IN_HW;
+	}
 
-	if (tc_skip_sw(flags))
-		return err;
+	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
+		return -EINVAL;
 
 	return 0;
 }
@@ -548,7 +591,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 					 rtnl_dereference(n->next));
 			tcf_unbind_filter(tp, &n->res);
 			u32_remove_hw_knode(tp, n->handle);
-			call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+			idr_remove_ext(&ht->handle_idr, n->handle);
+			if (tcf_exts_get_net(&n->exts))
+				call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+			else
+				u32_destroy_key(n->tp, n, true);
 		}
 	}
 }
@@ -569,6 +616,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 	     hn = &phn->next, phn = rtnl_dereference(*hn)) {
 		if (phn == ht) {
 			u32_clear_hw_hnode(tp, ht);
+			idr_destroy(&ht->handle_idr);
+			idr_remove_ext(&tp_c->handle_idr, ht->handle);
 			RCU_INIT_POINTER(*hn, ht->next);
 			kfree_rcu(ht, rcu);
 			return 0;
@@ -602,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp)
 	if (--tp_c->refcnt == 0) {
 		struct tc_u_hnode *ht;
 
-		tp->q->u32_node = NULL;
+		hlist_del(&tp_c->hnode);
 
 		for (ht = rtnl_dereference(tp_c->hlist);
 		     ht;
@@ -616,15 +665,16 @@ static void u32_destroy(struct tcf_proto *tp)
 			kfree_rcu(ht, rcu);
 		}
 
+		idr_destroy(&tp_c->handle_idr);
 		kfree(tp_c);
 	}
 
 	tp->data = NULL;
 }
 
-static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
-	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+	struct tc_u_hnode *ht = arg;
 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 	struct tc_u_common *tp_c = tp->data;
 	int ret = 0;
@@ -684,27 +734,21 @@ ret:
 	return ret;
 }
 
-#define NR_U32_NODE (1<<12)
-static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
 {
-	struct tc_u_knode *n;
-	unsigned long i;
-	unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
-					GFP_KERNEL);
-	if (!bitmap)
-		return handle | 0xFFF;
-
-	for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
-	     n;
-	     n = rtnl_dereference(n->next))
-		set_bit(TC_U32_NODE(n->handle), bitmap);
-
-	i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
-	if (i >= NR_U32_NODE)
-		i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+	unsigned long idr_index;
+	u32 start = htid | 0x800;
+	u32 max = htid | 0xFFF;
+	u32 min = htid;
+
+	if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+			  start, max + 1, GFP_KERNEL)) {
+		if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+				  min + 1, max + 1, GFP_KERNEL))
+			return max;
+	}
 
-	kfree(bitmap);
-	return handle | (i >= NR_U32_NODE ? 0xFFF : i);
+	return (u32)idr_index;
 }
 
 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -723,29 +767,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
 		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
 		struct tc_u_hnode *ht_down = NULL, *ht_old;
 
 		if (TC_U32_KEY(handle))
-			goto errout;
+			return -EINVAL;
 
 		if (handle) {
 			ht_down = u32_lookup_ht(ht->tp_c, handle);
 
 			if (ht_down == NULL)
-				goto errout;
+				return -EINVAL;
 			ht_down->refcnt++;
 		}
 
@@ -765,16 +804,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
 		if (ret < 0)
-			goto errout;
+			return -EINVAL;
 		n->ifindex = ret;
 	}
 #endif
-	tcf_exts_change(tp, &n->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
@@ -799,6 +833,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
 		if (pins->handle == n->handle)
 			break;
 
+	idr_replace_ext(&ht->handle_idr, n, n->handle);
 	RCU_INIT_POINTER(n->next, pins->next);
 	rcu_assign_pointer(*ins, n);
 }
@@ -858,7 +893,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct nlattr **tca, unsigned long *arg, bool ovr)
+		      struct nlattr **tca, void **arg, bool ovr)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -885,7 +920,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 	}
 
-	n = (struct tc_u_knode *)*arg;
+	n = *arg;
 	if (n) {
 		struct tc_u_knode *new;
 
@@ -919,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		u32_replace_knode(tp, tp_c, new);
 		tcf_unbind_filter(tp, &n->res);
+		tcf_exts_get_net(&n->exts);
 		call_rcu(&n->rcu, u32_delete_key_rcu);
 		return 0;
 	}
@@ -930,29 +966,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 		if (TC_U32_KEY(handle))
 			return -EINVAL;
-		if (handle == 0) {
-			handle = gen_new_htid(tp->data);
-			if (handle == 0)
-				return -ENOMEM;
-		}
 		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
 		if (ht == NULL)
 			return -ENOBUFS;
+		if (handle == 0) {
+			handle = gen_new_htid(tp->data, ht);
+			if (handle == 0) {
+				kfree(ht);
+				return -ENOMEM;
+			}
+		} else {
+			err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
+					    handle, handle + 1, GFP_KERNEL);
+			if (err) {
+				kfree(ht);
+				return err;
+			}
+		}
 		ht->tp_c = tp_c;
 		ht->refcnt = 1;
 		ht->divisor = divisor;
 		ht->handle = handle;
 		ht->prio = tp->prio;
+		idr_init(&ht->handle_idr);
 
 		err = u32_replace_hw_hnode(tp, ht, flags);
 		if (err) {
+			idr_remove_ext(&tp_c->handle_idr, handle);
 			kfree(ht);
 			return err;
 		}
 
 		RCU_INIT_POINTER(ht->next, tp_c->hlist);
 		rcu_assign_pointer(tp_c->hlist, ht);
-		*arg = (unsigned long)ht;
+		*arg = ht;
 
 		return 0;
 	}
@@ -979,24 +1026,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
 			return -EINVAL;
 		handle = htid | TC_U32_NODE(handle);
+		err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
+				    handle, handle + 1,
+				    GFP_KERNEL);
+		if (err)
+			return err;
 	} else
 		handle = gen_new_kid(ht, htid);
 
-	if (tb[TCA_U32_SEL] == NULL)
-		return -EINVAL;
+	if (tb[TCA_U32_SEL] == NULL) {
+		err = -EINVAL;
+		goto erridr;
+	}
 
 	s = nla_data(tb[TCA_U32_SEL]);
 
 	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
-	if (n == NULL)
-		return -ENOBUFS;
+	if (n == NULL) {
+		err = -ENOBUFS;
+		goto erridr;
+	}
 
 #ifdef CONFIG_CLS_U32_PERF
 	size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
 	n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
 	if (!n->pf) {
-		kfree(n);
-		return -ENOBUFS;
+		err = -ENOBUFS;
+		goto errfree;
 	}
 #endif
 
@@ -1047,7 +1103,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
-		*arg = (unsigned long)n;
+		*arg = n;
 		return 0;
 	}
 
@@ -1059,9 +1115,12 @@ errhw:
 errout:
 	tcf_exts_destroy(&n->exts);
 #ifdef CONFIG_CLS_U32_PERF
+errfree:
 	free_percpu(n->pf);
 #endif
 	kfree(n);
+erridr:
+	idr_remove_ext(&ht->handle_idr, handle);
 	return err;
 }
 
@@ -1081,7 +1140,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 		if (ht->prio != tp->prio)
 			continue;
 		if (arg->count >= arg->skip) {
-			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+			if (arg->fn(tp, ht, arg) < 0) {
 				arg->stop = 1;
 				return;
 			}
@@ -1095,7 +1154,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 					arg->count++;
 					continue;
 				}
-				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+				if (arg->fn(tp, n, arg) < 0) {
 					arg->stop = 1;
 					return;
 				}
@@ -1105,10 +1164,18 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct tc_u_knode *n = fh;
+
+	if (n && n->res.classid == classid)
+		n->res.class = cl;
+}
+
+static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		    struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tc_u_knode *n = (struct tc_u_knode *)fh;
+	struct tc_u_knode *n = fh;
 	struct tc_u_hnode *ht_up, *ht_down;
 	struct nlattr *nest;
 
@@ -1122,7 +1189,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (TC_U32_KEY(n->handle) == 0) {
-		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+		struct tc_u_hnode *ht = fh;
 		u32 divisor = ht->divisor + 1;
 
 		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
@@ -1235,11 +1302,14 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 	.delete		=	u32_delete,
 	.walk		=	u32_walk,
 	.dump		=	u32_dump,
+	.bind_class	=	u32_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
 static int __init init_u32(void)
 {
+	int i, ret;
+
 	pr_info("u32 classifier\n");
 #ifdef CONFIG_CLS_U32_PERF
 	pr_info("    Performance counters on\n");
@@ -1250,12 +1320,25 @@ static int __init init_u32(void)
 #ifdef CONFIG_NET_CLS_ACT
 	pr_info("    Actions configured\n");
 #endif
-	return register_tcf_proto_ops(&cls_u32_ops);
+	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
+					  sizeof(struct hlist_head),
+					  GFP_KERNEL);
+	if (!tc_u_common_hash)
+		return -ENOMEM;
+
+	for (i = 0; i < U32_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&tc_u_common_hash[i]);
+
+	ret = register_tcf_proto_ops(&cls_u32_ops);
+	if (ret)
+		kvfree(tc_u_common_hash);
+	return ret;
 }
 
 static void __exit exit_u32(void)
 {
 	unregister_tcf_proto_ops(&cls_u32_ops);
+	kvfree(tc_u_common_hash);
 }
 
 module_init(init_u32)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 03b677bc0700..1331a4c2d8ff 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
 	struct tcf_ematch_hdr *em_hdr = nla_data(nla);
 	int data_len = nla_len(nla) - sizeof(*em_hdr);
 	void *data = (void *) em_hdr + sizeof(*em_hdr);
-	struct net *net = dev_net(qdisc_dev(tp->q));
+	struct net *net = tp->chain->block->net;
 
 	if (!TCF_EM_REL_VALID(em_hdr->flags))
 		goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a3fa144b8648..b6c4f536876b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -35,13 +35,7 @@
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-
-static int qdisc_notify(struct net *net, struct sk_buff *oskb,
-			struct nlmsghdr *n, u32 clid,
-			struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct net *net, struct sk_buff *oskb,
-			 struct nlmsghdr *n, struct Qdisc *q,
-			 unsigned long cl, int event);
+#include <net/pkt_cls.h>
 
 /*
 
@@ -160,7 +154,7 @@ int register_qdisc(struct Qdisc_ops *qops)
 	if (qops->cl_ops) {
 		const struct Qdisc_class_ops *cops = qops->cl_ops;
 
-		if (!(cops->get && cops->put && cops->walk && cops->leaf))
+		if (!(cops->find && cops->walk && cops->leaf))
 			goto out_einval;
 
 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
@@ -307,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
 	struct Qdisc *q;
 
+	if (!handle)
+		return NULL;
 	q = qdisc_match_from_root(dev->qdisc, handle);
 	if (q)
 		goto out;
@@ -327,12 +323,11 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 
 	if (cops == NULL)
 		return NULL;
-	cl = cops->get(p, classid);
+	cl = cops->find(p, classid);
 
 	if (cl == 0)
 		return NULL;
 	leaf = cops->leaf(p, cl);
-	cops->put(p, cl);
 	return leaf;
 }
 
@@ -621,14 +616,10 @@ EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 {
-	unsigned int size = n * sizeof(struct hlist_head), i;
 	struct hlist_head *h;
+	unsigned int i;
 
-	if (size <= PAGE_SIZE)
-		h = kmalloc(size, GFP_KERNEL);
-	else
-		h = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(size));
+	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
 
 	if (h != NULL) {
 		for (i = 0; i < n; i++)
@@ -637,16 +628,6 @@ static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 	return h;
 }
 
-static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
-{
-	unsigned int size = n * sizeof(struct hlist_head);
-
-	if (size <= PAGE_SIZE)
-		kfree(h);
-	else
-		free_pages((unsigned long)h, get_order(size));
-}
-
 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 {
 	struct Qdisc_class_common *cl;
@@ -679,7 +660,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 	clhash->hashmask = nmask;
 	sch_tree_unlock(sch);
 
-	qdisc_class_hash_free(ohash, osize);
+	kvfree(ohash);
 }
 EXPORT_SYMBOL(qdisc_class_hash_grow);
 
@@ -699,7 +680,7 @@ EXPORT_SYMBOL(qdisc_class_hash_init);
 
 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
 {
-	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+	kvfree(clhash->hash);
 }
 EXPORT_SYMBOL(qdisc_class_hash_destroy);
 
@@ -749,6 +730,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
+	bool notify;
 	int drops;
 
 	if (n == 0 && len == 0)
@@ -761,6 +743,13 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 
 		if (sch->flags & TCQ_F_NOPARENT)
 			break;
+		/* Notify parent qdisc only if child qdisc becomes empty.
+		 *
+		 * If child was empty even before update then backlog
+		 * counter is screwed and we skip notification because
+		 * parent class is already passive.
+		 */
+		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
 		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
@@ -768,10 +757,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 			break;
 		}
 		cops = sch->ops->cl_ops;
-		if (cops->qlen_notify) {
-			cl = cops->get(sch, parentid);
+		if (notify && cops->qlen_notify) {
+			cl = cops->find(sch, parentid);
 			cops->qlen_notify(sch, cl);
-			cops->put(sch, cl);
 		}
 		sch->q.qlen -= n;
 		sch->qstats.backlog -= len;
@@ -781,6 +769,111 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 }
 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+			 u32 portid, u32 seq, u16 flags, int event)
+{
+	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct gnet_dump d;
+	struct qdisc_size_table *stab;
+	__u32 qlen;
+
+	cond_resched();
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = clid;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = refcount_read(&q->refcnt);
+	if (nla_put_string(skb, TCA_KIND, q->ops->id))
+		goto nla_put_failure;
+	if (q->ops->dump && q->ops->dump(q, skb) < 0)
+		goto nla_put_failure;
+	qlen = q->q.qlen;
+
+	stab = rtnl_dereference(q->stab);
+	if (stab && qdisc_dump_stab(skb, stab) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 NULL, &d, TCA_PAD) < 0)
+		goto nla_put_failure;
+
+	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+		goto nla_put_failure;
+
+	if (qdisc_is_percpu_stats(q)) {
+		cpu_bstats = q->cpu_bstats;
+		cpu_qstats = q->cpu_qstats;
+	}
+
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+				  &d, cpu_bstats, &q->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
+{
+	if (q->flags & TCQ_F_BUILTIN)
+		return true;
+	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+		return true;
+
+	return false;
+}
+
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+			struct nlmsghdr *n, u32 clid,
+			struct Qdisc *old, struct Qdisc *new)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (old && !tc_qdisc_dump_ignore(old, false)) {
+		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
+				  0, RTM_DELQDISC) < 0)
+			goto err_out;
+	}
+	if (new && !tc_qdisc_dump_ignore(new, false)) {
+		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
+				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+			goto err_out;
+	}
+
+	if (skb->len)
+		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				      n->nlmsg_flags & NLM_F_ECHO);
+
+err_out:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
 			       struct nlmsghdr *n, u32 clid,
 			       struct Qdisc *old, struct Qdisc *new)
@@ -836,7 +929,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 
 			old = dev_graft_qdisc(dev_queue, new);
 			if (new && i > 0)
-				refcount_inc(&new->refcnt);
+				qdisc_refcount_inc(new);
 
 			if (!ingress)
 				qdisc_destroy(old);
@@ -847,7 +940,7 @@ skip:
 			notify_and_destroy(net, skb, n, classid,
 					   dev->qdisc, new);
 			if (new && !new->ops->attach)
-				refcount_inc(&new->refcnt);
+				qdisc_refcount_inc(new);
 			dev->qdisc = new ? : &noop_qdisc;
 
 			if (new && new->ops->attach)
@@ -863,11 +956,11 @@ skip:
 
 		err = -EOPNOTSUPP;
 		if (cops && cops->graft) {
-			unsigned long cl = cops->get(parent, classid);
-			if (cl) {
+			unsigned long cl = cops->find(parent, classid);
+
+			if (cl)
 				err = cops->graft(parent, cl, new, &old);
-				cops->put(parent, cl);
-			} else
+			else
 				err = -ENOENT;
 		}
 		if (!err)
@@ -1256,7 +1349,7 @@ replay:
 				if (q == p ||
 				    (p && check_loop(q, p, 0)))
 					return -ELOOP;
-				refcount_inc(&q->refcnt);
+				qdisc_refcount_inc(q);
 				goto graft;
 			} else {
 				if (!q)
@@ -1348,111 +1441,6 @@ graft:
 	return 0;
 }
 
-static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
-			 u32 portid, u32 seq, u16 flags, int event)
-{
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
-	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
-	struct tcmsg *tcm;
-	struct nlmsghdr  *nlh;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct gnet_dump d;
-	struct qdisc_size_table *stab;
-	__u32 qlen;
-
-	cond_resched();
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
-	if (!nlh)
-		goto out_nlmsg_trim;
-	tcm = nlmsg_data(nlh);
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm__pad1 = 0;
-	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
-	tcm->tcm_parent = clid;
-	tcm->tcm_handle = q->handle;
-	tcm->tcm_info = refcount_read(&q->refcnt);
-	if (nla_put_string(skb, TCA_KIND, q->ops->id))
-		goto nla_put_failure;
-	if (q->ops->dump && q->ops->dump(q, skb) < 0)
-		goto nla_put_failure;
-	qlen = q->q.qlen;
-
-	stab = rtnl_dereference(q->stab);
-	if (stab && qdisc_dump_stab(skb, stab) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
-					 NULL, &d, TCA_PAD) < 0)
-		goto nla_put_failure;
-
-	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
-		goto nla_put_failure;
-
-	if (qdisc_is_percpu_stats(q)) {
-		cpu_bstats = q->cpu_bstats;
-		cpu_qstats = q->cpu_qstats;
-	}
-
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
-				  &d, cpu_bstats, &q->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
-	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_finish_copy(&d) < 0)
-		goto nla_put_failure;
-
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
-{
-	if (q->flags & TCQ_F_BUILTIN)
-		return true;
-	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
-		return true;
-
-	return false;
-}
-
-static int qdisc_notify(struct net *net, struct sk_buff *oskb,
-			struct nlmsghdr *n, u32 clid,
-			struct Qdisc *old, struct Qdisc *new)
-{
-	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	if (old && !tc_qdisc_dump_ignore(old, false)) {
-		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
-				  0, RTM_DELQDISC) < 0)
-			goto err_out;
-	}
-	if (new && !tc_qdisc_dump_ignore(new, false)) {
-		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
-				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
-			goto err_out;
-	}
-
-	if (skb->len)
-		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-				      n->nlmsg_flags & NLM_F_ECHO);
-
-err_out:
-	kfree_skb(skb);
-	return -EINVAL;
-}
-
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			      struct netlink_callback *cb,
 			      int *q_idx_p, int s_q_idx, bool recur,
@@ -1514,7 +1502,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx, s_q_idx;
 	struct net_device *dev;
 	const struct nlmsghdr *nlh = cb->nlh;
-	struct tcmsg *tcm = nlmsg_data(nlh);
 	struct nlattr *tca[TCA_MAX + 1];
 	int err;
 
@@ -1524,7 +1511,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
 	if (err < 0)
 		return err;
 
@@ -1565,7 +1552,163 @@ done:
  *	Traffic classes manipulation.		*
  ************************************************/
 
+static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+			  unsigned long cl,
+			  u32 portid, u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct gnet_dump d;
+	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+
+	cond_resched();
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = q->handle;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = 0;
+	if (nla_put_string(skb, TCA_KIND, q->ops->id))
+		goto nla_put_failure;
+	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 NULL, &d, TCA_PAD) < 0)
+		goto nla_put_failure;
+
+	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+			 struct nlmsghdr *n, struct Qdisc *q,
+			 unsigned long cl, int event)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tclass_del_notify(struct net *net,
+			     const struct Qdisc_class_ops *cops,
+			     struct sk_buff *oskb, struct nlmsghdr *n,
+			     struct Qdisc *q, unsigned long cl)
+{
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct sk_buff *skb;
+	int err = 0;
+
+	if (!cops->delete)
+		return -EOPNOTSUPP;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
 
+	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+			   RTM_DELTCLASS) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	err = cops->delete(q, cl);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+#ifdef CONFIG_NET_CLS
+
+struct tcf_bind_args {
+	struct tcf_walker w;
+	u32 classid;
+	unsigned long cl;
+};
+
+static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
+{
+	struct tcf_bind_args *a = (void *)arg;
+
+	if (tp->ops->bind_class) {
+		struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+		sch_tree_lock(q);
+		tp->ops->bind_class(n, a->classid, a->cl);
+		sch_tree_unlock(q);
+	}
+	return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+			   unsigned long new_cl)
+{
+	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+	struct tcf_block *block;
+	struct tcf_chain *chain;
+	unsigned long cl;
+
+	cl = cops->find(q, portid);
+	if (!cl)
+		return;
+	block = cops->tcf_block(q, cl);
+	if (!block)
+		return;
+	list_for_each_entry(chain, &block->chain_list, list) {
+		struct tcf_proto *tp;
+
+		for (tp = rtnl_dereference(chain->filter_chain);
+		     tp; tp = rtnl_dereference(tp->next)) {
+			struct tcf_bind_args arg = {};
+
+			arg.w.fn = tcf_node_bind;
+			arg.classid = clid;
+			arg.cl = new_cl;
+			tp->ops->walk(tp, &arg.w);
+		}
+	}
+}
+
+#else
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+			   unsigned long new_cl)
+{
+}
+
+#endif
 
 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
@@ -1656,7 +1799,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 		clid = TC_H_MAKE(qid, clid);
 
 	if (clid)
-		cl = cops->get(q, clid);
+		cl = cops->find(q, clid);
 
 	if (cl == 0) {
 		err = -ENOENT;
@@ -1671,12 +1814,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 				goto out;
 			break;
 		case RTM_DELTCLASS:
-			err = -EOPNOTSUPP;
-			if (cops->delete)
-				err = cops->delete(q, cl);
-			if (err == 0)
-				tclass_notify(net, skb, n, q, cl,
-					      RTM_DELTCLASS);
+			err = tclass_del_notify(net, cops, skb, n, q, cl);
+			/* Unbind the class with flilters with 0 */
+			tc_bind_tclass(q, portid, clid, 0);
 			goto out;
 		case RTM_GETTCLASS:
 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@ -1691,83 +1831,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	err = -EOPNOTSUPP;
 	if (cops->change)
 		err = cops->change(q, clid, portid, tca, &new_cl);
-	if (err == 0)
+	if (err == 0) {
 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
-
+		/* We just create a new class, need to do reverse binding. */
+		if (cl != new_cl)
+			tc_bind_tclass(q, portid, clid, new_cl);
+	}
 out:
-	if (cl)
-		cops->put(q, cl);
-
 	return err;
 }
 
-
-static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
-			  unsigned long cl,
-			  u32 portid, u32 seq, u16 flags, int event)
-{
-	struct tcmsg *tcm;
-	struct nlmsghdr  *nlh;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct gnet_dump d;
-	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
-
-	cond_resched();
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
-	if (!nlh)
-		goto out_nlmsg_trim;
-	tcm = nlmsg_data(nlh);
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm__pad1 = 0;
-	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
-	tcm->tcm_parent = q->handle;
-	tcm->tcm_handle = q->handle;
-	tcm->tcm_info = 0;
-	if (nla_put_string(skb, TCA_KIND, q->ops->id))
-		goto nla_put_failure;
-	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
-					 NULL, &d, TCA_PAD) < 0)
-		goto nla_put_failure;
-
-	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_finish_copy(&d) < 0)
-		goto nla_put_failure;
-
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int tclass_notify(struct net *net, struct sk_buff *oskb,
-			 struct nlmsghdr *n, struct Qdisc *q,
-			 unsigned long cl, int event)
-{
-	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
-}
-
 struct qdisc_dump_args {
 	struct qdisc_walker	w;
 	struct sk_buff		*skb;
@@ -1949,14 +2022,14 @@ static int __init pktsched_init(void)
 	register_qdisc(&mq_qdisc_ops);
 	register_qdisc(&noqueue_qdisc_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
-		      NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
+		      0);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
-		      NULL);
+		      0);
 
 	return 0;
 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c403c87aff7a..2dbd249c0b2f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,6 +41,7 @@
 #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
 
 struct atm_flow_data {
+	struct Qdisc_class_common common;
 	struct Qdisc		*q;	/* FIFO, TBF, etc. */
 	struct tcf_proto __rcu	*filter_list;
 	struct tcf_block	*block;
@@ -49,7 +50,6 @@ struct atm_flow_data {
 					   struct sk_buff *skb); /* chaining */
 	struct atm_qdisc_data	*parent;	/* parent qdisc */
 	struct socket		*sock;		/* for closing */
-	u32			classid;	/* x:y type ID */
 	int			ref;		/* reference count */
 	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
@@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 	struct atm_flow_data *flow;
 
 	list_for_each_entry(flow, &p->flows, list) {
-		if (flow->classid == classid)
+		if (flow->common.classid == classid)
 			return flow;
 	}
 	return NULL;
@@ -108,23 +108,29 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
 	return flow ? flow->q : NULL;
 }
 
-static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
+static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
 {
 	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
 	flow = lookup_flow(sch, classid);
-	if (flow)
-		flow->ref++;
-	pr_debug("atm_tc_get: flow %p\n", flow);
+	pr_debug("%s: flow %p\n", __func__, flow);
 	return (unsigned long)flow;
 }
 
 static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return atm_tc_get(sch, classid);
+	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
+	flow = lookup_flow(sch, classid);
+	if (flow)
+		flow->ref++;
+	pr_debug("%s: flow %p\n", __func__, flow);
+	return (unsigned long)flow;
 }
 
 /*
@@ -234,7 +240,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		excess = NULL;
 	else {
 		excess = (struct atm_flow_data *)
-			atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
+			atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
 		if (!excess)
 			return -ENOENT;
 	}
@@ -262,10 +268,9 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 
 		for (i = 1; i < 0x8000; i++) {
 			classid = TC_H_MAKE(sch->handle, 0x8000 | i);
-			cl = atm_tc_get(sch, classid);
+			cl = atm_tc_find(sch, classid);
 			if (!cl)
 				break;
-			atm_tc_put(sch, cl);
 		}
 	}
 	pr_debug("atm_tc_change: new id %x\n", classid);
@@ -276,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		goto err_out;
 	}
 
-	error = tcf_block_get(&flow->block, &flow->filter_list);
+	error = tcf_block_get(&flow->block, &flow->filter_list, sch);
 	if (error) {
 		kfree(flow);
 		goto err_out;
@@ -293,7 +298,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	flow->old_pop = flow->vcc->pop;
 	flow->parent = p;
 	flow->vcc->pop = sch_atm_pop;
-	flow->classid = classid;
+	flow->common.classid = classid;
 	flow->ref = 1;
 	flow->excess = excess;
 	list_add(&flow->list, &p->link.list);
@@ -305,8 +310,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	*arg = (unsigned long)flow;
 	return 0;
 err_out:
-	if (excess)
-		atm_tc_put(sch, (unsigned long)excess);
 	sockfd_put(sock);
 	return error;
 }
@@ -377,7 +380,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	result = TC_ACT_OK;	/* be nice to gcc */
 	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+	    !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
 		struct tcf_proto *fl;
 
 		list_for_each_entry(flow, &p->flows, list) {
@@ -543,13 +546,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 		p->link.q = &noop_qdisc;
 	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
 
-	err = tcf_block_get(&p->link.block, &p->link.filter_list);
+	err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
 	if (err)
 		return err;
 
 	p->link.vcc = NULL;
 	p->link.sock = NULL;
-	p->link.classid = sch->handle;
+	p->link.common.classid = sch->handle;
 	p->link.ref = 1;
 	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
 	return 0;
@@ -596,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		sch, p, flow, skb, tcm);
 	if (list_empty(&flow->list))
 		return -EINVAL;
-	tcm->tcm_handle = flow->classid;
+	tcm->tcm_handle = flow->common.classid;
 	tcm->tcm_info = flow->q->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -621,7 +624,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 			goto nla_put_failure;
 	}
 	if (flow->excess) {
-		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid))
+		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
 			goto nla_put_failure;
 	} else {
 		if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
@@ -655,8 +658,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
 static const struct Qdisc_class_ops atm_class_ops = {
 	.graft		= atm_tc_graft,
 	.leaf		= atm_tc_leaf,
-	.get		= atm_tc_get,
-	.put		= atm_tc_put,
+	.find		= atm_tc_find,
 	.change		= atm_tc_change,
 	.delete		= atm_tc_delete,
 	.walk		= atm_tc_walk,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 780db43300b1..6361be7881f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -129,7 +129,6 @@ struct cbq_class {
 	struct tcf_proto __rcu	*filter_list;
 	struct tcf_block	*block;
 
-	int			refcnt;
 	int			filters;
 
 	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
@@ -256,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		case TC_ACT_STOLEN:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return NULL;
 		case TC_ACT_RECLASSIFY:
@@ -1139,6 +1139,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_ratespec *r;
 	int err;
 
+	qdisc_watchdog_init(&q->watchdog, sch);
+	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	q->delay_timer.function = cbq_undelay;
+
+	if (!opt)
+		return -EINVAL;
+
 	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
 	if (err < 0)
 		return err;
@@ -1155,7 +1162,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		goto put_rtab;
 
-	q->link.refcnt = 1;
 	q->link.sibling = &q->link;
 	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
@@ -1177,9 +1183,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.avpkt = q->link.allot/2;
 	q->link.minidle = -0x7FFFFFFF;
 
-	qdisc_watchdog_init(&q->watchdog, sch);
-	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
-	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
 
@@ -1385,20 +1388,14 @@ static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_class *cl = (struct cbq_class *)arg;
 
-	if (cl->q->q.qlen == 0)
-		cbq_deactivate_class(cl);
+	cbq_deactivate_class(cl);
 }
 
-static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = cbq_class_lookup(q, classid);
 
-	if (cl) {
-		cl->refcnt++;
-		return (unsigned long)cl;
-	}
-	return 0;
+	return (unsigned long)cbq_class_lookup(q, classid);
 }
 
 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
@@ -1444,25 +1441,6 @@ static void cbq_destroy(struct Qdisc *sch)
 	qdisc_class_hash_destroy(&q->clhash);
 }
 
-static void cbq_put(struct Qdisc *sch, unsigned long arg)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	if (--cl->refcnt == 0) {
-#ifdef CONFIG_NET_CLS_ACT
-		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
-		struct cbq_sched_data *q = qdisc_priv(sch);
-
-		spin_lock_bh(root_lock);
-		if (q->rx_class == cl)
-			q->rx_class = NULL;
-		spin_unlock_bh(root_lock);
-#endif
-
-		cbq_destroy_class(sch, cl);
-	}
-}
-
 static int
 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
 		 unsigned long *arg)
@@ -1589,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	if (cl == NULL)
 		goto failure;
 
-	err = tcf_block_get(&cl->block, &cl->filter_list);
+	err = tcf_block_get(&cl->block, &cl->filter_list, sch);
 	if (err) {
 		kfree(cl);
 		return err;
@@ -1609,7 +1587,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	cl->R_tab = rtab;
 	rtab = NULL;
-	cl->refcnt = 1;
 	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
 	if (!cl->q)
 		cl->q = &noop_qdisc;
@@ -1690,12 +1667,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 	cbq_rmprio(q, cl);
 	sch_tree_unlock(sch);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
+	cbq_destroy_class(sch, cl);
 	return 0;
 }
 
@@ -1761,8 +1733,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
 	.graft		=	cbq_graft,
 	.leaf		=	cbq_leaf,
 	.qlen_notify	=	cbq_qlen_notify,
-	.get		=	cbq_get,
-	.put		=	cbq_put,
+	.find		=	cbq_find,
 	.change		=	cbq_change_class,
 	.delete		=	cbq_delete,
 	.walk		=	cbq_walk,
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
new file mode 100644
index 000000000000..7a72980c1509
--- /dev/null
+++ b/net/sched/sch_cbs.c
@@ -0,0 +1,373 @@
+/*
+ * net/sched/sch_cbs.c	Credit Based Shaper
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+/* Credit Based Shaper (CBS)
+ * =========================
+ *
+ * This is a simple rate-limiting shaper aimed at TSN applications on
+ * systems with known traffic workloads.
+ *
+ * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
+ * Section 8.6.8.2, and explained in more detail in the Annex L of the
+ * same specification.
+ *
+ * There are four tunables to be considered:
+ *
+ *	'idleslope': Idleslope is the rate of credits that is
+ *	accumulated (in kilobits per second) when there is at least
+ *	one packet waiting for transmission. Packets are transmitted
+ *	when the current value of credits is equal or greater than
+ *	zero. When there is no packet to be transmitted the amount of
+ *	credits is set to zero. This is the main tunable of the CBS
+ *	algorithm.
+ *
+ *	'sendslope':
+ *	Sendslope is the rate of credits that is depleted (it should be a
+ *	negative number of kilobits per second) when a transmission is
+ *	ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
+ *	8.6.8.2 item g):
+ *
+ *	sendslope = idleslope - port_transmit_rate
+ *
+ *	'hicredit': Hicredit defines the maximum amount of credits (in
+ *	bytes) that can be accumulated. Hicredit depends on the
+ *	characteristics of interfering traffic,
+ *	'max_interference_size' is the maximum size of any burst of
+ *	traffic that can delay the transmission of a frame that is
+ *	available for transmission for this traffic class, (IEEE
+ *	802.1Q-2014 Annex L, Equation L-3):
+ *
+ *	hicredit = max_interference_size * (idleslope / port_transmit_rate)
+ *
+ *	'locredit': Locredit is the minimum amount of credits that can
+ *	be reached. It is a function of the traffic flowing through
+ *	this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
+ *
+ *	locredit = max_frame_size * (sendslope / port_transmit_rate)
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+
+#define BYTES_PER_KBIT (1000LL / 8)
+
+struct cbs_sched_data {
+	bool offload;
+	int queue;
+	s64 port_rate; /* in bytes/s */
+	s64 last; /* timestamp in ns */
+	s64 credits; /* in bytes */
+	s32 locredit; /* in bytes */
+	s32 hicredit; /* in bytes */
+	s64 sendslope; /* in bytes/s */
+	s64 idleslope; /* in bytes/s */
+	struct qdisc_watchdog watchdog;
+	int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
+	struct sk_buff *(*dequeue)(struct Qdisc *sch);
+};
+
+static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
+{
+	return qdisc_enqueue_tail(skb, sch);
+}
+
+static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+
+	if (sch->q.qlen == 0 && q->credits > 0) {
+		/* We need to stop accumulating credits when there's
+		 * no enqueued packets and q->credits is positive.
+		 */
+		q->credits = 0;
+		q->last = ktime_get_ns();
+	}
+
+	return qdisc_enqueue_tail(skb, sch);
+}
+
+static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		       struct sk_buff **to_free)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+
+	return q->enqueue(skb, sch);
+}
+
+/* timediff is in ns, slope is in bytes/s */
+static s64 timediff_to_credits(s64 timediff, s64 slope)
+{
+	return div64_s64(timediff * slope, NSEC_PER_SEC);
+}
+
+static s64 delay_from_credits(s64 credits, s64 slope)
+{
+	if (unlikely(slope == 0))
+		return S64_MAX;
+
+	return div64_s64(-credits * NSEC_PER_SEC, slope);
+}
+
+static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
+{
+	if (unlikely(port_rate == 0))
+		return S64_MAX;
+
+	return div64_s64(len * slope, port_rate);
+}
+
+static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	s64 now = ktime_get_ns();
+	struct sk_buff *skb;
+	s64 credits;
+	int len;
+
+	if (q->credits < 0) {
+		credits = timediff_to_credits(now - q->last, q->idleslope);
+
+		credits = q->credits + credits;
+		q->credits = min_t(s64, credits, q->hicredit);
+
+		if (q->credits < 0) {
+			s64 delay;
+
+			delay = delay_from_credits(q->credits, q->idleslope);
+			qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
+
+			q->last = now;
+
+			return NULL;
+		}
+	}
+
+	skb = qdisc_dequeue_head(sch);
+	if (!skb)
+		return NULL;
+
+	len = qdisc_pkt_len(skb);
+
+	/* As sendslope is a negative number, this will decrease the
+	 * amount of q->credits.
+	 */
+	credits = credits_from_len(len, q->sendslope, q->port_rate);
+	credits += q->credits;
+
+	q->credits = max_t(s64, credits, q->locredit);
+	q->last = now;
+
+	return skb;
+}
+
+static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
+{
+	return qdisc_dequeue_head(sch);
+}
+
+static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+
+	return q->dequeue(sch);
+}
+
+static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
+	[TCA_CBS_PARMS]	= { .len = sizeof(struct tc_cbs_qopt) },
+};
+
+static void cbs_disable_offload(struct net_device *dev,
+				struct cbs_sched_data *q)
+{
+	struct tc_cbs_qopt_offload cbs = { };
+	const struct net_device_ops *ops;
+	int err;
+
+	if (!q->offload)
+		return;
+
+	q->enqueue = cbs_enqueue_soft;
+	q->dequeue = cbs_dequeue_soft;
+
+	ops = dev->netdev_ops;
+	if (!ops->ndo_setup_tc)
+		return;
+
+	cbs.queue = q->queue;
+	cbs.enable = 0;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
+	if (err < 0)
+		pr_warn("Couldn't disable CBS offload for queue %d\n",
+			cbs.queue);
+}
+
+static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
+			      const struct tc_cbs_qopt *opt)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_cbs_qopt_offload cbs = { };
+	int err;
+
+	if (!ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	cbs.queue = q->queue;
+
+	cbs.enable = 1;
+	cbs.hicredit = opt->hicredit;
+	cbs.locredit = opt->locredit;
+	cbs.idleslope = opt->idleslope;
+	cbs.sendslope = opt->sendslope;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
+	if (err < 0)
+		return err;
+
+	q->enqueue = cbs_enqueue_offload;
+	q->dequeue = cbs_dequeue_offload;
+
+	return 0;
+}
+
+static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct nlattr *tb[TCA_CBS_MAX + 1];
+	struct tc_cbs_qopt *qopt;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_CBS_PARMS])
+		return -EINVAL;
+
+	qopt = nla_data(tb[TCA_CBS_PARMS]);
+
+	if (!qopt->offload) {
+		struct ethtool_link_ksettings ecmd;
+		s64 link_speed;
+
+		if (!__ethtool_get_link_ksettings(dev, &ecmd))
+			link_speed = ecmd.base.speed;
+		else
+			link_speed = SPEED_1000;
+
+		q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
+
+		cbs_disable_offload(dev, q);
+	} else {
+		err = cbs_enable_offload(dev, q, qopt);
+		if (err < 0)
+			return err;
+	}
+
+	/* Everything went OK, save the parameters used. */
+	q->hicredit = qopt->hicredit;
+	q->locredit = qopt->locredit;
+	q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
+	q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
+	q->offload = qopt->offload;
+
+	return 0;
+}
+
+static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (!opt)
+		return -EINVAL;
+
+	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+	q->enqueue = cbs_enqueue_soft;
+	q->dequeue = cbs_dequeue_soft;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	return cbs_change(sch, opt);
+}
+
+static void cbs_destroy(struct Qdisc *sch)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+
+	cbs_disable_offload(dev, q);
+}
+
+static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cbs_sched_data *q = qdisc_priv(sch);
+	struct tc_cbs_qopt opt = { };
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	opt.hicredit = q->hicredit;
+	opt.locredit = q->locredit;
+	opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
+	opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
+	opt.offload = q->offload;
+
+	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
+	.id		=	"cbs",
+	.priv_size	=	sizeof(struct cbs_sched_data),
+	.enqueue	=	cbs_enqueue,
+	.dequeue	=	cbs_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	cbs_init,
+	.reset		=	qdisc_reset_queue,
+	.destroy	=	cbs_destroy,
+	.change		=	cbs_change,
+	.dump		=	cbs_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cbs_module_init(void)
+{
+	return register_qdisc(&cbs_qdisc_ops);
+}
+
+static void __exit cbs_module_exit(void)
+{
+	unregister_qdisc(&cbs_qdisc_ops);
+}
+module_init(cbs_module_init)
+module_exit(cbs_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index a413dc1c2098..5bbcef3dcd8c 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -20,7 +20,6 @@
 
 struct drr_class {
 	struct Qdisc_class_common	common;
-	unsigned int			refcnt;
 	unsigned int			filter_cnt;
 
 	struct gnet_stats_basic_packed		bstats;
@@ -111,7 +110,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	cl->refcnt	   = 1;
 	cl->common.classid = classid;
 	cl->quantum	   = quantum;
 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
@@ -163,32 +161,15 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
 	drr_purge_queue(cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
-
-static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct drr_class *cl = drr_find_class(sch, classid);
 
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
+	drr_destroy_class(sch, cl);
+	return 0;
 }
 
-static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long drr_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct drr_class *cl = (struct drr_class *)arg;
-
-	if (--cl->refcnt == 0)
-		drr_destroy_class(sch, cl);
+	return (unsigned long)drr_find_class(sch, classid);
 }
 
 static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -246,8 +227,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
 {
 	struct drr_class *cl = (struct drr_class *)arg;
 
-	if (cl->qdisc->q.qlen == 0)
-		list_del(&cl->alist);
+	list_del(&cl->alist);
 }
 
 static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
@@ -341,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
 		case TC_ACT_STOLEN:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -432,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	struct drr_sched *q = qdisc_priv(sch);
 	int err;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 	err = qdisc_class_hash_init(&q->clhash);
@@ -479,8 +460,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 static const struct Qdisc_class_ops drr_class_ops = {
 	.change		= drr_change_class,
 	.delete		= drr_delete_class,
-	.get		= drr_get_class,
-	.put		= drr_put_class,
+	.find		= drr_search_class,
 	.tcf_block	= drr_tcf_block,
 	.bind_tcf	= drr_bind_tcf,
 	.unbind_tcf	= drr_unbind_tcf,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 6d94fcc3592a..fb4fb71c68cf 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -85,21 +85,21 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
 	return p->q;
 }
 
-static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
+static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
 {
-	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
-		 __func__, sch, qdisc_priv(sch), classid);
-
 	return TC_H_MIN(classid) + 1;
 }
 
 static unsigned long dsmark_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return dsmark_get(sch, classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+		 __func__, sch, qdisc_priv(sch), classid);
+
+	return dsmark_find(sch, classid);
 }
 
-static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
 {
 }
 
@@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		goto errout;
 
-	err = tcf_block_get(&p->block, &p->filter_list);
+	err = tcf_block_get(&p->block, &p->filter_list, sch);
 	if (err)
 		return err;
 
@@ -469,14 +469,13 @@ nla_put_failure:
 static const struct Qdisc_class_ops dsmark_class_ops = {
 	.graft		=	dsmark_graft,
 	.leaf		=	dsmark_leaf,
-	.get		=	dsmark_get,
-	.put		=	dsmark_put,
+	.find		=	dsmark_find,
 	.change		=	dsmark_change,
 	.delete		=	dsmark_delete,
 	.walk		=	dsmark_walk,
 	.tcf_block	=	dsmark_tcf_block,
 	.bind_tcf	=	dsmark_bind_filter,
-	.unbind_tcf	=	dsmark_put,
+	.unbind_tcf	=	dsmark_unbind_filter,
 	.dump		=	dsmark_dump_class,
 };
 
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 337f2d6d81e4..0305d791ea94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
 		case TC_ACT_QUEUED:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return 0;
 		}
@@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
@@ -491,10 +492,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 		if (!q->flows)
 			return -ENOMEM;
 		q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
-		if (!q->backlogs) {
-			kvfree(q->flows);
+		if (!q->backlogs)
 			return -ENOMEM;
-		}
 		for (i = 0; i < q->flows_cnt; i++) {
 			struct fq_codel_flow *flow = q->flows + i;
 
@@ -579,7 +578,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
+static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
 {
 	return 0;
 }
@@ -592,7 +591,7 @@ static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
-static void fq_codel_put(struct Qdisc *q, unsigned long cl)
+static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -683,11 +682,10 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops fq_codel_class_ops = {
 	.leaf		=	fq_codel_leaf,
-	.get		=	fq_codel_get,
-	.put		=	fq_codel_put,
+	.find		=	fq_codel_find,
 	.tcf_block	=	fq_codel_tcf_block,
 	.bind_tcf	=	fq_codel_bind,
-	.unbind_tcf	=	fq_codel_put,
+	.unbind_tcf	=	fq_codel_unbind,
 	.dump		=	fq_codel_dump_class,
 	.dump_stats	=	fq_codel_dump_class_stats,
 	.walk		=	fq_codel_walk,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 57ba406f1437..3839cbbdc32b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,6 +29,7 @@
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
+#include <trace/events/qdisc.h>
 
 /* Qdisc to use by default */
 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@ -126,7 +127,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			q->q.qlen--;
 		} else
 			skb = NULL;
-		return skb;
+		goto trace;
 	}
 	*validate = true;
 	skb = q->skb_bad_txq;
@@ -139,7 +140,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			q->q.qlen--;
 			goto bulk;
 		}
-		return NULL;
+		skb = NULL;
+		goto trace;
 	}
 	if (!(q->flags & TCQ_F_ONETXQUEUE) ||
 	    !netif_xmit_frozen_or_stopped(txq))
@@ -151,6 +153,8 @@ bulk:
 		else
 			try_bulk_dequeue_skb_slow(q, skb, packets);
 	}
+trace:
+	trace_qdisc_dequeue(q, txq, *packets, skb);
 	return skb;
 }
 
@@ -284,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_trans_start);
 
-static void dev_watchdog(unsigned long arg)
+static void dev_watchdog(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)arg;
+	struct net_device *dev = from_timer(dev, t, watchdog_timer);
 
 	netif_tx_lock(dev);
 	if (!qdisc_tx_is_noop(dev)) {
@@ -599,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	struct Qdisc *sch;
 	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
 	int err = -ENOBUFS;
-	struct net_device *dev = dev_queue->dev;
+	struct net_device *dev;
 
+	if (!dev_queue) {
+		err = -EINVAL;
+		goto errout;
+	}
+
+	dev = dev_queue->dev;
 	p = kzalloc_node(size, GFP_KERNEL,
 			 netdev_queue_numa_node_read(dev_queue));
 
@@ -681,13 +691,12 @@ void qdisc_reset(struct Qdisc *qdisc)
 		qdisc->gso_skb = NULL;
 	}
 	qdisc->q.qlen = 0;
+	qdisc->qstats.backlog = 0;
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-static void qdisc_rcu_free(struct rcu_head *head)
+static void qdisc_free(struct Qdisc *qdisc)
 {
-	struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
-
 	if (qdisc_is_percpu_stats(qdisc)) {
 		free_percpu(qdisc->cpu_bstats);
 		free_percpu(qdisc->cpu_qstats);
@@ -720,11 +729,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 
 	kfree_skb_list(qdisc->gso_skb);
 	kfree_skb(qdisc->skb_bad_txq);
-	/*
-	 * gen_estimator est_timer() might access qdisc->q.lock,
-	 * wait a RCU grace period before freeing qdisc.
-	 */
-	call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
+	qdisc_free(qdisc);
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
@@ -785,7 +790,7 @@ static void attach_default_qdiscs(struct net_device *dev)
 	    dev->priv_flags & IFF_NO_QUEUE) {
 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
 		dev->qdisc = txq->qdisc_sleeping;
-		refcount_inc(&dev->qdisc->refcnt);
+		qdisc_refcount_inc(dev->qdisc);
 	} else {
 		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
 		if (qdisc) {
@@ -955,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev)
 	if (dev_ingress_queue(dev))
 		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
 
-	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
+	timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
 }
 
 static void shutdown_scheduler_queue(struct net_device *dev,
@@ -1019,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
 	}
 }
 EXPORT_SYMBOL(psched_ratecfg_precompute);
+
+static void mini_qdisc_rcu_func(struct rcu_head *head)
+{
+}
+
+void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
+			  struct tcf_proto *tp_head)
+{
+	struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+	struct mini_Qdisc *miniq;
+
+	if (!tp_head) {
+		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+		return;
+	}
+
+	miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
+		&miniqp->miniq1 : &miniqp->miniq2;
+
+	/* We need to make sure that readers won't see the miniq
+	 * we are about to modify. So wait until previous call_rcu_bh callback
+	 * is done.
+	 */
+	rcu_barrier_bh();
+	miniq->filter_list = tp_head;
+	rcu_assign_pointer(*miniqp->p_miniq, miniq);
+
+	if (miniq_old)
+		/* This is counterpart of the rcu barrier above. We need to
+		 * block potential new user of miniq_old until all readers
+		 * are not seeing it.
+		 */
+		call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
+}
+EXPORT_SYMBOL(mini_qdisc_pair_swap);
+
+void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+			  struct mini_Qdisc __rcu **p_miniq)
+{
+	miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
+	miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
+	miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
+	miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+	miniqp->p_miniq = p_miniq;
+}
+EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fd15200f8627..d04068a97d81 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -110,7 +110,6 @@ enum hfsc_class_flags {
 
 struct hfsc_class {
 	struct Qdisc_class_common cl_common;
-	unsigned int	refcnt;		/* usage count */
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
@@ -829,28 +828,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 	}
 }
 
-static void
-set_active(struct hfsc_class *cl, unsigned int len)
-{
-	if (cl->cl_flags & HFSC_RSC)
-		init_ed(cl, len);
-	if (cl->cl_flags & HFSC_FSC)
-		init_vf(cl, len);
-
-}
-
-static void
-set_passive(struct hfsc_class *cl)
-{
-	if (cl->cl_flags & HFSC_RSC)
-		eltree_remove(cl);
-
-	/*
-	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
-	 * needs to be called explicitly to remove a class from vttree.
-	 */
-}
-
 static unsigned int
 qdisc_peek_len(struct Qdisc *sch)
 {
@@ -981,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	}
 
 	if (cl != NULL) {
+		int old_flags;
+
 		if (parentid) {
 			if (cl->cl_parent &&
 			    cl->cl_parent->cl_common.classid != parentid)
@@ -1001,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 
 		sch_tree_lock(sch);
+		old_flags = cl->cl_flags;
+
 		if (rsc != NULL)
 			hfsc_change_rsc(cl, rsc, cur_time);
 		if (fsc != NULL)
@@ -1009,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			hfsc_change_usc(cl, usc, cur_time);
 
 		if (cl->qdisc->q.qlen != 0) {
-			if (cl->cl_flags & HFSC_RSC)
-				update_ed(cl, qdisc_peek_len(cl->qdisc));
-			if (cl->cl_flags & HFSC_FSC)
-				update_vf(cl, 0, cur_time);
+			int len = qdisc_peek_len(cl->qdisc);
+
+			if (cl->cl_flags & HFSC_RSC) {
+				if (old_flags & HFSC_RSC)
+					update_ed(cl, len);
+				else
+					init_ed(cl, len);
+			}
+
+			if (cl->cl_flags & HFSC_FSC) {
+				if (old_flags & HFSC_FSC)
+					update_vf(cl, 0, cur_time);
+				else
+					init_vf(cl, len);
+			}
 		}
 		sch_tree_unlock(sch);
 
@@ -1041,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	err = tcf_block_get(&cl->block, &cl->filter_list);
+	err = tcf_block_get(&cl->block, &cl->filter_list, sch);
 	if (err) {
 		kfree(cl);
 		return err;
@@ -1067,7 +1059,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		hfsc_change_usc(cl, usc, 0);
 
 	cl->cl_common.classid = classid;
-	cl->refcnt    = 1;
 	cl->sched     = q;
 	cl->cl_parent = parent;
 	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -1123,13 +1114,9 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 	hfsc_purge_queue(sch, cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
+
+	hfsc_destroy_class(sch, cl);
 	return 0;
 }
 
@@ -1157,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		case TC_ACT_STOLEN:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -1221,30 +1209,18 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 
-	if (cl->qdisc->q.qlen == 0) {
-		update_vf(cl, 0, 0);
-		set_passive(cl);
-	}
+	/* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from vttree.
+	 */
+	update_vf(cl, 0, 0);
+	if (cl->cl_flags & HFSC_RSC)
+		eltree_remove(cl);
 }
 
 static unsigned long
-hfsc_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct hfsc_class *cl = hfsc_find_class(classid, sch);
-
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
-}
-
-static void
-hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+hfsc_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct hfsc_class *cl = (struct hfsc_class *)arg;
-
-	if (--cl->refcnt == 0)
-		hfsc_destroy_class(sch, cl);
+	return (unsigned long)hfsc_find_class(classid, sch);
 }
 
 static unsigned long
@@ -1418,6 +1394,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_hfsc_qopt *qopt;
 	int err;
 
+	qdisc_watchdog_init(&q->watchdog, sch);
+
 	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
 	qopt = nla_data(opt);
@@ -1428,12 +1406,11 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 	q->eligible = RB_ROOT;
 
-	err = tcf_block_get(&q->root.block, &q->root.filter_list);
+	err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
 	if (err)
-		goto err_tcf;
+		return err;
 
 	q->root.cl_common.classid = sch->handle;
-	q->root.refcnt  = 1;
 	q->root.sched   = q;
 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					  sch->handle);
@@ -1448,13 +1425,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
 	qdisc_class_hash_grow(sch, &q->clhash);
 
-	qdisc_watchdog_init(&q->watchdog, sch);
-
 	return 0;
-
-err_tcf:
-	qdisc_class_hash_destroy(&q->clhash);
-	return err;
 }
 
 static int
@@ -1585,7 +1556,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 	}
 
 	if (cl->qdisc->q.qlen == 1) {
-		set_active(cl, qdisc_pkt_len(skb));
+		unsigned int len = qdisc_pkt_len(skb);
+
+		if (cl->cl_flags & HFSC_RSC)
+			init_ed(cl, len);
+		if (cl->cl_flags & HFSC_FSC)
+			init_vf(cl, len);
 		/*
 		 * If this is the first packet, isolate the head so an eventual
 		 * head drop before the first dequeue operation has no chance
@@ -1649,18 +1625,18 @@ hfsc_dequeue(struct Qdisc *sch)
 	if (realtime)
 		cl->cl_cumul += qdisc_pkt_len(skb);
 
-	if (cl->qdisc->q.qlen != 0) {
-		if (cl->cl_flags & HFSC_RSC) {
+	if (cl->cl_flags & HFSC_RSC) {
+		if (cl->qdisc->q.qlen != 0) {
 			/* update ed */
 			next_len = qdisc_peek_len(cl->qdisc);
 			if (realtime)
 				update_ed(cl, next_len);
 			else
 				update_d(cl, next_len);
+		} else {
+			/* the class becomes passive */
+			eltree_remove(cl);
 		}
-	} else {
-		/* the class becomes passive */
-		set_passive(cl);
 	}
 
 	qdisc_bstats_update(sch, skb);
@@ -1676,8 +1652,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
 	.graft		= hfsc_graft_class,
 	.leaf		= hfsc_class_leaf,
 	.qlen_notify	= hfsc_qlen_notify,
-	.get		= hfsc_get_class,
-	.put		= hfsc_put_class,
+	.find		= hfsc_search_class,
 	.bind_tcf	= hfsc_bind_tcf,
 	.unbind_tcf	= hfsc_unbind_tcf,
 	.tcf_block	= hfsc_tcf_block,
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 51d3ba682af9..73a53c08091b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -477,6 +477,9 @@ static void hhf_destroy(struct Qdisc *sch)
 		kvfree(q->hhf_valid_bits[i]);
 	}
 
+	if (!q->hh_flows)
+		return;
+
 	for (i = 0; i < HH_FLOWS_CNT; i++) {
 		struct hh_flow_state *flow, *next;
 		struct list_head *head = &q->hh_flows[i];
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5d65ec5207e9..fa0380730ff0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -107,7 +107,6 @@ struct htb_class {
 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
 	struct tcf_block	*block;
 	int			filter_cnt;
-	int			refcnt;		/* usage count of this class */
 
 	int			level;		/* our level (see above) */
 	unsigned int		children;
@@ -143,6 +142,7 @@ struct htb_class {
 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
 
 	unsigned int drops ____cacheline_aligned_in_smp;
+	unsigned int		overlimits;
 };
 
 struct htb_level {
@@ -193,6 +193,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 	return container_of(clc, struct htb_class, common);
 }
 
+static unsigned long htb_search(struct Qdisc *sch, u32 handle)
+{
+	return (unsigned long)htb_find(handle, sch);
+}
 /**
  * htb_classify - classify a packet into class
  *
@@ -240,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 		case TC_ACT_STOLEN:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -530,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 	if (new_mode == cl->cmode)
 		return;
 
+	if (new_mode == HTB_CANT_SEND)
+		cl->overlimits++;
+
 	if (cl->prio_activity) {	/* not necessary: speed optimization */
 		if (cl->cmode != HTB_CANT_SEND)
 			htb_deactivate_prios(q, cl);
@@ -1017,10 +1025,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	int err;
 	int i;
 
+	qdisc_watchdog_init(&q->watchdog, sch);
+	INIT_WORK(&q->work, htb_work_func);
+
 	if (!opt)
 		return -EINVAL;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
@@ -1041,8 +1052,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
-	qdisc_watchdog_init(&q->watchdog, sch);
-	INIT_WORK(&q->work, htb_work_func);
 	qdisc_skb_head_init(&q->direct_queue);
 
 	if (tb[TCA_HTB_DIRECT_QLEN])
@@ -1139,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	struct htb_class *cl = (struct htb_class *)arg;
 	struct gnet_stats_queue qs = {
 		.drops = cl->drops,
+		.overlimits = cl->overlimits,
 	};
 	__u32 qlen = 0;
 
@@ -1186,16 +1196,7 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
 
-	if (cl->un.leaf.q->q.qlen == 0)
-		htb_deactivate(qdisc_priv(sch), cl);
-}
-
-static unsigned long htb_get(struct Qdisc *sch, u32 classid)
-{
-	struct htb_class *cl = htb_find(classid, sch);
-	if (cl)
-		cl->refcnt++;
-	return (unsigned long)cl;
+	htb_deactivate(qdisc_priv(sch), cl);
 }
 
 static inline int htb_parent_last_child(struct htb_class *cl)
@@ -1317,22 +1318,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	if (last_child)
 		htb_parent_to_leaf(q, cl, new_q);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
 
-static void htb_put(struct Qdisc *sch, unsigned long arg)
-{
-	struct htb_class *cl = (struct htb_class *)arg;
-
-	if (--cl->refcnt == 0)
-		htb_destroy_class(sch, cl);
+	htb_destroy_class(sch, cl);
+	return 0;
 }
 
 static int htb_change_class(struct Qdisc *sch, u32 classid,
@@ -1405,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (!cl)
 			goto failure;
 
-		err = tcf_block_get(&cl->block, &cl->filter_list);
+		err = tcf_block_get(&cl->block, &cl->filter_list, sch);
 		if (err) {
 			kfree(cl);
 			goto failure;
@@ -1423,7 +1412,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			}
 		}
 
-		cl->refcnt = 1;
 		cl->children = 0;
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
@@ -1599,8 +1587,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
 	.graft		=	htb_graft,
 	.leaf		=	htb_leaf,
 	.qlen_notify	=	htb_qlen_notify,
-	.get		=	htb_get,
-	.put		=	htb_put,
+	.find		=	htb_search,
 	.change		=	htb_change_class,
 	.delete		=	htb_delete,
 	.walk		=	htb_walk,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d8a9bebcab90..5ecc38f35d47 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,6 +20,8 @@
 
 struct ingress_sched_data {
 	struct tcf_block *block;
+	struct tcf_block_ext_info block_info;
+	struct mini_Qdisc_pair miniqp;
 };
 
 static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -27,23 +29,18 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
+static unsigned long ingress_find(struct Qdisc *sch, u32 classid)
 {
 	return TC_H_MIN(classid) + 1;
 }
 
-static bool ingress_cl_offload(u32 classid)
-{
-	return true;
-}
-
 static unsigned long ingress_bind_filter(struct Qdisc *sch,
 					 unsigned long parent, u32 classid)
 {
-	return ingress_get(sch, classid);
+	return ingress_find(sch, classid);
 }
 
-static void ingress_put(struct Qdisc *sch, unsigned long cl)
+static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl)
 {
 }
 
@@ -58,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
 	return q->block;
 }
 
+static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
+{
+	struct mini_Qdisc_pair *miniqp = priv;
+
+	mini_qdisc_pair_swap(miniqp, tp_head);
+}
+
 static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct ingress_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
 	int err;
 
-	err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+	mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+
+	q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	q->block_info.chain_head_change = clsact_chain_head_change;
+	q->block_info.chain_head_change_priv = &q->miniqp;
+
+	err = tcf_block_get_ext(&q->block, sch, &q->block_info);
 	if (err)
 		return err;
 
@@ -78,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch)
 {
 	struct ingress_sched_data *q = qdisc_priv(sch);
 
-	tcf_block_put(q->block);
+	tcf_block_put_ext(q->block, sch, &q->block_info);
 	net_dec_ingress_queue();
 }
 
@@ -99,13 +109,11 @@ nla_put_failure:
 
 static const struct Qdisc_class_ops ingress_class_ops = {
 	.leaf		=	ingress_leaf,
-	.get		=	ingress_get,
-	.put		=	ingress_put,
+	.find		=	ingress_find,
 	.walk		=	ingress_walk,
 	.tcf_block	=	ingress_tcf_block,
-	.tcf_cl_offload	=	ingress_cl_offload,
 	.bind_tcf	=	ingress_bind_filter,
-	.unbind_tcf	=	ingress_put,
+	.unbind_tcf	=	ingress_unbind_filter,
 };
 
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
@@ -121,9 +129,13 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 struct clsact_sched_data {
 	struct tcf_block *ingress_block;
 	struct tcf_block *egress_block;
+	struct tcf_block_ext_info ingress_block_info;
+	struct tcf_block_ext_info egress_block_info;
+	struct mini_Qdisc_pair miniqp_ingress;
+	struct mini_Qdisc_pair miniqp_egress;
 };
 
-static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
+static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
 {
 	switch (TC_H_MIN(classid)) {
 	case TC_H_MIN(TC_H_MIN_INGRESS):
@@ -134,15 +146,10 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
 	}
 }
 
-static bool clsact_cl_offload(u32 classid)
-{
-	return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS);
-}
-
 static unsigned long clsact_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return clsact_get(sch, classid);
+	return clsact_find(sch, classid);
 }
 
 static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -165,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 	struct net_device *dev = qdisc_dev(sch);
 	int err;
 
-	err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+	mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+
+	q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	q->ingress_block_info.chain_head_change = clsact_chain_head_change;
+	q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
+
+	err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
 	if (err)
 		return err;
 
-	err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+	mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+
+	q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
+	q->egress_block_info.chain_head_change = clsact_chain_head_change;
+	q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
+
+	err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
 	if (err)
-		return err;
+		goto err_egress_block_get;
 
 	net_inc_ingress_queue();
 	net_inc_egress_queue();
@@ -179,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 	sch->flags |= TCQ_F_CPUSTATS;
 
 	return 0;
+
+err_egress_block_get:
+	tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+	return err;
 }
 
 static void clsact_destroy(struct Qdisc *sch)
 {
 	struct clsact_sched_data *q = qdisc_priv(sch);
 
-	tcf_block_put(q->egress_block);
-	tcf_block_put(q->ingress_block);
+	tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+	tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
 
 	net_dec_ingress_queue();
 	net_dec_egress_queue();
@@ -194,13 +217,11 @@ static void clsact_destroy(struct Qdisc *sch)
 
 static const struct Qdisc_class_ops clsact_class_ops = {
 	.leaf		=	ingress_leaf,
-	.get		=	clsact_get,
-	.put		=	ingress_put,
+	.find		=	clsact_find,
 	.walk		=	ingress_walk,
 	.tcf_block	=	clsact_tcf_block,
-	.tcf_cl_offload	=	clsact_cl_offload,
 	.bind_tcf	=	clsact_bind_filter,
-	.unbind_tcf	=	ingress_put,
+	.unbind_tcf	=	ingress_unbind_filter,
 };
 
 static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index cadfdd4f1e52..213b586a06a0 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
 static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
 					    struct tcmsg *tcm)
 {
-	unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
-	struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
-
-	if (!dev_queue) {
-		struct net_device *dev = qdisc_dev(sch);
-
-		return netdev_get_tx_queue(dev, 0);
-	}
-	return dev_queue;
+	return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
 }
 
 static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
@@ -165,7 +157,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
 	return dev_queue->qdisc_sleeping;
 }
 
-static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+static unsigned long mq_find(struct Qdisc *sch, u32 classid)
 {
 	unsigned int ntx = TC_H_MIN(classid);
 
@@ -174,10 +166,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
 	return ntx;
 }
 
-static void mq_put(struct Qdisc *sch, unsigned long cl)
-{
-}
-
 static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
@@ -223,8 +211,7 @@ static const struct Qdisc_class_ops mq_class_ops = {
 	.select_queue	= mq_select_queue,
 	.graft		= mq_graft,
 	.leaf		= mq_leaf,
-	.get		= mq_get,
-	.put		= mq_put,
+	.find		= mq_find,
 	.walk		= mq_walk,
 	.dump		= mq_dump_class,
 	.dump_stats	= mq_dump_class_stats,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index e0c02725cd48..b85885a9d8a1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,16 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/sch_generic.h>
+#include <net/pkt_cls.h>
 
 struct mqprio_sched {
 	struct Qdisc		**qdiscs;
+	u16 mode;
+	u16 shaper;
 	int hw_offload;
+	u32 flags;
+	u64 min_rate[TC_QOPT_MAX_QUEUE];
+	u64 max_rate[TC_QOPT_MAX_QUEUE];
 };
 
 static void mqprio_destroy(struct Qdisc *sch)
@@ -39,11 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch)
 	}
 
 	if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
-		struct tc_mqprio_qopt offload = { 0 };
-		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-					   { .mqprio = &offload } };
-
-		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc);
+		struct tc_mqprio_qopt_offload mqprio = { { 0 } };
+
+		switch (priv->mode) {
+		case TC_MQPRIO_MODE_DCB:
+		case TC_MQPRIO_MODE_CHANNEL:
+			dev->netdev_ops->ndo_setup_tc(dev,
+						      TC_SETUP_QDISC_MQPRIO,
+						      &mqprio);
+			break;
+		default:
+			return;
+		}
 	} else {
 		netdev_set_num_tc(dev, 0);
 	}
@@ -99,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
 	return 0;
 }
 
+static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
+	[TCA_MQPRIO_MODE]	= { .len = sizeof(u16) },
+	[TCA_MQPRIO_SHAPER]	= { .len = sizeof(u16) },
+	[TCA_MQPRIO_MIN_RATE64]	= { .type = NLA_NESTED },
+	[TCA_MQPRIO_MAX_RATE64]	= { .type = NLA_NESTED },
+};
+
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+		      const struct nla_policy *policy, int len)
+{
+	int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+	if (nested_len >= nla_attr_size(0))
+		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+				 nested_len, policy, NULL);
+
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+	return 0;
+}
+
 static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct net_device *dev = qdisc_dev(sch);
@@ -107,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	struct Qdisc *qdisc;
 	int i, err = -EOPNOTSUPP;
 	struct tc_mqprio_qopt *qopt = NULL;
+	struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+	struct nlattr *attr;
+	int rem;
+	int len;
 
 	BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
 	BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -117,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!netif_is_multiqueue(dev))
 		return -EOPNOTSUPP;
 
+	/* make certain can allocate enough classids to handle queues */
+	if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
+		return -ENOMEM;
+
 	if (!opt || nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
 
@@ -124,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	if (mqprio_parse_opt(dev, qopt))
 		return -EINVAL;
 
+	len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
+	if (len > 0) {
+		err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+				 sizeof(*qopt));
+		if (err < 0)
+			return err;
+
+		if (!qopt->hw)
+			return -EINVAL;
+
+		if (tb[TCA_MQPRIO_MODE]) {
+			priv->flags |= TC_MQPRIO_F_MODE;
+			priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+		}
+
+		if (tb[TCA_MQPRIO_SHAPER]) {
+			priv->flags |= TC_MQPRIO_F_SHAPER;
+			priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+		}
+
+		if (tb[TCA_MQPRIO_MIN_RATE64]) {
+			if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+				return -EINVAL;
+			i = 0;
+			nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+					    rem) {
+				if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+					return -EINVAL;
+				if (i >= qopt->num_tc)
+					break;
+				priv->min_rate[i] = *(u64 *)nla_data(attr);
+				i++;
+			}
+			priv->flags |= TC_MQPRIO_F_MIN_RATE;
+		}
+
+		if (tb[TCA_MQPRIO_MAX_RATE64]) {
+			if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+				return -EINVAL;
+			i = 0;
+			nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+					    rem) {
+				if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+					return -EINVAL;
+				if (i >= qopt->num_tc)
+					break;
+				priv->max_rate[i] = *(u64 *)nla_data(attr);
+				i++;
+			}
+			priv->flags |= TC_MQPRIO_F_MAX_RATE;
+		}
+	}
+
 	/* pre-allocate qdisc, attachment can't fail */
 	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
 			       GFP_KERNEL);
@@ -148,16 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	 * supplied and verified mapping
 	 */
 	if (qopt->hw) {
-		struct tc_mqprio_qopt offload = *qopt;
-		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-					   { .mqprio = &offload } };
+		struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
 
-		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
-						    0, 0, &tc);
+		switch (priv->mode) {
+		case TC_MQPRIO_MODE_DCB:
+			if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+				return -EINVAL;
+			break;
+		case TC_MQPRIO_MODE_CHANNEL:
+			mqprio.flags = priv->flags;
+			if (priv->flags & TC_MQPRIO_F_MODE)
+				mqprio.mode = priv->mode;
+			if (priv->flags & TC_MQPRIO_F_SHAPER)
+				mqprio.shaper = priv->shaper;
+			if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+				for (i = 0; i < mqprio.qopt.num_tc; i++)
+					mqprio.min_rate[i] = priv->min_rate[i];
+			if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+				for (i = 0; i < mqprio.qopt.num_tc; i++)
+					mqprio.max_rate[i] = priv->max_rate[i];
+			break;
+		default:
+			return -EINVAL;
+		}
+		err = dev->netdev_ops->ndo_setup_tc(dev,
+						    TC_SETUP_QDISC_MQPRIO,
+						    &mqprio);
 		if (err)
 			return err;
 
-		priv->hw_offload = offload.hw;
+		priv->hw_offload = mqprio.qopt.hw;
 	} else {
 		netdev_set_num_tc(dev, qopt->num_tc);
 		for (i = 0; i < qopt->num_tc; i++)
@@ -197,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
 					     unsigned long cl)
 {
 	struct net_device *dev = qdisc_dev(sch);
-	unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+	unsigned long ntx = cl - 1;
 
 	if (ntx >= dev->num_tx_queues)
 		return NULL;
@@ -227,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 	return 0;
 }
 
+static int dump_rates(struct mqprio_sched *priv,
+		      struct tc_mqprio_qopt *opt, struct sk_buff *skb)
+{
+	struct nlattr *nest;
+	int i;
+
+	if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
+		nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
+		if (!nest)
+			goto nla_put_failure;
+
+		for (i = 0; i < opt->num_tc; i++) {
+			if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
+				    sizeof(priv->min_rate[i]),
+				    &priv->min_rate[i]))
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
+
+	if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
+		nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
+		if (!nest)
+			goto nla_put_failure;
+
+		for (i = 0; i < opt->num_tc; i++) {
+			if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
+				    sizeof(priv->max_rate[i]),
+				    &priv->max_rate[i]))
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
 static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	struct mqprio_sched *priv = qdisc_priv(sch);
-	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
 	struct tc_mqprio_qopt opt = { 0 };
 	struct Qdisc *qdisc;
 	unsigned int i;
@@ -262,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 		opt.offset[i] = dev->tc_to_txq[i].offset;
 	}
 
-	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+	if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
+		goto nla_put_failure;
+
+	if ((priv->flags & TC_MQPRIO_F_MODE) &&
+	    nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
 		goto nla_put_failure;
 
-	return skb->len;
+	if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
+	    nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
+		goto nla_put_failure;
+
+	if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
+	     priv->flags & TC_MQPRIO_F_MAX_RATE) &&
+	    (dump_rates(priv, &opt, skb) != 0))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, nla);
 nla_put_failure:
-	nlmsg_trim(skb, b);
+	nlmsg_trim(skb, nla);
 	return -1;
 }
 
@@ -281,47 +448,40 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
 	return dev_queue->qdisc_sleeping;
 }
 
-static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	unsigned int ntx = TC_H_MIN(classid);
 
-	if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
-		return 0;
-	return ntx;
-}
+	/* There are essentially two regions here that have valid classid
+	 * values. The first region will have a classid value of 1 through
+	 * num_tx_queues. All of these are backed by actual Qdiscs.
+	 */
+	if (ntx < TC_H_MIN_PRIORITY)
+		return (ntx <= dev->num_tx_queues) ? ntx : 0;
 
-static void mqprio_put(struct Qdisc *sch, unsigned long cl)
-{
+	/* The second region represents the hardware traffic classes. These
+	 * are represented by classid values of TC_H_MIN_PRIORITY through
+	 * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
+	 */
+	return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
 }
 
 static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
-	struct net_device *dev = qdisc_dev(sch);
+	if (cl < TC_H_MIN_PRIORITY) {
+		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+		struct net_device *dev = qdisc_dev(sch);
+		int tc = netdev_txq_to_tc(dev, cl - 1);
 
-	if (cl <= netdev_get_num_tc(dev)) {
+		tcm->tcm_parent = (tc < 0) ? 0 :
+			TC_H_MAKE(TC_H_MAJ(sch->handle),
+				  TC_H_MIN(tc + TC_H_MIN_PRIORITY));
+		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	} else {
 		tcm->tcm_parent = TC_H_ROOT;
 		tcm->tcm_info = 0;
-	} else {
-		int i;
-		struct netdev_queue *dev_queue;
-
-		dev_queue = mqprio_queue_get(sch, cl);
-		tcm->tcm_parent = 0;
-		for (i = 0; i < netdev_get_num_tc(dev); i++) {
-			struct netdev_tc_txq tc = dev->tc_to_txq[i];
-			int q_idx = cl - netdev_get_num_tc(dev);
-
-			if (q_idx > tc.offset &&
-			    q_idx <= tc.offset + tc.count) {
-				tcm->tcm_parent =
-					TC_H_MAKE(TC_H_MAJ(sch->handle),
-						  TC_H_MIN(i + 1));
-				break;
-			}
-		}
-		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
 	}
 	tcm->tcm_handle |= TC_H_MIN(cl);
 	return 0;
@@ -332,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	__releases(d->lock)
 	__acquires(d->lock)
 {
-	struct net_device *dev = qdisc_dev(sch);
-
-	if (cl <= netdev_get_num_tc(dev)) {
+	if (cl >= TC_H_MIN_PRIORITY) {
 		int i;
 		__u32 qlen = 0;
 		struct Qdisc *qdisc;
 		struct gnet_stats_queue qstats = {0};
 		struct gnet_stats_basic_packed bstats = {0};
-		struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+		struct net_device *dev = qdisc_dev(sch);
+		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
 
 		/* Drop lock here it will be reclaimed before touching
 		 * statistics this is required because the d->lock we
@@ -393,25 +552,44 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 	/* Walk hierarchy with a virtual class per tc */
 	arg->count = arg->skip;
-	for (ntx = arg->skip;
-	     ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
-	     ntx++) {
+	for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
+		if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
+			arg->stop = 1;
+			return;
+		}
+		arg->count++;
+	}
+
+	/* Pad the values and skip over unused traffic classes */
+	if (ntx < TC_MAX_QUEUE) {
+		arg->count = TC_MAX_QUEUE;
+		ntx = TC_MAX_QUEUE;
+	}
+
+	/* Reset offset, sort out remaining per-queue qdiscs */
+	for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
 		if (arg->fn(sch, ntx + 1, arg) < 0) {
 			arg->stop = 1;
-			break;
+			return;
 		}
 		arg->count++;
 	}
 }
 
+static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
+						struct tcmsg *tcm)
+{
+	return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
 static const struct Qdisc_class_ops mqprio_class_ops = {
 	.graft		= mqprio_graft,
 	.leaf		= mqprio_leaf,
-	.get		= mqprio_get,
-	.put		= mqprio_put,
+	.find		= mqprio_find,
 	.walk		= mqprio_walk,
 	.dump		= mqprio_dump_class,
 	.dump_stats	= mqprio_dump_class_stats,
+	.select_queue	= mqprio_select_queue,
 };
 
 static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index f143b7bbaa0d..012216386c0b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	case TC_ACT_QUEUED:
 	case TC_ACT_TRAP:
 		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		/* fall through */
 	case TC_ACT_SHOT:
 		return NULL;
 	}
@@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
@@ -257,12 +258,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
 	for (i = 0; i < q->max_bands; i++)
 		q->queues[i] = &noop_qdisc;
 
-	err = multiq_tune(sch, opt);
-
-	if (err)
-		kfree(q->queues);
-
-	return err;
+	return multiq_tune(sch, opt);
 }
 
 static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -306,7 +302,7 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->queues[band];
 }
 
-static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+static unsigned long multiq_find(struct Qdisc *sch, u32 classid)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
 	unsigned long band = TC_H_MIN(classid);
@@ -319,11 +315,11 @@ static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
 static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
 				 u32 classid)
 {
-	return multiq_get(sch, classid);
+	return multiq_find(sch, classid);
 }
 
 
-static void multiq_put(struct Qdisc *q, unsigned long cl)
+static void multiq_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -385,12 +381,11 @@ static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
 static const struct Qdisc_class_ops multiq_class_ops = {
 	.graft		=	multiq_graft,
 	.leaf		=	multiq_leaf,
-	.get		=	multiq_get,
-	.put		=	multiq_put,
+	.find		=	multiq_find,
 	.walk		=	multiq_walk,
 	.tcf_block	=	multiq_tcf_block,
 	.bind_tcf	=	multiq_bind,
-	.unbind_tcf	=	multiq_put,
+	.unbind_tcf	=	multiq_unbind,
 	.dump		=	multiq_dump_class,
 	.dump_stats	=	multiq_dump_class_stats,
 };
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1b3dd6190e93..dd70924cbcdf 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,8 +77,8 @@ struct netem_sched_data {
 
 	struct qdisc_watchdog watchdog;
 
-	psched_tdiff_t latency;
-	psched_tdiff_t jitter;
+	s64 latency;
+	s64 jitter;
 
 	u32 loss;
 	u32 ecn;
@@ -135,6 +135,13 @@ struct netem_sched_data {
 		u32 a5; /* p23 used only in 4-states */
 	} clg;
 
+	struct tc_netem_slot slot_config;
+	struct slotstate {
+		u64 slot_next;
+		s32 packets_left;
+		s32 bytes_left;
+	} slot;
+
 };
 
 /* Time stamp put into socket buffer control block
@@ -145,16 +152,9 @@ struct netem_sched_data {
  * we save skb->tstamp value in skb->cb[] before destroying it.
  */
 struct netem_skb_cb {
-	psched_time_t	time_to_send;
-	ktime_t		tstamp_save;
+	u64	        time_to_send;
 };
 
-
-static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
-{
-	return rb_entry(rb, struct sk_buff, rbnode);
-}
-
 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 {
 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
@@ -312,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q)
  * std deviation sigma.  Uses table lookup to approximate the desired
  * distribution, and a uniformly-distributed pseudo-random source.
  */
-static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
-				struct crndstate *state,
-				const struct disttable *dist)
+static s64 tabledist(s64 mu, s32 sigma,
+		     struct crndstate *state,
+		     const struct disttable *dist)
 {
-	psched_tdiff_t x;
+	s64 x;
 	long t;
 	u32 rnd;
 
@@ -327,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 
 	/* default uniform distribution */
 	if (dist == NULL)
-		return (rnd % (2*sigma)) - sigma + mu;
+		return (rnd % (2 * sigma)) - sigma + mu;
 
 	t = dist->table[rnd % dist->size];
 	x = (sigma % NETEM_DIST_SCALE) * t;
@@ -339,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
 
-static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
 {
-	u64 ticks;
-
 	len += q->packet_overhead;
 
 	if (q->cell_size) {
@@ -353,21 +351,19 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
 		len = cells * (q->cell_size + q->cell_overhead);
 	}
 
-	ticks = (u64)len * NSEC_PER_SEC;
-
-	do_div(ticks, q->rate);
-	return PSCHED_NS2TICKS(ticks);
+	return div64_u64(len * NSEC_PER_SEC, q->rate);
 }
 
 static void tfifo_reset(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	struct rb_node *p;
+	struct rb_node *p = rb_first(&q->t_root);
 
-	while ((p = rb_first(&q->t_root))) {
-		struct sk_buff *skb = netem_rb_to_skb(p);
+	while (p) {
+		struct sk_buff *skb = rb_to_skb(p);
 
-		rb_erase(p, &q->t_root);
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, &q->t_root);
 		rtnl_kfree_skbs(skb, skb);
 	}
 }
@@ -375,14 +371,14 @@ static void tfifo_reset(struct Qdisc *sch)
 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+	u64 tnext = netem_skb_cb(nskb)->time_to_send;
 	struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
 
 	while (*p) {
 		struct sk_buff *skb;
 
 		parent = *p;
-		skb = netem_rb_to_skb(parent);
+		skb = rb_to_skb(parent);
 		if (tnext >= netem_skb_cb(skb)->time_to_send)
 			p = &parent->rb_right;
 		else
@@ -521,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (q->gap == 0 ||		/* not doing reordering */
 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
 	    q->reorder < get_crandom(&q->reorder_cor)) {
-		psched_time_t now;
-		psched_tdiff_t delay;
+		u64 now;
+		s64 delay;
 
 		delay = tabledist(q->latency, q->jitter,
 				  &q->delay_cor, q->delay_dist);
 
-		now = psched_get_time();
+		now = ktime_get_ns();
 
 		if (q->rate) {
 			struct netem_skb_cb *last = NULL;
@@ -538,7 +534,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 				struct sk_buff *t_skb;
 				struct netem_skb_cb *t_last;
 
-				t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+				t_skb = skb_rb_last(&q->t_root);
 				t_last = netem_skb_cb(t_skb);
 				if (!last ||
 				    t_last->time_to_send > last->time_to_send) {
@@ -553,15 +549,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 				 * from delay.
 				 */
 				delay -= last->time_to_send - now;
-				delay = max_t(psched_tdiff_t, 0, delay);
+				delay = max_t(s64, 0, delay);
 				now = last->time_to_send;
 			}
 
-			delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
+			delay += packet_time_ns(qdisc_pkt_len(skb), q);
 		}
 
 		cb->time_to_send = now + delay;
-		cb->tstamp_save = skb->tstamp;
 		++q->counter;
 		tfifo_enqueue(skb, sch);
 	} else {
@@ -569,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		 * Do re-ordering by putting one out of N packets at the front
 		 * of the queue.
 		 */
-		cb->time_to_send = psched_get_time();
+		cb->time_to_send = ktime_get_ns();
 		q->counter = 0;
 
 		netem_enqueue_skb_head(&sch->q, skb);
@@ -600,6 +595,20 @@ finish_segs:
 	return NET_XMIT_SUCCESS;
 }
 
+/* Delay the next round with a new future slot with a
+ * correct number of bytes and packets.
+ */
+
+static void get_slot_next(struct netem_sched_data *q, u64 now)
+{
+	q->slot.slot_next = now + q->slot_config.min_delay +
+		(prandom_u32() *
+			(q->slot_config.max_delay -
+				q->slot_config.min_delay) >> 32);
+	q->slot.packets_left = q->slot_config.max_packets;
+	q->slot.bytes_left = q->slot_config.max_bytes;
+}
+
 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -616,20 +625,26 @@ deliver:
 	}
 	p = rb_first(&q->t_root);
 	if (p) {
-		psched_time_t time_to_send;
+		u64 time_to_send;
+		u64 now = ktime_get_ns();
 
-		skb = netem_rb_to_skb(p);
+		skb = rb_to_skb(p);
 
 		/* if more time remaining? */
 		time_to_send = netem_skb_cb(skb)->time_to_send;
-		if (time_to_send <= psched_get_time()) {
-			rb_erase(p, &q->t_root);
+		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
+			get_slot_next(q, now);
 
+		if (time_to_send <= now &&  q->slot.slot_next <= now) {
+			rb_erase(p, &q->t_root);
 			sch->q.qlen--;
 			qdisc_qstats_backlog_dec(sch, skb);
 			skb->next = NULL;
 			skb->prev = NULL;
-			skb->tstamp = netem_skb_cb(skb)->tstamp_save;
+			/* skb->dev shares skb->rbnode area,
+			 * we need to restore its value.
+			 */
+			skb->dev = qdisc_dev(sch);
 
 #ifdef CONFIG_NET_CLS_ACT
 			/*
@@ -640,6 +655,14 @@ deliver:
 				skb->tstamp = 0;
 #endif
 
+			if (q->slot.slot_next) {
+				q->slot.packets_left--;
+				q->slot.bytes_left -= qdisc_pkt_len(skb);
+				if (q->slot.packets_left <= 0 ||
+				    q->slot.bytes_left <= 0)
+					get_slot_next(q, now);
+			}
+
 			if (q->qdisc) {
 				unsigned int pkt_len = qdisc_pkt_len(skb);
 				struct sk_buff *to_free = NULL;
@@ -663,7 +686,10 @@ deliver:
 			if (skb)
 				goto deliver;
 		}
-		qdisc_watchdog_schedule(&q->watchdog, time_to_send);
+
+		qdisc_watchdog_schedule_ns(&q->watchdog,
+					   max(time_to_send,
+					       q->slot.slot_next));
 	}
 
 	if (q->qdisc) {
@@ -694,6 +720,7 @@ static void dist_free(struct disttable *d)
  * Distribution data is a variable size payload containing
  * signed 16 bit values.
  */
+
 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -724,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	return 0;
 }
 
+static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
+{
+	const struct tc_netem_slot *c = nla_data(attr);
+
+	q->slot_config = *c;
+	if (q->slot_config.max_packets == 0)
+		q->slot_config.max_packets = INT_MAX;
+	if (q->slot_config.max_bytes == 0)
+		q->slot_config.max_bytes = INT_MAX;
+	q->slot.packets_left = q->slot_config.max_packets;
+	q->slot.bytes_left = q->slot_config.max_bytes;
+	if (q->slot_config.min_delay | q->slot_config.max_delay)
+		q->slot.slot_next = ktime_get_ns();
+	else
+		q->slot.slot_next = 0;
+}
+
 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
 {
 	const struct tc_netem_corr *c = nla_data(attr);
@@ -825,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
+	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
+	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
+	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
 };
 
 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -892,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch->limit = qopt->limit;
 
-	q->latency = qopt->latency;
-	q->jitter = qopt->jitter;
+	q->latency = PSCHED_TICKS2NS(qopt->latency);
+	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
 	q->limit = qopt->limit;
 	q->gap = qopt->gap;
 	q->counter = 0;
@@ -922,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 		q->rate = max_t(u64, q->rate,
 				nla_get_u64(tb[TCA_NETEM_RATE64]));
 
+	if (tb[TCA_NETEM_LATENCY64])
+		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
+
+	if (tb[TCA_NETEM_JITTER64])
+		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
+
 	if (tb[TCA_NETEM_ECN])
 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
 
+	if (tb[TCA_NETEM_SLOT])
+		get_slot(q, tb[TCA_NETEM_SLOT]);
+
 	return ret;
 }
 
@@ -933,11 +989,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	int ret;
 
+	qdisc_watchdog_init(&q->watchdog, sch);
+
 	if (!opt)
 		return -EINVAL;
 
-	qdisc_watchdog_init(&q->watchdog, sch);
-
 	q->loss_model = CLG_RANDOM;
 	ret = netem_change(sch, opt);
 	if (ret)
@@ -1014,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_reorder reorder;
 	struct tc_netem_corrupt corrupt;
 	struct tc_netem_rate rate;
+	struct tc_netem_slot slot;
 
-	qopt.latency = q->latency;
-	qopt.jitter = q->jitter;
+	qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+			     UINT_MAX);
+	qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+			    UINT_MAX);
 	qopt.limit = q->limit;
 	qopt.loss = q->loss;
 	qopt.gap = q->gap;
@@ -1024,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
 		goto nla_put_failure;
 
+	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
+		goto nla_put_failure;
+
+	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
+		goto nla_put_failure;
+
 	cor.delay_corr = q->delay_cor.rho;
 	cor.loss_corr = q->loss_cor.rho;
 	cor.dup_corr = q->dup_cor.rho;
@@ -1060,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;
 
+	if (q->slot_config.min_delay | q->slot_config.max_delay) {
+		slot = q->slot_config;
+		if (slot.max_packets == INT_MAX)
+			slot.max_packets = 0;
+		if (slot.max_bytes == INT_MAX)
+			slot.max_bytes = 0;
+		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
+			goto nla_put_failure;
+	}
+
 	return nla_nest_end(skb, nla);
 
 nla_put_failure:
@@ -1096,15 +1171,11 @@ static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+static unsigned long netem_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -1120,8 +1191,7 @@ static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops netem_class_ops = {
 	.graft		=	netem_graft,
 	.leaf		=	netem_leaf,
-	.get		=	netem_get,
-	.put		=	netem_put,
+	.find		=	netem_find,
 	.walk		=	netem_walk,
 	.dump		=	netem_dump_class,
 };
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 6c2791d6102d..776c694c77c7 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -74,6 +74,7 @@ struct pie_sched_data {
 	struct pie_vars vars;
 	struct pie_stats stats;
 	struct timer_list adapt_timer;
+	struct Qdisc *sch;
 };
 
 static void pie_params_init(struct pie_params *params)
@@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch)
 		pie_vars_init(&q->vars);
 }
 
-static void pie_timer(unsigned long arg)
+static void pie_timer(struct timer_list *t)
 {
-	struct Qdisc *sch = (struct Qdisc *)arg;
-	struct pie_sched_data *q = qdisc_priv(sch);
+	struct pie_sched_data *q = from_timer(q, t, adapt_timer);
+	struct Qdisc *sch = q->sch;
 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 
 	spin_lock(root_lock);
@@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
 	pie_vars_init(&q->vars);
 	sch->limit = q->params.limit;
 
-	setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch);
+	q->sch = sch;
+	timer_setup(&q->adapt_timer, pie_timer, 0);
 
 	if (opt) {
 		int err = pie_change(sch, opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index e3e364cc9a70..2c79559a0d31 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		case TC_ACT_QUEUED:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -80,7 +81,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 #endif
@@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		return -EINVAL;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
@@ -260,7 +261,7 @@ prio_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->queues[band];
 }
 
-static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+static unsigned long prio_find(struct Qdisc *sch, u32 classid)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned long band = TC_H_MIN(classid);
@@ -272,11 +273,11 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid)
 
 static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
 {
-	return prio_get(sch, classid);
+	return prio_find(sch, classid);
 }
 
 
-static void prio_put(struct Qdisc *q, unsigned long cl)
+static void prio_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -338,12 +339,11 @@ static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
 static const struct Qdisc_class_ops prio_class_ops = {
 	.graft		=	prio_graft,
 	.leaf		=	prio_leaf,
-	.get		=	prio_get,
-	.put		=	prio_put,
+	.find		=	prio_find,
 	.walk		=	prio_walk,
 	.tcf_block	=	prio_tcf_block,
 	.bind_tcf	=	prio_bind,
-	.unbind_tcf	=	prio_put,
+	.unbind_tcf	=	prio_unbind,
 	.dump		=	prio_dump_class,
 	.dump_stats	=	prio_dump_class_stats,
 };
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 0e16dfda0bd7..6962b37a3ad3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -132,7 +132,6 @@ struct qfq_aggregate;
 struct qfq_class {
 	struct Qdisc_class_common common;
 
-	unsigned int refcnt;
 	unsigned int filter_cnt;
 
 	struct gnet_stats_basic_packed bstats;
@@ -477,7 +476,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	cl->refcnt = 1;
 	cl->common.classid = classid;
 	cl->deficit = lmax;
 
@@ -555,32 +553,15 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
 	qfq_purge_queue(cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
-
-static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct qfq_class *cl = qfq_find_class(sch, classid);
 
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
+	qfq_destroy_class(sch, cl);
+	return 0;
 }
 
-static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct qfq_class *cl = (struct qfq_class *)arg;
-
-	if (--cl->refcnt == 0)
-		qfq_destroy_class(sch, cl);
+	return (unsigned long)qfq_find_class(sch, classid);
 }
 
 static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -728,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		case TC_ACT_STOLEN:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return NULL;
 		}
@@ -1234,7 +1216,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (cl == NULL) {
 		if (err & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return err;
 	}
 	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
@@ -1428,8 +1410,7 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl = (struct qfq_class *)arg;
 
-	if (cl->qdisc->q.qlen == 0)
-		qfq_deactivate_class(q, cl);
+	qfq_deactivate_class(q, cl);
 }
 
 static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
@@ -1439,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	int i, j, err;
 	u32 max_cl_shift, maxbudg_shift, max_classes;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
@@ -1511,8 +1492,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 static const struct Qdisc_class_ops qfq_class_ops = {
 	.change		= qfq_change_class,
 	.delete		= qfq_delete_class,
-	.get		= qfq_get_class,
-	.put		= qfq_put_class,
+	.find		= qfq_search_class,
 	.tcf_block	= qfq_tcf_block,
 	.bind_tcf	= qfq_bind_tcf,
 	.unbind_tcf	= qfq_unbind_tcf,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 11292adce412..7f8ea9e297c3 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
 #include <net/inet_ecn.h>
 #include <net/red.h>
 
@@ -40,6 +41,7 @@ struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
 	struct timer_list	adapt_timer;
+	struct Qdisc		*sch;
 	struct red_parms	parms;
 	struct red_vars		vars;
 	struct red_stats	stats;
@@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
 	red_restart(&q->vars);
 }
 
+static int red_offload(struct Qdisc *sch, bool enable)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_red_qopt_offload opt = {
+		.handle = sch->handle,
+		.parent = sch->parent,
+	};
+
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	if (enable) {
+		opt.command = TC_RED_REPLACE;
+		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
+		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
+		opt.set.probability = q->parms.max_P;
+		opt.set.is_ecn = red_use_ecn(q);
+	} else {
+		opt.command = TC_RED_DESTROY;
+	}
+
+	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+}
+
 static void red_destroy(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
 	del_timer_sync(&q->adapt_timer);
+	red_offload(sch, false);
 	qdisc_destroy(q->qdisc);
 }
 
@@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 		red_start_of_idle_period(&q->vars);
 
 	sch_tree_unlock(sch);
+	red_offload(sch, true);
 	return 0;
 }
 
-static inline void red_adaptative_timer(unsigned long arg)
+static inline void red_adaptative_timer(struct timer_list *t)
 {
-	struct Qdisc *sch = (struct Qdisc *)arg;
-	struct red_sched_data *q = qdisc_priv(sch);
+	struct red_sched_data *q = from_timer(q, t, adapt_timer);
+	struct Qdisc *sch = q->sch;
 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 
 	spin_lock(root_lock);
@@ -238,10 +267,40 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
 	struct red_sched_data *q = qdisc_priv(sch);
 
 	q->qdisc = &noop_qdisc;
-	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
+	q->sch = sch;
+	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
 	return red_change(sch, opt);
 }
 
+static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_red_qopt_offload hw_stats = {
+		.command = TC_RED_STATS,
+		.handle = sch->handle,
+		.parent = sch->parent,
+		{
+			.stats.bstats = &sch->bstats,
+			.stats.qstats = &sch->qstats,
+		},
+	};
+	int err;
+
+	opt->flags &= ~TC_RED_OFFLOADED;
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return 0;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+					    &hw_stats);
+	if (err == -EOPNOTSUPP)
+		return 0;
+
+	if (!err)
+		opt->flags |= TC_RED_OFFLOADED;
+
+	return err;
+}
+
 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
@@ -255,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 		.Plog		= q->parms.Plog,
 		.Scell_log	= q->parms.Scell_log,
 	};
+	int err;
 
 	sch->qstats.backlog = q->qdisc->qstats.backlog;
+	err = red_dump_offload(sch, &opt);
+	if (err)
+		goto nla_put_failure;
+
 	opts = nla_nest_start(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
@@ -273,6 +337,7 @@ nla_put_failure:
 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
 	struct tc_red_xstats st = {
 		.early	= q->stats.prob_drop + q->stats.forced_drop,
 		.pdrop	= q->stats.pdrop,
@@ -280,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
 	};
 
+	if (tc_can_offload(dev) &&  dev->netdev_ops->ndo_setup_tc) {
+		struct red_stats hw_stats = {0};
+		struct tc_red_qopt_offload hw_stats_request = {
+			.command = TC_RED_XSTATS,
+			.handle = sch->handle,
+			.parent = sch->parent,
+			{
+				.xstats = &hw_stats,
+			},
+		};
+		if (!dev->netdev_ops->ndo_setup_tc(dev,
+						   TC_SETUP_QDISC_RED,
+						   &hw_stats_request)) {
+			st.early += hw_stats.prob_drop + hw_stats.forced_drop;
+			st.pdrop += hw_stats.pdrop;
+			st.other += hw_stats.other;
+			st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
+		}
+	}
+
 	return gnet_stats_copy_app(d, &st, sizeof(st));
 }
 
@@ -311,15 +396,11 @@ static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long red_get(struct Qdisc *sch, u32 classid)
+static unsigned long red_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void red_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -335,8 +416,7 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops red_class_ops = {
 	.graft		=	red_graft,
 	.leaf		=	red_leaf,
-	.get		=	red_get,
-	.put		=	red_put,
+	.find		=	red_find,
 	.walk		=	red_walk,
 	.dump		=	red_dump_class,
 };
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 11fb6ec878d6..0678debdd856 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
 		case TC_ACT_QUEUED:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return false;
 		}
@@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
 	struct sfb_sched_data *q = qdisc_priv(sch);
 	int err;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
@@ -632,12 +633,12 @@ static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfb_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void sfb_put(struct Qdisc *sch, unsigned long arg)
+static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
 {
 }
 
@@ -683,14 +684,13 @@ static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
 static const struct Qdisc_class_ops sfb_class_ops = {
 	.graft		=	sfb_graft,
 	.leaf		=	sfb_leaf,
-	.get		=	sfb_get,
-	.put		=	sfb_put,
+	.find		=	sfb_find,
 	.change		=	sfb_change_class,
 	.delete		=	sfb_delete,
 	.walk		=	sfb_walk,
 	.tcf_block	=	sfb_tcf_block,
 	.bind_tcf	=	sfb_bind,
-	.unbind_tcf	=	sfb_put,
+	.unbind_tcf	=	sfb_unbind,
 	.dump		=	sfb_dump_class,
 };
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 82469ef9655e..890f4a4564e7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -145,6 +145,7 @@ struct sfq_sched_data {
 	int		perturb_period;
 	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
 	struct timer_list perturb_timer;
+	struct Qdisc	*sch;
 };
 
 /*
@@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 		case TC_ACT_QUEUED:
 		case TC_ACT_TRAP:
 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
 		case TC_ACT_SHOT:
 			return 0;
 		}
@@ -292,7 +294,7 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
 	slot->skblist_prev = skb;
 }
 
-static unsigned int sfq_drop(struct Qdisc *sch)
+static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	sfq_index x, d = q->cur_depth;
@@ -310,9 +312,8 @@ drop:
 		slot->backlog -= len;
 		sfq_dec(q, x);
 		sch->q.qlen--;
-		qdisc_qstats_drop(sch);
 		qdisc_qstats_backlog_dec(sch, skb);
-		kfree_skb(skb);
+		qdisc_drop(skb, sch, to_free);
 		return len;
 	}
 
@@ -360,7 +361,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 	if (hash == 0) {
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 	hash--;
@@ -465,7 +466,7 @@ enqueue:
 		return NET_XMIT_SUCCESS;
 
 	qlen = slot->qlen;
-	dropped = sfq_drop(sch);
+	dropped = sfq_drop(sch, to_free);
 	/* Return Congestion Notification only if we dropped a packet
 	 * from this flow.
 	 */
@@ -605,10 +606,10 @@ drop:
 	qdisc_tree_reduce_backlog(sch, dropped, drop_len);
 }
 
-static void sfq_perturbation(unsigned long arg)
+static void sfq_perturbation(struct timer_list *t)
 {
-	struct Qdisc *sch = (struct Qdisc *)arg;
-	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
+	struct Qdisc *sch = q->sch;
 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 
 	spin_lock(root_lock);
@@ -628,6 +629,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen, dropped = 0;
 	struct red_parms *p = NULL;
+	struct sk_buff *to_free = NULL;
+	struct sk_buff *tail = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
@@ -674,8 +677,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	qlen = sch->q.qlen;
-	while (sch->q.qlen > q->limit)
-		dropped += sfq_drop(sch);
+	while (sch->q.qlen > q->limit) {
+		dropped += sfq_drop(sch, &to_free);
+		if (!tail)
+			tail = to_free;
+	}
+
+	rtnl_kfree_skbs(to_free, tail);
 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
 	del_timer(&q->perturb_timer);
@@ -716,13 +724,12 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	int i;
 	int err;
 
-	err = tcf_block_get(&q->block, &q->filter_list);
+	timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
+
+	err = tcf_block_get(&q->block, &q->filter_list, sch);
 	if (err)
 		return err;
 
-	setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
-			       (unsigned long)sch);
-
 	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
 		q->dep[i].next = i + SFQ_MAX_FLOWS;
 		q->dep[i].prev = i + SFQ_MAX_FLOWS;
@@ -808,7 +815,7 @@ static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
 {
 	return 0;
 }
@@ -821,7 +828,7 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
-static void sfq_put(struct Qdisc *q, unsigned long cl)
+static void sfq_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -885,11 +892,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops sfq_class_ops = {
 	.leaf		=	sfq_leaf,
-	.get		=	sfq_get,
-	.put		=	sfq_put,
+	.find		=	sfq_find,
 	.tcf_block	=	sfq_tcf_block,
 	.bind_tcf	=	sfq_bind,
-	.unbind_tcf	=	sfq_put,
+	.unbind_tcf	=	sfq_unbind,
 	.dump		=	sfq_dump_class,
 	.dump_stats	=	sfq_dump_class_stats,
 	.walk		=	sfq_walk,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b2e4b6ad241a..120f4f365967 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -425,12 +425,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
+	qdisc_watchdog_init(&q->watchdog, sch);
+	q->qdisc = &noop_qdisc;
+
 	if (opt == NULL)
 		return -EINVAL;
 
 	q->t_c = ktime_get_ns();
-	qdisc_watchdog_init(&q->watchdog, sch);
-	q->qdisc = &noop_qdisc;
 
 	return tbf_change(sch, opt);
 }
@@ -510,15 +511,11 @@ static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void tbf_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -534,8 +531,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops tbf_class_ops = {
 	.graft		=	tbf_graft,
 	.leaf		=	tbf_leaf,
-	.get		=	tbf_get,
-	.put		=	tbf_put,
+	.find		=	tbf_find,
 	.walk		=	tbf_walk,
 	.dump		=	tbf_dump_class,
 };
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 70f1b570bab9..1ca84a288443 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for SCTP support code.
 #
@@ -12,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  inqueue.o outqueue.o ulpqueue.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
 	  output.o input.o debug.o stream.o auth.o \
-	  offload.o
+	  offload.o stream_sched.o stream_sched_prio.o \
+	  stream_sched_rr.o
 
 sctp_probe-y := probe.o
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 40ec83679d6e..69394f4d6091 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
 /* 1st Level Abstractions. */
 
 /* Initialize a new association from provided memory. */
-static struct sctp_association *sctp_association_init(struct sctp_association *asoc,
-					  const struct sctp_endpoint *ep,
-					  const struct sock *sk,
-					  sctp_scope_t scope,
-					  gfp_t gfp)
+static struct sctp_association *sctp_association_init(
+					struct sctp_association *asoc,
+					const struct sctp_endpoint *ep,
+					const struct sock *sk,
+					enum sctp_scope scope, gfp_t gfp)
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
@@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* Initializes the timers */
 	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
-		setup_timer(&asoc->timers[i], sctp_timer_events[i],
-				(unsigned long)asoc);
+		timer_setup(&asoc->timers[i], sctp_timer_events[i], 0);
 
 	/* Pull default initialization values from the sock options.
 	 * Note: This assumes that the values have already been
@@ -301,9 +300,8 @@ fail_init:
 
 /* Allocate and initialize a new association */
 struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
-					 const struct sock *sk,
-					 sctp_scope_t scope,
-					 gfp_t gfp)
+					      const struct sock *sk,
+					      enum sctp_scope scope, gfp_t gfp)
 {
 	struct sctp_association *asoc;
 
@@ -797,7 +795,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
  */
 void sctp_assoc_control_transport(struct sctp_association *asoc,
 				  struct sctp_transport *transport,
-				  sctp_transport_cmd_t command,
+				  enum sctp_transport_cmd command,
 				  sctp_sn_error_t error)
 {
 	struct sctp_ulpevent *event;
@@ -1022,11 +1020,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		container_of(work, struct sctp_association,
 			     base.inqueue.immediate);
 	struct net *net = sock_net(asoc->base.sk);
+	union sctp_subtype subtype;
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
 	int state;
-	sctp_subtype_t subtype;
 	int error = 0;
 
 	/* The association should be held so we should be safe. */
@@ -1564,7 +1562,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
  * local endpoint and the remote peer.
  */
 int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
-				     sctp_scope_t scope, gfp_t gfp)
+				     enum sctp_scope scope, gfp_t gfp)
 {
 	int flags;
 
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e001b01b0e68..00667c50efa7 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
  *    are called the two key vectors.
  */
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
-			sctp_random_param_t *random,
-			sctp_chunks_param_t *chunks,
-			sctp_hmac_algo_param_t *hmacs,
+			struct sctp_random_param *random,
+			struct sctp_chunks_param *chunks,
+			struct sctp_hmac_algo_param *hmacs,
 			gfp_t gfp)
 {
 	struct sctp_auth_bytes *new;
@@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
 				    gfp_t gfp)
 {
 	return sctp_auth_make_key_vector(
-				    (sctp_random_param_t *)asoc->c.auth_random,
-				    (sctp_chunks_param_t *)asoc->c.auth_chunks,
-				    (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs,
-				    gfp);
+			(struct sctp_random_param *)asoc->c.auth_random,
+			(struct sctp_chunks_param *)asoc->c.auth_chunks,
+			(struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp);
 }
 
 /* Make a key vector based on peer's parameters */
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 1ebc184a0e23..7df3704982f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -45,9 +45,9 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
-			      union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
-			      int flags);
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 
 /* First Level Abstractions. */
@@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
  */
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags)
 {
 	struct sctp_sockaddr_entry *addr;
@@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 
 /* Copy out addresses from the global local address list. */
 static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
-			      union sctp_addr *addr,
-			      sctp_scope_t scope, gfp_t gfp,
-			      int flags)
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags)
 {
 	int error = 0;
 
@@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
 }
 
 /* Is 'addr' valid for 'scope'?  */
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  enum sctp_scope scope)
 {
-	sctp_scope_t addr_scope = sctp_scope(addr);
+	enum sctp_scope addr_scope = sctp_scope(addr);
 
 	/* The unusable SCTP addresses will not be considered with
 	 * any defined scopes.
@@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk)
  ********************************************************************/
 
 /* What is the scope of 'addr'?  */
-sctp_scope_t sctp_scope(const union sctp_addr *addr)
+enum sctp_scope sctp_scope(const union sctp_addr *addr)
 {
 	struct sctp_af *af;
 
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1323d41e68b8..7b261afc47b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
 
 		if (hmac_desc)
-			max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
 	}
 
@@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
 	    msg_len > max_data)
-		first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
+		first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
@@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 
 		if (chunk->sent_count) {
 			chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
-			streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+			streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
 		} else {
 			chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
-			streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
 		}
 		return 1;
 	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
@@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 			&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
 
 		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
-		streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+		streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		return 1;
 	} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
 		   chunk->msg->expires_at &&
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 2e47eb2f05cb..3f619fdcbf0a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
 };
 
 /* Lookup "chunk type" debug name. */
-const char *sctp_cname(const sctp_subtype_t cid)
+const char *sctp_cname(const union sctp_subtype cid)
 {
 	if (cid.chunk <= SCTP_CID_BASE_MAX)
 		return sctp_cid_tbl[cid.chunk];
@@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
 };
 
 /* Lookup primitive debug name. */
-const char *sctp_pname(const sctp_subtype_t id)
+const char *sctp_pname(const union sctp_subtype id)
 {
 	if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX)
 		return sctp_primitive_tbl[id.primitive];
@@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = {
 };
 
 /* Lookup "other" debug name. */
-const char *sctp_oname(const sctp_subtype_t id)
+const char *sctp_oname(const union sctp_subtype id)
 {
 	if (id.other <= SCTP_EVENT_OTHER_MAX)
 		return sctp_other_tbl[id.other];
@@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = {
 };
 
 /* Lookup timer debug name. */
-const char *sctp_tname(const sctp_subtype_t id)
+const char *sctp_tname(const union sctp_subtype id)
 {
 	BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl));
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 0e86f988f836..ee1e601a0b11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
 		 */
-		auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) +
-				sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+		auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
+				     sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
 		if (!auth_hmacs)
 			goto nomem;
 
-		auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) +
-					SCTP_NUM_CHUNK_TYPES, gfp);
+		auth_chunks = kzalloc(sizeof(*auth_chunks) +
+				      SCTP_NUM_CHUNK_TYPES, gfp);
 		if (!auth_chunks)
 			goto nomem;
 
@@ -382,8 +382,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	struct sctp_transport *transport;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
-	sctp_subtype_t subtype;
-	sctp_state_t state;
+	union sctp_subtype subtype;
+	enum sctp_state state;
 	int error = 0;
 	int first_time = 1;	/* is this the first time through the loop */
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 41eb2ec10460..621b5ca3fd1c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
 {
 	struct dst_entry *dst;
 
-	if (!t)
+	if (sock_owned_by_user(sk) || !t)
 		return;
 	dst = sctp_transport_dst_check(t);
 	if (dst)
@@ -794,7 +794,7 @@ hit:
 struct sctp_hash_cmp_arg {
 	const union sctp_addr	*paddr;
 	const struct net	*net;
-	u16			lport;
+	__be16			lport;
 };
 
 static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -820,37 +820,37 @@ out:
 	return err;
 }
 
-static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
+static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
 {
 	const struct sctp_transport *t = data;
 	const union sctp_addr *paddr = &t->ipaddr;
 	const struct net *net = sock_net(t->asoc->base.sk);
-	u16 lport = htons(t->asoc->base.bind_addr.port);
-	u32 addr;
+	__be16 lport = htons(t->asoc->base.bind_addr.port);
+	__u32 addr;
 
 	if (paddr->sa.sa_family == AF_INET6)
 		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
 	else
-		addr = paddr->v4.sin_addr.s_addr;
+		addr = (__force __u32)paddr->v4.sin_addr.s_addr;
 
-	return  jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+	return  jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
 			     (__force __u32)lport, net_hash_mix(net), seed);
 }
 
-static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
+static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
 {
 	const struct sctp_hash_cmp_arg *x = data;
 	const union sctp_addr *paddr = x->paddr;
 	const struct net *net = x->net;
-	u16 lport = x->lport;
-	u32 addr;
+	__be16 lport = x->lport;
+	__u32 addr;
 
 	if (paddr->sa.sa_family == AF_INET6)
 		addr = jhash(&paddr->v6.sin6_addr, 16, seed);
 	else
-		addr = paddr->v4.sin_addr.s_addr;
+		addr = (__force __u32)paddr->v4.sin_addr.s_addr;
 
-	return  jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+	return  jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
 			     (__force __u32)lport, net_hash_mix(net), seed);
 }
 
@@ -1111,7 +1111,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 					__be16 peer_port,
 					struct sctp_transport **transportp)
 {
-	sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
+	struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch;
 	struct sctp_af *af;
 	union sctp_addr_param *param;
 	union sctp_addr paddr;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2a186b201ad2..3b18085e3b10 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
 	struct in6_addr *final_p, final;
+	enum sctp_scope scope;
 	__u8 matchlen = 0;
-	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->daddr = daddr->v6.sin6_addr;
@@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
 static int sctp_v6_to_addr_param(const union sctp_addr *addr,
 				 union sctp_addr_param *param)
 {
-	int length = sizeof(sctp_ipv6addr_param_t);
+	int length = sizeof(struct sctp_ipv6addr_param);
 
 	param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
 	param->v6.param_hdr.length = htons(length);
@@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
 {
 	addr->sa.sa_family = AF_INET6;
 	addr->v6.sin6_port = port;
+	addr->v6.sin6_flowinfo = 0;
 	addr->v6.sin6_addr = *saddr;
+	addr->v6.sin6_scope_id = 0;
 }
 
 /* Compare addresses exactly.
@@ -624,10 +626,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr,
 }
 
 /* What is the scope of 'addr'?  */
-static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v6_scope(union sctp_addr *addr)
 {
+	enum sctp_scope retval;
 	int v6scope;
-	sctp_scope_t retval;
 
 	/* The IPv6 scope is really a set of bit fields.
 	 * See IFA_* in <net/if_inet6.h>.  Map to a generic SCTP scope.
@@ -736,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
 /* Was this packet marked by Explicit Congestion Notification? */
 static int sctp_v6_is_ce(const struct sk_buff *skb)
 {
-	return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
+	return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20);
 }
 
 /* Dump the v6 addr to the seq file. */
@@ -805,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 		addr->v6.sin6_flowinfo = 0;
 		addr->v6.sin6_port = sh->source;
 		addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
-		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
 			addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
-		}
+		else
+			addr->v6.sin6_scope_id = 0;
 	}
 
 	*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
@@ -880,8 +883,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
 			net = sock_net(&opt->inet.sk);
 			rcu_read_lock();
 			dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
-			if (!dev ||
-			    !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
+			if (!dev || !(opt->inet.freebind ||
+				      net->ipv6.sysctl.ip_nonlocal_bind ||
+				      ipv6_chk_addr(net, &addr->v6.sin6_addr,
+						    dev, 0))) {
 				rcu_read_unlock();
 				return 0;
 			}
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 105ac3327b28..aeea6da81441 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -57,7 +57,7 @@ SCTP_DBG_OBJCNT(keys);
 /* An array to make it easy to pretty print the debug information
  * to the proc fs.
  */
-static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = {
+static struct sctp_dbg_objcnt_entry sctp_dbg_objcnt[] = {
 	SCTP_DBG_OBJCNT_ENTRY(sock),
 	SCTP_DBG_OBJCNT_ENTRY(ep),
 	SCTP_DBG_OBJCNT_ENTRY(assoc),
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d8504985744..4a865cd06d76 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -57,15 +57,15 @@
 #include <net/sctp/checksum.h>
 
 /* Forward declarations for private helpers. */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk);
 static void sctp_packet_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len);
+				    struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len);
 
 static void sctp_packet_reset(struct sctp_packet *packet)
 {
@@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet)
  * as it can fit in the packet, but any more data that does not fit in this
  * packet can be sent only after receiving the COOKIE_ACK.
  */
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
-				       struct sctp_chunk *chunk,
-				       int one_packet, gfp_t gfp)
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp)
 {
-	sctp_xmit_t retval;
+	enum sctp_xmit retval;
 
 	pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__,
 		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 }
 
 /* Try to bundle an auth chunk into the packet. */
-static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
 	struct sctp_association *asoc = pkt->transport->asoc;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	struct sctp_chunk *auth;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	/* if we don't have an association, we can't do authentication */
 	if (!asoc)
@@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
 }
 
 /* Try to bundle a SACK with the packet. */
-static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* If sending DATA and haven't aleady bundled a SACK, try to
 	 * bundle one in to the packet.
@@ -299,11 +299,11 @@ out:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk)
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -353,10 +353,10 @@ finish:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
-				     struct sctp_chunk *chunk)
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
 
@@ -653,8 +653,8 @@ out:
  ********************************************************************/
 
 /* This private function check to see if a chunk can be added */
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk)
 {
 	size_t datasize, rwnd, inflight, flight_size;
 	struct sctp_transport *transport = packet->transport;
@@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
 	sctp_chunk_assign_ssn(chunk);
 }
 
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len)
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len)
 {
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	psize = packet->size;
 	if (packet->transport->asoc)
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e8762702a313..4db012aa25f7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
 
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
 
 /* Declare internal functions here.  */
 static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
 
 /* Add data to the front of the queue. */
 static inline void sctp_outq_head_data(struct sctp_outq *q,
-					struct sctp_chunk *ch)
+				       struct sctp_chunk *ch)
 {
+	struct sctp_stream_out_ext *oute;
+	__u16 stream;
+
 	list_add(&ch->list, &q->out_chunk_list);
 	q->out_qlen += ch->skb->len;
+
+	stream = sctp_chunk_stream_no(ch);
+	oute = q->asoc->stream.out[stream].ext;
+	list_add(&ch->stream_list, &oute->outq);
 }
 
 /* Take data from the front of the queue. */
 static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
 {
-	struct sctp_chunk *ch = NULL;
-
-	if (!list_empty(&q->out_chunk_list)) {
-		struct list_head *entry = q->out_chunk_list.next;
-
-		ch = list_entry(entry, struct sctp_chunk, list);
-		list_del_init(entry);
-		q->out_qlen -= ch->skb->len;
-	}
-	return ch;
+	return q->sched->dequeue(q);
 }
+
 /* Add data chunk to the end of the queue. */
 static inline void sctp_outq_tail_data(struct sctp_outq *q,
 				       struct sctp_chunk *ch)
 {
+	struct sctp_stream_out_ext *oute;
+	__u16 stream;
+
 	list_add_tail(&ch->list, &q->out_chunk_list);
 	q->out_qlen += ch->skb->len;
+
+	stream = sctp_chunk_stream_no(ch);
+	oute = q->asoc->stream.out[stream].ext;
+	list_add_tail(&ch->stream_list, &oute->outq);
 }
 
 /*
@@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->retransmit);
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
+	sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
 }
 
 /* Free the outqueue structure and any related pending chunks.
@@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q)
 
 	/* Throw away any leftover data chunks. */
 	while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
+		sctp_sched_dequeue_done(q, chunk);
 
 		/* Mark as send failure. */
 		sctp_chunk_fail(chunk, q->error);
@@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 		streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
-		streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+		streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 
 		if (!chk->tsn_gap_acked) {
 			if (chk->transport)
@@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 	struct sctp_outq *q = &asoc->outqueue;
 	struct sctp_chunk *chk, *temp;
 
+	q->sched->unsched_all(&asoc->stream);
+
 	list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
 		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
 		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
 			continue;
 
-		list_del_init(&chk->list);
-		q->out_qlen -= chk->skb->len;
+		sctp_sched_dequeue_common(q, chk);
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
 			struct sctp_stream_out *streamout =
 				&asoc->stream.out[chk->sinfo.sinfo_stream];
 
-			streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		}
 
 		msg_len -= SCTP_DATA_SNDSIZE(chk) +
@@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 			break;
 	}
 
+	q->sched->sched_all(&asoc->stream);
+
 	return msg_len;
 }
 
@@ -534,7 +546,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
  * one packet out.
  */
 void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
-		     sctp_retransmit_reason_t reason)
+		     enum sctp_retransmit_reason reason)
 {
 	struct net *net = sock_net(q->asoc->base.sk);
 
@@ -594,14 +606,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			       int rtx_timeout, int *start_timer)
 {
-	struct list_head *lqueue;
 	struct sctp_transport *transport = pkt->transport;
-	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
-	int fast_rtx;
+	struct list_head *lqueue;
+	enum sctp_xmit status;
 	int error = 0;
 	int timer = 0;
 	int done = 0;
+	int fast_rtx;
 
 	lqueue = &q->retransmit;
 	fast_rtx = q->fast_rtx;
@@ -781,7 +793,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 	struct sctp_transport *transport = NULL;
 	struct sctp_transport *new_transport;
 	struct sctp_chunk *chunk, *tmp;
-	sctp_xmit_t status;
+	enum sctp_xmit status;
 	int error = 0;
 	int start_timer = 0;
 	int one_packet = 0;
@@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 		while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
 			__u32 sid = ntohs(chunk->subh.data_hdr->stream);
 
-			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
-			 * stream identifier.
-			 */
-			if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
-
-				/* Mark as failed send. */
-				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
-				if (asoc->peer.prsctp_capable &&
-				    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
-					asoc->sent_cnt_removable--;
-				sctp_chunk_free(chunk);
-				continue;
-			}
-
 			/* Has this chunk expired? */
 			if (sctp_chunk_abandoned(chunk)) {
+				sctp_sched_dequeue_done(q, chunk);
 				sctp_chunk_fail(chunk, 0);
 				sctp_chunk_free(chunk);
 				continue;
@@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 				new_transport = asoc->peer.active_path;
 			if (new_transport->state == SCTP_UNCONFIRMED) {
 				WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
+				sctp_sched_dequeue_done(q, chunk);
 				sctp_chunk_fail(chunk, 0);
 				sctp_chunk_free(chunk);
 				continue;
@@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 				else
 					asoc->stats.oodchunks++;
 
+				/* Only now it's safe to consider this
+				 * chunk as sent, sched-wise.
+				 */
+				sctp_sched_dequeue_done(q, chunk);
+
 				break;
 
 			default:
@@ -1197,7 +1202,7 @@ sctp_flush_out:
 static void sctp_sack_update_unack_data(struct sctp_association *assoc,
 					struct sctp_sackhdr *sack)
 {
-	sctp_sack_variable_t *frags;
+	union sctp_sack_variable *frags;
 	__u16 unack_data;
 	int i;
 
@@ -1224,7 +1229,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 	struct sctp_transport *transport;
 	struct sctp_chunk *tchunk = NULL;
 	struct list_head *lchunk, *transport_list, *temp;
-	sctp_sack_variable_t *frags = sack->variable;
+	union sctp_sack_variable *frags = sack->variable;
 	__u32 sack_ctsn, ctsn, tsn;
 	__u32 highest_tsn, highest_new_tsn;
 	__u32 sack_a_rwnd;
@@ -1736,10 +1741,10 @@ static void sctp_mark_missing(struct sctp_outq *q,
 /* Is the given TSN acked by this packet?  */
 static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
 {
-	int i;
-	sctp_sack_variable_t *frags;
-	__u16 tsn_offset, blocks;
 	__u32 ctsn = ntohl(sack->cum_tsn_ack);
+	union sctp_sack_variable *frags;
+	__u16 tsn_offset, blocks;
+	int i;
 
 	if (TSN_lte(tsn, ctsn))
 		goto pass;
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index f0553a022859..c0817f7a8964 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,8 +53,8 @@
 int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
-	sctp_event_t event_type; sctp_subtype_t subtype; \
-	sctp_state_t state; \
+	enum sctp_event event_type; union sctp_subtype subtype; \
+	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\
 	event_type = SCTP_EVENT_T_PRIMITIVE; \
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 6cc2152e0740..1280f85a598d 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -127,12 +127,13 @@ static const struct file_operations sctpprobe_fops = {
 	.llseek = noop_llseek,
 };
 
-static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
-					    const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
-					    void *arg,
-					    sctp_cmd_seq_t *commands)
+static enum sctp_disposition jsctp_sf_eat_sack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 989a900383b5..f5172c21349b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net)
 
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
-			      sctp_scope_t scope, gfp_t gfp, int copy_flags)
+			      enum sctp_scope scope, gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	union sctp_addr laddr;
@@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
 static int sctp_v4_to_addr_param(const union sctp_addr *addr,
 				 union sctp_addr_param *param)
 {
-	int length = sizeof(sctp_ipv4addr_param_t);
+	int length = sizeof(struct sctp_ipv4addr_param);
 
 	param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
 	param->v4.param_hdr.length = htons(length);
@@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
  * IPv4 scoping can be controlled through sysctl option
  * net.sctp.addr_scope_policy
  */
-static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
 {
-	sctp_scope_t retval;
+	enum sctp_scope retval;
 
 	/* Check for unusable SCTP addresses. */
 	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
@@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk)
 	INET_ECN_xmit(sk);
 }
 
-static void sctp_addr_wq_timeout_handler(unsigned long arg)
+static void sctp_addr_wq_timeout_handler(struct timer_list *t)
 {
-	struct net *net = (struct net *)arg;
+	struct net *net = from_timer(net, t, sctp.addr_wq_timer);
 	struct sctp_sockaddr_entry *addrw, *temp;
 	struct sctp_sock *sp;
 
@@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net)
 	INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
 	spin_lock_init(&net->sctp.addr_wq_lock);
 	net->sctp.addr_wq_timer.expires = 0;
-	setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler,
-		    (unsigned long)net);
+	timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
 
 	return 0;
 
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 9a647214a91e..a72a7d925d46 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
 
 	info = nla_data(attr);
 	list_for_each_entry_rcu(laddr, address_list, list) {
-		memcpy(info, &laddr->a, addrlen);
+		memcpy(info, &laddr->a, sizeof(laddr->a));
+		memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
 		info += addrlen;
 	}
 
@@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
 	info = nla_data(attr);
 	list_for_each_entry(from, &asoc->peer.transport_addr_list,
 			    transports) {
-		memcpy(info, &from->ipaddr, addrlen);
+		memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
+		memset(info + sizeof(from->ipaddr), 0,
+		       addrlen - sizeof(from->ipaddr));
 		info += addrlen;
 	}
 
@@ -276,9 +279,11 @@ out:
 	return err;
 }
 
-static int sctp_sock_dump(struct sock *sk, void *p)
+static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
 {
+	struct sctp_endpoint *ep = tsp->asoc->ep;
 	struct sctp_comm_param *commp = p;
+	struct sock *sk = ep->base.sk;
 	struct sk_buff *skb = commp->skb;
 	struct netlink_callback *cb = commp->cb;
 	const struct inet_diag_req_v2 *r = commp->r;
@@ -286,9 +291,7 @@ static int sctp_sock_dump(struct sock *sk, void *p)
 	int err = 0;
 
 	lock_sock(sk);
-	if (!sctp_sk(sk)->ep)
-		goto release;
-	list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
+	list_for_each_entry(assoc, &ep->asocs, asocs) {
 		if (cb->args[4] < cb->args[1])
 			goto next;
 
@@ -306,7 +309,6 @@ static int sctp_sock_dump(struct sock *sk, void *p)
 					cb->nlh->nlmsg_seq,
 					NLM_F_MULTI, cb->nlh,
 					commp->net_admin) < 0) {
-			cb->args[3] = 1;
 			err = 1;
 			goto release;
 		}
@@ -324,40 +326,30 @@ next:
 		cb->args[4]++;
 	}
 	cb->args[1] = 0;
-	cb->args[2]++;
 	cb->args[3] = 0;
 	cb->args[4] = 0;
 release:
 	release_sock(sk);
-	sock_put(sk);
 	return err;
 }
 
-static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
 {
 	struct sctp_endpoint *ep = tsp->asoc->ep;
 	struct sctp_comm_param *commp = p;
 	struct sock *sk = ep->base.sk;
-	struct netlink_callback *cb = commp->cb;
 	const struct inet_diag_req_v2 *r = commp->r;
 	struct sctp_association *assoc =
 		list_entry(ep->asocs.next, struct sctp_association, asocs);
 
 	/* find the ep only once through the transports by this condition */
 	if (tsp->asoc != assoc)
-		goto out;
+		return 0;
 
 	if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
-		goto out;
-
-	sock_hold(sk);
-	cb->args[5] = (long)sk;
+		return 0;
 
 	return 1;
-
-out:
-	cb->args[2]++;
-	return 0;
 }
 
 static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@ -471,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		.r = r,
 		.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
 	};
+	int pos = cb->args[2];
 
 	/* eps hashtable dumps
 	 * args:
@@ -500,12 +493,9 @@ skip:
 	if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
 		goto done;
 
-next:
-	cb->args[5] = 0;
-	sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
-
-	if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
-		goto next;
+	sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
+				net, &pos, &commp);
+	cb->args[2] = pos;
 
 done:
 	cb->args[1] = cb->args[4];
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6110447fe51d..9bf575f2e8ed 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
 static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 					   __u8 type, __u8 flags, int paylen,
 					   gfp_t gfp);
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
+static struct sctp_cookie_param *sctp_pack_cookie(
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const struct sctp_chunk *init_chunk,
 					int *cookie_len,
@@ -131,17 +132,17 @@ static const struct sctp_paramhdr prsctp_param = {
  * provided chunk, as most cause codes will be embedded inside an
  * abort chunk.
  */
-void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
-		      size_t paylen)
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
+		     size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
-	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
 }
 
 /* A helper to initialize an op error inside a
@@ -150,21 +151,21 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
  * if there isn't enough space in the op error chunk
  */
 static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
-		      size_t paylen)
+				 size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
 
 	if (skb_tailroom(chunk->skb) < len)
 		return -ENOSPC;
-	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
-						     sizeof(sctp_errhdr_t),
-						     &err);
+
+	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+
 	return 0;
 }
 /* 3.3.2 Initiation (INIT) (1)
@@ -212,32 +213,31 @@ static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
  * Supported Address Types (Note 4)    Optional    12
  */
 struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
-			     const struct sctp_bind_addr *bp,
-			     gfp_t gfp, int vparam_len)
+				  const struct sctp_bind_addr *bp,
+				  gfp_t gfp, int vparam_len)
 {
 	struct net *net = sock_net(asoc->base.sk);
+	struct sctp_supported_ext_param ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_paramhdr *auth_chunks = NULL;
+	struct sctp_paramhdr *auth_hmacs = NULL;
+	struct sctp_supported_addrs_param sat;
 	struct sctp_endpoint *ep = asoc->ep;
-	struct sctp_inithdr init;
-	union sctp_params addrs;
-	size_t chunksize;
 	struct sctp_chunk *retval = NULL;
 	int num_types, addrs_len = 0;
+	struct sctp_inithdr init;
+	union sctp_params addrs;
 	struct sctp_sock *sp;
-	sctp_supported_addrs_param_t sat;
+	__u8 extensions[4];
+	size_t chunksize;
 	__be16 types[2];
-	sctp_adaptation_ind_param_t aiparam;
-	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
-	__u8 extensions[4];
-	struct sctp_paramhdr *auth_chunks = NULL,
-			*auth_hmacs = NULL;
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
 	 * Note 1: The INIT chunks can contain multiple addresses that
 	 * can be IPv4 and/or IPv6 in any combination.
 	 */
-	retval = NULL;
 
 	/* Convert the provided bind address list to raw format. */
 	addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp);
@@ -305,8 +305,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-				       num_ext);
+		chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -348,10 +347,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	 */
 	if (num_ext) {
 		ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-		ext_param.param_hdr.length =
-			    htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-		sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-				&ext_param);
+		ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+		sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
@@ -382,26 +379,24 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
-				 const struct sctp_chunk *chunk,
-				 gfp_t gfp, int unkparam_len)
+				      const struct sctp_chunk *chunk,
+				      gfp_t gfp, int unkparam_len)
 {
+	struct sctp_supported_ext_param ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_paramhdr *auth_chunks = NULL;
+	struct sctp_paramhdr *auth_random = NULL;
+	struct sctp_paramhdr *auth_hmacs = NULL;
+	struct sctp_chunk *retval = NULL;
+	struct sctp_cookie_param *cookie;
 	struct sctp_inithdr initack;
-	struct sctp_chunk *retval;
 	union sctp_params addrs;
 	struct sctp_sock *sp;
-	int addrs_len;
-	sctp_cookie_param_t *cookie;
-	int cookie_len;
+	__u8 extensions[4];
 	size_t chunksize;
-	sctp_adaptation_ind_param_t aiparam;
-	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
-	__u8 extensions[4];
-	struct sctp_paramhdr *auth_chunks = NULL,
-			*auth_hmacs = NULL,
-			*auth_random = NULL;
-
-	retval = NULL;
+	int cookie_len;
+	int addrs_len;
 
 	/* Note: there may be no addresses to embed. */
 	addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp);
@@ -468,8 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	}
 
 	if (num_ext)
-		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-				       num_ext);
+		chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -495,10 +489,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 		sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
 	if (num_ext) {
 		ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-		ext_param.param_hdr.length =
-			    htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-		sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-				 &ext_param);
+		ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+		sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 	if (asoc->peer.prsctp_capable)
@@ -567,11 +559,11 @@ nomem_cookie:
  *   to insure interoperability.
  */
 struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
-				    const struct sctp_chunk *chunk)
+					 const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
-	void *cookie;
 	int cookie_len;
+	void *cookie;
 
 	cookie = asoc->peer.cookie;
 	cookie_len = asoc->peer.cookie_len;
@@ -619,7 +611,7 @@ nodata:
  *   Set to zero on transmit and ignored on receipt.
  */
 struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
-				   const struct sctp_chunk *chunk)
+					const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 
@@ -664,15 +656,15 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
  *     Note: The CWR is considered a Control chunk.
  */
 struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
-			    const __u32 lowest_tsn,
-			    const struct sctp_chunk *chunk)
+				 const __u32 lowest_tsn,
+				 const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
-	sctp_cwrhdr_t cwr;
+	struct sctp_cwrhdr cwr;
 
 	cwr.lowest_tsn = htonl(lowest_tsn);
 	retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
-				   sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
+				   sizeof(cwr), GFP_ATOMIC);
 
 	if (!retval)
 		goto nodata;
@@ -699,14 +691,14 @@ nodata:
 
 /* Make an ECNE chunk.  This is a congestion experienced report.  */
 struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
-			     const __u32 lowest_tsn)
+				  const __u32 lowest_tsn)
 {
 	struct sctp_chunk *retval;
-	sctp_ecnehdr_t ecne;
+	struct sctp_ecnehdr ecne;
 
 	ecne.lowest_tsn = htonl(lowest_tsn);
 	retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
-				   sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
+				   sizeof(ecne), GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 	retval->subh.ecne_hdr =
@@ -720,9 +712,9 @@ nodata:
  * parameters.  However, do not populate the data payload.
  */
 struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
-				       const struct sctp_sndrcvinfo *sinfo,
-				       int data_len, __u8 flags, __u16 ssn,
-				       gfp_t gfp)
+					    const struct sctp_sndrcvinfo *sinfo,
+					    int data_len, __u8 flags, __u16 ssn,
+					    gfp_t gfp)
 {
 	struct sctp_chunk *retval;
 	struct sctp_datahdr dp;
@@ -760,15 +752,15 @@ nodata:
  */
 struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 {
-	struct sctp_chunk *retval;
-	struct sctp_sackhdr sack;
-	int len;
-	__u32 ctsn;
-	__u16 num_gabs, num_dup_tsns;
-	struct sctp_association *aptr = (struct sctp_association *)asoc;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+	struct sctp_association *aptr = (struct sctp_association *)asoc;
 	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
+	__u16 num_gabs, num_dup_tsns;
 	struct sctp_transport *trans;
+	struct sctp_chunk *retval;
+	struct sctp_sackhdr sack;
+	__u32 ctsn;
+	int len;
 
 	memset(gabs, 0, sizeof(gabs));
 	ctsn = sctp_tsnmap_get_ctsn(map);
@@ -862,15 +854,15 @@ nodata:
 struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
 				      const struct sctp_chunk *chunk)
 {
+	struct sctp_shutdownhdr shut;
 	struct sctp_chunk *retval;
-	sctp_shutdownhdr_t shut;
 	__u32 ctsn;
 
 	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
 	shut.cum_tsn_ack = htonl(ctsn);
 
 	retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
-				   sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
+				   sizeof(shut), GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -884,7 +876,7 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
-				     const struct sctp_chunk *chunk)
+					  const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 
@@ -907,8 +899,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
 }
 
 struct sctp_chunk *sctp_make_shutdown_complete(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 	__u8 flags = 0;
@@ -941,8 +933,8 @@ struct sctp_chunk *sctp_make_shutdown_complete(
  * association, except when responding to an INIT (sctpimpguide 2.41).
  */
 struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
-			      const struct sctp_chunk *chunk,
-			      const size_t hint)
+				   const struct sctp_chunk *chunk,
+				   const size_t hint)
 {
 	struct sctp_chunk *retval;
 	__u8 flags = 0;
@@ -978,14 +970,15 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
 
 /* Helper to create ABORT with a NO_USER_DATA error.  */
 struct sctp_chunk *sctp_make_abort_no_data(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk, __u32 tsn)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					__u32 tsn)
 {
 	struct sctp_chunk *retval;
 	__be32 payload;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
-				 + sizeof(tsn));
+	retval = sctp_make_abort(asoc, chunk,
+				 sizeof(struct sctp_errhdr) + sizeof(tsn));
 
 	if (!retval)
 		goto no_mem;
@@ -1020,7 +1013,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
 	void *payload = NULL;
 	int err;
 
-	retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
+	retval = sctp_make_abort(asoc, NULL,
+				 sizeof(struct sctp_errhdr) + paylen);
 	if (!retval)
 		goto err_chunk;
 
@@ -1058,8 +1052,8 @@ err_chunk:
 static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
 			      const void *data)
 {
-	void *target;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
+	void *target;
 
 	target = skb_put(chunk->skb, len);
 
@@ -1077,16 +1071,16 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
 
 /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */
 struct sctp_chunk *sctp_make_abort_violation(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	const __u8   *payload,
-	const size_t paylen)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					const __u8 *payload,
+					const size_t paylen)
 {
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
-					      sizeof(phdr));
+	retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) +
+					      paylen + sizeof(phdr));
 	if (!retval)
 		goto end;
 
@@ -1103,14 +1097,14 @@ end:
 }
 
 struct sctp_chunk *sctp_make_violation_paramlen(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	struct sctp_paramhdr *param)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					struct sctp_paramhdr *param)
 {
-	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) +
 			     sizeof(*param);
+	struct sctp_chunk *retval;
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1126,12 +1120,12 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_violation_max_retrans(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
-	struct sctp_chunk *retval;
 	static const char error[] = "Association exceeded its max_retans count";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t);
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
+	struct sctp_chunk *retval;
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1146,10 +1140,10 @@ nodata:
 
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
-				  const struct sctp_transport *transport)
+				       const struct sctp_transport *transport)
 {
+	struct sctp_sender_hb_info hbinfo;
 	struct sctp_chunk *retval;
-	sctp_sender_hb_info_t hbinfo;
 
 	retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
 				   sizeof(hbinfo), GFP_ATOMIC);
@@ -1158,7 +1152,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 		goto nodata;
 
 	hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
-	hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
+	hbinfo.param_hdr.length = htons(sizeof(hbinfo));
 	hbinfo.daddr = transport->ipaddr;
 	hbinfo.sent_at = jiffies;
 	hbinfo.hb_nonce = transport->hb_nonce;
@@ -1175,8 +1169,9 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
-				      const struct sctp_chunk *chunk,
-				      const void *payload, const size_t paylen)
+					   const struct sctp_chunk *chunk,
+					   const void *payload,
+					   const size_t paylen)
 {
 	struct sctp_chunk *retval;
 
@@ -1207,14 +1202,15 @@ nodata:
  * This routine can be used for containing multiple causes in the chunk.
  */
 static struct sctp_chunk *sctp_make_op_error_space(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	size_t size)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					size_t size)
 {
 	struct sctp_chunk *retval;
 
 	retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
-				   sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
+				   sizeof(struct sctp_errhdr) + size,
+				   GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -1240,8 +1236,8 @@ nodata:
  * to report all the errors, if the incoming chunk is large
  */
 static inline struct sctp_chunk *sctp_make_op_error_fixed(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
 	size_t size = asoc ? asoc->pathmtu : 0;
 
@@ -1253,9 +1249,9 @@ static inline struct sctp_chunk *sctp_make_op_error_fixed(
 
 /* Create an Operation Error chunk.  */
 struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
-				 const struct sctp_chunk *chunk,
-				 __be16 cause_code, const void *payload,
-				 size_t paylen, size_t reserve_tail)
+				      const struct sctp_chunk *chunk,
+				      __be16 cause_code, const void *payload,
+				      size_t paylen, size_t reserve_tail)
 {
 	struct sctp_chunk *retval;
 
@@ -1274,9 +1270,9 @@ nodata:
 
 struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 {
-	struct sctp_chunk *retval;
-	struct sctp_hmac *hmac_desc;
 	struct sctp_authhdr auth_hdr;
+	struct sctp_hmac *hmac_desc;
+	struct sctp_chunk *retval;
 	__u8 *hmac;
 
 	/* Get the first hmac that the peer told us to use */
@@ -1285,16 +1281,16 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 		return NULL;
 
 	retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
-			hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
-			GFP_ATOMIC);
+				   hmac_desc->hmac_len + sizeof(auth_hdr),
+				   GFP_ATOMIC);
 	if (!retval)
 		return NULL;
 
 	auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
 	auth_hdr.shkey_id = htons(asoc->active_key_id);
 
-	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t),
-						&auth_hdr);
+	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
+						 &auth_hdr);
 
 	hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len);
 
@@ -1322,8 +1318,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
  *
  */
 struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
-			    const struct sctp_association *asoc,
-			    struct sock *sk, gfp_t gfp)
+				 const struct sctp_association *asoc,
+				 struct sock *sk, gfp_t gfp)
 {
 	struct sctp_chunk *retval;
 
@@ -1375,11 +1371,11 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
  * arguments, reserving enough space for a 'paylen' byte payload.
  */
 static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
-					    __u8 type, __u8 flags, int paylen,
-					    gfp_t gfp)
+					   __u8 type, __u8 flags, int paylen,
+					   gfp_t gfp)
 {
-	struct sctp_chunk *retval;
 	struct sctp_chunkhdr *chunk_hdr;
+	struct sctp_chunk *retval;
 	struct sk_buff *skb;
 	struct sock *sk;
 
@@ -1473,9 +1469,9 @@ void sctp_chunk_put(struct sctp_chunk *ch)
  */
 void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 {
-	void *target;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
 	int padlen = SCTP_PAD4(chunklen) - chunklen;
+	void *target;
 
 	skb_put_zero(chunk->skb, padlen);
 	target = skb_put_data(chunk->skb, data, len);
@@ -1528,11 +1524,10 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len,
  */
 void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
 {
-	struct sctp_datamsg *msg;
-	struct sctp_chunk *lchunk;
 	struct sctp_stream *stream;
-	__u16 ssn;
-	__u16 sid;
+	struct sctp_chunk *lchunk;
+	struct sctp_datamsg *msg;
+	__u16 ssn, sid;
 
 	if (chunk->has_ssn)
 		return;
@@ -1577,12 +1572,12 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
 
 /* Create a CLOSED association to use with an incoming packet.  */
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
-					struct sctp_chunk *chunk,
-					gfp_t gfp)
+					     struct sctp_chunk *chunk,
+					     gfp_t gfp)
 {
 	struct sctp_association *asoc;
+	enum sctp_scope scope;
 	struct sk_buff *skb;
-	sctp_scope_t scope;
 
 	/* Create the bare association.  */
 	scope = sctp_scope(sctp_source(chunk));
@@ -1601,14 +1596,15 @@ nodata:
 /* Build a cookie representing asoc.
  * This INCLUDES the param header needed to put the cookie in the INIT ACK.
  */
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const struct sctp_chunk *init_chunk,
-				      int *cookie_len,
-				      const __u8 *raw_addrs, int addrs_len)
+static struct sctp_cookie_param *sctp_pack_cookie(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *init_chunk,
+					int *cookie_len, const __u8 *raw_addrs,
+					int addrs_len)
 {
-	sctp_cookie_param_t *retval;
 	struct sctp_signed_cookie *cookie;
+	struct sctp_cookie_param *retval;
 	int headersize, bodysize;
 
 	/* Header size is static data prior to the actual cookie, including
@@ -1692,19 +1688,19 @@ nodata:
 
 /* Unpack the cookie from COOKIE ECHO chunk, recreating the association.  */
 struct sctp_association *sctp_unpack_cookie(
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	struct sctp_chunk *chunk, gfp_t gfp,
-	int *error, struct sctp_chunk **errp)
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk, gfp_t gfp,
+					int *error, struct sctp_chunk **errp)
 {
 	struct sctp_association *retval = NULL;
+	int headersize, bodysize, fixed_size;
 	struct sctp_signed_cookie *cookie;
+	struct sk_buff *skb = chunk->skb;
 	struct sctp_cookie *bear_cookie;
-	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
+	enum sctp_scope scope;
 	unsigned int len;
-	sctp_scope_t scope;
-	struct sk_buff *skb = chunk->skb;
 	ktime_t kt;
 
 	/* Header size is static data prior to the actual cookie, including
@@ -1976,8 +1972,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-	int have_auth = 0;
 	int have_asconf = 0;
+	int have_auth = 0;
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2007,10 +2003,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 }
 
 static void sctp_process_ext_param(struct sctp_association *asoc,
-				    union sctp_params param)
+				   union sctp_params param)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
+	struct net *net = sock_net(asoc->base.sk);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2067,10 +2063,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
  * 	SCTP_IERROR_ERROR    - stop and report an error.
  * 	SCTP_IERROR_NOMEME   - out of memory.
  */
-static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
-					    union sctp_params param,
-					    struct sctp_chunk *chunk,
-					    struct sctp_chunk **errp)
+static enum sctp_ierror sctp_process_unk_param(
+					const struct sctp_association *asoc,
+					union sctp_params param,
+					struct sctp_chunk *chunk,
+					struct sctp_chunk **errp)
 {
 	int retval = SCTP_IERROR_NO_ERROR;
 
@@ -2119,13 +2116,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
  *	SCTP_IERROR_ERROR - stop processing, trigger an ERROR
  * 	SCTP_IERROR_NO_ERROR - continue with the chunk
  */
-static sctp_ierror_t sctp_verify_param(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					union sctp_params param,
-					enum sctp_cid cid,
-					struct sctp_chunk *chunk,
-					struct sctp_chunk **err_chunk)
+static enum sctp_ierror sctp_verify_param(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  union sctp_params param,
+					  enum sctp_cid cid,
+					  struct sctp_chunk *chunk,
+					  struct sctp_chunk **err_chunk)
 {
 	struct sctp_hmac_algo_param *hmacs;
 	int retval = SCTP_IERROR_NO_ERROR;
@@ -2310,13 +2307,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 		      struct sctp_init_chunk *peer_init, gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	union sctp_params param;
 	struct sctp_transport *transport;
 	struct list_head *pos, *temp;
-	struct sctp_af *af;
+	union sctp_params param;
 	union sctp_addr addr;
-	char *cookie;
+	struct sctp_af *af;
 	int src_match = 0;
+	char *cookie;
 
 	/* We must include the address that the INIT packet came from.
 	 * This is the only address that matters for an INIT packet.
@@ -2500,16 +2497,15 @@ static int sctp_process_param(struct sctp_association *asoc,
 			      gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	union sctp_addr addr;
-	int i;
-	__u16 sat;
-	int retval = 1;
-	sctp_scope_t scope;
-	u32 stale;
-	struct sctp_af *af;
+	struct sctp_endpoint *ep = asoc->ep;
 	union sctp_addr_param *addr_param;
 	struct sctp_transport *t;
-	struct sctp_endpoint *ep = asoc->ep;
+	enum sctp_scope scope;
+	union sctp_addr addr;
+	struct sctp_af *af;
+	int retval = 1, i;
+	u32 stale;
+	__u16 sat;
 
 	/* We maintain all INIT parameters in network byte order all the
 	 * time.  This allows us to not worry about whether the parameters
@@ -2617,7 +2613,7 @@ do_addr_param:
 		if (!net->sctp.addip_enable)
 			goto fall_through;
 
-		addr_param = param.v + sizeof(sctp_addip_param_t);
+		addr_param = param.v + sizeof(struct sctp_addip_param);
 
 		af = sctp_get_af_specific(param_type2af(addr_param->p.type));
 		if (af == NULL)
@@ -2754,7 +2750,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
 					   union sctp_addr *addr,
 					   int vparam_len)
 {
-	sctp_addiphdr_t asconf;
+	struct sctp_addiphdr asconf;
 	struct sctp_chunk *retval;
 	int length = sizeof(asconf) + vparam_len;
 	union sctp_addr_param addrparam;
@@ -2807,22 +2803,20 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
  *
  */
 struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
-					      union sctp_addr	      *laddr,
-					      struct sockaddr	      *addrs,
-					      int		      addrcnt,
-					      __be16		      flags)
-{
-	sctp_addip_param_t	param;
-	struct sctp_chunk	*retval;
-	union sctp_addr_param	addr_param;
-	union sctp_addr		*addr;
-	void			*addr_buf;
-	struct sctp_af		*af;
-	int			paramlen = sizeof(param);
-	int			addr_param_len = 0;
-	int 			totallen = 0;
-	int 			i;
-	int			del_pickup = 0;
+					      union sctp_addr *laddr,
+					      struct sockaddr *addrs,
+					      int addrcnt, __be16 flags)
+{
+	union sctp_addr_param addr_param;
+	struct sctp_addip_param	param;
+	int paramlen = sizeof(param);
+	struct sctp_chunk *retval;
+	int addr_param_len = 0;
+	union sctp_addr *addr;
+	int totallen = 0, i;
+	int del_pickup = 0;
+	struct sctp_af *af;
+	void *addr_buf;
 
 	/* Get total length of all the address parameters. */
 	addr_buf = addrs;
@@ -2860,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 		addr_param_len = af->to_addr_param(addr, &addr_param);
 		param.param_hdr.type = flags;
 		param.param_hdr.length = htons(paramlen + addr_param_len);
-		param.crr_id = i;
+		param.crr_id = htonl(i);
 
 		sctp_addto_chunk(retval, paramlen, &param);
 		sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2873,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 		addr_param_len = af->to_addr_param(addr, &addr_param);
 		param.param_hdr.type = SCTP_PARAM_DEL_IP;
 		param.param_hdr.length = htons(paramlen + addr_param_len);
-		param.crr_id = i;
+		param.crr_id = htonl(i);
 
 		sctp_addto_chunk(retval, paramlen, &param);
 		sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2898,12 +2892,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr)
 {
-	sctp_addip_param_t	param;
-	struct sctp_chunk 	*retval;
-	int 			len = sizeof(param);
-	union sctp_addr_param	addrparam;
-	int			addrlen;
-	struct sctp_af		*af = sctp_get_af_specific(addr->v4.sin_family);
+	struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family);
+	union sctp_addr_param addrparam;
+	struct sctp_addip_param	param;
+	struct sctp_chunk *retval;
+	int len = sizeof(param);
+	int addrlen;
 
 	addrlen = af->to_addr_param(addr, &addrparam);
 	if (!addrlen)
@@ -2947,9 +2941,9 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc,
 					       __u32 serial, int vparam_len)
 {
-	sctp_addiphdr_t		asconf;
-	struct sctp_chunk	*retval;
-	int			length = sizeof(asconf) + vparam_len;
+	struct sctp_addiphdr asconf;
+	struct sctp_chunk *retval;
+	int length = sizeof(asconf) + vparam_len;
 
 	/* Create the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
@@ -2967,13 +2961,14 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
 
 /* Add response parameters to an ASCONF_ACK chunk. */
 static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
-			      __be16 err_code, sctp_addip_param_t *asconf_param)
+				     __be16 err_code,
+				     struct sctp_addip_param *asconf_param)
 {
-	sctp_addip_param_t 	ack_param;
-	sctp_errhdr_t		err_param;
-	int			asconf_param_len = 0;
-	int			err_param_len = 0;
-	__be16			response_type;
+	struct sctp_addip_param ack_param;
+	struct sctp_errhdr err_param;
+	int asconf_param_len = 0;
+	int err_param_len = 0;
+	__be16 response_type;
 
 	if (SCTP_ERROR_NO_ERROR == err_code) {
 		response_type = SCTP_PARAM_SUCCESS_REPORT;
@@ -3008,15 +3003,15 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
 
 /* Process a asconf parameter. */
 static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
-				       struct sctp_chunk *asconf,
-				       sctp_addip_param_t *asconf_param)
+					struct sctp_chunk *asconf,
+					struct sctp_addip_param *asconf_param)
 {
+	union sctp_addr_param *addr_param;
 	struct sctp_transport *peer;
-	struct sctp_af *af;
 	union sctp_addr	addr;
-	union sctp_addr_param *addr_param;
+	struct sctp_af *af;
 
-	addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
 	if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP &&
 	    asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP &&
@@ -3141,10 +3136,11 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			struct sctp_chunk *chunk, bool addr_param_needed,
 			struct sctp_paramhdr **errp)
 {
-	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
-	union sctp_params param;
+	struct sctp_addip_chunk *addip;
 	bool addr_param_seen = false;
+	union sctp_params param;
 
+	addip = (struct sctp_addip_chunk *)chunk->chunk_hdr;
 	sctp_walk_params(param, addip, addip_hdr.params) {
 		size_t length = ntohs(param.p->length);
 
@@ -3153,7 +3149,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 		case SCTP_PARAM_ERR_CAUSE:
 			break;
 		case SCTP_PARAM_IPV4_ADDRESS:
-			if (length != sizeof(sctp_ipv4addr_param_t))
+			if (length != sizeof(struct sctp_ipv4addr_param))
 				return false;
 			/* ensure there is only one addr param and it's in the
 			 * beginning of addip_hdr params, or we reject it.
@@ -3163,7 +3159,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			addr_param_seen = true;
 			break;
 		case SCTP_PARAM_IPV6_ADDRESS:
-			if (length != sizeof(sctp_ipv6addr_param_t))
+			if (length != sizeof(struct sctp_ipv6addr_param))
 				return false;
 			if (param.v != addip->addip_hdr.params)
 				return false;
@@ -3176,13 +3172,13 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			if (addr_param_needed && !addr_param_seen)
 				return false;
 			length = ntohs(param.addip->param_hdr.length);
-			if (length < sizeof(sctp_addip_param_t) +
+			if (length < sizeof(struct sctp_addip_param) +
 				     sizeof(**errp))
 				return false;
 			break;
 		case SCTP_PARAM_SUCCESS_REPORT:
 		case SCTP_PARAM_ADAPTATION_LAYER_IND:
-			if (length != sizeof(sctp_addip_param_t))
+			if (length != sizeof(struct sctp_addip_param))
 				return false;
 			break;
 		default:
@@ -3208,24 +3204,24 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 				       struct sctp_chunk *asconf)
 {
-	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+	union sctp_addr_param *addr_param;
+	struct sctp_addip_chunk *addip;
+	struct sctp_chunk *asconf_ack;
 	bool all_param_pass = true;
+	struct sctp_addiphdr *hdr;
+	int length = 0, chunk_len;
 	union sctp_params param;
-	sctp_addiphdr_t		*hdr;
-	union sctp_addr_param	*addr_param;
-	struct sctp_chunk	*asconf_ack;
-	__be16	err_code;
-	int	length = 0;
-	int	chunk_len;
-	__u32	serial;
+	__be16 err_code;
+	__u32 serial;
 
+	addip = (struct sctp_addip_chunk *)asconf->chunk_hdr;
 	chunk_len = ntohs(asconf->chunk_hdr->length) -
 		    sizeof(struct sctp_chunkhdr);
-	hdr = (sctp_addiphdr_t *)asconf->skb->data;
+	hdr = (struct sctp_addiphdr *)asconf->skb->data;
 	serial = ntohl(hdr->serial);
 
 	/* Skip the addiphdr and store a pointer to address parameter.  */
-	length = sizeof(sctp_addiphdr_t);
+	length = sizeof(*hdr);
 	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
 	chunk_len -= length;
 
@@ -3291,16 +3287,16 @@ done:
 
 /* Process a asconf parameter that is successfully acked. */
 static void sctp_asconf_param_success(struct sctp_association *asoc,
-				     sctp_addip_param_t *asconf_param)
+				      struct sctp_addip_param *asconf_param)
 {
-	struct sctp_af *af;
-	union sctp_addr	addr;
 	struct sctp_bind_addr *bp = &asoc->base.bind_addr;
 	union sctp_addr_param *addr_param;
-	struct sctp_transport *transport;
 	struct sctp_sockaddr_entry *saddr;
+	struct sctp_transport *transport;
+	union sctp_addr	addr;
+	struct sctp_af *af;
 
-	addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
 	/* We have checked the packet before, so we do not check again.	*/
 	af = sctp_get_af_specific(param_type2af(addr_param->p.type));
@@ -3351,14 +3347,14 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
  * specific success indication is present for the parameter.
  */
 static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
-				      sctp_addip_param_t *asconf_param,
-				      int no_err)
+				       struct sctp_addip_param *asconf_param,
+				       int no_err)
 {
-	sctp_addip_param_t	*asconf_ack_param;
-	sctp_errhdr_t		*err_param;
-	int			length;
-	int			asconf_ack_len;
-	__be16			err_code;
+	struct sctp_addip_param	*asconf_ack_param;
+	struct sctp_errhdr *err_param;
+	int asconf_ack_len;
+	__be16 err_code;
+	int length;
 
 	if (no_err)
 		err_code = SCTP_ERROR_NO_ERROR;
@@ -3371,9 +3367,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 	/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
 	 * the first asconf_ack parameter.
 	 */
-	length = sizeof(sctp_addiphdr_t);
-	asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data +
-						  length);
+	length = sizeof(struct sctp_addiphdr);
+	asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data +
+						       length);
 	asconf_ack_len -= length;
 
 	while (asconf_ack_len > 0) {
@@ -3382,7 +3378,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 			case SCTP_PARAM_SUCCESS_REPORT:
 				return SCTP_ERROR_NO_ERROR;
 			case SCTP_PARAM_ERR_CAUSE:
-				length = sizeof(sctp_addip_param_t);
+				length = sizeof(*asconf_ack_param);
 				err_param = (void *)asconf_ack_param + length;
 				asconf_ack_len -= length;
 				if (asconf_ack_len > 0)
@@ -3407,20 +3403,20 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 int sctp_process_asconf_ack(struct sctp_association *asoc,
 			    struct sctp_chunk *asconf_ack)
 {
-	struct sctp_chunk	*asconf = asoc->addip_last_asconf;
-	union sctp_addr_param	*addr_param;
-	sctp_addip_param_t	*asconf_param;
-	int	length = 0;
-	int	asconf_len = asconf->skb->len;
-	int	all_param_pass = 0;
-	int	no_err = 1;
-	int	retval = 0;
-	__be16	err_code = SCTP_ERROR_NO_ERROR;
+	struct sctp_chunk *asconf = asoc->addip_last_asconf;
+	struct sctp_addip_param *asconf_param;
+	__be16 err_code = SCTP_ERROR_NO_ERROR;
+	union sctp_addr_param *addr_param;
+	int asconf_len = asconf->skb->len;
+	int all_param_pass = 0;
+	int length = 0;
+	int no_err = 1;
+	int retval = 0;
 
 	/* Skip the chunkhdr and addiphdr from the last asconf sent and store
 	 * a pointer to address parameter.
 	 */
-	length = sizeof(sctp_addip_chunk_t);
+	length = sizeof(struct sctp_addip_chunk);
 	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
 	asconf_len -= length;
 
@@ -3436,7 +3432,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 	 * failures are indicated, then all request(s) are considered
 	 * successful.
 	 */
-	if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t))
+	if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr))
 		all_param_pass = 1;
 
 	/* Process the TLVs contained in the last sent ASCONF chunk. */
@@ -3542,9 +3538,8 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
  *  \                                                               \
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-static struct sctp_chunk *sctp_make_reconf(
-				const struct sctp_association *asoc,
-				int length)
+static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
+					   int length)
 {
 	struct sctp_reconf_chunk *reconf;
 	struct sctp_chunk *retval;
@@ -3595,12 +3590,12 @@ static struct sctp_chunk *sctp_make_reconf(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_req(
-				const struct sctp_association *asoc,
-				__u16 stream_num, __u16 *stream_list,
-				bool out, bool in)
+					const struct sctp_association *asoc,
+					__u16 stream_num, __be16 *stream_list,
+					bool out, bool in)
 {
+	__u16 stream_len = stream_num * sizeof(__u16);
 	struct sctp_strreset_outreq outreq;
-	__u16 stream_len = stream_num * 2;
 	struct sctp_strreset_inreq inreq;
 	struct sctp_chunk *retval;
 	__u16 outlen, inlen;
@@ -3649,7 +3644,7 @@ struct sctp_chunk *sctp_make_strreset_req(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_tsnreq(
-				const struct sctp_association *asoc)
+					const struct sctp_association *asoc)
 {
 	struct sctp_strreset_tsnreq tsnreq;
 	__u16 length = sizeof(tsnreq);
@@ -3680,8 +3675,8 @@ struct sctp_chunk *sctp_make_strreset_tsnreq(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_addstrm(
-				const struct sctp_association *asoc,
-				__u16 out, __u16 in)
+					const struct sctp_association *asoc,
+					__u16 out, __u16 in)
 {
 	struct sctp_strreset_addstrm addstrm;
 	__u16 size = sizeof(addstrm);
@@ -3725,9 +3720,8 @@ struct sctp_chunk *sctp_make_strreset_addstrm(
  *  |                            Result                             |
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-struct sctp_chunk *sctp_make_strreset_resp(
-				const struct sctp_association *asoc,
-				__u32 result, __u32 sn)
+struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc,
+					   __u32 result, __u32 sn)
 {
 	struct sctp_strreset_resp resp;
 	__u16 length = sizeof(resp);
@@ -3762,10 +3756,10 @@ struct sctp_chunk *sctp_make_strreset_resp(
  *  |                  Receiver's Next TSN (optional)               |
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-struct sctp_chunk *sctp_make_strreset_tsnresp(
-					struct sctp_association *asoc,
-					__u32 result, __u32 sn,
-					__u32 sender_tsn, __u32 receiver_tsn)
+struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc,
+					      __u32 result, __u32 sn,
+					      __u32 sender_tsn,
+					      __u32 receiver_tsn)
 {
 	struct sctp_strreset_resptsn tsnresp;
 	__u16 length = sizeof(tsnresp);
@@ -3794,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc,
 {
 	struct sctp_reconf_chunk *hdr;
 	union sctp_params param;
-	__u16 last = 0, cnt = 0;
+	__be16 last = 0;
+	__u16 cnt = 0;
 
 	hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
 	sctp_walk_params(param, hdr, params) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d6e5e9e0fd6d..df94d77401e7 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -50,23 +50,25 @@
 #include <net/sock.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
 
-static int sctp_cmd_interpreter(sctp_event_t event_type,
-				sctp_subtype_t subtype,
-				sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+				union sctp_subtype subtype,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
-				sctp_disposition_t status,
-				sctp_cmd_seq_t *commands,
+				enum sctp_disposition status,
+				struct sctp_cmd_seq *commands,
 				gfp_t gfp);
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
-			     sctp_disposition_t status,
-			     sctp_cmd_seq_t *commands,
+			     enum sctp_disposition status,
+			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp);
 
 /********************************************************************
@@ -96,8 +98,8 @@ static void sctp_do_ecn_ce_work(struct sctp_association *asoc,
  * that was originally marked with the CE bit.
  */
 static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc,
-					   __u32 lowest_tsn,
-					   struct sctp_chunk *chunk)
+						__u32 lowest_tsn,
+						struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *repl;
 
@@ -149,11 +151,11 @@ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc,
 
 /* Generate SACK if necessary.  We call this at the end of a packet.  */
 static int sctp_gen_sack(struct sctp_association *asoc, int force,
-			 sctp_cmd_seq_t *commands)
+			 struct sctp_cmd_seq *commands)
 {
+	struct sctp_transport *trans = asoc->peer.last_data_from;
 	__u32 ctsn, max_tsn_seen;
 	struct sctp_chunk *sack;
-	struct sctp_transport *trans = asoc->peer.last_data_from;
 	int error = 0;
 
 	if (force ||
@@ -241,13 +243,14 @@ nomem:
 /* When the T3-RTX timer expires, it calls this function to create the
  * relevant state machine event.
  */
-void sctp_generate_t3_rtx_event(unsigned long peer)
+void sctp_generate_t3_rtx_event(struct timer_list *t)
 {
-	int error;
-	struct sctp_transport *transport = (struct sctp_transport *) peer;
+	struct sctp_transport *transport =
+		from_timer(transport, t, T3_rtx_timer);
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
+	int error;
 
 	/* Check whether a task is in the sock.  */
 
@@ -280,7 +283,7 @@ out_unlock:
  * for timeouts which use the association as their parameter.
  */
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
-					sctp_event_timeout_t timeout_type)
+					enum sctp_event_timeout timeout_type)
 {
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -317,55 +320,68 @@ out_unlock:
 	sctp_association_put(asoc);
 }
 
-static void sctp_generate_t1_cookie_event(unsigned long data)
+static void sctp_generate_t1_cookie_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
+
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE);
 }
 
-static void sctp_generate_t1_init_event(unsigned long data)
+static void sctp_generate_t1_init_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]);
+
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT);
 }
 
-static void sctp_generate_t2_shutdown_event(unsigned long data)
+static void sctp_generate_t2_shutdown_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]);
+
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN);
 }
 
-static void sctp_generate_t4_rto_event(unsigned long data)
+static void sctp_generate_t4_rto_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]);
+
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO);
 }
 
-static void sctp_generate_t5_shutdown_guard_event(unsigned long data)
+static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *)data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t,
+			   timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]);
+
 	sctp_generate_timeout_event(asoc,
 				    SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD);
 
 } /* sctp_generate_t5_shutdown_guard_event() */
 
-static void sctp_generate_autoclose_event(unsigned long data)
+static void sctp_generate_autoclose_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]);
+
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE);
 }
 
 /* Generate a heart beat event.  If the sock is busy, reschedule.   Make
  * sure that the transport is still valid.
  */
-void sctp_generate_heartbeat_event(unsigned long data)
+void sctp_generate_heartbeat_event(struct timer_list *t)
 {
-	int error = 0;
-	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_transport *transport = from_timer(transport, t, hb_timer);
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
 	u32 elapsed, timeout;
+	int error = 0;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -403,9 +419,10 @@ out_unlock:
 /* Handle the timeout of the ICMP protocol unreachable timer.  Trigger
  * the correct state machine transition that will close the association.
  */
-void sctp_generate_proto_unreach_event(unsigned long data)
+void sctp_generate_proto_unreach_event(struct timer_list *t)
 {
-	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_transport *transport =
+		from_timer(transport, t, proto_unreach_timer);
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -437,9 +454,10 @@ out_unlock:
 }
 
  /* Handle the timeout of the RE-CONFIG timer. */
-void sctp_generate_reconf_event(unsigned long data)
+void sctp_generate_reconf_event(struct timer_list *t)
 {
-	struct sctp_transport *transport = (struct sctp_transport *)data;
+	struct sctp_transport *transport =
+		from_timer(transport, t, reconf_timer);
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -469,24 +487,27 @@ out_unlock:
 }
 
 /* Inject a SACK Timeout event into the state machine.  */
-static void sctp_generate_sack_event(unsigned long data)
+static void sctp_generate_sack_event(struct timer_list *t)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc =
+		from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]);
+
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
 }
 
 sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
-	NULL,
-	sctp_generate_t1_cookie_event,
-	sctp_generate_t1_init_event,
-	sctp_generate_t2_shutdown_event,
-	NULL,
-	sctp_generate_t4_rto_event,
-	sctp_generate_t5_shutdown_guard_event,
-	NULL,
-	NULL,
-	sctp_generate_sack_event,
-	sctp_generate_autoclose_event,
+	[SCTP_EVENT_TIMEOUT_NONE] =		NULL,
+	[SCTP_EVENT_TIMEOUT_T1_COOKIE] =	sctp_generate_t1_cookie_event,
+	[SCTP_EVENT_TIMEOUT_T1_INIT] =		sctp_generate_t1_init_event,
+	[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] =	sctp_generate_t2_shutdown_event,
+	[SCTP_EVENT_TIMEOUT_T3_RTX] =		NULL,
+	[SCTP_EVENT_TIMEOUT_T4_RTO] =		sctp_generate_t4_rto_event,
+	[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] =
+					sctp_generate_t5_shutdown_guard_event,
+	[SCTP_EVENT_TIMEOUT_HEARTBEAT] =	NULL,
+	[SCTP_EVENT_TIMEOUT_RECONF] =		NULL,
+	[SCTP_EVENT_TIMEOUT_SACK] =		sctp_generate_sack_event,
+	[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =	sctp_generate_autoclose_event,
 };
 
 
@@ -505,7 +526,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
  * notification SHOULD be sent to the upper layer.
  *
  */
-static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
+static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
 					 struct sctp_association *asoc,
 					 struct sctp_transport *transport,
 					 int is_hb)
@@ -577,7 +598,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
 }
 
 /* Worker routine to handle INIT command failure.  */
-static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands,
 				 struct sctp_association *asoc,
 				 unsigned int error)
 {
@@ -600,15 +621,16 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 }
 
 /* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
-static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
 				  struct sctp_association *asoc,
-				  sctp_event_t event_type,
-				  sctp_subtype_t subtype,
+				  enum sctp_event event_type,
+				  union sctp_subtype subtype,
 				  struct sctp_chunk *chunk,
 				  unsigned int error)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_chunk *abort;
+
 	/* Cancel any partial delivery in progress. */
 	sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
 
@@ -644,7 +666,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
  * since all other cases use "temporary" associations and can do all
  * their work in statefuns directly.
  */
-static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
+static int sctp_cmd_process_init(struct sctp_cmd_seq *commands,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk,
 				 struct sctp_init_chunk *peer_init,
@@ -666,7 +688,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
 }
 
 /* Helper function to break out starting up of heartbeat timers.  */
-static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds,
 				     struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -679,7 +701,7 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
 		sctp_transport_reset_hb_timer(t);
 }
 
-static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds,
 				    struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -694,7 +716,7 @@ static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to stop any pending T3-RTX timers */
-static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds,
 					struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -708,12 +730,12 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
 
 
 /* Helper function to handle the reception of an HEARTBEAT ACK.  */
-static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_transport_on(struct sctp_cmd_seq *cmds,
 				  struct sctp_association *asoc,
 				  struct sctp_transport *t,
 				  struct sctp_chunk *chunk)
 {
-	sctp_sender_hb_info_t *hbinfo;
+	struct sctp_sender_hb_info *hbinfo;
 	int was_unconfirmed = 0;
 
 	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
@@ -767,7 +789,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 	if (t->rto_pending == 0)
 		t->rto_pending = 1;
 
-	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
 	sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
 
 	/* Update the heartbeat timer.  */
@@ -779,7 +801,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 
 
 /* Helper function to process the process SACK command.  */
-static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
+static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk)
 {
@@ -801,7 +823,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
 /* Helper function to set the timeout value for T2-SHUTDOWN timer and to set
  * the transport for a shutdown chunk.
  */
-static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds,
 			      struct sctp_association *asoc,
 			      struct sctp_chunk *chunk)
 {
@@ -818,7 +840,7 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
 }
 
-static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
 				  struct sctp_association *asoc,
 				  struct sctp_association *new)
 {
@@ -828,7 +850,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 	if (!sctp_assoc_update(asoc, new))
 		return;
 
-	abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t));
+	abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
 	if (abort) {
 		sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 		sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
@@ -841,9 +863,9 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to change the state of an association. */
-static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
 			       struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	struct sock *sk = asoc->base.sk;
 
@@ -901,7 +923,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to delete an association. */
-static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_delete_tcb(struct sctp_cmd_seq *cmds,
 				struct sctp_association *asoc)
 {
 	struct sock *sk = asoc->base.sk;
@@ -923,9 +945,9 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
  * destination address (we use active path instead of primary path just
  * because primary path may be inactive.
  */
-static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
-				struct sctp_association *asoc,
-				struct sctp_chunk *chunk)
+static void sctp_cmd_setup_t4(struct sctp_cmd_seq *cmds,
+			      struct sctp_association *asoc,
+			      struct sctp_chunk *chunk)
 {
 	struct sctp_transport *t;
 
@@ -935,7 +957,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
 }
 
 /* Process an incoming Operation Error Chunk. */
-static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
 				   struct sctp_association *asoc,
 				   struct sctp_chunk *chunk)
 {
@@ -990,6 +1012,7 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 				    struct sctp_chunk *chunk)
 {
 	struct sctp_fwdtsn_skip *skip;
+
 	/* Walk through all the skipped SSNs */
 	sctp_walk_fwdtsn(skip, chunk) {
 		sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
@@ -1002,8 +1025,8 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
-	struct list_head *pos;
 	struct list_head *temp;
+	struct list_head *pos;
 
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		t = list_entry(pos, struct sctp_transport, transports);
@@ -1024,9 +1047,9 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
 }
 
 /* Helper function to generate an association change event */
-static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
-				 struct sctp_association *asoc,
-				 u8 state)
+static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
+				  struct sctp_association *asoc,
+				  u8 state)
 {
 	struct sctp_ulpevent *ev;
 
@@ -1039,7 +1062,7 @@ static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
 }
 
 /* Helper function to generate an adaptation indication event */
-static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
 				    struct sctp_association *asoc)
 {
 	struct sctp_ulpevent *ev;
@@ -1052,8 +1075,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
 
 
 static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
-				    sctp_event_timeout_t timer,
-				    char *name)
+				     enum sctp_event_timeout timer,
+				     char *name)
 {
 	struct sctp_transport *t;
 
@@ -1086,6 +1109,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
 
 	list_for_each_entry(chunk, &msg->chunks, frag_list)
 		sctp_outq_tail(&asoc->outqueue, chunk, gfp);
+
+	asoc->outqueue.sched->enqueue(&asoc->outqueue, msg);
 }
 
 
@@ -1139,22 +1164,20 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * If you want to understand all of lksctp, this is a
  * good place to start.
  */
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
-	       struct sctp_endpoint *ep,
-	       struct sctp_association *asoc,
-	       void *event_arg,
-	       gfp_t gfp)
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       union sctp_subtype subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp)
 {
-	sctp_cmd_seq_t commands;
-	const sctp_sm_table_entry_t *state_fn;
-	sctp_disposition_t status;
-	int error = 0;
-	typedef const char *(printfn_t)(sctp_subtype_t);
+	typedef const char *(printfn_t)(union sctp_subtype);
 	static printfn_t *table[] = {
 		NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
 	};
 	printfn_t *debug_fn  __attribute__ ((unused)) = table[event_type];
+	const struct sctp_sm_table_entry *state_fn;
+	struct sctp_cmd_seq commands;
+	enum sctp_disposition status;
+	int error = 0;
 
 	/* Look up the state function, run it, and then process the
 	 * side effects.  These three steps are the heart of lksctp.
@@ -1178,13 +1201,14 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 /*****************************************************************
  * This the master state function side effect processing function.
  *****************************************************************/
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
-			     sctp_disposition_t status,
-			     sctp_cmd_seq_t *commands,
+			     enum sctp_disposition status,
+			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp)
 {
 	int error;
@@ -1263,29 +1287,27 @@ bail:
  ********************************************************************/
 
 /* This is the side-effect interpreter.  */
-static int sctp_cmd_interpreter(sctp_event_t event_type,
-				sctp_subtype_t subtype,
-				sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+				union sctp_subtype subtype,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
-				sctp_disposition_t status,
-				sctp_cmd_seq_t *commands,
+				enum sctp_disposition status,
+				struct sctp_cmd_seq *commands,
 				gfp_t gfp)
 {
-	struct sock *sk = ep->base.sk;
-	struct sctp_sock *sp = sctp_sk(sk);
-	int error = 0;
-	int force;
-	sctp_cmd_t *cmd;
-	struct sctp_chunk *new_obj;
-	struct sctp_chunk *chunk = NULL;
+	struct sctp_sock *sp = sctp_sk(ep->base.sk);
+	struct sctp_chunk *chunk = NULL, *new_obj;
 	struct sctp_packet *packet;
+	struct sctp_sackhdr sackh;
 	struct timer_list *timer;
-	unsigned long timeout;
 	struct sctp_transport *t;
-	struct sctp_sackhdr sackh;
+	unsigned long timeout;
+	struct sctp_cmd *cmd;
 	int local_cork = 0;
+	int error = 0;
+	int force;
 
 	if (SCTP_EVENT_T_TIMEOUT != event_type)
 		chunk = event_arg;
@@ -1607,12 +1629,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			break;
 
 		case SCTP_CMD_INIT_FAILED:
-			sctp_cmd_init_failed(commands, asoc, cmd->obj.err);
+			sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
 			break;
 
 		case SCTP_CMD_ASSOC_FAILED:
 			sctp_cmd_assoc_failed(commands, asoc, event_type,
-					      subtype, chunk, cmd->obj.err);
+					      subtype, chunk, cmd->obj.u32);
 			break;
 
 		case SCTP_CMD_INIT_COUNTER_INC:
@@ -1680,8 +1702,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 		case SCTP_CMD_PROCESS_CTSN:
 			/* Dummy up a SACK for processing. */
 			sackh.cum_tsn_ack = cmd->obj.be32;
-			sackh.a_rwnd = asoc->peer.rwnd +
-					asoc->outqueue.outstanding_bytes;
+			sackh.a_rwnd = htonl(asoc->peer.rwnd +
+					     asoc->outqueue.outstanding_bytes);
 			sackh.num_gap_ack_blocks = 0;
 			sackh.num_dup_tsns = 0;
 			chunk->subh.sack_hdr = &sackh;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b2a74c3823ee..8f8ccded13e4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,103 +59,110 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/structs.h>
 
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  struct sctp_chunk *chunk,
-				  const void *payload,
-				  size_t paylen);
+static struct sctp_packet *sctp_abort_pkt_new(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					const void *payload, size_t paylen);
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
-			 sctp_cmd_seq_t *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
-					     const struct sctp_association *asoc,
-					     const struct sctp_chunk *chunk);
+			 struct sctp_cmd_seq *commands);
+static struct sctp_packet *sctp_ootb_pkt_new(
+					struct net *net,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
 static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
-				       sctp_cmd_seq_t *commands,
+				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
-						 const struct sctp_endpoint *ep,
-						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
-						 void *arg,
-						 sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
-					     const struct sctp_endpoint *ep,
-					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
-					     void *arg,
-					     sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands);
+					struct sctp_cmd_seq *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
 
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   sctp_cmd_seq_t *commands,
-					   __be16 error, int sk_err,
-					   const struct sctp_association *asoc,
-					   struct sctp_transport *transport);
+static enum sctp_disposition sctp_stop_t1_and_abort(
+					struct net *net,
+					struct sctp_cmd_seq *commands,
+					__be16 error, int sk_err,
+					const struct sctp_association *asoc,
+					struct sctp_transport *transport);
 
-static sctp_disposition_t sctp_sf_abort_violation(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     void *arg,
-				     sctp_cmd_seq_t *commands,
-				     const __u8 *payload,
-				     const size_t paylen);
+static enum sctp_disposition sctp_sf_abort_violation(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					void *arg,
+					struct sctp_cmd_seq *commands,
+					const __u8 *payload,
+					const size_t paylen);
 
-static sctp_disposition_t sctp_sf_violation_chunklen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_chunklen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_paramlen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg, void *ext,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_paramlen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg, void *ext,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_ctsn(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_chunk(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_chunk(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk);
+static enum sctp_ierror sctp_sf_authenticate(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_chunk *chunk);
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands);
+					struct sctp_cmd_seq *commands);
 
 /* Small helper function that checks if the chunk length
  * is of the appropriate length.  The 'required_length' argument
@@ -164,8 +171,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
  * 		   false = Invalid length
  *
  */
-static inline bool
-sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
+static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
+					   __u16 required_length)
 {
 	__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
 
@@ -213,12 +220,11 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_4_C(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  const sctp_subtype_t type,
-				  void *arg,
-				  sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_4_C(struct net *net,
+				     const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const union sctp_subtype type,
+				     void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -299,19 +305,17 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
-	struct sctp_chunk *repl;
+	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
-	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
-	sctp_unrecognized_param_t *unk_param;
 	int len;
 
 	/* 6.10 Bundling
@@ -435,7 +439,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		 * construct the parameters in INIT ACK by copying the
 		 * ERROR causes over.
 		 */
-		unk_param = (sctp_unrecognized_param_t *)
+		unk_param = (struct sctp_unrecognized_param *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
 			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
@@ -495,15 +499,15 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_init_chunk *initchunk;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 
@@ -518,7 +522,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the INIT-ACK chunk has a valid length */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
@@ -530,7 +534,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
-		sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
+		enum sctp_error error = SCTP_ERROR_NO_RESOURCE;
 
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes.  If there are no causes,
@@ -645,20 +649,21 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
+	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	struct sctp_association *new_asoc;
 	struct sctp_init_chunk *peer_init;
-	struct sctp_chunk *repl;
-	struct sctp_ulpevent *ev, *ai_ev = NULL;
-	int error = 0;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *err_chk_p;
+	struct sctp_chunk *repl;
 	struct sock *sk;
+	int error = 0;
 
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
@@ -758,7 +763,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	 */
 	if (chunk->auth_chunk) {
 		struct sctp_chunk auth;
-		sctp_ierror_t ret;
+		enum sctp_ierror ret;
 
 		/* Make sure that we and the peer are AUTH capable */
 		if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
@@ -872,11 +877,12 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -950,11 +956,12 @@ nomem:
 }
 
 /* Generate and sendout a heartbeat packet.  */
-static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
-					    void *arg,
-					    sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_heartbeat(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 	struct sctp_chunk *reply;
@@ -975,12 +982,12 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 }
 
 /* Generate a HEARTBEAT packet on the given transport.  */
-sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_sendbeat_8_3(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 
@@ -1023,11 +1030,12 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 }
 
 /* resend asoc strreset_chunk.  */
-sctp_disposition_t sctp_sf_send_reconf(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type, void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_send_reconf(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -1074,12 +1082,11 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_beat_8_3(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *param_hdr;
 	struct sctp_chunk *chunk = arg;
@@ -1090,7 +1097,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk,
+				     sizeof(struct sctp_heartbeat_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -1098,7 +1106,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 	 * respond with a HEARTBEAT ACK that contains the Heartbeat
 	 * Information field copied from the received HEARTBEAT chunk.
 	 */
-	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+	chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data;
 	param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
 
@@ -1148,34 +1156,32 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
+	struct sctp_sender_hb_info *hbinfo;
 	struct sctp_chunk *chunk = arg;
-	union sctp_addr from_addr;
 	struct sctp_transport *link;
-	sctp_sender_hb_info_t *hbinfo;
 	unsigned long max_interval;
+	union sctp_addr from_addr;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
-					    sizeof(sctp_sender_hb_info_t)))
+					    sizeof(*hbinfo)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
 	/* Make sure that the length of the parameter is what we expect */
-	if (ntohs(hbinfo->param_hdr.length) !=
-				    sizeof(sctp_sender_hb_info_t)) {
+	if (ntohs(hbinfo->param_hdr.length) != sizeof(*hbinfo))
 		return SCTP_DISPOSITION_DISCARD;
-	}
 
 	from_addr = hbinfo->daddr;
 	link = sctp_assoc_lookup_paddr(asoc, &from_addr);
@@ -1227,15 +1233,15 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
  */
 static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 				      struct sctp_chunk *init,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
-	int len;
-	struct sctp_packet *pkt;
+	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
 	union sctp_addr_param *addrparm;
 	struct sctp_errhdr *errhdr;
+	char buffer[sizeof(*errhdr) + sizeof(*addrparm)];
 	struct sctp_endpoint *ep;
-	char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)];
-	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
+	struct sctp_packet *pkt;
+	int len;
 
 	/* Build the error on the stack.   We are way to malloc crazy
 	 * throughout the code today.
@@ -1245,7 +1251,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 
 	/* Copy into a parm format. */
 	len = af->to_addr_param(ssa, addrparm);
-	len += sizeof(sctp_errhdr_t);
+	len += sizeof(*errhdr);
 
 	errhdr->cause = SCTP_ERROR_RESTART;
 	errhdr->length = htons(len);
@@ -1292,7 +1298,7 @@ static bool list_has_sctp_addr(const struct list_head *list,
 static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 				       const struct sctp_association *asoc,
 				       struct sctp_chunk *init,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct net *net = sock_net(new_asoc->base.sk);
 	struct sctp_transport *new_addr;
@@ -1412,20 +1418,19 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
 /* Common helper routine for both duplicate and simulataneous INIT
  * chunk handling.
  */
-static sctp_disposition_t sctp_sf_do_unexpected_init(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg, sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_do_unexpected_init(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	sctp_disposition_t retval;
-	struct sctp_chunk *chunk = arg;
-	struct sctp_chunk *repl;
+	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
-	struct sctp_chunk *err_chunk;
+	enum sctp_disposition retval;
 	struct sctp_packet *packet;
-	sctp_unrecognized_param_t *unk_param;
 	int len;
 
 	/* 6.10 Bundling
@@ -1555,7 +1560,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		 * construct the parameters in INIT ACK by copying the
 		 * ERROR causes over.
 		 */
-		unk_param = (sctp_unrecognized_param_t *)
+		unk_param = (struct sctp_unrecognized_param *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
 			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
@@ -1626,12 +1631,13 @@ cleanup:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_2_1_siminit(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1680,12 +1686,13 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_2_dupinit(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1703,11 +1710,13 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
  * An unexpected INIT ACK usually indicates the processing of an old or
  * duplicated INIT chunk.
 */
-sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
-					    const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
-					    void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_2_3_initack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Per the above section, we'll discard the chunk if we have an
 	 * endpoint.  If this is an OOTB INIT-ACK, treat it as such.
@@ -1723,18 +1732,19 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
  * Section 5.2.4
  *  A)  In this case, the peer may have restarted.
  */
-static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_a(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
+	enum sctp_disposition disposition;
 	struct sctp_ulpevent *ev;
 	struct sctp_chunk *repl;
 	struct sctp_chunk *err;
-	sctp_disposition_t disposition;
 
 	/* new_asoc is a brand-new association, so these are not yet
 	 * side effects--it is safe to run them here.
@@ -1838,11 +1848,12 @@ nomem:
  *      after responding to the local endpoint's INIT
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_b(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
@@ -1909,11 +1920,12 @@ nomem:
  *     but a new tag of its own.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_c(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	/* The cookie should be silently discarded.
@@ -1931,11 +1943,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
  *    enter the ESTABLISHED state, if it has not already done so.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_d(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
@@ -2026,19 +2039,20 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
-	sctp_disposition_t retval;
-	struct sctp_chunk *chunk = arg;
 	struct sctp_association *new_asoc;
+	struct sctp_chunk *chunk = arg;
+	enum sctp_disposition retval;
+	struct sctp_chunk *err_chk_p;
 	int error = 0;
 	char action;
-	struct sctp_chunk *err_chk_p;
 
 	/* Make sure that the chunk has a valid length from the protocol
 	 * perspective.  In this case check to make sure we have at least
@@ -2144,13 +2158,13 @@ nomem:
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_pending_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2167,7 +2181,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2187,12 +2201,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+enum sctp_disposition sctp_sf_shutdown_sent_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2209,7 +2224,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2237,13 +2252,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -2265,15 +2280,16 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+enum sctp_disposition sctp_sf_cookie_echoed_err(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2281,7 +2297,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 	/* Make sure that the ERROR chunk has a valid length.
 	 * The parameter walking depends on this as well.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -2329,20 +2345,20 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
-						 const struct sctp_endpoint *ep,
-						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
-						 void *arg,
-						 sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
-	u32 stale;
-	sctp_cookie_preserve_param_t bht;
-	sctp_errhdr_t *err;
-	struct sctp_chunk *reply;
-	struct sctp_bind_addr *bp;
 	int attempts = asoc->init_err_counter + 1;
+	struct sctp_chunk *chunk = arg, *reply;
+	struct sctp_cookie_preserve_param bht;
+	struct sctp_bind_addr *bp;
+	struct sctp_errhdr *err;
+	u32 stale;
 
 	if (attempts > asoc->max_init_attempts) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
@@ -2352,7 +2368,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 		return SCTP_DISPOSITION_DELETE_TCB;
 	}
 
-	err = (sctp_errhdr_t *)(chunk->skb->data);
+	err = (struct sctp_errhdr *)(chunk->skb->data);
 
 	/* When calculating the time extension, an implementation
 	 * SHOULD use the RTT information measured based on the
@@ -2368,7 +2384,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 	 * to give ample time to retransmit the new cookie and thus
 	 * yield a higher probability of success on the reattempt.
 	 */
-	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t)));
+	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err)));
 	stale = (stale * 2) / 1000;
 
 	bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
@@ -2452,12 +2468,13 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+enum sctp_disposition sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2474,7 +2491,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2489,27 +2506,29 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 	return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
 }
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
+	__be16 error = SCTP_ERROR_NO_ERROR;
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
+		struct sctp_errhdr *err;
 
-		sctp_errhdr_t *err;
 		sctp_walk_errors(err, chunk->chunk_hdr);
 		if ((void *)err != (void *)chunk->chunk_end)
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 	}
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
@@ -2526,16 +2545,17 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
  *
  * See sctp_sf_do_9_1_abort() above.
  */
-sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	__be16 error = SCTP_ERROR_NO_ERROR;
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2550,13 +2570,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 
 	return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
 				      chunk->transport);
@@ -2565,12 +2585,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 /*
  * Process an incoming ICMP as an ABORT.  (COOKIE-WAIT state)
  */
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+enum sctp_disposition sctp_sf_cookie_wait_icmp_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
 				      ENOPROTOOPT, asoc,
@@ -2580,12 +2601,13 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 /*
  * Process an ABORT.  (COOKIE-ECHOED state)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
-					       const struct sctp_endpoint *ep,
-					       const struct sctp_association *asoc,
-					       const sctp_subtype_t type,
-					       void *arg,
-					       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -2598,11 +2620,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
  *
  * This is common code called by several sctp_sf_*_abort() functions above.
  */
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   sctp_cmd_seq_t *commands,
-					   __be16 error, int sk_err,
-					   const struct sctp_association *asoc,
-					   struct sctp_transport *transport)
+static enum sctp_disposition sctp_stop_t1_and_abort(
+					struct net *net,
+					struct sctp_cmd_seq *commands,
+					__be16 error, int sk_err,
+					const struct sctp_association *asoc,
+					struct sctp_transport *transport)
 {
 	pr_debug("%s: ABORT received (INIT)\n", __func__);
 
@@ -2652,16 +2675,17 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	enum sctp_disposition disposition;
 	struct sctp_chunk *chunk = arg;
-	sctp_shutdownhdr_t *sdh;
-	sctp_disposition_t disposition;
+	struct sctp_shutdownhdr *sdh;
 	struct sctp_ulpevent *ev;
 	__u32 ctsn;
 
@@ -2669,14 +2693,13 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk,
-				      sizeof(struct sctp_shutdown_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Convert the elaborate header.  */
-	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
+	sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*sdh));
 	chunk->subh.shutdown_hdr = sdh;
 	ctsn = ntohl(sdh->cum_tsn_ack);
 
@@ -2742,27 +2765,27 @@ out:
  * The Cumulative TSN Ack of the received SHUTDOWN chunk
  * MUST be processed.
  */
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_shutdownhdr_t *sdh;
+	struct sctp_shutdownhdr *sdh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk,
-				      sizeof(struct sctp_shutdown_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
+	sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
 	ctsn = ntohl(sdh->cum_tsn_ack);
 
 	if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
@@ -2796,14 +2819,15 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
  * that belong to this association, it should discard the INIT chunk and
  * retransmit the SHUTDOWN ACK chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_reshutack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 
 	/* Make sure that the chunk has a valid length */
@@ -2860,26 +2884,26 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
-				      void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_ecn_cwr(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
-	sctp_cwrhdr_t *cwr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_cwrhdr *cwr;
 	u32 lowest_tsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	cwr = (sctp_cwrhdr_t *) chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t));
+	cwr = (struct sctp_cwrhdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*cwr));
 
 	lowest_tsn = ntohl(cwr->lowest_tsn);
 
@@ -2916,25 +2940,24 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecne(struct net *net,
-				   const struct sctp_endpoint *ep,
-				   const struct sctp_association *asoc,
-				   const sctp_subtype_t type,
-				   void *arg,
-				   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_ecne(struct net *net,
+				      const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const union sctp_subtype type,
+				      void *arg, struct sctp_cmd_seq *commands)
 {
-	sctp_ecnehdr_t *ecne;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_ecnehdr *ecne;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	ecne = (sctp_ecnehdr_t *) chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t));
+	ecne = (struct sctp_ecnehdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*ecne));
 
 	/* If this is a newer ECNE than the last CWR packet we sent out */
 	sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE,
@@ -2973,15 +2996,15 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
+	union sctp_arg force = SCTP_NOFORCE();
 	struct sctp_chunk *chunk = arg;
-	sctp_arg_t force = SCTP_NOFORCE();
 	int error;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
@@ -3093,12 +3116,13 @@ discard_noforce:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_data_fast_4_4(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	int error;
@@ -3184,22 +3208,22 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3258,12 +3282,13 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
  *
  * The return value is the disposition of the chunk.
 */
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3308,21 +3333,21 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
  *
  * The return value is the disposition of the chunk.
 */
-sctp_disposition_t sctp_sf_operr_notify(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_operr_notify(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ERROR chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	sctp_walk_errors(err, chunk->chunk_hdr);
@@ -3346,12 +3371,12 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_final(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
@@ -3429,20 +3454,19 @@ nomem:
  *    receiver of the OOTB packet shall discard the OOTB packet and take
  *    no further action.
  */
-sctp_disposition_t sctp_sf_ootb(struct net *net,
-				const struct sctp_endpoint *ep,
-				const struct sctp_association *asoc,
-				const sctp_subtype_t type,
-				void *arg,
-				sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ootb(struct net *net,
+				   const struct sctp_endpoint *ep,
+				   const struct sctp_association *asoc,
+				   const union sctp_subtype type,
+				   void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
 	struct sctp_chunkhdr *ch;
-	sctp_errhdr_t *err;
-	__u8 *ch_end;
-	int ootb_shut_ack = 0;
+	struct sctp_errhdr *err;
 	int ootb_cookie_ack = 0;
+	int ootb_shut_ack = 0;
+	__u8 *ch_end;
 
 	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
@@ -3518,16 +3542,17 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
  * (endpoint, asoc, type, arg, commands)
  *
  * Outputs
- * (sctp_disposition_t)
+ * (enum sctp_disposition)
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
-					     const struct sctp_endpoint *ep,
-					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
-					     void *arg,
-					     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3584,12 +3609,12 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
  *   chunks. --piggy ]
  *
  */
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
-				      void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -3609,17 +3634,18 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 }
 
 /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk.  */
-sctp_disposition_t sctp_sf_do_asconf(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_asconf(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk	*chunk = arg;
-	struct sctp_chunk	*asconf_ack = NULL;
-	struct sctp_paramhdr	*err_param = NULL;
-	sctp_addiphdr_t		*hdr;
-	__u32			serial;
+	struct sctp_paramhdr *err_param = NULL;
+	struct sctp_chunk *asconf_ack = NULL;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_addiphdr *hdr;
+	__u32 serial;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3634,14 +3660,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!net->sctp.addip_noauth && !chunk->auth)
-		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+					     commands);
 
 	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	hdr = (sctp_addiphdr_t *)chunk->skb->data;
+	hdr = (struct sctp_addiphdr *)chunk->skb->data;
 	serial = ntohl(hdr->serial);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3725,18 +3752,19 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
  * When building TLV parameters for the ASCONF Chunk that will add or
  * delete IP addresses the D0 to D13 rules should be applied:
  */
-sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type, void *arg,
-					 sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk	*asconf_ack = arg;
-	struct sctp_chunk	*last_asconf = asoc->addip_last_asconf;
-	struct sctp_chunk	*abort;
-	struct sctp_paramhdr	*err_param = NULL;
-	sctp_addiphdr_t		*addip_hdr;
-	__u32			sent_serial, rcvd_serial;
+	struct sctp_chunk *last_asconf = asoc->addip_last_asconf;
+	struct sctp_paramhdr *err_param = NULL;
+	struct sctp_chunk *asconf_ack = arg;
+	struct sctp_addiphdr *addip_hdr;
+	__u32 sent_serial, rcvd_serial;
+	struct sctp_chunk *abort;
 
 	if (!sctp_vtag_verify(asconf_ack, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3751,14 +3779,16 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!net->sctp.addip_noauth && !asconf_ack->auth)
-		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+					     commands);
 
 	/* Make sure that the ADDIP chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
+	if (!sctp_chunk_length_valid(asconf_ack,
+				     sizeof(struct sctp_addip_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
+	addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
 	rcvd_serial = ntohl(addip_hdr->serial);
 
 	/* Verify the ASCONF-ACK chunk before processing it. */
@@ -3767,7 +3797,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 			   (void *)err_param, commands);
 
 	if (last_asconf) {
-		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
+		addip_hdr = (struct sctp_addiphdr *)last_asconf->subh.addip_hdr;
 		sent_serial = ntohl(addip_hdr->serial);
 	} else {
 		sent_serial = asoc->addip_serial - 1;
@@ -3782,7 +3812,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
 	    !(asoc->addip_last_asconf)) {
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3818,7 +3848,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 		}
 
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3841,11 +3871,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 }
 
 /* RE-CONFIG Section 5.2 Upon reception of an RECONF Chunk. */
-sctp_disposition_t sctp_sf_do_reconf(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_reconf(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *err_param = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3917,15 +3948,15 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_skip *skip;
 	__u16 len;
 	__u32 tsn;
@@ -3987,16 +4018,16 @@ discard_noforce:
 	return SCTP_DISPOSITION_DISCARD;
 }
 
-sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_skip *skip;
 	__u16 len;
 	__u32 tsn;
@@ -4079,23 +4110,23 @@ gen_shutdown:
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk)
+static enum sctp_ierror sctp_sf_authenticate(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_chunk *chunk)
 {
 	struct sctp_authhdr *auth_hdr;
+	__u8 *save_digest, *digest;
 	struct sctp_hmac *hmac;
 	unsigned int sig_len;
 	__u16 key_id;
-	__u8 *save_digest;
-	__u8 *digest;
 
 	/* Pull in the auth header, so we can do some more verification */
 	auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
 	chunk->subh.auth_hdr = auth_hdr;
-	skb_pull(chunk->skb, sizeof(struct sctp_authhdr));
+	skb_pull(chunk->skb, sizeof(*auth_hdr));
 
 	/* Make sure that we support the HMAC algorithm from the auth
 	 * chunk.
@@ -4114,7 +4145,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	/* Make sure that the length of the signature matches what
 	 * we expect.
 	 */
-	sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t);
+	sig_len = ntohs(chunk->chunk_hdr->length) -
+		  sizeof(struct sctp_auth_chunk);
 	hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id));
 	if (sig_len != hmac->hmac_len)
 		return SCTP_IERROR_PROTO_VIOLATION;
@@ -4136,8 +4168,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	memset(digest, 0, sig_len);
 
 	sctp_auth_calculate_hmac(asoc, chunk->skb,
-				(struct sctp_auth_chunk *)chunk->chunk_hdr,
-				GFP_ATOMIC);
+				 (struct sctp_auth_chunk *)chunk->chunk_hdr,
+				 GFP_ATOMIC);
 
 	/* Discard the packet if the digests do not match */
 	if (memcmp(save_digest, digest, sig_len)) {
@@ -4153,17 +4185,16 @@ nomem:
 	return SCTP_IERROR_NOMEM;
 }
 
-sctp_disposition_t sctp_sf_eat_auth(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_auth(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
-	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *err_chunk;
-	sctp_ierror_t error;
+	enum sctp_ierror error;
 
 	/* Make sure that the peer has AUTH capable */
 	if (!asoc->peer.auth_capable)
@@ -4250,12 +4281,12 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_unk_chunk(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *unk_chunk = arg;
 	struct sctp_chunk *err_chunk;
@@ -4330,12 +4361,12 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
-					 void *arg,
-					 sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4370,12 +4401,11 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_pdiscard(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_pdiscard(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -4398,12 +4428,12 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
  * We simply tag the chunk as a violation.  The state machine will log
  * the violation and continue.
  */
-sctp_disposition_t sctp_sf_violation(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_violation(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4418,14 +4448,14 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
 /*
  * Common function to handle a protocol violation.
  */
-static sctp_disposition_t sctp_sf_abort_violation(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     void *arg,
-				     sctp_cmd_seq_t *commands,
-				     const __u8 *payload,
-				     const size_t paylen)
+static enum sctp_disposition sctp_sf_abort_violation(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					void *arg,
+					struct sctp_cmd_seq *commands,
+					const __u8 *payload,
+					const size_t paylen)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk =  arg;
@@ -4454,11 +4484,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
 		/* Treat INIT-ACK as a special case during COOKIE-WAIT. */
 		if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK &&
 		    !asoc->peer.i.init_tag) {
-			sctp_initack_chunk_t *initack;
+			struct sctp_initack_chunk *initack;
 
-			initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
-			if (!sctp_chunk_length_valid(chunk,
-						     sizeof(sctp_initack_chunk_t)))
+			initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
+			if (!sctp_chunk_length_valid(chunk, sizeof(*initack)))
 				abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T;
 			else {
 				unsigned int inittag;
@@ -4521,7 +4550,7 @@ nomem:
  * Handle a protocol violation when the chunk length is invalid.
  * "Invalid" length is identified as smaller than the minimal length a
  * given chunk can be.  For example, a SACK chunk has invalid length
- * if its length is set to be smaller than the size of sctp_sack_chunk_t.
+ * if its length is set to be smaller than the size of struct sctp_sack_chunk.
  *
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
@@ -4536,18 +4565,18 @@ nomem:
  *
  * Generate an  ABORT chunk and terminate the association.
  */
-static sctp_disposition_t sctp_sf_violation_chunklen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_chunklen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk had invalid length:";
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 
 /*
@@ -4556,17 +4585,17 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
  * or accumulated length in multi parameters exceeds the end of the chunk,
  * the length is considered as invalid.
  */
-static sctp_disposition_t sctp_sf_violation_paramlen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg, void *ext,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_paramlen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg, void *ext,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk =  arg;
 	struct sctp_paramhdr *param = ext;
 	struct sctp_chunk *abort = NULL;
+	struct sctp_chunk *chunk = arg;
 
 	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
 		goto discard;
@@ -4599,18 +4628,18 @@ nomem:
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
  */
-static sctp_disposition_t sctp_sf_violation_ctsn(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:";
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 
 /* Handle protocol violation of an invalid chunk bundling.  For example,
@@ -4619,13 +4648,13 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
  * statement from the specs.  Additionally, there might be an attacker
  * on the path and we may not want to continue this communication.
  */
-static sctp_disposition_t sctp_sf_violation_chunk(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_chunk(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk violates protocol:";
 
@@ -4633,7 +4662,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 		return sctp_sf_violation(net, ep, asoc, type, arg, commands);
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 /***************************************************************************
  * These are the state functions for handling primitive (Section 10) events.
@@ -4695,15 +4724,15 @@ static sctp_disposition_t sctp_sf_violation_chunk(
  *
  * The return value is a disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_asoc(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *repl;
 	struct sctp_association *my_asoc;
+	struct sctp_chunk *repl;
 
 	/* The comment below says that we enter COOKIE-WAIT AFTER
 	 * sending the INIT, but that doesn't actually work in our
@@ -4807,12 +4836,12 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_send(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	struct sctp_datamsg *msg = arg;
 
@@ -4846,15 +4875,15 @@ sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	int disposition;
+	enum sctp_disposition disposition;
 
 	/* From 9.2 Shutdown of an Association
 	 * Upon receipt of the SHUTDOWN primitive from its upper
@@ -4872,6 +4901,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
+
 	return disposition;
 }
 
@@ -4902,13 +4932,13 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_1_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_1_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* From 9.1 Abort of an Association
 	 * Upon receipt of the ABORT primitive from its upper
@@ -4940,12 +4970,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 }
 
 /* We tried an illegal operation on an association which is closed.  */
-sctp_disposition_t sctp_sf_error_closed(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_error_closed(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL));
 	return SCTP_DISPOSITION_CONSUME;
@@ -4954,12 +4984,13 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
 /* We tried an illegal operation on an association which is shutting
  * down.
  */
-sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
-					  const struct sctp_endpoint *ep,
-					  const struct sctp_association *asoc,
-					  const sctp_subtype_t type,
-					  void *arg,
-					  sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_error_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR,
 			SCTP_ERROR(-ESHUTDOWN));
@@ -4980,13 +5011,13 @@ sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
@@ -5015,12 +5046,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5042,13 +5074,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *abort = arg;
 
@@ -5091,13 +5123,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5117,13 +5149,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Stop the T5-shutdown guard timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5144,13 +5176,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_sent_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Stop the T2-shutdown timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5175,13 +5207,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -5211,13 +5243,13 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
  * o destination transport address - the transport address of the
  *   association on which a heartbeat should be issued.
  */
-sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+enum sctp_disposition sctp_sf_do_prm_requestheartbeat(
 					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type,
 				      (struct sctp_transport *)arg, commands))
@@ -5244,12 +5276,12 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
  * When an endpoint has an ASCONF signaled change to be sent to the
  * remote endpoint it should do A1 to A9
  */
-sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_asconf(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5261,11 +5293,12 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 }
 
 /* RE-CONFIG Section 5.1 RECONF Chunk Procedures */
-sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
-					 void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_reconf(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5278,13 +5311,13 @@ sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
  *
  * The return value is the disposition of the primitive.
  */
-sctp_disposition_t sctp_sf_ignore_primitive(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ignore_primitive(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: primitive type:%d is ignored\n", __func__,
 		 type.primitive);
@@ -5302,13 +5335,13 @@ sctp_disposition_t sctp_sf_ignore_primitive(
  * subscribes to this event, if there is no data to be sent or
  * retransmit, the stack will immediately send up this notification.
  */
-sctp_disposition_t sctp_sf_do_no_pending_tsn(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_no_pending_tsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_ulpevent *event;
 
@@ -5334,13 +5367,13 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_start_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply;
 
@@ -5404,15 +5437,15 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown_ack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 
 	/* There are 2 ways of getting here:
@@ -5424,12 +5457,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	 */
 	if (chunk) {
 		if (!sctp_vtag_verify(chunk, asoc))
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
 		/* Make sure that the SHUTDOWN chunk has a valid length. */
-		if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
-			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-							  commands);
+		if (!sctp_chunk_length_valid(
+				chunk, sizeof(struct sctp_shutdown_chunk)))
+			return sctp_sf_violation_chunklen(net, ep, asoc, type,
+							  arg, commands);
 	}
 
 	/* If it has no more outstanding DATA chunks, the SHUTDOWN receiver
@@ -5476,12 +5511,12 @@ nomem:
  *
  * The return value is the disposition of the event.
  */
-sctp_disposition_t sctp_sf_ignore_other(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ignore_other(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: the event other type:%d is ignored\n",
 		 __func__, type.other);
@@ -5504,12 +5539,12 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_6_3_3_rtx(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -5592,12 +5627,12 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
  * allow. However, an SCTP transmitter MUST NOT be more aggressive than
  * the following algorithms allow.
  */
-sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_6_2_sack(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
@@ -5623,16 +5658,17 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t1_init_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	int attempts = asoc->init_err_counter + 1;
 	struct sctp_chunk *repl = NULL;
 	struct sctp_bind_addr *bp;
-	int attempts = asoc->init_err_counter + 1;
 
 	pr_debug("%s: timer T1 expired (INIT)\n", __func__);
 
@@ -5687,15 +5723,16 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t1_cookie_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *repl = NULL;
 	int attempts = asoc->init_err_counter + 1;
+	struct sctp_chunk *repl = NULL;
 
 	pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__);
 
@@ -5737,12 +5774,13 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
  * the T2-Shutdown timer,  giving its peer ample opportunity to transmit
  * all of its queued DATA chunks that have not yet been sent.
  */
-sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t2_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5807,13 +5845,13 @@ nomem:
  * ADDIP Section 4.1 ASCONF CHunk Procedures
  * If the T4 RTO timer expires the endpoint should do B1 to B5
  */
-sctp_disposition_t sctp_sf_t4_timer_expire(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t4_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
@@ -5879,12 +5917,13 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
  * At the expiration of this timer the sender SHOULD abort the association
  * by sending an ABORT chunk.
  */
-sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t5_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5915,15 +5954,15 @@ nomem:
  * The work that needs to be done is same as when SHUTDOWN is initiated by
  * the user.  So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
  */
-sctp_disposition_t sctp_sf_autoclose_timer_expire(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_autoclose_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	int disposition;
+	enum sctp_disposition disposition;
 
 	SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
 
@@ -5943,6 +5982,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
+
 	return disposition;
 }
 
@@ -5958,12 +5998,11 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_not_impl(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_not_impl(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_NOT_IMPL;
 }
@@ -5976,12 +6015,11 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_bug(struct net *net,
-			       const struct sctp_endpoint *ep,
-			       const struct sctp_association *asoc,
-			       const sctp_subtype_t type,
-			       void *arg,
-			       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_bug(struct net *net,
+				  const struct sctp_endpoint *ep,
+				  const struct sctp_association *asoc,
+				  const union sctp_subtype type,
+				  void *arg, struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_BUG;
 }
@@ -5997,12 +6035,12 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_timer_ignore(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: timer %d ignored\n", __func__, type.chunk);
 
@@ -6017,9 +6055,9 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 {
 	struct sctp_sackhdr *sack;
+	__u16 num_dup_tsns;
 	unsigned int len;
 	__u16 num_blocks;
-	__u16 num_dup_tsns;
 
 	/* Protect ourselves from reading too far into
 	 * the skb from a bogus sender.
@@ -6041,12 +6079,12 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 /* Create an ABORT packet to be sent as a response, with the specified
  * error causes.
  */
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  struct sctp_chunk *chunk,
-				  const void *payload,
-				  size_t paylen)
+static struct sctp_packet *sctp_abort_pkt_new(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					const void *payload, size_t paylen)
 {
 	struct sctp_packet *packet;
 	struct sctp_chunk *abort;
@@ -6083,14 +6121,14 @@ static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
 }
 
 /* Allocate a packet for responding in the OOTB conditions.  */
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
-					     const struct sctp_association *asoc,
-					     const struct sctp_chunk *chunk)
+static struct sctp_packet *sctp_ootb_pkt_new(
+					struct net *net,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
-	struct sctp_packet *packet;
 	struct sctp_transport *transport;
-	__u16 sport;
-	__u16 dport;
+	struct sctp_packet *packet;
+	__u16 sport, dport;
 	__u32 vtag;
 
 	/* Get the source and destination port from the inbound packet.  */
@@ -6107,9 +6145,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 		switch (chunk->chunk_hdr->type) {
 		case SCTP_CID_INIT_ACK:
 		{
-			sctp_initack_chunk_t *initack;
+			struct sctp_initack_chunk *initack;
 
-			initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
+			initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
 			vtag = ntohl(initack->init_hdr.init_tag);
 			break;
 		}
@@ -6168,7 +6206,7 @@ static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
-				       sctp_cmd_seq_t *commands,
+				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk)
 {
 	struct sctp_packet *packet;
@@ -6197,20 +6235,19 @@ static void sctp_send_stale_cookie_err(struct net *net,
 /* Process a data chunk */
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
-			 sctp_cmd_seq_t *commands)
+			 struct sctp_cmd_seq *commands)
 {
-	struct sctp_datahdr *data_hdr;
-	struct sctp_chunk *err;
-	size_t datalen;
-	sctp_verb_t deliver;
-	int tmp;
-	__u32 tsn;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
-	u16 ssn;
-	u16 sid;
+	struct sctp_datahdr *data_hdr;
+	struct sctp_chunk *err;
+	enum sctp_verb deliver;
+	size_t datalen;
 	u8 ordered = 0;
+	u16 ssn, sid;
+	__u32 tsn;
+	int tmp;
 
 	data_hdr = (struct sctp_datahdr *)chunk->skb->data;
 	chunk->subh.data_hdr = data_hdr;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 3e958c1c4b95..79b6bee5b768 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -45,26 +45,27 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state);
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state);
 
 
-static const sctp_sm_table_entry_t bug = {
+static const struct sctp_sm_table_entry bug = {
 	.fn = sctp_sf_bug,
 	.name = "sctp_sf_bug"
 };
 
 #define DO_LOOKUP(_max, _type, _table)					\
 ({									\
-	const sctp_sm_table_entry_t *rtn;				\
+	const struct sctp_sm_table_entry *rtn;				\
 									\
 	if ((event_subtype._type > (_max))) {				\
 		pr_warn("table %p possible attack: event %d exceeds max %d\n", \
@@ -76,10 +77,11 @@ static const sctp_sm_table_entry_t bug = {
 	rtn;								\
 })
 
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  sctp_event_t event_type,
-						  sctp_state_t state,
-						  sctp_subtype_t event_subtype)
+const struct sctp_sm_table_entry *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype)
 {
 	switch (event_type) {
 	case SCTP_EVENT_T_CHUNK:
@@ -392,7 +394,8 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
  *
  * For base protocol (RFC 2960).
  */
-static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_DATA,
 	TYPE_SCTP_INIT,
 	TYPE_SCTP_INIT_ACK,
@@ -451,7 +454,8 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_ASCONF,
 	TYPE_SCTP_ASCONF_ACK,
 }; /*state_fn_t addip_chunk_event_table[][] */
@@ -478,7 +482,8 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_FWD_TSN,
 }; /*state_fn_t prsctp_chunk_event_table[][] */
 
@@ -504,7 +509,8 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_RECONF,
 }; /*state_fn_t reconf_chunk_event_table[][] */
 
@@ -530,11 +536,12 @@ static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUN
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_AUTH,
 }; /*state_fn_t auth_chunk_event_table[][] */
 
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 	/* SCTP_STATE_CLOSED */
 	TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -691,7 +698,8 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 /* The primary index for this table is the primitive type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_PRIMITIVE_ASSOCIATE,
 	TYPE_SCTP_PRIMITIVE_SHUTDOWN,
 	TYPE_SCTP_PRIMITIVE_ABORT,
@@ -739,7 +747,8 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
 	TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
 }
 
-static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_OTHER_NO_PENDING_TSN,
 	TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH,
 };
@@ -953,7 +962,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 }
 
-static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_EVENT_TIMEOUT_NONE,
 	TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE,
 	TYPE_SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -967,9 +977,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state)
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state)
 {
 	if (state > SCTP_STATE_MAX)
 		return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1db478e34520..3204a9b29407 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -79,12 +79,13 @@
 #include <net/sock.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
 
 /* Forward declarations for internal helper functions. */
 static int sctp_writeable(struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
-				size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+				size_t msg_len, struct sock **orig_sk);
 static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
 static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
 static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -100,8 +101,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk);
 static int sctp_do_bind(struct sock *, union sctp_addr *, int);
 static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *, struct sock *,
-			      struct sctp_association *, sctp_socket_type_t);
+static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			      struct sctp_association *assoc,
+			      enum sctp_socket_type type);
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
@@ -169,6 +171,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 	sk_mem_charge(sk, chunk->skb->truesize);
 }
 
+static void sctp_clear_owner_w(struct sctp_chunk *chunk)
+{
+	skb_orphan(chunk->skb);
+}
+
+static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
+				       void (*cb)(struct sctp_chunk *))
+
+{
+	struct sctp_outq *q = &asoc->outqueue;
+	struct sctp_transport *t;
+	struct sctp_chunk *chunk;
+
+	list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+		list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+			cb(chunk);
+
+	list_for_each_entry(chunk, &q->retransmit, list)
+		cb(chunk);
+
+	list_for_each_entry(chunk, &q->sacked, list)
+		cb(chunk);
+
+	list_for_each_entry(chunk, &q->abandoned, list)
+		cb(chunk);
+
+	list_for_each_entry(chunk, &q->out_chunk_list, list)
+		cb(chunk);
+}
+
 /* Verify that this is a valid address. */
 static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
 				   int len)
@@ -1055,7 +1087,7 @@ static int __sctp_connect(struct sock *sk,
 	struct sctp_association *asoc2;
 	struct sctp_transport *transport;
 	union sctp_addr to;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	long timeo;
 	int err = 0;
 	int addrcnt = 0;
@@ -1593,7 +1625,8 @@ static int sctp_error(struct sock *sk, int flags, int err)
  */
 /* BUG:  We do not implement the equivalent of sk_stream_wait_memory(). */
 
-static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
+static int sctp_msghdr_parse(const struct msghdr *msg,
+			     struct sctp_cmsgs *cmsgs);
 
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
@@ -1609,8 +1642,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_sndrcvinfo *sinfo;
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
-	sctp_cmsgs_t cmsgs = { NULL };
-	sctp_scope_t scope;
+	struct sctp_cmsgs cmsgs = { NULL };
+	enum sctp_scope scope;
 	bool fill_sinfo_ttl = false, wait_connect = false;
 	struct sctp_datamsg *datamsg;
 	int msg_flags = msg->msg_flags;
@@ -1925,14 +1958,28 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		goto out_free;
 	}
 
+	/* Allocate sctp_stream_out_ext if not already done */
+	if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
+		err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
+		if (err)
+			goto out_free;
+	}
+
 	if (sctp_wspace(asoc) < msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
-		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-		if (err)
+		/* sk can be changed by peel off when waiting for buf. */
+		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+		if (err) {
+			if (err == -ESRCH) {
+				/* asoc is already dead. */
+				new_asoc = NULL;
+				err = -EPIPE;
+			}
 			goto out_free;
+		}
 	}
 
 	/* If an address is passed with the sendto/sendmsg call, it is used
@@ -3093,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
  */
 static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_assoc_value params;
 	struct sctp_association *asoc;
-	struct sctp_sock *sp = sctp_sk(sk);
 	int val;
 
 	if (optlen == sizeof(int)) {
@@ -3111,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 		if (copy_from_user(&params, optval, optlen))
 			return -EFAULT;
 		val = params.assoc_value;
-	} else
+	} else {
 		return -EINVAL;
+	}
 
-	if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)))
-		return -EINVAL;
+	if (val) {
+		int min_len, max_len;
 
-	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc && params.assoc_id && sctp_style(sk, UDP))
-		return -EINVAL;
+		min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len;
+		min_len -= sizeof(struct sctphdr) +
+			   sizeof(struct sctp_data_chunk);
+
+		max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+
+		if (val < min_len || val > max_len)
+			return -EINVAL;
+	}
 
+	asoc = sctp_id2assoc(sk, params.assoc_id);
 	if (asoc) {
 		if (val == 0) {
-			val = asoc->pathmtu;
-			val -= sp->pf->af->net_header_len;
+			val = asoc->pathmtu - sp->pf->af->net_header_len;
 			val -= sizeof(struct sctphdr) +
-					sizeof(struct sctp_data_chunk);
+			       sizeof(struct sctp_data_chunk);
 		}
 		asoc->user_frag = val;
 		asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
 	} else {
+		if (params.assoc_id && sctp_style(sk, UDP))
+			return -EINVAL;
 		sp->user_frag = val;
 	}
 
@@ -3905,6 +3961,64 @@ out:
 	return retval;
 }
 
+static int sctp_setsockopt_scheduler(struct sock *sk,
+				     char __user *optval,
+				     unsigned int optlen)
+{
+	struct sctp_association *asoc;
+	struct sctp_assoc_value params;
+	int retval = -EINVAL;
+
+	if (optlen < sizeof(params))
+		goto out;
+
+	optlen = sizeof(params);
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (params.assoc_value > SCTP_SS_MAX)
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc)
+		goto out;
+
+	retval = sctp_sched_set_sched(asoc, params.assoc_value);
+
+out:
+	return retval;
+}
+
+static int sctp_setsockopt_scheduler_value(struct sock *sk,
+					   char __user *optval,
+					   unsigned int optlen)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_value params;
+	int retval = -EINVAL;
+
+	if (optlen < sizeof(params))
+		goto out;
+
+	optlen = sizeof(params);
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc)
+		goto out;
+
+	retval = sctp_sched_set_value(asoc, params.stream_id,
+				      params.stream_value, GFP_KERNEL);
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4086,6 +4200,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_ADD_STREAMS:
 		retval = sctp_setsockopt_add_streams(sk, optval, optlen);
 		break;
+	case SCTP_STREAM_SCHEDULER:
+		retval = sctp_setsockopt_scheduler(sk, optval, optlen);
+		break;
+	case SCTP_STREAM_SCHEDULER_VALUE:
+		retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -4538,8 +4658,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
 	info->sctpi_ictrlchunks = asoc->stats.ictrlchunks;
 
 	prim = asoc->peer.primary_path;
-	memcpy(&info->sctpi_p_address, &prim->ipaddr,
-	       sizeof(struct sockaddr_storage));
+	memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr));
 	info->sctpi_p_state = prim->state;
 	info->sctpi_p_cwnd = prim->cwnd;
 	info->sctpi_p_srtt = prim->srtt;
@@ -4657,29 +4776,39 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
 EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
 
 int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
-			    struct net *net, int pos, void *p) {
+			    int (*cb_done)(struct sctp_transport *, void *),
+			    struct net *net, int *pos, void *p) {
 	struct rhashtable_iter hti;
-	void *obj;
-	int err;
-
-	err = sctp_transport_walk_start(&hti);
-	if (err)
-		return err;
+	struct sctp_transport *tsp;
+	int ret;
 
-	obj = sctp_transport_get_idx(net, &hti, pos + 1);
-	for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
-		struct sctp_transport *transport = obj;
+again:
+	ret = sctp_transport_walk_start(&hti);
+	if (ret)
+		return ret;
 
-		if (!sctp_transport_hold(transport))
+	tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
+	for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
+		if (!sctp_transport_hold(tsp))
 			continue;
-		err = cb(transport, p);
-		sctp_transport_put(transport);
-		if (err)
+		ret = cb(tsp, p);
+		if (ret)
 			break;
+		(*pos)++;
+		sctp_transport_put(tsp);
 	}
 	sctp_transport_walk_stop(&hti);
 
-	return err;
+	if (ret) {
+		if (cb_done && !cb_done(tsp, p)) {
+			(*pos)++;
+			sctp_transport_put(tsp);
+			goto again;
+		}
+		sctp_transport_put(tsp);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(sctp_for_each_transport);
 
@@ -4895,14 +5024,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	struct socket *sock;
 	int err = 0;
 
-	if (!asoc)
+	/* Do not peel off from one netns to another one. */
+	if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
 		return -EINVAL;
 
-	/* If there is a thread waiting on more sndbuf space for
-	 * sending on this asoc, it cannot be peeled.
-	 */
-	if (waitqueue_active(&asoc->wait))
-		return -EBUSY;
+	if (!asoc)
+		return -EINVAL;
 
 	/* An association cannot be branched off from an already peeled-off
 	 * socket, nor is this supported for tcp style sockets.
@@ -6634,7 +6761,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 					   char __user *optval,
 					   int __user *optlen)
 {
-	struct sctp_stream_out *streamout;
+	struct sctp_stream_out_ext *streamoute;
 	struct sctp_association *asoc;
 	struct sctp_prstatus params;
 	int retval = -EINVAL;
@@ -6657,21 +6784,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 	if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
 		goto out;
 
-	streamout = &asoc->stream.out[params.sprstat_sid];
+	streamoute = asoc->stream.out[params.sprstat_sid].ext;
+	if (!streamoute) {
+		/* Not allocated yet, means all stats are 0 */
+		params.sprstat_abandoned_unsent = 0;
+		params.sprstat_abandoned_sent = 0;
+		retval = 0;
+		goto out;
+	}
+
 	if (policy == SCTP_PR_SCTP_NONE) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
 		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
 			params.sprstat_abandoned_unsent +=
-				streamout->abandoned_unsent[policy];
+				streamoute->abandoned_unsent[policy];
 			params.sprstat_abandoned_sent +=
-				streamout->abandoned_sent[policy];
+				streamoute->abandoned_sent[policy];
 		}
 	} else {
 		params.sprstat_abandoned_unsent =
-			streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+			streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)];
 		params.sprstat_abandoned_sent =
-			streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+			streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)];
 	}
 
 	if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
@@ -6767,6 +6902,85 @@ out:
 	return retval;
 }
 
+static int sctp_getsockopt_scheduler(struct sock *sk, int len,
+				     char __user *optval,
+				     int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = sctp_sched_get_sched(asoc);
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
+static int sctp_getsockopt_scheduler_value(struct sock *sk, int len,
+					   char __user *optval,
+					   int __user *optlen)
+{
+	struct sctp_stream_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	retval = sctp_sched_get_value(asoc, params.stream_id,
+				      &params.stream_value);
+	if (retval)
+		goto out;
+
+	if (put_user(len, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	if (copy_to_user(optval, &params, len)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -6949,6 +7163,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_enable_strreset(sk, len, optval,
 							 optlen);
 		break;
+	case SCTP_STREAM_SCHEDULER:
+		retval = sctp_getsockopt_scheduler(sk, len, optval,
+						   optlen);
+		break;
+	case SCTP_STREAM_SCHEDULER_VALUE:
+		retval = sctp_getsockopt_scheduler_value(sk, len, optval,
+							 optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7445,10 +7667,10 @@ static int sctp_autobind(struct sock *sk)
  * msg_control
  * points here
  */
-static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
+static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 {
-	struct cmsghdr *cmsg;
 	struct msghdr *my_msg = (struct msghdr *)msg;
+	struct cmsghdr *cmsg;
 
 	for_each_cmsghdr(cmsg, my_msg) {
 		if (!CMSG_OK(my_msg, cmsg))
@@ -7777,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb)
 
 /* Helper function to wait for space in the sndbuf.  */
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
-				size_t msg_len)
+				size_t msg_len, struct sock **orig_sk)
 {
 	struct sock *sk = asoc->base.sk;
 	int err = 0;
@@ -7794,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 	for (;;) {
 		prepare_to_wait_exclusive(&asoc->wait, &wait,
 					  TASK_INTERRUPTIBLE);
+		if (asoc->base.dead)
+			goto do_dead;
 		if (!*timeo_p)
 			goto do_nonblock;
-		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
-		    asoc->base.dead)
+		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
 			goto do_error;
 		if (signal_pending(current))
 			goto do_interrupted;
@@ -7810,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 		release_sock(sk);
 		current_timeo = schedule_timeout(current_timeo);
 		lock_sock(sk);
+		if (sk != asoc->base.sk) {
+			release_sock(sk);
+			sk = asoc->base.sk;
+			lock_sock(sk);
+		}
 
 		*timeo_p = current_timeo;
 	}
 
 out:
+	*orig_sk = sk;
 	finish_wait(&asoc->wait, &wait);
 
 	/* Release the association's refcnt.  */
@@ -7822,6 +8051,10 @@ out:
 
 	return err;
 
+do_dead:
+	err = -ESRCH;
+	goto out;
+
 do_error:
 	err = -EPIPE;
 	goto out;
@@ -8085,7 +8318,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
  */
 static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 			      struct sctp_association *assoc,
-			      sctp_socket_type_t type)
+			      enum sctp_socket_type type)
 {
 	struct sctp_sock *oldsp = sctp_sk(oldsk);
 	struct sctp_sock *newsp = sctp_sk(newsk);
@@ -8197,7 +8430,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	 * paths won't try to lock it and then oldsk.
 	 */
 	lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+	sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
 	sctp_assoc_migrate(assoc, newsk);
+	sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
 
 	/* If the association on the newsk is already closed before accept()
 	 * is called, set RCV_SHUTDOWN flag.
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 63ea15503714..a11db21dc8a0 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -32,44 +32,181 @@
  *    Xin Long <lucien.xin@gmail.com>
  */
 
+#include <linux/list.h>
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Migrates chunks from stream queues to new stream queues if needed,
+ * but not across associations. Also, removes those chunks to streams
+ * higher than the new max.
+ */
+static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+				     struct sctp_stream *new, __u16 outcnt)
+{
+	struct sctp_association *asoc;
+	struct sctp_chunk *ch, *temp;
+	struct sctp_outq *outq;
+	int i;
+
+	asoc = container_of(stream, struct sctp_association, stream);
+	outq = &asoc->outqueue;
+
+	list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) {
+		__u16 sid = sctp_chunk_stream_no(ch);
+
+		if (sid < outcnt)
+			continue;
+
+		sctp_sched_dequeue_common(outq, ch);
+		/* No need to call dequeue_done here because
+		 * the chunks are not scheduled by now.
+		 */
+
+		/* Mark as failed send. */
+		sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
+		if (asoc->peer.prsctp_capable &&
+		    SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
+			asoc->sent_cnt_removable--;
+
+		sctp_chunk_free(ch);
+	}
+
+	if (new) {
+		/* Here we actually move the old ext stuff into the new
+		 * buffer, because we want to keep it. Then
+		 * sctp_stream_update will swap ->out pointers.
+		 */
+		for (i = 0; i < outcnt; i++) {
+			kfree(new->out[i].ext);
+			new->out[i].ext = stream->out[i].ext;
+			stream->out[i].ext = NULL;
+		}
+	}
+
+	for (i = outcnt; i < stream->outcnt; i++)
+		kfree(stream->out[i].ext);
+}
+
+static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
+				 gfp_t gfp)
+{
+	struct sctp_stream_out *out;
+
+	out = kmalloc_array(outcnt, sizeof(*out), gfp);
+	if (!out)
+		return -ENOMEM;
+
+	if (stream->out) {
+		memcpy(out, stream->out, min(outcnt, stream->outcnt) *
+					 sizeof(*out));
+		kfree(stream->out);
+	}
+
+	if (outcnt > stream->outcnt)
+		memset(out + stream->outcnt, 0,
+		       (outcnt - stream->outcnt) * sizeof(*out));
+
+	stream->out = out;
+
+	return 0;
+}
+
+static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
+				gfp_t gfp)
+{
+	struct sctp_stream_in *in;
+
+	in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
+
+	if (!in)
+		return -ENOMEM;
+
+	if (stream->in) {
+		memcpy(in, stream->in, min(incnt, stream->incnt) *
+				       sizeof(*in));
+		kfree(stream->in);
+	}
+
+	if (incnt > stream->incnt)
+		memset(in + stream->incnt, 0,
+		       (incnt - stream->incnt) * sizeof(*in));
+
+	stream->in = in;
+
+	return 0;
+}
 
 int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 		     gfp_t gfp)
 {
-	int i;
+	struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+	int i, ret = 0;
+
+	gfp |= __GFP_NOWARN;
 
 	/* Initial stream->out size may be very big, so free it and alloc
-	 * a new one with new outcnt to save memory.
+	 * a new one with new outcnt to save memory if needed.
 	 */
-	kfree(stream->out);
+	if (outcnt == stream->outcnt)
+		goto in;
 
-	stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
-	if (!stream->out)
-		return -ENOMEM;
+	/* Filter out chunks queued on streams that won't exist anymore */
+	sched->unsched_all(stream);
+	sctp_stream_outq_migrate(stream, NULL, outcnt);
+	sched->sched_all(stream);
+
+	i = sctp_stream_alloc_out(stream, outcnt, gfp);
+	if (i)
+		return i;
 
 	stream->outcnt = outcnt;
 	for (i = 0; i < stream->outcnt; i++)
 		stream->out[i].state = SCTP_STREAM_OPEN;
 
+	sched->init(stream);
+
+in:
 	if (!incnt)
-		return 0;
+		goto out;
 
-	stream->in = kcalloc(incnt, sizeof(*stream->in), gfp);
-	if (!stream->in) {
-		kfree(stream->out);
-		stream->out = NULL;
-		return -ENOMEM;
+	i = sctp_stream_alloc_in(stream, incnt, gfp);
+	if (i) {
+		ret = -ENOMEM;
+		goto free;
 	}
 
 	stream->incnt = incnt;
+	goto out;
 
-	return 0;
+free:
+	sched->free(stream);
+	kfree(stream->out);
+	stream->out = NULL;
+out:
+	return ret;
+}
+
+int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+{
+	struct sctp_stream_out_ext *soute;
+
+	soute = kzalloc(sizeof(*soute), GFP_KERNEL);
+	if (!soute)
+		return -ENOMEM;
+	stream->out[sid].ext = soute;
+
+	return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
 }
 
 void sctp_stream_free(struct sctp_stream *stream)
 {
+	struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+	int i;
+
+	sched->free(stream);
+	for (i = 0; i < stream->outcnt; i++)
+		kfree(stream->out[i].ext);
 	kfree(stream->out);
 	kfree(stream->in);
 }
@@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream)
 
 void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
 {
+	struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+	sched->unsched_all(stream);
+	sctp_stream_outq_migrate(stream, new, new->outcnt);
 	sctp_stream_free(stream);
 
 	stream->out = new->out;
@@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
 	stream->outcnt = new->outcnt;
 	stream->incnt  = new->incnt;
 
+	sched->sched_all(stream);
+
 	new->out = NULL;
 	new->in  = NULL;
 }
@@ -118,6 +261,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
 	__u16 i, str_nums, *str_list;
 	struct sctp_chunk *chunk;
 	int retval = -EINVAL;
+	__be16 *nstr_list;
 	bool out, in;
 
 	if (!asoc->peer.reconf_capable ||
@@ -138,23 +282,44 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
 
 	str_nums = params->srs_number_streams;
 	str_list = params->srs_stream_list;
-	if (out && str_nums)
-		for (i = 0; i < str_nums; i++)
-			if (str_list[i] >= stream->outcnt)
-				goto out;
+	if (str_nums) {
+		int param_len = 0;
 
-	if (in && str_nums)
-		for (i = 0; i < str_nums; i++)
-			if (str_list[i] >= stream->incnt)
-				goto out;
+		if (out) {
+			for (i = 0; i < str_nums; i++)
+				if (str_list[i] >= stream->outcnt)
+					goto out;
 
-	for (i = 0; i < str_nums; i++)
-		str_list[i] = htons(str_list[i]);
+			param_len = str_nums * sizeof(__u16) +
+				    sizeof(struct sctp_strreset_outreq);
+		}
+
+		if (in) {
+			for (i = 0; i < str_nums; i++)
+				if (str_list[i] >= stream->incnt)
+					goto out;
 
-	chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in);
+			param_len += str_nums * sizeof(__u16) +
+				     sizeof(struct sctp_strreset_inreq);
+		}
+
+		if (param_len > SCTP_MAX_CHUNK_LEN -
+				sizeof(struct sctp_reconf_chunk))
+			goto out;
+	}
+
+	nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
+	if (!nstr_list) {
+		retval = -ENOMEM;
+		goto out;
+	}
 
 	for (i = 0; i < str_nums; i++)
-		str_list[i] = ntohs(str_list[i]);
+		nstr_list[i] = htons(str_list[i]);
+
+	chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
+
+	kfree(nstr_list);
 
 	if (!chunk) {
 		retval = -ENOMEM;
@@ -244,7 +409,7 @@ int sctp_send_add_streams(struct sctp_association *asoc,
 {
 	struct sctp_stream *stream = &asoc->stream;
 	struct sctp_chunk *chunk = NULL;
-	int retval = -ENOMEM;
+	int retval;
 	__u32 outcnt, incnt;
 	__u16 out, in;
 
@@ -270,20 +435,16 @@ int sctp_send_add_streams(struct sctp_association *asoc,
 	}
 
 	if (out) {
-		struct sctp_stream_out *streamout;
-
-		streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
-				     GFP_KERNEL);
-		if (!streamout)
+		retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL);
+		if (retval)
 			goto out;
-
-		memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
-		stream->out = streamout;
 	}
 
 	chunk = sctp_make_strreset_addstrm(asoc, out, in);
-	if (!chunk)
+	if (!chunk) {
+		retval = -ENOMEM;
 		goto out;
+	}
 
 	asoc->strreset_chunk = chunk;
 	sctp_chunk_hold(asoc->strreset_chunk);
@@ -305,7 +466,7 @@ out:
 }
 
 static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
-			struct sctp_association *asoc, __u32 resp_seq,
+			struct sctp_association *asoc, __be32 resp_seq,
 			__be16 type)
 {
 	struct sctp_chunk *chunk = asoc->strreset_chunk;
@@ -345,8 +506,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
 {
 	struct sctp_strreset_outreq *outreq = param.v;
 	struct sctp_stream *stream = &asoc->stream;
-	__u16 i, nums, flags = 0, *str_p = NULL;
 	__u32 result = SCTP_STRRESET_DENIED;
+	__u16 i, nums, flags = 0;
+	__be16 *str_p = NULL;
 	__u32 request_seq;
 
 	request_seq = ntohl(outreq->request_seq);
@@ -439,8 +601,9 @@ struct sctp_chunk *sctp_process_strreset_inreq(
 	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
 	struct sctp_chunk *chunk = NULL;
-	__u16 i, nums, *str_p;
 	__u32 request_seq;
+	__u16 i, nums;
+	__be16 *str_p;
 
 	request_seq = ntohl(inreq->request_seq);
 	if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -601,7 +764,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
 	struct sctp_strreset_addstrm *addstrm = param.v;
 	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
-	struct sctp_stream_in *streamin;
 	__u32 request_seq, incnt;
 	__u16 in, i;
 
@@ -648,13 +810,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
 	if (!in || incnt > SCTP_MAX_STREAM)
 		goto out;
 
-	streamin = krealloc(stream->in, incnt * sizeof(*streamin),
-			    GFP_ATOMIC);
-	if (!streamin)
+	if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
 		goto out;
 
-	memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
-	stream->in = streamin;
 	stream->incnt = incnt;
 
 	result = SCTP_STRRESET_PERFORMED;
@@ -676,10 +834,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
 	struct sctp_strreset_addstrm *addstrm = param.v;
 	struct sctp_stream *stream = &asoc->stream;
 	__u32 result = SCTP_STRRESET_DENIED;
-	struct sctp_stream_out *streamout;
 	struct sctp_chunk *chunk = NULL;
 	__u32 request_seq, outcnt;
 	__u16 out, i;
+	int ret;
 
 	request_seq = ntohl(addstrm->request_seq);
 	if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -708,14 +866,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
 	if (!out || outcnt > SCTP_MAX_STREAM)
 		goto out;
 
-	streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
-			     GFP_ATOMIC);
-	if (!streamout)
+	ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC);
+	if (ret)
 		goto out;
 
-	memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
-	stream->out = streamout;
-
 	chunk = sctp_make_strreset_addstrm(asoc, out, 0);
 	if (!chunk)
 		goto out;
@@ -769,7 +923,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 
 	if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
 		struct sctp_strreset_outreq *outreq;
-		__u16 *str_p;
+		__be16 *str_p;
 
 		outreq = (struct sctp_strreset_outreq *)req;
 		str_p = outreq->list_of_streams;
@@ -794,7 +948,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 			nums, str_p, GFP_ATOMIC);
 	} else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
 		struct sctp_strreset_inreq *inreq;
-		__u16 *str_p;
+		__be16 *str_p;
 
 		/* if the result is performed, it's impossible for inreq */
 		if (result == SCTP_STRRESET_PERFORMED)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
new file mode 100644
index 000000000000..0b83ec51e43b
--- /dev/null
+++ b/net/sctp/stream_sched.c
@@ -0,0 +1,275 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* First Come First Serve (a.k.a. FIFO)
+ * RFC DRAFT ndata Section 3.1
+ */
+static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid,
+			       __u16 value, gfp_t gfp)
+{
+	return 0;
+}
+
+static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid,
+			       __u16 *value)
+{
+	*value = 0;
+	return 0;
+}
+
+static int sctp_sched_fcfs_init(struct sctp_stream *stream)
+{
+	return 0;
+}
+
+static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
+				    gfp_t gfp)
+{
+	return 0;
+}
+
+static void sctp_sched_fcfs_free(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_enqueue(struct sctp_outq *q,
+				    struct sctp_datamsg *msg)
+{
+}
+
+static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q)
+{
+	struct sctp_stream *stream = &q->asoc->stream;
+	struct sctp_chunk *ch = NULL;
+	struct list_head *entry;
+
+	if (list_empty(&q->out_chunk_list))
+		goto out;
+
+	if (stream->out_curr) {
+		ch = list_entry(stream->out_curr->ext->outq.next,
+				struct sctp_chunk, stream_list);
+	} else {
+		entry = q->out_chunk_list.next;
+		ch = list_entry(entry, struct sctp_chunk, list);
+	}
+
+	sctp_sched_dequeue_common(q, ch);
+
+out:
+	return ch;
+}
+
+static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q,
+					 struct sctp_chunk *chunk)
+{
+}
+
+static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream)
+{
+}
+
+static struct sctp_sched_ops sctp_sched_fcfs = {
+	.set = sctp_sched_fcfs_set,
+	.get = sctp_sched_fcfs_get,
+	.init = sctp_sched_fcfs_init,
+	.init_sid = sctp_sched_fcfs_init_sid,
+	.free = sctp_sched_fcfs_free,
+	.enqueue = sctp_sched_fcfs_enqueue,
+	.dequeue = sctp_sched_fcfs_dequeue,
+	.dequeue_done = sctp_sched_fcfs_dequeue_done,
+	.sched_all = sctp_sched_fcfs_sched_all,
+	.unsched_all = sctp_sched_fcfs_unsched_all,
+};
+
+/* API to other parts of the stack */
+
+extern struct sctp_sched_ops sctp_sched_prio;
+extern struct sctp_sched_ops sctp_sched_rr;
+
+static struct sctp_sched_ops *sctp_sched_ops[] = {
+	&sctp_sched_fcfs,
+	&sctp_sched_prio,
+	&sctp_sched_rr,
+};
+
+int sctp_sched_set_sched(struct sctp_association *asoc,
+			 enum sctp_sched_type sched)
+{
+	struct sctp_sched_ops *n = sctp_sched_ops[sched];
+	struct sctp_sched_ops *old = asoc->outqueue.sched;
+	struct sctp_datamsg *msg = NULL;
+	struct sctp_chunk *ch;
+	int i, ret = 0;
+
+	if (old == n)
+		return ret;
+
+	if (sched > SCTP_SS_MAX)
+		return -EINVAL;
+
+	if (old) {
+		old->free(&asoc->stream);
+
+		/* Give the next scheduler a clean slate. */
+		for (i = 0; i < asoc->stream.outcnt; i++) {
+			void *p = asoc->stream.out[i].ext;
+
+			if (!p)
+				continue;
+
+			p += offsetofend(struct sctp_stream_out_ext, outq);
+			memset(p, 0, sizeof(struct sctp_stream_out_ext) -
+				     offsetofend(struct sctp_stream_out_ext, outq));
+		}
+	}
+
+	asoc->outqueue.sched = n;
+	n->init(&asoc->stream);
+	for (i = 0; i < asoc->stream.outcnt; i++) {
+		if (!asoc->stream.out[i].ext)
+			continue;
+
+		ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+		if (ret)
+			goto err;
+	}
+
+	/* We have to requeue all chunks already queued. */
+	list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+		if (ch->msg == msg)
+			continue;
+		msg = ch->msg;
+		n->enqueue(&asoc->outqueue, msg);
+	}
+
+	return ret;
+
+err:
+	n->free(&asoc->stream);
+	asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
+
+	return ret;
+}
+
+int sctp_sched_get_sched(struct sctp_association *asoc)
+{
+	int i;
+
+	for (i = 0; i <= SCTP_SS_MAX; i++)
+		if (asoc->outqueue.sched == sctp_sched_ops[i])
+			return i;
+
+	return 0;
+}
+
+int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
+			 __u16 value, gfp_t gfp)
+{
+	if (sid >= asoc->stream.outcnt)
+		return -EINVAL;
+
+	if (!asoc->stream.out[sid].ext) {
+		int ret;
+
+		ret = sctp_stream_init_ext(&asoc->stream, sid);
+		if (ret)
+			return ret;
+	}
+
+	return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp);
+}
+
+int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
+			 __u16 *value)
+{
+	if (sid >= asoc->stream.outcnt)
+		return -EINVAL;
+
+	if (!asoc->stream.out[sid].ext)
+		return 0;
+
+	return asoc->outqueue.sched->get(&asoc->stream, sid, value);
+}
+
+void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+	if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
+		struct sctp_stream_out *sout;
+		__u16 sid;
+
+		/* datamsg is not finish, so save it as current one,
+		 * in case application switch scheduler or a higher
+		 * priority stream comes in.
+		 */
+		sid = sctp_chunk_stream_no(ch);
+		sout = &q->asoc->stream.out[sid];
+		q->asoc->stream.out_curr = sout;
+		return;
+	}
+
+	q->asoc->stream.out_curr = NULL;
+	q->sched->dequeue_done(q, ch);
+}
+
+/* Auxiliary functions for the schedulers */
+void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+	list_del_init(&ch->list);
+	list_del_init(&ch->stream_list);
+	q->out_qlen -= ch->skb->len;
+}
+
+int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
+{
+	struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+	INIT_LIST_HEAD(&stream->out[sid].ext->outq);
+	return sched->init_sid(stream, sid, gfp);
+}
+
+struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream)
+{
+	struct sctp_association *asoc;
+
+	asoc = container_of(stream, struct sctp_association, stream);
+
+	return asoc->outqueue.sched;
+}
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.4
+ */
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
+
+static struct sctp_stream_priorities *sctp_sched_prio_new_head(
+			struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+	struct sctp_stream_priorities *p;
+
+	p = kmalloc(sizeof(*p), gfp);
+	if (!p)
+		return NULL;
+
+	INIT_LIST_HEAD(&p->prio_sched);
+	INIT_LIST_HEAD(&p->active);
+	p->next = NULL;
+	p->prio = prio;
+
+	return p;
+}
+
+static struct sctp_stream_priorities *sctp_sched_prio_get_head(
+			struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+	struct sctp_stream_priorities *p;
+	int i;
+
+	/* Look into scheduled priorities first, as they are sorted and
+	 * we can find it fast IF it's scheduled.
+	 */
+	list_for_each_entry(p, &stream->prio_list, prio_sched) {
+		if (p->prio == prio)
+			return p;
+		if (p->prio > prio)
+			break;
+	}
+
+	/* No luck. So we search on all streams now. */
+	for (i = 0; i < stream->outcnt; i++) {
+		if (!stream->out[i].ext)
+			continue;
+
+		p = stream->out[i].ext->prio_head;
+		if (!p)
+			/* Means all other streams won't be initialized
+			 * as well.
+			 */
+			break;
+		if (p->prio == prio)
+			return p;
+	}
+
+	/* If not even there, allocate a new one. */
+	return sctp_sched_prio_new_head(stream, prio, gfp);
+}
+
+static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
+{
+	struct list_head *pos;
+
+	pos = p->next->prio_list.next;
+	if (pos == &p->active)
+		pos = pos->next;
+	p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
+}
+
+static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
+{
+	bool scheduled = false;
+
+	if (!list_empty(&soute->prio_list)) {
+		struct sctp_stream_priorities *prio_head = soute->prio_head;
+
+		/* Scheduled */
+		scheduled = true;
+
+		if (prio_head->next == soute)
+			/* Try to move to the next stream */
+			sctp_sched_prio_next_stream(prio_head);
+
+		list_del_init(&soute->prio_list);
+
+		/* Also unsched the priority if this was the last stream */
+		if (list_empty(&prio_head->active)) {
+			list_del_init(&prio_head->prio_sched);
+			/* If there is no stream left, clear next */
+			prio_head->next = NULL;
+		}
+	}
+
+	return scheduled;
+}
+
+static void sctp_sched_prio_sched(struct sctp_stream *stream,
+				  struct sctp_stream_out_ext *soute)
+{
+	struct sctp_stream_priorities *prio, *prio_head;
+
+	prio_head = soute->prio_head;
+
+	/* Nothing to do if already scheduled */
+	if (!list_empty(&soute->prio_list))
+		return;
+
+	/* Schedule the stream. If there is a next, we schedule the new
+	 * one before it, so it's the last in round robin order.
+	 * If there isn't, we also have to schedule the priority.
+	 */
+	if (prio_head->next) {
+		list_add(&soute->prio_list, prio_head->next->prio_list.prev);
+		return;
+	}
+
+	list_add(&soute->prio_list, &prio_head->active);
+	prio_head->next = soute;
+
+	list_for_each_entry(prio, &stream->prio_list, prio_sched) {
+		if (prio->prio > prio_head->prio) {
+			list_add(&prio_head->prio_sched, prio->prio_sched.prev);
+			return;
+		}
+	}
+
+	list_add_tail(&prio_head->prio_sched, &stream->prio_list);
+}
+
+static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
+			       __u16 prio, gfp_t gfp)
+{
+	struct sctp_stream_out *sout = &stream->out[sid];
+	struct sctp_stream_out_ext *soute = sout->ext;
+	struct sctp_stream_priorities *prio_head, *old;
+	bool reschedule = false;
+	int i;
+
+	prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
+	if (!prio_head)
+		return -ENOMEM;
+
+	reschedule = sctp_sched_prio_unsched(soute);
+	old = soute->prio_head;
+	soute->prio_head = prio_head;
+	if (reschedule)
+		sctp_sched_prio_sched(stream, soute);
+
+	if (!old)
+		/* Happens when we set the priority for the first time */
+		return 0;
+
+	for (i = 0; i < stream->outcnt; i++) {
+		soute = stream->out[i].ext;
+		if (soute && soute->prio_head == old)
+			/* It's still in use, nothing else to do here. */
+			return 0;
+	}
+
+	/* No hits, we are good to free it. */
+	kfree(old);
+
+	return 0;
+}
+
+static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
+			       __u16 *value)
+{
+	*value = stream->out[sid].ext->prio_head->prio;
+	return 0;
+}
+
+static int sctp_sched_prio_init(struct sctp_stream *stream)
+{
+	INIT_LIST_HEAD(&stream->prio_list);
+
+	return 0;
+}
+
+static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
+				    gfp_t gfp)
+{
+	INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
+	return sctp_sched_prio_set(stream, sid, 0, gfp);
+}
+
+static void sctp_sched_prio_free(struct sctp_stream *stream)
+{
+	struct sctp_stream_priorities *prio, *n;
+	LIST_HEAD(list);
+	int i;
+
+	/* As we don't keep a list of priorities, to avoid multiple
+	 * frees we have to do it in 3 steps:
+	 *   1. unsched everyone, so the lists are free to use in 2.
+	 *   2. build the list of the priorities
+	 *   3. free the list
+	 */
+	sctp_sched_prio_unsched_all(stream);
+	for (i = 0; i < stream->outcnt; i++) {
+		if (!stream->out[i].ext)
+			continue;
+		prio = stream->out[i].ext->prio_head;
+		if (prio && list_empty(&prio->prio_sched))
+			list_add(&prio->prio_sched, &list);
+	}
+	list_for_each_entry_safe(prio, n, &list, prio_sched) {
+		list_del_init(&prio->prio_sched);
+		kfree(prio);
+	}
+}
+
+static void sctp_sched_prio_enqueue(struct sctp_outq *q,
+				    struct sctp_datamsg *msg)
+{
+	struct sctp_stream *stream;
+	struct sctp_chunk *ch;
+	__u16 sid;
+
+	ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+	sid = sctp_chunk_stream_no(ch);
+	stream = &q->asoc->stream;
+	sctp_sched_prio_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
+{
+	struct sctp_stream *stream = &q->asoc->stream;
+	struct sctp_stream_priorities *prio;
+	struct sctp_stream_out_ext *soute;
+	struct sctp_chunk *ch = NULL;
+
+	/* Bail out quickly if queue is empty */
+	if (list_empty(&q->out_chunk_list))
+		goto out;
+
+	/* Find which chunk is next. It's easy, it's either the current
+	 * one or the first chunk on the next active stream.
+	 */
+	if (stream->out_curr) {
+		soute = stream->out_curr->ext;
+	} else {
+		prio = list_entry(stream->prio_list.next,
+				  struct sctp_stream_priorities, prio_sched);
+		soute = prio->next;
+	}
+	ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+	sctp_sched_dequeue_common(q, ch);
+
+out:
+	return ch;
+}
+
+static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
+					 struct sctp_chunk *ch)
+{
+	struct sctp_stream_priorities *prio;
+	struct sctp_stream_out_ext *soute;
+	__u16 sid;
+
+	/* Last chunk on that msg, move to the next stream on
+	 * this priority.
+	 */
+	sid = sctp_chunk_stream_no(ch);
+	soute = q->asoc->stream.out[sid].ext;
+	prio = soute->prio_head;
+
+	sctp_sched_prio_next_stream(prio);
+
+	if (list_empty(&soute->outq))
+		sctp_sched_prio_unsched(soute);
+}
+
+static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out *sout;
+	struct sctp_chunk *ch;
+
+	asoc = container_of(stream, struct sctp_association, stream);
+	list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+		__u16 sid;
+
+		sid = sctp_chunk_stream_no(ch);
+		sout = &stream->out[sid];
+		if (sout->ext)
+			sctp_sched_prio_sched(stream, sout->ext);
+	}
+}
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
+{
+	struct sctp_stream_priorities *p, *tmp;
+	struct sctp_stream_out_ext *soute, *souttmp;
+
+	list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
+		list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
+			sctp_sched_prio_unsched(soute);
+}
+
+struct sctp_sched_ops sctp_sched_prio = {
+	.set = sctp_sched_prio_set,
+	.get = sctp_sched_prio_get,
+	.init = sctp_sched_prio_init,
+	.init_sid = sctp_sched_prio_init_sid,
+	.free = sctp_sched_prio_free,
+	.enqueue = sctp_sched_prio_enqueue,
+	.dequeue = sctp_sched_prio_dequeue,
+	.dequeue_done = sctp_sched_prio_dequeue_done,
+	.sched_all = sctp_sched_prio_sched_all,
+	.unsched_all = sctp_sched_prio_unsched_all,
+};
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
new file mode 100644
index 000000000000..7612a438c5b9
--- /dev/null
+++ b/net/sctp/stream_sched_rr.c
@@ -0,0 +1,201 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.2
+ */
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream);
+
+static void sctp_sched_rr_next_stream(struct sctp_stream *stream)
+{
+	struct list_head *pos;
+
+	pos = stream->rr_next->rr_list.next;
+	if (pos == &stream->rr_list)
+		pos = pos->next;
+	stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list);
+}
+
+static void sctp_sched_rr_unsched(struct sctp_stream *stream,
+				  struct sctp_stream_out_ext *soute)
+{
+	if (stream->rr_next == soute)
+		/* Try to move to the next stream */
+		sctp_sched_rr_next_stream(stream);
+
+	list_del_init(&soute->rr_list);
+
+	/* If we have no other stream queued, clear next */
+	if (list_empty(&stream->rr_list))
+		stream->rr_next = NULL;
+}
+
+static void sctp_sched_rr_sched(struct sctp_stream *stream,
+				struct sctp_stream_out_ext *soute)
+{
+	if (!list_empty(&soute->rr_list))
+		/* Already scheduled. */
+		return;
+
+	/* Schedule the stream */
+	list_add_tail(&soute->rr_list, &stream->rr_list);
+
+	if (!stream->rr_next)
+		stream->rr_next = soute;
+}
+
+static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid,
+			     __u16 prio, gfp_t gfp)
+{
+	return 0;
+}
+
+static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid,
+			     __u16 *value)
+{
+	return 0;
+}
+
+static int sctp_sched_rr_init(struct sctp_stream *stream)
+{
+	INIT_LIST_HEAD(&stream->rr_list);
+	stream->rr_next = NULL;
+
+	return 0;
+}
+
+static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
+				  gfp_t gfp)
+{
+	INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
+
+	return 0;
+}
+
+static void sctp_sched_rr_free(struct sctp_stream *stream)
+{
+	sctp_sched_rr_unsched_all(stream);
+}
+
+static void sctp_sched_rr_enqueue(struct sctp_outq *q,
+				  struct sctp_datamsg *msg)
+{
+	struct sctp_stream *stream;
+	struct sctp_chunk *ch;
+	__u16 sid;
+
+	ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+	sid = sctp_chunk_stream_no(ch);
+	stream = &q->asoc->stream;
+	sctp_sched_rr_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
+{
+	struct sctp_stream *stream = &q->asoc->stream;
+	struct sctp_stream_out_ext *soute;
+	struct sctp_chunk *ch = NULL;
+
+	/* Bail out quickly if queue is empty */
+	if (list_empty(&q->out_chunk_list))
+		goto out;
+
+	/* Find which chunk is next */
+	if (stream->out_curr)
+		soute = stream->out_curr->ext;
+	else
+		soute = stream->rr_next;
+	ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+
+	sctp_sched_dequeue_common(q, ch);
+
+out:
+	return ch;
+}
+
+static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
+				       struct sctp_chunk *ch)
+{
+	struct sctp_stream_out_ext *soute;
+	__u16 sid;
+
+	/* Last chunk on that msg, move to the next stream */
+	sid = sctp_chunk_stream_no(ch);
+	soute = q->asoc->stream.out[sid].ext;
+
+	sctp_sched_rr_next_stream(&q->asoc->stream);
+
+	if (list_empty(&soute->outq))
+		sctp_sched_rr_unsched(&q->asoc->stream, soute);
+}
+
+static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
+{
+	struct sctp_association *asoc;
+	struct sctp_stream_out_ext *soute;
+	struct sctp_chunk *ch;
+
+	asoc = container_of(stream, struct sctp_association, stream);
+	list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+		__u16 sid;
+
+		sid = sctp_chunk_stream_no(ch);
+		soute = stream->out[sid].ext;
+		if (soute)
+			sctp_sched_rr_sched(stream, soute);
+	}
+}
+
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
+{
+	struct sctp_stream_out_ext *soute, *tmp;
+
+	list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list)
+		sctp_sched_rr_unsched(stream, soute);
+}
+
+struct sctp_sched_ops sctp_sched_rr = {
+	.set = sctp_sched_rr_set,
+	.get = sctp_sched_rr_get,
+	.init = sctp_sched_rr_init,
+	.init_sid = sctp_sched_rr_init_sid,
+	.free = sctp_sched_rr_free,
+	.enqueue = sctp_sched_rr_enqueue,
+	.dequeue = sctp_sched_rr_dequeue,
+	.dequeue_done = sctp_sched_rr_dequeue_done,
+	.sched_all = sctp_sched_rr_sched_all,
+	.unsched_all = sctp_sched_rr_unsched_all,
+};
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 0e732f68c2bf..ef7ca44d6e6a 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */
 static int int_max = INT_MAX;
 static int sack_timer_min = 1;
 static int sack_timer_max = 500;
-static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
+static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
 static int rwnd_scale_max = 16;
 static int rto_alpha_min = 0;
 static int rto_beta_min = 0;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 80a97c8501a7..1e5a22430cf5 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
 
-	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
-		    (unsigned long)peer);
-	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
-		    (unsigned long)peer);
-	setup_timer(&peer->reconf_timer, sctp_generate_reconf_event,
-		    (unsigned long)peer);
-	setup_timer(&peer->proto_unreach_timer,
-		    sctp_generate_proto_unreach_event, (unsigned long)peer);
+	timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
+	timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
+	timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+	timer_setup(&peer->proto_unreach_timer,
+		    sctp_generate_proto_unreach_event, 0);
 
 	/* Initialize the 64-bit random nonce sent with heartbeat. */
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
@@ -490,7 +487,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  * detected.
  */
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
-			       sctp_lower_cwnd_t reason)
+			       enum sctp_lower_cwnd reason)
 {
 	struct sctp_association *asoc = transport->asoc;
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5f86c5062a98..5447228bf1a0 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
 				struct sctp_chunk *chunk, __u16 flags,
 				gfp_t gfp)
 {
-	struct sctp_ulpevent *event;
 	struct sctp_remote_error *sre;
+	struct sctp_ulpevent *event;
+	struct sctp_errhdr *ch;
 	struct sk_buff *skb;
-	sctp_errhdr_t *ch;
 	__be16 cause;
 	int elen;
 
-	ch = (sctp_errhdr_t *)(chunk->skb->data);
+	ch = (struct sctp_errhdr *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch);
 
 	/* Pull off the ERROR header.  */
-	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
+	skb_pull(chunk->skb, sizeof(*ch));
 
 	/* Copy the skb to a new skb with room for us to prepend
 	 * notification with.
@@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
 
 struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
 	const struct sctp_association *asoc, __u16 flags, __u16 stream_num,
-	__u16 *stream_list, gfp_t gfp)
+	__be16 *stream_list, gfp_t gfp)
 {
 	struct sctp_stream_reset_event *sreset;
 	struct sctp_ulpevent *event;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0225d62a869f..a71be33f3afe 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 		sctp_ulpq_clear_pd(ulpq);
 
 	if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
-		sp->data_ready_signalled = 1;
+		if (!sock_owned_by_user(sk))
+			sp->data_ready_signalled = 1;
 		sk->sk_data_ready(sk);
 	}
 	return 1;
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 33954852f3f8..c717ef0896aa 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,10 +8,6 @@ config SMC
 	  The Linux implementation of the SMC-R solution is designed as
 	  a separate socket family SMC.
 
-	  Warning: SMC will expose all memory for remote reads and writes
-	  once a connection is established.  Don't enable this option except
-	  for tightly controlled lab environment.
-
 	  Select this option if you want to run SMC socket applications
 
 config SMC_DIAG
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6793d7348cc8..6451c5013e06 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 			 __be32 *subnet, u8 *prefix_len)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct in_device *in_dev;
 	struct sockaddr_in addr;
 	int rc = -ENOENT;
 	int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 	/* get address to which the internal TCP socket is bound */
 	kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
 	/* analyze IPv4 specific data of net_device belonging to TCP socket */
-	for_ifa(dst->dev->ip_ptr) {
-		if (ifa->ifa_address != addr.sin_addr.s_addr)
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dst->dev);
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
 			continue;
 		*prefix_len = inet_mask_len(ifa->ifa_mask);
 		*subnet = ifa->ifa_address & ifa->ifa_mask;
 		rc = 0;
 		break;
-	} endfor_ifa(dst->dev->ip_ptr);
+	} endfor_ifa(in_dev);
+	rcu_read_unlock();
 
 out_rel:
 	dst_release(dst);
@@ -338,6 +342,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 		return SMC_CLC_DECL_INTERR;
 
 	smc_wr_remember_qp_attr(link);
+
+	rc = smc_wr_reg_send(link,
+			     smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+	if (rc)
+		return SMC_CLC_DECL_INTERR;
+
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
@@ -380,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
 	int rc = 0;
 	u8 ibport;
 
+	if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
+		/* peer has not signalled SMC-capability */
+		smc->use_fallback = true;
+		goto out_connected;
+	}
+
 	/* IPSec connections opt out of SMC-R optimizations */
 	if (using_ipsec(smc)) {
 		reason_code = SMC_CLC_DECL_IPSEC;
@@ -430,12 +446,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	smc_conn_save_peer_info(smc, &aclc);
 
-	rc = smc_sndbuf_create(smc);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma_unlock;
-	}
-	rc = smc_rmb_create(smc);
+	/* create send buffer and rmb */
+	rc = smc_buf_create(smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
 		goto decline_rdma_unlock;
@@ -459,7 +471,20 @@ static int smc_connect_rdma(struct smc_sock *smc)
 			reason_code = SMC_CLC_DECL_INTERR;
 			goto decline_rdma_unlock;
 		}
+	} else {
+		struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
+
+		if (!buf_desc->reused) {
+			/* register memory region for new rmb */
+			rc = smc_wr_reg_send(link,
+					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
+			if (rc) {
+				reason_code = SMC_CLC_DECL_INTERR;
+				goto decline_rdma_unlock;
+			}
+		}
 	}
+	smc_rmb_sync_sg_for_device(&smc->conn);
 
 	rc = smc_clc_send_confirm(smc);
 	if (rc)
@@ -494,7 +519,7 @@ decline_rdma:
 	/* RDMA setup failed, switch back to TCP */
 	smc->use_fallback = true;
 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-		rc = smc_clc_send_decline(smc, reason_code, 0);
+		rc = smc_clc_send_decline(smc, reason_code);
 		if (rc < sizeof(struct smc_clc_msg_decline))
 			goto out_err;
 	}
@@ -536,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 	}
 
 	smc_copy_sock_settings_to_clc(smc);
+	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 	rc = kernel_connect(smc->clcsock, addr, alen, flags);
 	if (rc)
 		goto out;
@@ -692,6 +718,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	int rc;
 
 	link = &lgr->lnk[SMC_SINGLE_LINK];
+
+	rc = smc_wr_reg_send(link,
+			     smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+	if (rc)
+		return SMC_CLC_DECL_INTERR;
+
 	/* send CONFIRM LINK request to client over the RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
@@ -734,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
 	u8 prefix_len;
 	u8 ibport;
 
+	/* check if peer is smc capable */
+	if (!tcp_sk(newclcsock->sk)->syn_smc) {
+		new_smc->use_fallback = true;
+		goto out_connected;
+	}
+
 	/* do inband token exchange -
 	 *wait for and receive SMC Proposal CLC message
 	 */
@@ -779,46 +817,50 @@ static void smc_listen_work(struct work_struct *work)
 	mutex_lock(&smc_create_lgr_pending);
 	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
 					smcibdev, ibport, &pclc.lcl, 0);
-	if (local_contact == SMC_REUSE_CONTACT)
-		/* lock no longer needed, free it due to following
-		 * smc_clc_wait_msg() call
-		 */
-		mutex_unlock(&smc_create_lgr_pending);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-		else if (rc == -ENOLINK)
-			reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	}
 	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
-	rc = smc_sndbuf_create(new_smc);
+	/* create send buffer and rmb */
+	rc = smc_buf_create(new_smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma;
-	}
-	rc = smc_rmb_create(new_smc);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	}
 
 	smc_close_init(new_smc);
 	smc_rx_init(new_smc);
 
+	if (local_contact != SMC_FIRST_CONTACT) {
+		struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
+
+		if (!buf_desc->reused) {
+			/* register memory region for new rmb */
+			rc = smc_wr_reg_send(link,
+					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
+			if (rc) {
+				reason_code = SMC_CLC_DECL_INTERR;
+				goto decline_rdma_unlock;
+			}
+		}
+	}
+	smc_rmb_sync_sg_for_device(&new_smc->conn);
+
 	rc = smc_clc_send_accept(new_smc, local_contact);
 	if (rc)
-		goto out_err;
+		goto out_err_unlock;
 
 	/* receive SMC Confirm CLC message */
 	reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
 				       SMC_CLC_CONFIRM);
 	if (reason_code < 0)
-		goto out_err;
+		goto out_err_unlock;
 	if (reason_code > 0)
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	smc_conn_save_peer_info(new_smc, &cclc);
 	if (local_contact == SMC_FIRST_CONTACT)
 		smc_link_save_peer_info(link, &cclc);
@@ -826,35 +868,34 @@ static void smc_listen_work(struct work_struct *work)
 	rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_INTERR;
-		goto decline_rdma;
+		goto decline_rdma_unlock;
 	}
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		rc = smc_ib_ready_link(link);
 		if (rc) {
 			reason_code = SMC_CLC_DECL_INTERR;
-			goto decline_rdma;
+			goto decline_rdma_unlock;
 		}
 		/* QP confirmation over RoCE fabric */
 		reason_code = smc_serv_conf_first_link(new_smc);
 		if (reason_code < 0) {
 			/* peer is not aware of a problem */
 			rc = reason_code;
-			goto out_err;
+			goto out_err_unlock;
 		}
 		if (reason_code > 0)
-			goto decline_rdma;
+			goto decline_rdma_unlock;
 	}
 
 	smc_tx_init(new_smc);
+	mutex_unlock(&smc_create_lgr_pending);
 
 out_connected:
 	sk_refcnt_debug_inc(newsmcsk);
 	if (newsmcsk->sk_state == SMC_INIT)
 		newsmcsk->sk_state = SMC_ACTIVE;
 enqueue:
-	if (local_contact == SMC_FIRST_CONTACT)
-		mutex_unlock(&smc_create_lgr_pending);
 	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 	if (lsmc->sk.sk_state == SMC_LISTEN) {
 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -868,17 +909,21 @@ enqueue:
 	sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
 	return;
 
+decline_rdma_unlock:
+	mutex_unlock(&smc_create_lgr_pending);
 decline_rdma:
 	/* RDMA setup failed, switch back to TCP */
 	smc_conn_free(&new_smc->conn);
 	new_smc->use_fallback = true;
 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-		rc = smc_clc_send_decline(new_smc, reason_code, 0);
+		rc = smc_clc_send_decline(new_smc, reason_code);
 		if (rc < sizeof(struct smc_clc_msg_decline))
 			goto out_err;
 	}
 	goto out_connected;
 
+out_err_unlock:
+	mutex_unlock(&smc_create_lgr_pending);
 out_err:
 	newsmcsk->sk_state = SMC_CLOSED;
 	smc_conn_free(&new_smc->conn);
@@ -935,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
 	 * them to the clc socket -- copy smc socket options to clc socket
 	 */
 	smc_copy_sock_settings_to_clc(smc);
+	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 
 	rc = kernel_listen(smc->clcsock, backlog);
 	if (rc)
@@ -1377,6 +1423,7 @@ static int __init smc_init(void)
 		goto out_sock;
 	}
 
+	static_branch_enable(&tcp_have_smc);
 	return 0;
 
 out_sock:
@@ -1401,6 +1448,7 @@ static void __exit smc_exit(void)
 		list_del_init(&lgr->list);
 		smc_lgr_free(lgr); /* free link group */
 	}
+	static_branch_disable(&tcp_have_smc);
 	smc_ib_unregister_client();
 	sock_unregister(PF_SMC);
 	proto_unregister(&smc_proto);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0bee9d16cf29 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -149,7 +150,7 @@ struct smc_connection {
 	atomic_t		sndbuf_space;	/* remaining space in sndbuf */
 	u16			tx_cdc_seq;	/* sequence # for CDC send */
 	spinlock_t		send_lock;	/* protect wr_sends */
-	struct work_struct	tx_work;	/* retry of smc_cdc_msg_send */
+	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
 
 	struct smc_host_cdc_msg	local_rx_ctrl;	/* filled during event_handl.
 						 * .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7294edbc221..87f7bede6eab 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -62,10 +63,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 	bh_unlock_sock(&smc->sk);
 }
 
-int smc_cdc_get_free_slot(struct smc_link *link,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
 			  struct smc_wr_buf **wr_buf,
 			  struct smc_cdc_tx_pend **pend)
 {
+	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
 	return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
 				       (struct smc_wr_tx_pend_priv **)pend);
 }
@@ -118,8 +121,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 	struct smc_wr_buf *wr_buf;
 	int rc;
 
-	rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
-				   &pend);
+	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
 	if (rc)
 		return rc;
 
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 8e1d76f26007..149ceda1b088 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -206,7 +207,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
 
 struct smc_cdc_tx_pend;
 
-int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
+			  struct smc_wr_buf **wr_buf,
 			  struct smc_cdc_tx_pend **pend);
 void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
 int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 03ec058d18df..1800e16b2a02 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -95,9 +96,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 	}
 	if (clcm->type == SMC_CLC_DECLINE) {
 		reason_code = SMC_CLC_DECL_REPLY;
-		if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
-			== SMC_CLC_DECL_SYNCERR)
+		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 			smc->conn.lgr->sync_err = true;
+			smc_lgr_terminate(smc->conn.lgr);
+		}
 	}
 
 out:
@@ -105,8 +107,7 @@ out:
 }
 
 /* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-			 u8 out_of_sync)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 {
 	struct smc_clc_msg_decline dclc;
 	struct msghdr msg;
@@ -118,7 +119,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
 	dclc.hdr.type = SMC_CLC_DECLINE;
 	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
 	dclc.hdr.version = SMC_CLC_V1;
-	dclc.hdr.flag = out_of_sync ? 1 : 0;
+	dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
 	memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
 	dclc.peer_diagnosis = htonl(peer_diag_info);
 	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -204,13 +205,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(cclc.qpn, link->roce_qp->qp_num);
 	cclc.rmb_rkey =
-		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
 	cclc.rmbe_size = conn->rmbe_size_short;
-	cclc.rmb_dma_addr =
-		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+	cclc.rmb_dma_addr = cpu_to_be64(
+		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 	hton24(cclc.psn, link->psn_initial);
 
 	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -256,13 +257,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(aclc.qpn, link->roce_qp->qp_num);
 	aclc.rmb_rkey =
-		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	aclc.qp_mtu = link->path_mtu;
 	aclc.rmbe_size = conn->rmbe_size_short,
-	aclc.rmb_dma_addr =
-		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+	aclc.rmb_dma_addr = cpu_to_be64(
+		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 	hton24(aclc.psn, link->psn_initial);
 	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 13db8ce177c9..12a9af1539a2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -106,8 +107,7 @@ struct smc_ib_device;
 
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-			 u8 out_of_sync);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
 int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
 			  u8 ibport);
 int smc_clc_send_confirm(struct smc_sock *smc);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..48615d2ac4aa 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -174,15 +175,15 @@ int smc_close_active(struct smc_sock *smc)
 {
 	struct smc_cdc_conn_state_flags *txflags =
 		&smc->conn.local_tx_ctrl.conn_state_flags;
-	long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
 	struct smc_connection *conn = &smc->conn;
 	struct sock *sk = &smc->sk;
 	int old_state;
+	long timeout;
 	int rc = 0;
 
-	if (sock_flag(sk, SOCK_LINGER) &&
-	    !(current->flags & PF_EXITING))
-		timeout = sk->sk_lingertime;
+	timeout = current->flags & PF_EXITING ?
+		  0 : sock_flag(sk, SOCK_LINGER) ?
+		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
 
 again:
 	old_state = sk->sk_state;
@@ -208,7 +209,7 @@ again:
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		if (sk->sk_state == SMC_ACTIVE) {
 			/* send close request */
@@ -234,7 +235,7 @@ again:
 		if (!smc_cdc_rxed_any_close(conn))
 			smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		if (sk->sk_err != ECONNABORTED) {
 			/* confirm close from peer */
@@ -263,7 +264,9 @@ again:
 		/* peer sending PeerConnectionClosed will cause transition */
 		break;
 	case SMC_PROCESSABORT:
-		cancel_work_sync(&conn->tx_work);
+		release_sock(sk);
+		cancel_delayed_work_sync(&conn->tx_work);
+		lock_sock(sk);
 		smc_close_abort(conn);
 		sk->sk_state = SMC_CLOSED;
 		smc_close_wait_tx_pends(smc);
@@ -358,7 +361,8 @@ static void smc_close_passive_work(struct work_struct *work)
 	case SMC_PEERCLOSEWAIT1:
 		if (rxflags->peer_done_writing)
 			sk->sk_state = SMC_PEERCLOSEWAIT2;
-		/* fall through to check for closing */
+		/* fall through */
+		/* to check for closing */
 	case SMC_PEERCLOSEWAIT2:
 	case SMC_PEERFINCLOSEWAIT:
 		if (!smc_cdc_rxed_any_close(&smc->conn))
@@ -411,13 +415,14 @@ void smc_close_sock_put_work(struct work_struct *work)
 int smc_close_shutdown_write(struct smc_sock *smc)
 {
 	struct smc_connection *conn = &smc->conn;
-	long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
 	struct sock *sk = &smc->sk;
 	int old_state;
+	long timeout;
 	int rc = 0;
 
-	if (sock_flag(sk, SOCK_LINGER))
-		timeout = sk->sk_lingertime;
+	timeout = current->flags & PF_EXITING ?
+		  0 : sock_flag(sk, SOCK_LINGER) ?
+		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
 
 again:
 	old_state = sk->sk_state;
@@ -425,7 +430,7 @@ again:
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		/* send close wr request */
 		rc = smc_close_wr(conn);
@@ -439,7 +444,7 @@ again:
 		if (!smc_cdc_rxed_any_close(conn))
 			smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		/* confirm close from peer */
 		rc = smc_close_wr(conn);
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 4a3d99a8d7cb..ed82506b1b0a 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3ac09a629ea1..2578fbd95664 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -25,8 +26,9 @@
 #include "smc_cdc.h"
 #include "smc_close.h"
 
-#define SMC_LGR_NUM_INCR	256
-#define SMC_LGR_FREE_DELAY	(600 * HZ)
+#define SMC_LGR_NUM_INCR		256
+#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
+#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10)
 
 static u32 smc_lgr_num;			/* unique link group number */
 
@@ -107,8 +109,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 		__smc_lgr_unregister_conn(conn);
 	}
 	write_unlock_bh(&lgr->conns_lock);
-	if (reduced && !lgr->conns_num)
-		schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
+	if (!reduced || lgr->conns_num)
+		return;
+	/* client link group creation always follows the server link group
+	 * creation. For client use a somewhat higher removal delay time,
+	 * otherwise there is a risk of out-of-sync link groups.
+	 */
+	mod_delayed_work(system_wq, &lgr->free_work,
+			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+						 SMC_LGR_FREE_DELAY_SERV);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -175,7 +184,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 	rc = smc_wr_alloc_link_mem(lnk);
 	if (rc)
 		goto free_lgr;
-	init_waitqueue_head(&lnk->wr_tx_wait);
 	rc = smc_ib_create_protection_domain(lnk);
 	if (rc)
 		goto free_link_mem;
@@ -207,17 +215,14 @@ out:
 	return rc;
 }
 
-static void smc_sndbuf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn)
 {
 	if (conn->sndbuf_desc) {
 		conn->sndbuf_desc->used = 0;
 		conn->sndbuf_size = 0;
 	}
-}
-
-static void smc_rmb_unuse(struct smc_connection *conn)
-{
 	if (conn->rmb_desc) {
+		conn->rmb_desc->reused = true;
 		conn->rmb_desc->used = 0;
 		conn->rmbe_size = 0;
 	}
@@ -232,8 +237,7 @@ void smc_conn_free(struct smc_connection *conn)
 		return;
 	smc_cdc_tx_dismiss_slots(conn);
 	smc_lgr_unregister_conn(conn);
-	smc_rmb_unuse(conn);
-	smc_sndbuf_unuse(conn);
+	smc_buf_unuse(conn);
 }
 
 static void smc_link_clear(struct smc_link *lnk)
@@ -246,48 +250,57 @@ static void smc_link_clear(struct smc_link *lnk)
 	smc_wr_free_link_mem(lnk);
 }
 
-static void smc_lgr_free_sndbufs(struct smc_link_group *lgr)
+static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
+			 bool is_rmb)
 {
-	struct smc_buf_desc *sndbuf_desc, *bf_desc;
-	int i;
-
-	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i],
-					 list) {
-			list_del(&sndbuf_desc->list);
-			smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-					 smc_uncompress_bufsize(i),
-					 sndbuf_desc, DMA_TO_DEVICE);
-			kfree(sndbuf_desc->cpu_addr);
-			kfree(sndbuf_desc);
-		}
+	if (is_rmb) {
+		if (buf_desc->mr_rx[SMC_SINGLE_LINK])
+			smc_ib_put_memory_region(
+					buf_desc->mr_rx[SMC_SINGLE_LINK]);
+		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+				    DMA_FROM_DEVICE);
+	} else {
+		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+				    DMA_TO_DEVICE);
 	}
+	sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
+	if (buf_desc->cpu_addr)
+		free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
+	kfree(buf_desc);
 }
 
-static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
+static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
-	struct smc_buf_desc *rmb_desc, *bf_desc;
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_buf_desc *buf_desc, *bf_desc;
+	struct list_head *buf_list;
 	int i;
 
 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
+		if (is_rmb)
+			buf_list = &lgr->rmbs[i];
+		else
+			buf_list = &lgr->sndbufs[i];
+		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
 					 list) {
-			list_del(&rmb_desc->list);
-			smc_ib_buf_unmap(lnk->smcibdev,
-					 smc_uncompress_bufsize(i),
-					 rmb_desc, DMA_FROM_DEVICE);
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
+			list_del(&buf_desc->list);
+			smc_buf_free(buf_desc, lnk, is_rmb);
 		}
 	}
 }
 
+static void smc_lgr_free_bufs(struct smc_link_group *lgr)
+{
+	/* free send buffers */
+	__smc_lgr_free_bufs(lgr, false);
+	/* free rmbs */
+	__smc_lgr_free_bufs(lgr, true);
+}
+
 /* remove a link group */
 void smc_lgr_free(struct smc_link_group *lgr)
 {
-	smc_lgr_free_rmbs(lgr);
-	smc_lgr_free_sndbufs(lgr);
+	smc_lgr_free_bufs(lgr);
 	smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 	kfree(lgr);
 }
@@ -368,10 +381,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 		if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
 				 &gattr))
 			continue;
-		if (gattr.ndev &&
-		    (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) {
-			lnk->gid = gid;
-			return 0;
+		if (gattr.ndev) {
+			if (is_vlan_dev(gattr.ndev) &&
+			    vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
+				lnk->gid = gid;
+				dev_put(gattr.ndev);
+				return 0;
+			}
+			dev_put(gattr.ndev);
 		}
 	}
 	return -ENODEV;
@@ -452,45 +469,25 @@ out:
 	return rc ? rc : local_contact;
 }
 
-/* try to reuse a sndbuf description slot of the sndbufs list for a certain
- * buf_size; if not available, return NULL
+/* try to reuse a sndbuf or rmb description slot for a certain
+ * buffer size; if not available, return NULL
  */
 static inline
-struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr,
-					 int compressed_bufsize)
+struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
+				      int compressed_bufsize,
+				      rwlock_t *lock,
+				      struct list_head *buf_list)
 {
-	struct smc_buf_desc *sndbuf_slot;
-
-	read_lock_bh(&lgr->sndbufs_lock);
-	list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize],
-			    list) {
-		if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) {
-			read_unlock_bh(&lgr->sndbufs_lock);
-			return sndbuf_slot;
-		}
-	}
-	read_unlock_bh(&lgr->sndbufs_lock);
-	return NULL;
-}
+	struct smc_buf_desc *buf_slot;
 
-/* try to reuse an rmb description slot of the rmbs list for a certain
- * rmbe_size; if not available, return NULL
- */
-static inline
-struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
-				      int compressed_bufsize)
-{
-	struct smc_buf_desc *rmb_slot;
-
-	read_lock_bh(&lgr->rmbs_lock);
-	list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize],
-			    list) {
-		if (cmpxchg(&rmb_slot->used, 0, 1) == 0) {
-			read_unlock_bh(&lgr->rmbs_lock);
-			return rmb_slot;
+	read_lock_bh(lock);
+	list_for_each_entry(buf_slot, buf_list, list) {
+		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
+			read_unlock_bh(lock);
+			return buf_slot;
 		}
 	}
-	read_unlock_bh(&lgr->rmbs_lock);
+	read_unlock_bh(lock);
 	return NULL;
 }
 
@@ -503,136 +500,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
-/* create the tx buffer for an SMC socket */
-int smc_sndbuf_create(struct smc_sock *smc)
+static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
+					       bool is_rmb, int bufsize)
 {
-	struct smc_connection *conn = &smc->conn;
-	struct smc_link_group *lgr = conn->lgr;
-	int tmp_bufsize, tmp_bufsize_short;
-	struct smc_buf_desc *sndbuf_desc;
+	struct smc_buf_desc *buf_desc;
+	struct smc_link *lnk;
 	int rc;
 
-	/* use socket send buffer size (w/o overhead) as start value */
-	for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
-	     tmp_bufsize_short >= 0; tmp_bufsize_short--) {
-		tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
-		/* check for reusable sndbuf_slot in the link group */
-		sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short);
-		if (sndbuf_desc) {
-			memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize);
-			break; /* found reusable slot */
-		}
-		/* try to alloc a new send buffer */
-		sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL);
-		if (!sndbuf_desc)
-			break; /* give up with -ENOMEM */
-		sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize,
-						GFP_KERNEL | __GFP_NOWARN |
-						__GFP_NOMEMALLOC |
-						__GFP_NORETRY);
-		if (!sndbuf_desc->cpu_addr) {
-			kfree(sndbuf_desc);
-			sndbuf_desc = NULL;
-			/* if send buffer allocation has failed,
-			 * try a smaller one
-			 */
-			continue;
-		}
-		rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				    tmp_bufsize, sndbuf_desc,
-				    DMA_TO_DEVICE);
+	/* try to alloc a new buffer */
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return ERR_PTR(-ENOMEM);
+
+	buf_desc->cpu_addr =
+		(void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
+					 __GFP_NOMEMALLOC |
+					 __GFP_NORETRY | __GFP_ZERO,
+					 get_order(bufsize));
+	if (!buf_desc->cpu_addr) {
+		kfree(buf_desc);
+		return ERR_PTR(-EAGAIN);
+	}
+	buf_desc->order = get_order(bufsize);
+
+	/* build the sg table from the pages */
+	lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
+			    GFP_KERNEL);
+	if (rc) {
+		smc_buf_free(buf_desc, lnk, is_rmb);
+		return ERR_PTR(rc);
+	}
+	sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
+		   buf_desc->cpu_addr, bufsize);
+
+	/* map sg table to DMA address */
+	rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
+			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	/* SMC protocol depends on mapping to one DMA address only */
+	if (rc != 1)  {
+		smc_buf_free(buf_desc, lnk, is_rmb);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/* create a new memory region for the RMB */
+	if (is_rmb) {
+		rc = smc_ib_get_memory_region(lnk->roce_pd,
+					      IB_ACCESS_REMOTE_WRITE |
+					      IB_ACCESS_LOCAL_WRITE,
+					      buf_desc);
 		if (rc) {
-			kfree(sndbuf_desc->cpu_addr);
-			kfree(sndbuf_desc);
-			sndbuf_desc = NULL;
-			continue; /* if mapping failed, try smaller one */
+			smc_buf_free(buf_desc, lnk, is_rmb);
+			return ERR_PTR(rc);
 		}
-		sndbuf_desc->used = 1;
-		write_lock_bh(&lgr->sndbufs_lock);
-		list_add(&sndbuf_desc->list,
-			 &lgr->sndbufs[tmp_bufsize_short]);
-		write_unlock_bh(&lgr->sndbufs_lock);
-		break;
-	}
-	if (sndbuf_desc && sndbuf_desc->cpu_addr) {
-		conn->sndbuf_desc = sndbuf_desc;
-		conn->sndbuf_size = tmp_bufsize;
-		smc->sk.sk_sndbuf = tmp_bufsize * 2;
-		atomic_set(&conn->sndbuf_space, tmp_bufsize);
-		return 0;
-	} else {
-		return -ENOMEM;
 	}
+
+	return buf_desc;
 }
 
-/* create the RMB for an SMC socket (even though the SMC protocol
- * allows more than one RMB-element per RMB, the Linux implementation
- * uses just one RMB-element per RMB, i.e. uses an extra RMB for every
- * connection in a link group
- */
-int smc_rmb_create(struct smc_sock *smc)
+static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 {
 	struct smc_connection *conn = &smc->conn;
 	struct smc_link_group *lgr = conn->lgr;
-	int tmp_bufsize, tmp_bufsize_short;
-	struct smc_buf_desc *rmb_desc;
-	int rc;
+	struct smc_buf_desc *buf_desc = NULL;
+	struct list_head *buf_list;
+	int bufsize, bufsize_short;
+	int sk_buf_size;
+	rwlock_t *lock;
+
+	if (is_rmb)
+		/* use socket recv buffer size (w/o overhead) as start value */
+		sk_buf_size = smc->sk.sk_rcvbuf / 2;
+	else
+		/* use socket send buffer size (w/o overhead) as start value */
+		sk_buf_size = smc->sk.sk_sndbuf / 2;
+
+	for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
+	     bufsize_short >= 0; bufsize_short--) {
+
+		if (is_rmb) {
+			lock = &lgr->rmbs_lock;
+			buf_list = &lgr->rmbs[bufsize_short];
+		} else {
+			lock = &lgr->sndbufs_lock;
+			buf_list = &lgr->sndbufs[bufsize_short];
+		}
+		bufsize = smc_uncompress_bufsize(bufsize_short);
+		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
+			continue;
 
-	/* use socket recv buffer size (w/o overhead) as start value */
-	for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2);
-	     tmp_bufsize_short >= 0; tmp_bufsize_short--) {
-		tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
-		/* check for reusable rmb_slot in the link group */
-		rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short);
-		if (rmb_desc) {
-			memset(rmb_desc->cpu_addr, 0, tmp_bufsize);
+		/* check for reusable slot in the link group */
+		buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
+		if (buf_desc) {
+			memset(buf_desc->cpu_addr, 0, bufsize);
 			break; /* found reusable slot */
 		}
-		/* try to alloc a new RMB */
-		rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL);
-		if (!rmb_desc)
-			break; /* give up with -ENOMEM */
-		rmb_desc->cpu_addr = kzalloc(tmp_bufsize,
-					     GFP_KERNEL | __GFP_NOWARN |
-					     __GFP_NOMEMALLOC |
-					     __GFP_NORETRY);
-		if (!rmb_desc->cpu_addr) {
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			/* if RMB allocation has failed,
-			 * try a smaller one
-			 */
+
+		buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+		if (PTR_ERR(buf_desc) == -ENOMEM)
+			break;
+		if (IS_ERR(buf_desc))
 			continue;
-		}
-		rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				    tmp_bufsize, rmb_desc,
-				    DMA_FROM_DEVICE);
-		if (rc) {
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			continue; /* if mapping failed, try smaller one */
-		}
-		rmb_desc->rkey[SMC_SINGLE_LINK] =
-			lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
-		rmb_desc->used = 1;
-		write_lock_bh(&lgr->rmbs_lock);
-		list_add(&rmb_desc->list,
-			 &lgr->rmbs[tmp_bufsize_short]);
-		write_unlock_bh(&lgr->rmbs_lock);
-		break;
+
+		buf_desc->used = 1;
+		write_lock_bh(lock);
+		list_add(&buf_desc->list, buf_list);
+		write_unlock_bh(lock);
+		break; /* found */
 	}
-	if (rmb_desc && rmb_desc->cpu_addr) {
-		conn->rmb_desc = rmb_desc;
-		conn->rmbe_size = tmp_bufsize;
-		conn->rmbe_size_short = tmp_bufsize_short;
-		smc->sk.sk_rcvbuf = tmp_bufsize * 2;
+
+	if (IS_ERR(buf_desc))
+		return -ENOMEM;
+
+	if (is_rmb) {
+		conn->rmb_desc = buf_desc;
+		conn->rmbe_size = bufsize;
+		conn->rmbe_size_short = bufsize_short;
+		smc->sk.sk_rcvbuf = bufsize * 2;
 		atomic_set(&conn->bytes_to_rcv, 0);
-		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize);
-		return 0;
+		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
 	} else {
-		return -ENOMEM;
+		conn->sndbuf_desc = buf_desc;
+		conn->sndbuf_size = bufsize;
+		smc->sk.sk_sndbuf = bufsize * 2;
+		atomic_set(&conn->sndbuf_space, bufsize);
 	}
+	return 0;
+}
+
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+			       conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+				  conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+			       conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+				  conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+/* create the send and receive buffer for an SMC socket;
+ * receive buffers are called RMBs;
+ * (even though the SMC protocol allows more than one RMB-element per RMB,
+ * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
+ * extra RMB for every connection in a link group
+ */
+int smc_buf_create(struct smc_sock *smc)
+{
+	int rc;
+
+	/* create send buffer */
+	rc = __smc_buf_create(smc, false);
+	if (rc)
+		return rc;
+	/* create rmb */
+	rc = __smc_buf_create(smc, true);
+	if (rc)
+		smc_buf_free(smc->conn.sndbuf_desc,
+			     &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
+	return rc;
 }
 
 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index b013cb43a327..fe691bf9af91 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -37,6 +38,14 @@ struct smc_wr_buf {
 	u8	raw[SMC_WR_BUF_SIZE];
 };
 
+#define SMC_WR_REG_MR_WAIT_TIME	(5 * HZ)/* wait time for ib_wr_reg_mr result */
+
+enum smc_wr_reg_state {
+	POSTED,		/* ib_wr_reg_mr request posted */
+	CONFIRMED,	/* ib_wr_reg_mr response: successful */
+	FAILED		/* ib_wr_reg_mr response: failure */
+};
+
 struct smc_link {
 	struct smc_ib_device	*smcibdev;	/* ib-device */
 	u8			ibport;		/* port - values 1 | 2 */
@@ -65,6 +74,10 @@ struct smc_link {
 	u64			wr_rx_id;	/* seq # of last recv WR */
 	u32			wr_rx_cnt;	/* number of WR recv buffers */
 
+	struct ib_reg_wr	wr_reg;		/* WR register memory region */
+	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */
+	enum smc_wr_reg_state	wr_reg_state;	/* state of wr_reg request */
+
 	union ib_gid		gid;		/* gid matching used vlan id */
 	u32			peer_qpn;	/* QP number of peer */
 	enum ib_mtu		path_mtu;	/* used mtu */
@@ -90,14 +103,15 @@ struct smc_link {
 /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
 struct smc_buf_desc {
 	struct list_head	list;
-	u64			dma_addr[SMC_LINKS_PER_LGR_MAX];
-						/* mapped address of buffer */
 	void			*cpu_addr;	/* virtual address of buffer */
-	u32			rkey[SMC_LINKS_PER_LGR_MAX];
-						/* for rmb only:
-						 * rkey provided to peer
+	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
+	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+						/* for rmb only: memory region
+						 * incl. rkey provided to peer
 						 */
+	u32			order;		/* allocation order */
 	u32			used;		/* currently used / unused */
+	bool			reused;		/* new created / reused */
 };
 
 struct smc_rtoken {				/* address/key of remote RMB */
@@ -173,9 +187,11 @@ struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
-int smc_sndbuf_create(struct smc_sock *smc);
-int smc_rmb_create(struct smc_sock *smc);
+int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
-
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
 #endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index b31715505a35..90f1a7f9085c 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -13,6 +14,7 @@
 
 #include <linux/random.h>
 #include <linux/workqueue.h>
+#include <linux/scatterlist.h>
 #include <rdma/ib_verbs.h>
 
 #include "smc_pnet.h"
@@ -192,8 +194,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
 	int rc;
 
-	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
-				   IB_PD_UNSAFE_GLOBAL_RKEY);
+	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
 	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
 	if (IS_ERR(lnk->roce_pd))
 		lnk->roce_pd = NULL;
@@ -232,10 +233,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 		.recv_cq = lnk->smcibdev->roce_cq_recv,
 		.srq = NULL,
 		.cap = {
-			.max_send_wr = SMC_WR_BUF_CNT,
 				/* include unsolicited rdma_writes as well,
 				 * there are max. 2 RDMA_WRITE per 1 WR_SEND
 				 */
+			.max_send_wr = SMC_WR_BUF_CNT * 3,
 			.max_recv_wr = SMC_WR_BUF_CNT * 3,
 			.max_send_sge = SMC_IB_MAX_SEND_SGE,
 			.max_recv_sge = 1,
@@ -254,56 +255,132 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 	return rc;
 }
 
-/* map a new TX or RX buffer to DMA */
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
-		   struct smc_buf_desc *buf_slot,
-		   enum dma_data_direction data_direction)
+void smc_ib_put_memory_region(struct ib_mr *mr)
 {
-	int rc = 0;
+	ib_dereg_mr(mr);
+}
 
-	if (buf_slot->dma_addr[SMC_SINGLE_LINK])
-		return rc; /* already mapped */
-	buf_slot->dma_addr[SMC_SINGLE_LINK] =
-		ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr,
-				  buf_size, data_direction);
-	if (ib_dma_mapping_error(smcibdev->ibdev,
-				 buf_slot->dma_addr[SMC_SINGLE_LINK]))
-		rc = -EIO;
-	return rc;
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+{
+	unsigned int offset = 0;
+	int sg_num;
+
+	/* map the largest prefix of a dma mapped SG list */
+	sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
+			      buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			      buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			      &offset, PAGE_SIZE);
+
+	return sg_num;
+}
+
+/* Allocate a memory region and map the dma mapped SG list of buf_slot */
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot)
+{
+	if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+		return 0; /* already done */
+
+	buf_slot->mr_rx[SMC_SINGLE_LINK] =
+		ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
+	if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+		int rc;
+
+		rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
+		buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+		return rc;
+	}
+
+	if (smc_ib_map_mr_sg(buf_slot) != 1)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* synchronize buffer usage for cpu access */
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+			    struct smc_buf_desc *buf_slot,
+			    enum dma_data_direction data_direction)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+					   sg_dma_address(sg),
+					   sg_dma_len(sg),
+					   data_direction);
+	}
+}
+
+/* synchronize buffer usage for device access */
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+			       struct smc_buf_desc *buf_slot,
+			       enum dma_data_direction data_direction)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		ib_dma_sync_single_for_device(smcibdev->ibdev,
+					      sg_dma_address(sg),
+					      sg_dma_len(sg),
+					      data_direction);
+	}
 }
 
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size,
+/* Map a new TX or RX buffer SG-table to DMA */
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction)
 {
-	if (!buf_slot->dma_addr[SMC_SINGLE_LINK])
+	int mapped_nents;
+
+	mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
+				     buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+				     buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+				     data_direction);
+	if (!mapped_nents)
+		return -ENOMEM;
+
+	return mapped_nents;
+}
+
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+			 struct smc_buf_desc *buf_slot,
+			 enum dma_data_direction data_direction)
+{
+	if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
 		return; /* already unmapped */
-	ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size,
-			    data_direction);
-	buf_slot->dma_addr[SMC_SINGLE_LINK] = 0;
+
+	ib_dma_unmap_sg(smcibdev->ibdev,
+			buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			data_direction);
+	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
 }
 
 static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
 {
-	struct net_device *ndev;
+	struct ib_gid_attr gattr;
 	int rc;
 
 	rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
-			  &smcibdev->gid[ibport - 1], NULL);
-	/* the SMC protocol requires specification of the roce MAC address;
-	 * if net_device cannot be determined, it can be derived from gid 0
-	 */
-	ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
-	if (ndev) {
-		memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
-	} else if (!rc) {
-		memcpy(&smcibdev->mac[ibport - 1][0],
-		       &smcibdev->gid[ibport - 1].raw[8], 3);
-		memcpy(&smcibdev->mac[ibport - 1][3],
-		       &smcibdev->gid[ibport - 1].raw[13], 3);
-		smcibdev->mac[ibport - 1][0] &= ~0x02;
-	}
-	return rc;
+			  &smcibdev->gid[ibport - 1], &gattr);
+	if (rc || !gattr.ndev)
+		return -ENODEV;
+
+	memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+	dev_put(gattr.ndev);
+	return 0;
 }
 
 /* Create an identifier unique for this instance of SMC-R.
@@ -334,6 +411,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
 			   &smcibdev->pattr[ibport - 1]);
 	if (rc)
 		goto out;
+	/* the SMC protocol requires specification of the RoCE MAC address */
 	rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
 	if (rc)
 		goto out;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index b567152a526d..e90630dadf8e 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -51,12 +52,12 @@ int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
 int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
-		   struct smc_buf_desc *buf_slot,
-		   enum dma_data_direction data_direction);
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize,
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction);
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+			 struct smc_buf_desc *buf_slot,
+			 enum dma_data_direction data_direction);
 void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
 int smc_ib_create_protection_domain(struct smc_link *lnk);
 void smc_ib_destroy_queue_pair(struct smc_link *lnk);
@@ -65,6 +66,13 @@ int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
-
-
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot);
+void smc_ib_put_memory_region(struct ib_mr *mr);
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+			    struct smc_buf_desc *buf_slot,
+			    enum dma_data_direction data_direction);
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+			       struct smc_buf_desc *buf_slot,
+			       enum dma_data_direction data_direction);
 #endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c2f9165d13ef..92fe4cc8c82c 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index b472f853953a..51b27ce90dbd 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 78f7af28ae4f..74568cdbca70 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -181,8 +182,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
 			     sizeof(new_pnetelem->ndev->name)) ||
 		    smc_pnet_same_ibname(pnetelem,
 					 new_pnetelem->smcibdev->ibdev->name,
-					 new_pnetelem->ib_port))
+					 new_pnetelem->ib_port)) {
+			dev_put(pnetelem->ndev);
 			goto found;
+		}
 	}
 	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
 	rc = 0;
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index c4f1bccd4358..5a29519db976 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index f0c8b089f770..cbf58637ee14 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -148,6 +149,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
 				read_done = sock_intr_errno(timeo);
 				break;
 			}
+			if (!timeo)
+				return -EAGAIN;
 		}
 
 		if (!atomic_read(&conn->bytes_to_rcv)) {
@@ -170,6 +173,7 @@ copy:
 				  copylen, conn->rmbe_size - cons.count);
 		chunk_len_sum = chunk_len;
 		chunk_off = cons.count;
+		smc_rmb_sync_sg_for_cpu(conn);
 		for (chunk = 0; chunk < 2; chunk++) {
 			if (!(flags & MSG_TRUNC)) {
 				rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
@@ -177,6 +181,7 @@ copy:
 				if (rc) {
 					if (!read_done)
 						read_done = -EFAULT;
+					smc_rmb_sync_sg_for_device(conn);
 					goto out;
 				}
 			}
@@ -190,6 +195,7 @@ copy:
 			chunk_len_sum += chunk_len;
 			chunk_off = 0; /* modulo offset in recv ring buffer */
 		}
+		smc_rmb_sync_sg_for_device(conn);
 
 		/* update cursors */
 		if (!(flags & MSG_PEEK)) {
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index b5b80e1f8b0f..3a32b59bf06c 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 21ec1832ab51..c48dc2d5fd3a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -24,6 +25,8 @@
 #include "smc_cdc.h"
 #include "smc_tx.h"
 
+#define SMC_TX_WORK_DELAY	HZ
+
 /***************************** sndbuf producer *******************************/
 
 /* callback implementation for sk.sk_write_space()
@@ -174,10 +177,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 				  copylen, conn->sndbuf_size - tx_cnt_prep);
 		chunk_len_sum = chunk_len;
 		chunk_off = tx_cnt_prep;
+		smc_sndbuf_sync_sg_for_cpu(conn);
 		for (chunk = 0; chunk < 2; chunk++) {
 			rc = memcpy_from_msg(sndbuf_base + chunk_off,
 					     msg, chunk_len);
 			if (rc) {
+				smc_sndbuf_sync_sg_for_device(conn);
 				if (send_done)
 					return send_done;
 				goto out_err;
@@ -192,6 +197,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 			chunk_len_sum += chunk_len;
 			chunk_off = 0; /* modulo offset in send ring buffer */
 		}
+		smc_sndbuf_sync_sg_for_device(conn);
 		/* update cursors */
 		smc_curs_add(conn->sndbuf_size, &prep, copylen);
 		smc_curs_write(&conn->tx_curs_prep,
@@ -277,6 +283,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 	struct smc_link_group *lgr = conn->lgr;
 	int to_send, rmbespace;
 	struct smc_link *link;
+	dma_addr_t dma_addr;
 	int num_sges;
 	int rc;
 
@@ -334,12 +341,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 		src_len = conn->sndbuf_size - sent.count;
 	}
 	src_len_sum = src_len;
+	dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
 	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
 		num_sges = 0;
 		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-			sges[srcchunk].addr =
-				conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] +
-				src_off;
+			sges[srcchunk].addr = dma_addr + src_off;
 			sges[srcchunk].length = src_len;
 			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
 			num_sges++;
@@ -391,8 +397,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
 	int rc;
 
 	spin_lock_bh(&conn->send_lock);
-	rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
-				   &pend);
+	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
 	if (rc < 0) {
 		if (rc == -EBUSY) {
 			struct smc_sock *smc =
@@ -403,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
 				goto out_unlock;
 			}
 			rc = 0;
-			schedule_work(&conn->tx_work);
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_WORK_DELAY);
 		}
 		goto out_unlock;
 	}
@@ -427,7 +433,7 @@ out_unlock:
  */
 static void smc_tx_work(struct work_struct *work)
 {
-	struct smc_connection *conn = container_of(work,
+	struct smc_connection *conn = container_of(to_delayed_work(work),
 						   struct smc_connection,
 						   tx_work);
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -460,12 +466,12 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 	    ((to_confirm > conn->rmbe_update_limit) &&
 	     ((to_confirm > (conn->rmbe_size / 2)) ||
 	      conn->local_rx_ctrl.prod_flags.write_blocked))) {
-		rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
-					   &wr_buf, &pend);
+		rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
 		if (!rc)
 			rc = smc_cdc_msg_send(conn, wr_buf, pend);
 		if (rc < 0) {
-			schedule_work(&conn->tx_work);
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_WORK_DELAY);
 			return;
 		}
 		smc_curs_write(&conn->rx_curs_confirmed,
@@ -484,6 +490,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 void smc_tx_init(struct smc_sock *smc)
 {
 	smc->sk.sk_write_space = smc_tx_write_space;
-	INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 	spin_lock_init(&smc->conn.send_lock);
 }
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 1d6a0dcdcfe6..78255964fa4d 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 874ee9f9d796..de4537f66832 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -68,6 +69,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 	int i;
 
 	link = wc->qp->qp_context;
+
+	if (wc->opcode == IB_WC_REG_MR) {
+		if (wc->status)
+			link->wr_reg_state = FAILED;
+		else
+			link->wr_reg_state = CONFIRMED;
+		wake_up(&link->wr_reg_wait);
+		return;
+	}
+
 	pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
 	if (pnd_snd_idx == link->wr_tx_cnt)
 		return;
@@ -234,7 +245,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	int rc;
 
 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
-			 IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
+			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
 	pend = container_of(priv, struct smc_wr_tx_pend, priv);
 	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
 			  &failed_wr);
@@ -243,6 +254,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	return rc;
 }
 
+/* Register a memory region and wait for result. */
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
+{
+	struct ib_send_wr *failed_wr = NULL;
+	int rc;
+
+	ib_req_notify_cq(link->smcibdev->roce_cq_send,
+			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+	link->wr_reg_state = POSTED;
+	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
+	link->wr_reg.mr = mr;
+	link->wr_reg.key = mr->rkey;
+	failed_wr = &link->wr_reg.wr;
+	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
+	WARN_ON(failed_wr != &link->wr_reg.wr);
+	if (rc)
+		return rc;
+
+	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
+					      (link->wr_reg_state != POSTED),
+					      SMC_WR_REG_MR_WAIT_TIME);
+	if (!rc) {
+		/* timeout - terminate connections */
+		struct smc_link_group *lgr;
+
+		lgr = container_of(link, struct smc_link_group,
+				   lnk[SMC_SINGLE_LINK]);
+		smc_lgr_terminate(lgr);
+		return -EPIPE;
+	}
+	if (rc == -ERESTARTSYS)
+		return -EINTR;
+	switch (link->wr_reg_state) {
+	case CONFIRMED:
+		rc = 0;
+		break;
+	case FAILED:
+		rc = -EIO;
+		break;
+	case POSTED:
+		rc = -EPIPE;
+		break;
+	}
+	return rc;
+}
+
 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
 			     smc_wr_tx_filter filter,
 			     smc_wr_tx_dismisser dismisser,
@@ -458,6 +515,11 @@ static void smc_wr_init_sge(struct smc_link *lnk)
 		lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
 		lnk->wr_rx_ibs[i].num_sge = 1;
 	}
+	lnk->wr_reg.wr.next = NULL;
+	lnk->wr_reg.wr.num_sge = 0;
+	lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
+	lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
+	lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
 }
 
 void smc_wr_free_link(struct smc_link *lnk)
@@ -602,6 +664,8 @@ int smc_wr_create_link(struct smc_link *lnk)
 	smc_wr_init_sge(lnk);
 	memset(lnk->wr_tx_mask, 0,
 	       BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
+	init_waitqueue_head(&lnk->wr_tx_wait);
+	init_waitqueue_head(&lnk->wr_reg_wait);
 	return rc;
 
 dma_unmap:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 0b9beeda6053..2acf12b06063 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Shared Memory Communications over RDMA (SMC-R) and RoCE
  *
@@ -102,5 +103,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
 int smc_wr_rx_post_init(struct smc_link *link);
 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr);
 
 #endif /* SMC_WR_H */
diff --git a/net/socket.c b/net/socket.c
index ad22df1ffbd1..42d8e9c9ccd5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -568,7 +568,6 @@ struct socket *sock_alloc(void)
 
 	sock = SOCKET_I(inode);
 
-	kmemcheck_annotate_bitfield(sock, type);
 	inode->i_ino = get_next_ino();
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current_fsuid();
@@ -652,6 +651,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t size)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (!sock->ops->sendmsg_locked)
+		return sock_no_sendmsg_locked(sk, msg, size);
+
+	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+
+	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+}
+EXPORT_SYMBOL(kernel_sendmsg_locked);
+
 static bool skb_is_err_queue(const struct sk_buff *skb)
 {
 	/* pkt_type of skbs enqueued on the error queue are set to
@@ -3376,6 +3389,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 }
 EXPORT_SYMBOL(kernel_sendpage);
 
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->sendpage_locked)
+		return sock->ops->sendpage_locked(sk, page, offset, size,
+						  flags);
+
+	return sock_no_sendpage_locked(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL(kernel_sendpage_locked);
+
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
 	mm_segment_t oldfs = get_fs();
@@ -3405,7 +3431,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 	struct inet_sock *inet;
 	struct ip_options_rcu *opt;
 	u32 overhead = 0;
-	bool owned_by_user;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo *np;
 	struct ipv6_txoptions *optv6 = NULL;
@@ -3414,13 +3439,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 	if (!sk)
 		return overhead;
 
-	owned_by_user = sock_owned_by_user(sk);
 	switch (sk->sk_family) {
 	case AF_INET:
 		inet = inet_sk(sk);
 		overhead += sizeof(struct iphdr);
 		opt = rcu_dereference_protected(inet->inet_opt,
-						owned_by_user);
+						sock_owned_by_user(sk));
 		if (opt)
 			overhead += opt->opt.optlen;
 		return overhead;
@@ -3430,7 +3454,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 		overhead += sizeof(struct ipv6hdr);
 		if (np)
 			optv6 = rcu_dereference_protected(np->opt,
-							  owned_by_user);
+							  sock_owned_by_user(sk));
 		if (optv6)
 			overhead += (optv6->opt_flen + optv6->opt_nflen);
 		return overhead;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b5c279b22680..c5fda15ba319 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -29,44 +29,46 @@
 
 static struct workqueue_struct *strp_wq;
 
-struct _strp_rx_msg {
-	/* Internal cb structure. struct strp_rx_msg must be first for passing
+struct _strp_msg {
+	/* Internal cb structure. struct strp_msg must be first for passing
 	 * to upper layer.
 	 */
-	struct strp_rx_msg strp;
+	struct strp_msg strp;
 	int accum_len;
 	int early_eaten;
 };
 
-static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
 {
-	return (struct _strp_rx_msg *)((void *)skb->cb +
+	return (struct _strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
 /* Lower lock held */
-static void strp_abort_rx_strp(struct strparser *strp, int err)
+static void strp_abort_strp(struct strparser *strp, int err)
 {
-	struct sock *csk = strp->sk;
-
 	/* Unrecoverable error in receive */
 
-	del_timer(&strp->rx_msg_timer);
+	cancel_delayed_work(&strp->msg_timer_work);
 
-	if (strp->rx_stopped)
+	if (strp->stopped)
 		return;
 
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
+
+	if (strp->sk) {
+		struct sock *sk = strp->sk;
 
-	/* Report an error on the lower socket */
-	csk->sk_err = err;
-	csk->sk_error_report(csk);
+		/* Report an error on the lower socket */
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	}
 }
 
-static void strp_start_rx_timer(struct strparser *strp)
+static void strp_start_timer(struct strparser *strp, long timeo)
 {
-	if (strp->sk->sk_rcvtimeo)
-		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+	if (timeo)
+		mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
 }
 
 /* Lower lock held */
@@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err,
 			    read_descriptor_t *desc)
 {
 	desc->error = err;
-	kfree_skb(strp->rx_skb_head);
-	strp->rx_skb_head = NULL;
+	kfree_skb(strp->skb_head);
+	strp->skb_head = NULL;
 	strp->cb.abort_parser(strp, err);
 }
 
 static inline int strp_peek_len(struct strparser *strp)
 {
-	struct socket *sock = strp->sk->sk_socket;
+	if (strp->sk) {
+		struct socket *sock = strp->sk->sk_socket;
+
+		return sock->ops->peek_len(sock);
+	}
+
+	/* If we don't have an associated socket there's nothing to peek.
+	 * Return int max to avoid stopping the strparser.
+	 */
 
-	return sock->ops->peek_len(sock);
+	return INT_MAX;
 }
 
 /* Lower socket lock held */
-static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-		     unsigned int orig_offset, size_t orig_len)
+static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		       unsigned int orig_offset, size_t orig_len,
+		       size_t max_msg_size, long timeo)
 {
 	struct strparser *strp = (struct strparser *)desc->arg.data;
-	struct _strp_rx_msg *rxm;
+	struct _strp_msg *stm;
 	struct sk_buff *head, *skb;
 	size_t eaten = 0, cand_len;
 	ssize_t extra;
 	int err;
 	bool cloned_orig = false;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return 0;
 
-	head = strp->rx_skb_head;
+	head = strp->skb_head;
 	if (head) {
 		/* Message already in progress */
 
-		rxm = _strp_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
+		stm = _strp_msg(head);
+		if (unlikely(stm->early_eaten)) {
 			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
+			 * data saved in skb_head, just indicate they
 			 * are consumed.
 			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
+			eaten = orig_len <= stm->early_eaten ?
+				orig_len : stm->early_eaten;
+			stm->early_eaten -= eaten;
 
 			return eaten;
 		}
@@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			 */
 			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
 			if (!orig_skb) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = -ENOMEM;
 				return 0;
 			}
 			if (!pskb_pull(orig_skb, orig_offset)) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				kfree_skb(orig_skb);
 				desc->error = -ENOMEM;
 				return 0;
@@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			orig_offset = 0;
 		}
 
-		if (!strp->rx_skb_nextp) {
+		if (!strp->skb_nextp) {
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
 			err = skb_unclone(head, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				return 0;
 			}
@@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 
 				skb = alloc_skb(0, GFP_ATOMIC);
 				if (!skb) {
-					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					STRP_STATS_INCR(strp->stats.mem_fail);
 					desc->error = -ENOMEM;
 					return 0;
 				}
 				skb->len = head->len;
 				skb->data_len = head->len;
 				skb->truesize = head->truesize;
-				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
-				strp->rx_skb_nextp = &head->next;
+				*_strp_msg(skb) = *_strp_msg(head);
+				strp->skb_nextp = &head->next;
 				skb_shinfo(skb)->frag_list = head;
-				strp->rx_skb_head = skb;
+				strp->skb_head = skb;
 				head = skb;
 			} else {
-				strp->rx_skb_nextp =
+				strp->skb_nextp =
 				    &skb_shinfo(head)->frag_list;
 			}
 		}
@@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		/* Always clone since we will consume something */
 		skb = skb_clone(orig_skb, GFP_ATOMIC);
 		if (!skb) {
-			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			STRP_STATS_INCR(strp->stats.mem_fail);
 			desc->error = -ENOMEM;
 			break;
 		}
 
 		cand_len = orig_len - eaten;
 
-		head = strp->rx_skb_head;
+		head = strp->skb_head;
 		if (!head) {
 			head = skb;
-			strp->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			strp->rx_skb_nextp = NULL;
-			rxm = _strp_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->strp.offset = orig_offset + eaten;
+			strp->skb_head = head;
+			/* Will set skb_nextp on next packet if needed */
+			strp->skb_nextp = NULL;
+			stm = _strp_msg(head);
+			memset(stm, 0, sizeof(*stm));
+			stm->strp.offset = orig_offset + eaten;
 		} else {
 			/* Unclone since we may be appending to an skb that we
 			 * already share a frag_list with.
 			 */
 			err = skb_unclone(skb, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				break;
 			}
 
-			rxm = _strp_rx_msg(head);
-			*strp->rx_skb_nextp = skb;
-			strp->rx_skb_nextp = &skb->next;
+			stm = _strp_msg(head);
+			*strp->skb_nextp = skb;
+			strp->skb_nextp = &skb->next;
 			head->data_len += skb->len;
 			head->len += skb->len;
 			head->truesize += skb->truesize;
 		}
 
-		if (!rxm->strp.full_len) {
+		if (!stm->strp.full_len) {
 			ssize_t len;
 
 			len = (*strp->cb.parse_msg)(strp, head);
 
 			if (!len) {
 				/* Need more header to determine length */
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
-				rxm->accum_len += cand_len;
+				stm->accum_len += cand_len;
 				eaten += cand_len;
-				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				STRP_STATS_INCR(strp->stats.need_more_hdr);
 				WARN_ON(eaten != orig_len);
 				break;
 			} else if (len < 0) {
-				if (len == -ESTRPIPE && rxm->accum_len) {
+				if (len == -ESTRPIPE && stm->accum_len) {
 					len = -ENODATA;
-					strp->rx_unrecov_intr = 1;
+					strp->unrecov_intr = 1;
 				} else {
-					strp->rx_interrupted = 1;
+					strp->interrupted = 1;
 				}
 				strp_parser_err(strp, len, desc);
 				break;
-			} else if (len > strp->sk->sk_rcvbuf) {
+			} else if (len > max_msg_size) {
 				/* Message length exceeds maximum allowed */
-				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				STRP_STATS_INCR(strp->stats.msg_too_big);
 				strp_parser_err(strp, -EMSGSIZE, desc);
 				break;
 			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->strp.offset) {
+					  skb->len - stm->strp.offset) {
 				/* Length must be into new skb (and also
 				 * greater than zero)
 				 */
-				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				STRP_STATS_INCR(strp->stats.bad_hdr_len);
 				strp_parser_err(strp, -EPROTO, desc);
 				break;
 			}
 
-			rxm->strp.full_len = len;
+			stm->strp.full_len = len;
 		}
 
-		extra = (ssize_t)(rxm->accum_len + cand_len) -
-			rxm->strp.full_len;
+		extra = (ssize_t)(stm->accum_len + cand_len) -
+			stm->strp.full_len;
 
 		if (extra < 0) {
 			/* Message not complete yet. */
-			if (rxm->strp.full_len - rxm->accum_len >
+			if (stm->strp.full_len - stm->accum_len >
 			    strp_peek_len(strp)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set strp->rx_need_bytes to wait for
+				/* Don't have the whole message in the socket
+				 * buffer. Set strp->need_bytes to wait for
 				 * the rest of the message. Also, set "early
 				 * eaten" since we've already buffered the skb
 				 * but don't consume yet per strp_read_sock.
 				 */
 
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
 
-				strp->rx_need_bytes = rxm->strp.full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				strp->need_bytes = stm->strp.full_len -
+						       stm->accum_len;
+				stm->accum_len += cand_len;
+				stm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.bytes, cand_len);
 				desc->count = 0; /* Stop reading socket */
 				break;
 			}
-			rxm->accum_len += cand_len;
+			stm->accum_len += cand_len;
 			eaten += cand_len;
 			WARN_ON(eaten != orig_len);
 			break;
@@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		eaten += (cand_len - extra);
 
 		/* Hurray, we have a new message! */
-		del_timer(&strp->rx_msg_timer);
-		strp->rx_skb_head = NULL;
-		STRP_STATS_INCR(strp->stats.rx_msgs);
+		cancel_delayed_work(&strp->msg_timer_work);
+		strp->skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.msgs);
 
 		/* Give skb to upper layer */
 		strp->cb.rcv_msg(strp, head);
 
-		if (unlikely(strp->rx_paused)) {
+		if (unlikely(strp->paused)) {
 			/* Upper layer paused strp */
 			break;
 		}
@@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 	if (cloned_orig)
 		kfree_skb(orig_skb);
 
-	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+	STRP_STATS_ADD(strp->stats.bytes, eaten);
 
 	return eaten;
 }
 
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
+{
+	read_descriptor_t desc; /* Dummy arg to strp_recv */
+
+	desc.arg.data = strp;
+
+	return __strp_recv(&desc, orig_skb, orig_offset, orig_len,
+			   max_msg_size, timeo);
+}
+EXPORT_SYMBOL_GPL(strp_process);
+
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		     unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+
+	return __strp_recv(desc, orig_skb, orig_offset, orig_len,
+			   strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo);
+}
+
 static int default_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -340,6 +373,9 @@ static int strp_read_sock(struct strparser *strp)
 	struct socket *sock = strp->sk->sk_socket;
 	read_descriptor_t desc;
 
+	if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+		return -EBUSY;
+
 	desc.arg.data = strp;
 	desc.error = 0;
 	desc.count = 1; /* give more than one skb per call */
@@ -355,101 +391,123 @@ static int strp_read_sock(struct strparser *strp)
 /* Lower sock lock held */
 void strp_data_ready(struct strparser *strp)
 {
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		return;
 
-	/* This check is needed to synchronize with do_strp_rx_work.
-	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	/* This check is needed to synchronize with do_strp_work.
+	 * do_strp_work acquires a process lock (lock_sock) whereas
 	 * the lock held here is bh_lock_sock. The two locks can be
 	 * held by different threads at the same time, but bh_lock_sock
 	 * allows a thread in BH context to safely check if the process
 	 * lock is held. In this case, if the lock is held, queue work.
 	 */
 	if (sock_owned_by_user(strp->sk)) {
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 		return;
 	}
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return;
 
-	if (strp->rx_need_bytes) {
-		if (strp_peek_len(strp) >= strp->rx_need_bytes)
-			strp->rx_need_bytes = 0;
+	if (strp->need_bytes) {
+		if (strp_peek_len(strp) >= strp->need_bytes)
+			strp->need_bytes = 0;
 		else
 			return;
 	}
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_data_ready);
 
-static void do_strp_rx_work(struct strparser *strp)
+static void do_strp_work(struct strparser *strp)
 {
 	read_descriptor_t rd_desc;
-	struct sock *csk = strp->sk;
 
 	/* We need the read lock to synchronize with strp_data_ready. We
 	 * need the socket lock for calling strp_read_sock.
 	 */
-	lock_sock(csk);
+	strp->cb.lock(strp);
 
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		goto out;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		goto out;
 
 	rd_desc.arg.data = strp;
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 
 out:
-	release_sock(csk);
+	strp->cb.unlock(strp);
 }
 
-static void strp_rx_work(struct work_struct *w)
+static void strp_work(struct work_struct *w)
 {
-	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+	do_strp_work(container_of(w, struct strparser, work));
 }
 
-static void strp_rx_msg_timeout(unsigned long arg)
+static void strp_msg_timeout(struct work_struct *w)
 {
-	struct strparser *strp = (struct strparser *)arg;
+	struct strparser *strp = container_of(w, struct strparser,
+					      msg_timer_work.work);
 
 	/* Message assembly timed out */
-	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
-	lock_sock(strp->sk);
+	STRP_STATS_INCR(strp->stats.msg_timeouts);
+	strp->cb.lock(strp);
 	strp->cb.abort_parser(strp, ETIMEDOUT);
+	strp->cb.unlock(strp);
+}
+
+static void strp_sock_lock(struct strparser *strp)
+{
+	lock_sock(strp->sk);
+}
+
+static void strp_sock_unlock(struct strparser *strp)
+{
 	release_sock(strp->sk);
 }
 
-int strp_init(struct strparser *strp, struct sock *csk,
-	      struct strp_callbacks *cb)
+int strp_init(struct strparser *strp, struct sock *sk,
+	      const struct strp_callbacks *cb)
 {
-	struct socket *sock = csk->sk_socket;
 
 	if (!cb || !cb->rcv_msg || !cb->parse_msg)
 		return -EINVAL;
 
-	if (!sock->ops->read_sock || !sock->ops->peek_len)
-		return -EAFNOSUPPORT;
-
-	memset(strp, 0, sizeof(*strp));
+	/* The sk (sock) arg determines the mode of the stream parser.
+	 *
+	 * If the sock is set then the strparser is in receive callback mode.
+	 * The upper layer calls strp_data_ready to kick receive processing
+	 * and strparser calls the read_sock function on the socket to
+	 * get packets.
+	 *
+	 * If the sock is not set then the strparser is in general mode.
+	 * The upper layer calls strp_process for each skb to be parsed.
+	 */
 
-	strp->sk = csk;
+	if (!sk) {
+		if (!cb->lock || !cb->unlock)
+			return -EINVAL;
+	}
 
-	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
-		    (unsigned long)strp);
+	memset(strp, 0, sizeof(*strp));
 
-	INIT_WORK(&strp->rx_work, strp_rx_work);
+	strp->sk = sk;
 
+	strp->cb.lock = cb->lock ? : strp_sock_lock;
+	strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
 	strp->cb.rcv_msg = cb->rcv_msg;
 	strp->cb.parse_msg = cb->parse_msg;
 	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
-	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
+
+	INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout);
+	INIT_WORK(&strp->work, strp_work);
 
 	return 0;
 }
@@ -457,12 +515,12 @@ EXPORT_SYMBOL_GPL(strp_init);
 
 void strp_unpause(struct strparser *strp)
 {
-	strp->rx_paused = 0;
+	strp->paused = 0;
 
-	/* Sync setting rx_paused with RX work */
+	/* Sync setting paused with RX work */
 	smp_mb();
 
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_unpause);
 
@@ -471,27 +529,27 @@ EXPORT_SYMBOL_GPL(strp_unpause);
  */
 void strp_done(struct strparser *strp)
 {
-	WARN_ON(!strp->rx_stopped);
+	WARN_ON(!strp->stopped);
 
-	del_timer_sync(&strp->rx_msg_timer);
-	cancel_work_sync(&strp->rx_work);
+	cancel_delayed_work_sync(&strp->msg_timer_work);
+	cancel_work_sync(&strp->work);
 
-	if (strp->rx_skb_head) {
-		kfree_skb(strp->rx_skb_head);
-		strp->rx_skb_head = NULL;
+	if (strp->skb_head) {
+		kfree_skb(strp->skb_head);
+		strp->skb_head = NULL;
 	}
 }
 EXPORT_SYMBOL_GPL(strp_done);
 
 void strp_stop(struct strparser *strp)
 {
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
 }
 EXPORT_SYMBOL_GPL(strp_stop);
 
 void strp_check_rcv(struct strparser *strp)
 {
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_check_rcv);
 
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index ea7ffa12e0f9..090658c3da12 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for Linux kernel SUN RPC
 #
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 14e9e53e63d5..c374268b008f 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for Linux kernel rpcsec_gss implementation
 #
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7b1ee5a0b03c..73165e9ca5bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
 		return stat;
 	if (integ_len > buf->len)
 		return stat;
-	if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
-		BUG();
+	if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
+		WARN_ON_ONCE(1);
+		return stat;
+	}
 	/* copy out mic... */
 	if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
-		BUG();
+		return stat;
 	if (mic.len > RPC_MAX_AUTH_SIZE)
 		return stat;
 	mic.data = kmalloc(mic.len, GFP_KERNEL);
@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
 	BUG_ON(integ_len % 4);
 	*p++ = htonl(integ_len);
 	*p++ = htonl(gc->gc_seq);
-	if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
-		BUG();
+	if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
+		WARN_ON_ONCE(1);
+		goto out_err;
+	}
 	if (resbuf->tail[0].iov_base == NULL) {
 		if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
 			goto out_err;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 5f3d527dff65..75d72e109a04 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/net/sunrpc/auth_null.c
  *
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 82337e1ec9cd..dafd6b870ba3 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/net/sunrpc/auth_unix.c
  *
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index ac701c28f44f..c2c68a15b59d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -171,10 +171,10 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
 	/*
 	 * Add the temporary list to the backchannel preallocation list
 	 */
-	spin_lock_bh(&xprt->bc_pa_lock);
+	spin_lock(&xprt->bc_pa_lock);
 	list_splice(&tmp_list, &xprt->bc_pa_list);
 	xprt_inc_alloc_count(xprt, min_reqs);
-	spin_unlock_bh(&xprt->bc_pa_lock);
+	spin_unlock(&xprt->bc_pa_lock);
 
 	dprintk("RPC:       setup backchannel transport done\n");
 	return 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2e49d1f892b7..a801da812f86 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 const char
 *rpc_proc_name(const struct rpc_task *task)
 {
@@ -1505,7 +1504,6 @@ const char
 	} else
 		return "no proc";
 }
-#endif
 
 /*
  * 0.  Initial state
@@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task)
 	struct rpc_clnt	*clnt = task->tk_client;
 	int idx = task->tk_msg.rpc_proc->p_statidx;
 
+	trace_rpc_request(task);
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
@@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task)
 	switch (status) {
 	case -ENOMEM:
 		rpc_delay(task, HZ >> 2);
+		/* fall through */
 	case -EAGAIN:	/* woken up; retry */
 		task->tk_action = call_retry_reserve;
 		return;
@@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task)
 		/* Use rate-limiting and a max number of retries if refresh
 		 * had status 0 but failed to update the cred.
 		 */
+		/* fall through */
 	case -ETIMEDOUT:
 		rpc_delay(task, 3*HZ);
+		/* fall through */
 	case -EAGAIN:
 		status = -EACCES;
+		/* fall through */
 	case -EKEYEXPIRED:
 		if (!task->tk_cred_retry)
 			break;
@@ -1903,6 +1906,15 @@ call_connect_status(struct rpc_task *task)
 	task->tk_status = 0;
 	switch (status) {
 	case -ECONNREFUSED:
+		/* A positive refusal suggests a rebind is needed. */
+		if (RPC_IS_SOFTCONN(task))
+			break;
+		if (clnt->cl_autobind) {
+			rpc_force_rebind(clnt);
+			task->tk_action = call_bind;
+			return;
+		}
+		/* fall through */
 	case -ECONNRESET:
 	case -ECONNABORTED:
 	case -ENETUNREACH:
@@ -1916,6 +1928,7 @@ call_connect_status(struct rpc_task *task)
 			break;
 		/* retry with existing socket, after a delay */
 		rpc_delay(task, 3*HZ);
+		/* fall through */
 	case -EAGAIN:
 		/* Check for timeouts before looping back to call_bind */
 	case -ETIMEDOUT:
@@ -2017,6 +2030,7 @@ call_transmit_status(struct rpc_task *task)
 			rpc_exit(task, task->tk_status);
 			break;
 		}
+		/* fall through */
 	case -ECONNRESET:
 	case -ECONNABORTED:
 	case -EADDRINUSE:
@@ -2137,25 +2151,25 @@ call_status(struct rpc_task *task)
 		 * were a timeout.
 		 */
 		rpc_delay(task, 3*HZ);
+		/* fall through */
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
-		if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
-		    && task->tk_client->cl_discrtry)
-			xprt_conditional_disconnect(req->rq_xprt,
-					req->rq_connect_cookie);
 		break;
 	case -ECONNREFUSED:
 	case -ECONNRESET:
 	case -ECONNABORTED:
 		rpc_force_rebind(clnt);
+		/* fall through */
 	case -EADDRINUSE:
 		rpc_delay(task, 3*HZ);
+		/* fall through */
 	case -EPIPE:
 	case -ENOTCONN:
 		task->tk_action = call_bind;
 		break;
 	case -ENOBUFS:
 		rpc_delay(task, HZ>>2);
+		/* fall through */
 	case -EAGAIN:
 		task->tk_action = call_transmit;
 		break;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index c8fd0b6c1618..e980d2a493de 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * debugfs interface for sunrpc
  *
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 394ce523174c..7ec10b92bea1 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __SUNRPC_NETNS_H__
 #define __SUNRPC_NETNS_H__
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 61a504fb1ae2..7803f3b6aa53 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 		return PTR_ERR(gssd_dentry);
 	}
 
-	dprintk("RPC:       sending pipefs MOUNT notification for net %p%s\n",
-		net, NET_NAME(net));
+	dprintk("RPC:       sending pipefs MOUNT notification for net %x%s\n",
+		net->ns.inum, NET_NAME(net));
 	mutex_lock(&sn->pipefs_sb_lock);
 	sn->pipefs_sb = sb;
 	err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
@@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb)
 		goto out;
 	}
 	sn->pipefs_sb = NULL;
-	dprintk("RPC:       sending pipefs UMOUNT notification for net %p%s\n",
-		net, NET_NAME(net));
+	dprintk("RPC:       sending pipefs UMOUNT notification for net %x%s\n",
+		net->ns.inum, NET_NAME(net));
 	blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_UMOUNT,
 					   sb);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ea0676f199c8..c526f8fb37c9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
 	smp_wmb(); 
 	sn->rpcb_users = 1;
 	dprintk("RPC:       created new rpcb local clients (rpcb_local_clnt: "
-			"%p, rpcb_local_clnt4: %p) for net %p%s\n",
-			sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
-			net, (net == &init_net) ? " (init_net)" : "");
+		"%p, rpcb_local_clnt4: %p) for net %x%s\n",
+		sn->rpcb_local_clnt, sn->rpcb_local_clnt4,
+		net->ns.inum, (net == &init_net) ? " (init_net)" : "");
 }
 
 /*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 0cc83839c13c..b1b49edd7c4d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,7 +44,7 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			rpc_async_schedule(struct work_struct *);
 static void			 rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(unsigned long ptr);
+static void __rpc_queue_timer_fn(struct timer_list *t);
 
 /*
  * RPC tasks sit here while waiting for conditions to improve.
@@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
 	queue->maxpriority = nr_queues - 1;
 	rpc_reset_waitqueue_priority(queue);
 	queue->qlen = 0;
-	setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
+	timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
 	INIT_LIST_HEAD(&queue->timer_list.list);
 	rpc_assign_waitqueue_name(queue, qname);
 }
@@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
 
 static void rpc_set_active(struct rpc_task *task)
 {
-	trace_rpc_task_begin(task->tk_client, task, NULL);
-
 	rpc_task_set_debuginfo(task);
 	set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+	trace_rpc_task_begin(task->tk_client, task, NULL);
 }
 
 /*
@@ -635,9 +634,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up_status);
 
-static void __rpc_queue_timer_fn(unsigned long ptr)
+static void __rpc_queue_timer_fn(struct timer_list *t)
 {
-	struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr;
+	struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
 	struct rpc_task *task, *n;
 	unsigned long expires, now, timeo;
 
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c73de181467a..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -65,10 +65,13 @@ err_proc:
 
 static __net_exit void sunrpc_exit_net(struct net *net)
 {
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
 	rpc_pipefs_exit_net(net);
 	unix_gid_cache_destroy(net);
 	ip_map_cache_destroy(net);
 	rpc_proc_exit(net);
+	WARN_ON_ONCE(!list_empty(&sn->all_clients));
 }
 
 static struct pernet_operations sunrpc_net_ops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85ce0db5b0a6..387cc4add6f6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
 static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
 
 static int
-param_set_pool_mode(const char *val, struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
 {
 	int *ip = (int *)kp->arg;
 	struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
 }
 
 static int
-param_get_pool_mode(char *buf, struct kernel_param *kp)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
 {
 	int *ip = (int *)kp->arg;
 
@@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-	     struct svc_serv_ops *ops)
+	     const struct svc_serv_ops *ops)
 {
 	struct svc_serv	*serv;
 	unsigned int vers;
@@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 	serv->sv_xdrsize   = xdrsize;
 	INIT_LIST_HEAD(&serv->sv_tempsocks);
 	INIT_LIST_HEAD(&serv->sv_permsocks);
-	init_timer(&serv->sv_temptimer);
+	timer_setup(&serv->sv_temptimer, NULL, 0);
 	spin_lock_init(&serv->sv_lock);
 
 	__svc_init_bc(serv);
@@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-	   struct svc_serv_ops *ops)
+	   const struct svc_serv_ops *ops)
 {
 	return __svc_create(prog, bufsize, /*npools*/1, ops);
 }
@@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-		  struct svc_serv_ops *ops)
+		  const struct svc_serv_ops *ops)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b423c20..e8e0831229cf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644);
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
-static void svc_age_temp_xprts(unsigned long closure);
+static void svc_age_temp_xprts(struct timer_list *t);
 static void svc_delete_xprt(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
 	svc_xprt_received(new);
 }
 
-int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
-		    struct net *net, const int family,
-		    const unsigned short port, int flags)
+static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+			    struct net *net, const int family,
+			    const unsigned short port, int flags)
 {
 	struct svc_xprt_class *xcl;
 
@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp = NULL;
 	int cpu;
-	bool queued = false;
 
 	if (!svc_xprt_has_something_to_do(xprt))
 		goto out;
@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
 	atomic_long_inc(&pool->sp_stats.packets);
 
-redo_search:
+	dprintk("svc: transport %p put into queue\n", xprt);
+	spin_lock_bh(&pool->sp_lock);
+	list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
+	pool->sp_stats.sockets_queued++;
+	spin_unlock_bh(&pool->sp_lock);
+
 	/* find a thread for this xprt */
 	rcu_read_lock();
 	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
-		/* Do a lockless check first */
-		if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
 			continue;
-
-		/*
-		 * Once the xprt has been queued, it can only be dequeued by
-		 * the task that intends to service it. All we can do at that
-		 * point is to try to wake this thread back up so that it can
-		 * do so.
-		 */
-		if (!queued) {
-			spin_lock_bh(&rqstp->rq_lock);
-			if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
-				/* already busy, move on... */
-				spin_unlock_bh(&rqstp->rq_lock);
-				continue;
-			}
-
-			/* this one will do */
-			rqstp->rq_xprt = xprt;
-			svc_xprt_get(xprt);
-			spin_unlock_bh(&rqstp->rq_lock);
-		}
-		rcu_read_unlock();
-
 		atomic_long_inc(&pool->sp_stats.threads_woken);
 		wake_up_process(rqstp->rq_task);
-		put_cpu();
-		goto out;
-	}
-	rcu_read_unlock();
-
-	/*
-	 * We didn't find an idle thread to use, so we need to queue the xprt.
-	 * Do so and then search again. If we find one, we can't hook this one
-	 * up to it directly but we can wake the thread up in the hopes that it
-	 * will pick it up once it searches for a xprt to service.
-	 */
-	if (!queued) {
-		queued = true;
-		dprintk("svc: transport %p put into queue\n", xprt);
-		spin_lock_bh(&pool->sp_lock);
-		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
-		pool->sp_stats.sockets_queued++;
-		spin_unlock_bh(&pool->sp_lock);
-		goto redo_search;
+		goto out_unlock;
 	}
+	set_bit(SP_CONGESTED, &pool->sp_flags);
 	rqstp = NULL;
+out_unlock:
+	rcu_read_unlock();
 	put_cpu();
 out:
 	trace_svc_xprt_do_enqueue(xprt, rqstp);
@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 
 static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 {
-	struct svc_xprt *xprt;
 	struct svc_pool		*pool = rqstp->rq_pool;
 	long			time_left = 0;
 
 	/* rq_xprt should be clear on entry */
 	WARN_ON_ONCE(rqstp->rq_xprt);
 
-	/* Normally we will wait up to 5 seconds for any required
-	 * cache information to be provided.
-	 */
-	rqstp->rq_chandle.thread_wait = 5*HZ;
-
-	xprt = svc_xprt_dequeue(pool);
-	if (xprt) {
-		rqstp->rq_xprt = xprt;
-
-		/* As there is a shortage of threads and this request
-		 * had to be queued, don't allow the thread to wait so
-		 * long for cache updates.
-		 */
-		rqstp->rq_chandle.thread_wait = 1*HZ;
-		clear_bit(SP_TASK_PENDING, &pool->sp_flags);
-		return xprt;
-	}
+	rqstp->rq_xprt = svc_xprt_dequeue(pool);
+	if (rqstp->rq_xprt)
+		goto out_found;
 
 	/*
 	 * We have to be able to interrupt this wait
 	 * to bring down the daemons ...
 	 */
 	set_current_state(TASK_INTERRUPTIBLE);
+	smp_mb__before_atomic();
+	clear_bit(SP_CONGESTED, &pool->sp_flags);
 	clear_bit(RQ_BUSY, &rqstp->rq_flags);
-	smp_mb();
+	smp_mb__after_atomic();
 
 	if (likely(rqst_should_sleep(rqstp)))
 		time_left = schedule_timeout(timeout);
@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 
 	try_to_freeze();
 
-	spin_lock_bh(&rqstp->rq_lock);
 	set_bit(RQ_BUSY, &rqstp->rq_flags);
-	spin_unlock_bh(&rqstp->rq_lock);
-
-	xprt = rqstp->rq_xprt;
-	if (xprt != NULL)
-		return xprt;
+	smp_mb__after_atomic();
+	rqstp->rq_xprt = svc_xprt_dequeue(pool);
+	if (rqstp->rq_xprt)
+		goto out_found;
 
 	if (!time_left)
 		atomic_long_inc(&pool->sp_stats.threads_timedout);
@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 	if (signalled() || kthread_should_stop())
 		return ERR_PTR(-EINTR);
 	return ERR_PTR(-EAGAIN);
+out_found:
+	/* Normally we will wait up to 5 seconds for any required
+	 * cache information to be provided.
+	 */
+	if (!test_bit(SP_CONGESTED, &pool->sp_flags))
+		rqstp->rq_chandle.thread_wait = 5*HZ;
+	else
+		rqstp->rq_chandle.thread_wait = 1*HZ;
+	return rqstp->rq_xprt;
 }
 
 static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -785,8 +745,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
 	serv->sv_tmpcnt++;
 	if (serv->sv_temptimer.function == NULL) {
 		/* setup timer to age temp transports */
-		setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
-			    (unsigned long)serv);
+		serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts;
 		mod_timer(&serv->sv_temptimer,
 			  jiffies + svc_conn_age_period * HZ);
 	}
@@ -960,9 +919,9 @@ out:
  * Timer function to close old temporary transports, using
  * a mark-and-sweep algorithm.
  */
-static void svc_age_temp_xprts(unsigned long closure)
+static void svc_age_temp_xprts(struct timer_list *t)
 {
-	struct svc_serv *serv = (struct svc_serv *)closure;
+	struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
 	struct svc_xprt *xprt;
 	struct list_head *le, *next;
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2b720fa35c4f..ff8e06cd067e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk)
 		dprintk("svc: socket %p(inet %p), busy=%d\n",
 			svsk, sk,
 			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_odata(sk);
 		if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
 			svc_xprt_enqueue(&svsk->sk_xprt);
@@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk)
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
 			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_owspace(sk);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
@@ -687,7 +693,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
 	return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
 }
 
-static struct svc_xprt_ops svc_udp_ops = {
+static const struct svc_xprt_ops svc_udp_ops = {
 	.xpo_create = svc_udp_create,
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
@@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
 	dprintk("svc: socket %p TCP (listen) state change %d\n",
 		sk, sk->sk_state);
 
-	if (svsk)
+	if (svsk) {
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_odata(sk);
+	}
+
 	/*
 	 * This callback may called twice when a new connection
 	 * is established as a child socket inherits everything
@@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk)
 	if (!svsk)
 		printk("svc: socket %p: no user data\n", sk);
 	else {
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_ostate(sk);
 		if (sk->sk_state != TCP_ESTABLISHED) {
 			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1001,7 +1013,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 
 	if (!bc_xprt)
 		return -EAGAIN;
-	spin_lock_bh(&bc_xprt->transport_lock);
+	spin_lock(&bc_xprt->recv_lock);
 	req = xprt_lookup_rqst(bc_xprt, xid);
 	if (!req)
 		goto unlock_notfound;
@@ -1019,7 +1031,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
 	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
-	spin_unlock_bh(&bc_xprt->transport_lock);
+	spin_unlock(&bc_xprt->recv_lock);
 	return 0;
 unlock_notfound:
 	printk(KERN_NOTICE
@@ -1028,7 +1040,7 @@ unlock_notfound:
 		__func__, ntohl(calldir),
 		bc_xprt, ntohl(xid));
 unlock_eagain:
-	spin_unlock_bh(&bc_xprt->transport_lock);
+	spin_unlock(&bc_xprt->recv_lock);
 	return -EAGAIN;
 }
 
@@ -1229,7 +1241,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
 {
 }
 
-static struct svc_xprt_ops svc_tcp_bc_ops = {
+static const struct svc_xprt_ops svc_tcp_bc_ops = {
 	.xpo_create = svc_bc_tcp_create,
 	.xpo_detach = svc_bc_tcp_sock_detach,
 	.xpo_free = svc_bc_sock_free,
@@ -1263,7 +1275,7 @@ static void svc_cleanup_bc_xprt_sock(void)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-static struct svc_xprt_ops svc_tcp_ops = {
+static const struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
@@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 		return ERR_PTR(err);
 	}
 
-	inet->sk_user_data = svsk;
 	svsk->sk_sock = sock;
 	svsk->sk_sk = inet;
 	svsk->sk_ostate = inet->sk_state_change;
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
+	/*
+	 * This barrier is necessary in order to prevent race condition
+	 * with svc_data_ready(), svc_listen_data_ready() and others
+	 * when calling callbacks above.
+	 */
+	wmb();
+	inet->sk_user_data = svsk;
 
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 4654a9934269..333b9d697ae5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
 }
 
 static void
-xprt_init_autodisconnect(unsigned long data)
+xprt_init_autodisconnect(struct timer_list *t)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)data;
+	struct rpc_xprt *xprt = from_timer(xprt, t, timer);
 
 	spin_lock(&xprt->transport_lock);
 	if (!list_empty(&xprt->recv))
@@ -844,6 +844,50 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 }
 EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
+/**
+ * xprt_pin_rqst - Pin a request on the transport receive list
+ * @req: Request to pin
+ *
+ * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
+ * so should be holding the xprt transport lock.
+ */
+void xprt_pin_rqst(struct rpc_rqst *req)
+{
+	set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+}
+EXPORT_SYMBOL_GPL(xprt_pin_rqst);
+
+/**
+ * xprt_unpin_rqst - Unpin a request on the transport receive list
+ * @req: Request to pin
+ *
+ * Caller should be holding the xprt transport lock.
+ */
+void xprt_unpin_rqst(struct rpc_rqst *req)
+{
+	struct rpc_task *task = req->rq_task;
+
+	clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
+	if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
+		wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+}
+EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
+
+static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
+__must_hold(&req->rq_xprt->recv_lock)
+{
+	struct rpc_task *task = req->rq_task;
+	
+	if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
+		spin_unlock(&req->rq_xprt->recv_lock);
+		set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
+		wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
+				TASK_UNINTERRUPTIBLE);
+		clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
+		spin_lock(&req->rq_xprt->recv_lock);
+	}
+}
+
 static void xprt_update_rtt(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
@@ -966,13 +1010,13 @@ void xprt_transmit(struct rpc_task *task)
 			/*
 			 * Add to the list only if we're expecting a reply
 			 */
-			spin_lock_bh(&xprt->transport_lock);
 			/* Update the softirq receive buffer */
 			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
 					sizeof(req->rq_private_buf));
 			/* Add request to the receive list */
+			spin_lock(&xprt->recv_lock);
 			list_add_tail(&req->rq_list, &xprt->recv);
-			spin_unlock_bh(&xprt->transport_lock);
+			spin_unlock(&xprt->recv_lock);
 			xprt_reset_majortimeo(req);
 			/* Turn off autodisconnect */
 			del_singleshot_timer_sync(&xprt->timer);
@@ -1095,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 	case -EAGAIN:
 		xprt_add_backlog(xprt, task);
 		dprintk("RPC:       waiting for request slot\n");
+		/* fall through */
 	default:
 		task->tk_status = -EAGAIN;
 	}
@@ -1287,12 +1332,16 @@ void xprt_release(struct rpc_task *task)
 		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
 	else if (task->tk_client)
 		rpc_count_iostats(task, task->tk_client->cl_metrics);
+	spin_lock(&xprt->recv_lock);
+	if (!list_empty(&req->rq_list)) {
+		list_del_init(&req->rq_list);
+		xprt_wait_on_pinned_rqst(req);
+	}
+	spin_unlock(&xprt->recv_lock);
 	spin_lock_bh(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
 	if (xprt->ops->release_request)
 		xprt->ops->release_request(task);
-	if (!list_empty(&req->rq_list))
-		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
 	xprt_schedule_autodisconnect(xprt);
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1318,6 +1367,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
+	spin_lock_init(&xprt->recv_lock);
 
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv);
@@ -1373,10 +1423,9 @@ found:
 		xprt->idle_timeout = 0;
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
 	if (xprt_has_timer(xprt))
-		setup_timer(&xprt->timer, xprt_init_autodisconnect,
-			    (unsigned long)xprt);
+		timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
 	else
-		init_timer(&xprt->timer);
+		timer_setup(&xprt->timer, NULL, 0);
 
 	if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
 		xprt_destroy(xprt);
@@ -1396,6 +1445,23 @@ out:
 	return xprt;
 }
 
+static void xprt_destroy_cb(struct work_struct *work)
+{
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, task_cleanup);
+
+	rpc_xprt_debugfs_unregister(xprt);
+	rpc_destroy_wait_queue(&xprt->binding);
+	rpc_destroy_wait_queue(&xprt->pending);
+	rpc_destroy_wait_queue(&xprt->sending);
+	rpc_destroy_wait_queue(&xprt->backlog);
+	kfree(xprt->servername);
+	/*
+	 * Tear down transport state and free the rpc_xprt
+	 */
+	xprt->ops->destroy(xprt);
+}
+
 /**
  * xprt_destroy - destroy an RPC transport, killing off all requests.
  * @xprt: transport to destroy
@@ -1405,22 +1471,19 @@ static void xprt_destroy(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:       destroying transport %p\n", xprt);
 
-	/* Exclude transport connect/disconnect handlers */
+	/*
+	 * Exclude transport connect/disconnect handlers and autoclose
+	 */
 	wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
 
 	del_timer_sync(&xprt->timer);
 
-	rpc_xprt_debugfs_unregister(xprt);
-	rpc_destroy_wait_queue(&xprt->binding);
-	rpc_destroy_wait_queue(&xprt->pending);
-	rpc_destroy_wait_queue(&xprt->sending);
-	rpc_destroy_wait_queue(&xprt->backlog);
-	cancel_work_sync(&xprt->task_cleanup);
-	kfree(xprt->servername);
 	/*
-	 * Tear down transport state and free the rpc_xprt
+	 * Destroy sockets etc from the system workqueue so they can
+	 * safely flush receive work running on rpciod.
 	 */
-	xprt->ops->destroy(xprt);
+	INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb);
+	schedule_work(&xprt->task_cleanup);
 }
 
 static void xprt_destroy_kref(struct kref *kref)
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index ae92a9e9ba52..e2d64c7138c3 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Multipath support for RPC
  *
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index b8213ddce2f2..8bf19e142b6b 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
 
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 03f6b5840764..8b818bb3518a 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015 Oracle.  All rights reserved.
  *
@@ -42,13 +43,14 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
 	req = rpcrdma_create_req(r_xprt);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
-	req->rl_backchannel = true;
+	__set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
 
 	rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
 				  DMA_TO_DEVICE, GFP_KERNEL);
 	if (IS_ERR(rb))
 		goto out_fail;
 	req->rl_rdmabuf = rb;
+	xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
 
 	size = r_xprt->rx_data.inline_rsize;
 	rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
@@ -202,23 +204,27 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
  */
 int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
 {
-	struct rpc_xprt *xprt = rqst->rq_xprt;
-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-	struct rpcrdma_msg *headerp;
-
-	headerp = rdmab_to_msg(req->rl_rdmabuf);
-	headerp->rm_xid = rqst->rq_xid;
-	headerp->rm_vers = rpcrdma_version;
-	headerp->rm_credit =
-			cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
-	headerp->rm_type = rdma_msg;
-	headerp->rm_body.rm_chunks[0] = xdr_zero;
-	headerp->rm_body.rm_chunks[1] = xdr_zero;
-	headerp->rm_body.rm_chunks[2] = xdr_zero;
-
-	if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
-				       &rqst->rq_snd_buf, rpcrdma_noch))
+	__be32 *p;
+
+	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
+	xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
+			req->rl_rdmabuf->rg_base);
+
+	p = xdr_reserve_space(&req->rl_stream, 28);
+	if (unlikely(!p))
+		return -EIO;
+	*p++ = rqst->rq_xid;
+	*p++ = rpcrdma_version;
+	*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
+	*p++ = rdma_msg;
+	*p++ = xdr_zero;
+	*p++ = xdr_zero;
+	*p = xdr_zero;
+
+	if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
+				      &rqst->rq_snd_buf, rpcrdma_noch))
 		return -EIO;
 	return 0;
 }
@@ -271,9 +277,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
  * @xprt: transport receiving the call
  * @rep: receive buffer containing the call
  *
- * Called in the RPC reply handler, which runs in a tasklet.
- * Be quick about it.
- *
  * Operational assumptions:
  *    o Backchannel credits are ignored, just as the NFS server
  *      forechannel currently does
@@ -284,7 +287,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
 			     struct rpcrdma_rep *rep)
 {
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-	struct rpcrdma_msg *headerp;
 	struct svc_serv *bc_serv;
 	struct rpcrdma_req *req;
 	struct rpc_rqst *rqst;
@@ -292,24 +294,15 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
 	size_t size;
 	__be32 *p;
 
-	headerp = rdmab_to_msg(rep->rr_rdmabuf);
+	p = xdr_inline_decode(&rep->rr_stream, 0);
+	size = xdr_stream_remaining(&rep->rr_stream);
+
 #ifdef RPCRDMA_BACKCHANNEL_DEBUG
 	pr_info("RPC:       %s: callback XID %08x, length=%u\n",
-		__func__, be32_to_cpu(headerp->rm_xid), rep->rr_len);
-	pr_info("RPC:       %s: %*ph\n", __func__, rep->rr_len, headerp);
+		__func__, be32_to_cpup(p), size);
+	pr_info("RPC:       %s: %*ph\n", __func__, size, p);
 #endif
 
-	/* Sanity check:
-	 * Need at least enough bytes for RPC/RDMA header, as code
-	 * here references the header fields by array offset. Also,
-	 * backward calls are always inline, so ensure there
-	 * are some bytes beyond the RPC/RDMA header.
-	 */
-	if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24)
-		goto out_short;
-	p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN);
-	size = rep->rr_len - RPCRDMA_HDRLEN_MIN;
-
 	/* Grab a free bc rqst */
 	spin_lock(&xprt->bc_pa_lock);
 	if (list_empty(&xprt->bc_pa_list)) {
@@ -325,7 +318,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
 	/* Prepare rqst */
 	rqst->rq_reply_bytes_recvd = 0;
 	rqst->rq_bytes_sent = 0;
-	rqst->rq_xid = headerp->rm_xid;
+	rqst->rq_xid = *p;
 
 	rqst->rq_private_buf.len = size;
 	set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
@@ -337,9 +330,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
 	buf->len = size;
 
 	/* The receive buffer has to be hooked to the rpcrdma_req
-	 * so that it can be reposted after the server is done
-	 * parsing it but just before sending the backward
-	 * direction reply.
+	 * so that it is not released while the req is pointing
+	 * to its buffer, and so that it can be reposted after
+	 * the Upper Layer is done decoding it.
 	 */
 	req = rpcr_to_rdmar(rqst);
 	dprintk("RPC:       %s: attaching rep %p to req %p\n",
@@ -367,13 +360,4 @@ out_overflow:
 	 * when the connection is re-established.
 	 */
 	return;
-
-out_short:
-	pr_warn("RPC/RDMA short backward direction call\n");
-
-	if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
-		xprt_disconnect_done(xprt);
-	else
-		pr_warn("RPC:       %s: reposting rep %p\n",
-			__func__, rep);
 }
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index d3f84bb1d443..29fc84c7ff98 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015 Oracle.  All rights reserved.
  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -177,7 +178,7 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 /* Use the ib_map_phys_fmr() verb to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
-static int
+static struct rpcrdma_mr_seg *
 fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	   int nsegs, bool writing, struct rpcrdma_mw **out)
 {
@@ -188,7 +189,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 
 	mw = rpcrdma_get_mw(r_xprt);
 	if (!mw)
-		return -ENOBUFS;
+		return ERR_PTR(-ENOBUFS);
 
 	pageoff = offset_in_page(seg1->mr_offset);
 	seg1->mr_offset -= pageoff;	/* start of page */
@@ -232,13 +233,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	mw->mw_offset = dma_pages[0] + pageoff;
 
 	*out = mw;
-	return mw->mw_nents;
+	return seg;
 
 out_dmamap_err:
 	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
 	       mw->mw_sg, i);
 	rpcrdma_put_mw(r_xprt, mw);
-	return -EIO;
+	return ERR_PTR(-EIO);
 
 out_maperr:
 	pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
@@ -247,7 +248,7 @@ out_maperr:
 	ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
 			mw->mw_sg, mw->mw_nents, mw->mw_dir);
 	rpcrdma_put_mw(r_xprt, mw);
-	return -EIO;
+	return ERR_PTR(-EIO);
 }
 
 /* Invalidate all memory regions that were registered for "req".
@@ -305,28 +306,9 @@ out_reset:
 	}
 }
 
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-		  bool sync)
-{
-	struct rpcrdma_mw *mw;
-
-	while (!list_empty(&req->rl_registered)) {
-		mw = rpcrdma_pop_mw(&req->rl_registered);
-		if (sync)
-			fmr_op_recover_mr(mw);
-		else
-			rpcrdma_defer_mr_recovery(mw);
-	}
-}
-
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
 	.ro_unmap_sync			= fmr_op_unmap_sync,
-	.ro_unmap_safe			= fmr_op_unmap_safe,
 	.ro_recover_mr			= fmr_op_recover_mr,
 	.ro_open			= fmr_op_open,
 	.ro_maxpages			= fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 6aea36a38bfd..773e66e10a15 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015 Oracle.  All rights reserved.
  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -344,7 +345,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
 /* Post a REG_MR Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  */
-static int
+static struct rpcrdma_mr_seg *
 frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	    int nsegs, bool writing, struct rpcrdma_mw **out)
 {
@@ -364,7 +365,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 			rpcrdma_defer_mr_recovery(mw);
 		mw = rpcrdma_get_mw(r_xprt);
 		if (!mw)
-			return -ENOBUFS;
+			return ERR_PTR(-ENOBUFS);
 	} while (mw->frmr.fr_state != FRMR_IS_INVALID);
 	frmr = &mw->frmr;
 	frmr->fr_state = FRMR_IS_VALID;
@@ -401,7 +402,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	if (unlikely(n != mw->mw_nents))
 		goto out_mapmr_err;
 
-	dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
+	dprintk("RPC:       %s: Using frmr %p to map %u segments (%llu bytes)\n",
 		__func__, frmr, mw->mw_nents, mr->length);
 
 	key = (u8)(mr->rkey & 0x000000FF);
@@ -419,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 			 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
 			 IB_ACCESS_REMOTE_READ;
 
-	rpcrdma_set_signaled(&r_xprt->rx_ep, &reg_wr->wr);
 	rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
 	if (rc)
 		goto out_senderr;
@@ -429,25 +429,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	mw->mw_offset = mr->iova;
 
 	*out = mw;
-	return mw->mw_nents;
+	return seg;
 
 out_dmamap_err:
 	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
 	       mw->mw_sg, i);
 	frmr->fr_state = FRMR_IS_INVALID;
 	rpcrdma_put_mw(r_xprt, mw);
-	return -EIO;
+	return ERR_PTR(-EIO);
 
 out_mapmr_err:
 	pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
 	       frmr->fr_mr, n, mw->mw_nents);
 	rpcrdma_defer_mr_recovery(mw);
-	return -EIO;
+	return ERR_PTR(-EIO);
 
 out_senderr:
 	pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
 	rpcrdma_defer_mr_recovery(mw);
-	return -ENOTCONN;
+	return ERR_PTR(-ENOTCONN);
 }
 
 /* Invalidate all memory regions that were registered for "req".
@@ -507,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 	f->fr_cqe.done = frwr_wc_localinv_wake;
 	reinit_completion(&f->fr_linv_done);
 
-	/* Initialize CQ count, since there is always a signaled
-	 * WR being posted here.  The new cqcount depends on how
-	 * many SQEs are about to be consumed.
-	 */
-	rpcrdma_init_cqcount(&r_xprt->rx_ep, count);
-
 	/* Transport disconnect drains the receive CQ before it
 	 * replaces the QP. The RPC reply handler won't call us
 	 * unless ri_id->qp is a valid pointer.
@@ -545,7 +539,6 @@ reset_mrs:
 	/* Find and reset the MRs in the LOCAL_INV WRs that did not
 	 * get posted.
 	 */
-	rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
 	while (bad_wr) {
 		f = container_of(bad_wr, struct rpcrdma_frmr,
 				 fr_invwr);
@@ -558,28 +551,9 @@ reset_mrs:
 	goto unmap;
 }
 
-/* Use a slow, safe mechanism to invalidate all memory regions
- * that were registered for "req".
- */
-static void
-frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-		   bool sync)
-{
-	struct rpcrdma_mw *mw;
-
-	while (!list_empty(&req->rl_registered)) {
-		mw = rpcrdma_pop_mw(&req->rl_registered);
-		if (sync)
-			frwr_op_recover_mr(mw);
-		else
-			rpcrdma_defer_mr_recovery(mw);
-	}
-}
-
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
 	.ro_unmap_sync			= frwr_op_unmap_sync,
-	.ro_unmap_safe			= frwr_op_unmap_safe,
 	.ro_recover_mr			= frwr_op_recover_mr,
 	.ro_open			= frwr_op_open,
 	.ro_maxpages			= frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index ca4d6e4528f3..ed34dc0f144c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014-2017 Oracle.  All rights reserved.
  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
 
 	/* Maximum Read list size */
 	maxsegs += 2;	/* segment for head and tail buffers */
-	size = maxsegs * sizeof(struct rpcrdma_read_chunk);
+	size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
 
 	/* Minimal Read chunk size */
 	size += sizeof(__be32);	/* segment count */
-	size += sizeof(struct rpcrdma_segment);
+	size += rpcrdma_segment_maxsz * sizeof(__be32);
 	size += sizeof(__be32);	/* list discriminator */
 
 	dprintk("RPC:       %s: max call header size = %u\n",
@@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
 	/* Maximum Write list size */
 	maxsegs += 2;	/* segment for head and tail buffers */
 	size = sizeof(__be32);		/* segment count */
-	size += maxsegs * sizeof(struct rpcrdma_segment);
+	size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
 	size += sizeof(__be32);	/* list discriminator */
 
 	dprintk("RPC:       %s: max reply header size = %u\n",
@@ -169,40 +170,41 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
 	return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
 }
 
-/* Split "vec" on page boundaries into segments. FMR registers pages,
- * not a byte range. Other modes coalesce these segments into a single
- * MR when they can.
+/* Split @vec on page boundaries into SGEs. FMR registers pages, not
+ * a byte range. Other modes coalesce these SGEs into a single MR
+ * when they can.
+ *
+ * Returns pointer to next available SGE, and bumps the total number
+ * of SGEs consumed.
  */
-static int
-rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
+static struct rpcrdma_mr_seg *
+rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
+		     unsigned int *n)
 {
-	size_t page_offset;
-	u32 remaining;
+	u32 remaining, page_offset;
 	char *base;
 
 	base = vec->iov_base;
 	page_offset = offset_in_page(base);
 	remaining = vec->iov_len;
-	while (remaining && n < RPCRDMA_MAX_SEGS) {
-		seg[n].mr_page = NULL;
-		seg[n].mr_offset = base;
-		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
-		remaining -= seg[n].mr_len;
-		base += seg[n].mr_len;
-		++n;
+	while (remaining) {
+		seg->mr_page = NULL;
+		seg->mr_offset = base;
+		seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
+		remaining -= seg->mr_len;
+		base += seg->mr_len;
+		++seg;
+		++(*n);
 		page_offset = 0;
 	}
-	return n;
+	return seg;
 }
 
-/*
- * Chunk assembly from upper layer xdr_buf.
+/* Convert @xdrbuf into SGEs no larger than a page each. As they
+ * are registered, these SGEs are then coalesced into RDMA segments
+ * when the selected memreg mode supports it.
  *
- * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
- * elements. Segments are then coalesced when registered, if possible
- * within the selected memreg mode.
- *
- * Returns positive number of segments converted, or a negative errno.
+ * Returns positive number of SGEs consumed, or a negative errno.
  */
 
 static int
@@ -210,47 +212,41 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
 		     unsigned int pos, enum rpcrdma_chunktype type,
 		     struct rpcrdma_mr_seg *seg)
 {
-	int len, n, p, page_base;
+	unsigned long page_base;
+	unsigned int len, n;
 	struct page **ppages;
 
 	n = 0;
-	if (pos == 0) {
-		n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n);
-		if (n == RPCRDMA_MAX_SEGS)
-			goto out_overflow;
-	}
+	if (pos == 0)
+		seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n);
 
 	len = xdrbuf->page_len;
 	ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
 	page_base = offset_in_page(xdrbuf->page_base);
-	p = 0;
-	while (len && n < RPCRDMA_MAX_SEGS) {
-		if (!ppages[p]) {
-			/* alloc the pagelist for receiving buffer */
-			ppages[p] = alloc_page(GFP_ATOMIC);
-			if (!ppages[p])
+	while (len) {
+		if (unlikely(!*ppages)) {
+			/* XXX: Certain upper layer operations do
+			 *	not provide receive buffer pages.
+			 */
+			*ppages = alloc_page(GFP_ATOMIC);
+			if (!*ppages)
 				return -EAGAIN;
 		}
-		seg[n].mr_page = ppages[p];
-		seg[n].mr_offset = (void *)(unsigned long) page_base;
-		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
-		if (seg[n].mr_len > PAGE_SIZE)
-			goto out_overflow;
-		len -= seg[n].mr_len;
+		seg->mr_page = *ppages;
+		seg->mr_offset = (char *)page_base;
+		seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
+		len -= seg->mr_len;
+		++ppages;
+		++seg;
 		++n;
-		++p;
-		page_base = 0;	/* page offset only applies to first page */
+		page_base = 0;
 	}
 
-	/* Message overflows the seg array */
-	if (len && n == RPCRDMA_MAX_SEGS)
-		goto out_overflow;
-
 	/* When encoding a Read chunk, the tail iovec contains an
 	 * XDR pad and may be omitted.
 	 */
 	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
-		return n;
+		goto out;
 
 	/* When encoding a Write chunk, some servers need to see an
 	 * extra segment for non-XDR-aligned Write chunks. The upper
@@ -258,30 +254,81 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
 	 * for this purpose.
 	 */
 	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
-		return n;
+		goto out;
 
-	if (xdrbuf->tail[0].iov_len) {
-		n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
-		if (n == RPCRDMA_MAX_SEGS)
-			goto out_overflow;
-	}
+	if (xdrbuf->tail[0].iov_len)
+		seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
 
+out:
+	if (unlikely(n > RPCRDMA_MAX_SEGS))
+		return -EIO;
 	return n;
+}
+
+static inline int
+encode_item_present(struct xdr_stream *xdr)
+{
+	__be32 *p;
 
-out_overflow:
-	pr_err("rpcrdma: segment array overflow\n");
-	return -EIO;
+	p = xdr_reserve_space(xdr, sizeof(*p));
+	if (unlikely(!p))
+		return -EMSGSIZE;
+
+	*p = xdr_one;
+	return 0;
 }
 
-static inline __be32 *
+static inline int
+encode_item_not_present(struct xdr_stream *xdr)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, sizeof(*p));
+	if (unlikely(!p))
+		return -EMSGSIZE;
+
+	*p = xdr_zero;
+	return 0;
+}
+
+static void
 xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
 {
 	*iptr++ = cpu_to_be32(mw->mw_handle);
 	*iptr++ = cpu_to_be32(mw->mw_length);
-	return xdr_encode_hyper(iptr, mw->mw_offset);
+	xdr_encode_hyper(iptr, mw->mw_offset);
+}
+
+static int
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+	if (unlikely(!p))
+		return -EMSGSIZE;
+
+	xdr_encode_rdma_segment(p, mw);
+	return 0;
 }
 
-/* XDR-encode the Read list. Supports encoding a list of read
+static int
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+		    u32 position)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 6 * sizeof(*p));
+	if (unlikely(!p))
+		return -EMSGSIZE;
+
+	*p++ = xdr_one;			/* Item present */
+	*p++ = cpu_to_be32(position);
+	xdr_encode_rdma_segment(p, mw);
+	return 0;
+}
+
+/* Register and XDR encode the Read list. Supports encoding a list of read
  * segments that belong to a single read chunk.
  *
  * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
@@ -290,23 +337,20 @@ xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
  *   N elements, position P (same P for all chunks of same arg!):
  *    1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
  *
- * Returns a pointer to the XDR word in the RDMA header following
- * the end of the Read list, or an error pointer.
+ * Returns zero on success, or a negative errno if a failure occurred.
+ * @xdr is advanced to the next position in the stream.
+ *
+ * Only a single @pos value is currently supported.
  */
-static __be32 *
-rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
-			 struct rpcrdma_req *req, struct rpc_rqst *rqst,
-			 __be32 *iptr, enum rpcrdma_chunktype rtype)
+static noinline int
+rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+			 struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype)
 {
+	struct xdr_stream *xdr = &req->rl_stream;
 	struct rpcrdma_mr_seg *seg;
 	struct rpcrdma_mw *mw;
 	unsigned int pos;
-	int n, nsegs;
-
-	if (rtype == rpcrdma_noch) {
-		*iptr++ = xdr_zero;	/* item not present */
-		return iptr;
-	}
+	int nsegs;
 
 	pos = rqst->rq_snd_buf.head[0].iov_len;
 	if (rtype == rpcrdma_areadch)
@@ -315,40 +359,33 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
 				     rtype, seg);
 	if (nsegs < 0)
-		return ERR_PTR(nsegs);
+		return nsegs;
 
 	do {
-		n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
-						 false, &mw);
-		if (n < 0)
-			return ERR_PTR(n);
+		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
+						   false, &mw);
+		if (IS_ERR(seg))
+			return PTR_ERR(seg);
 		rpcrdma_push_mw(mw, &req->rl_registered);
 
-		*iptr++ = xdr_one;	/* item present */
-
-		/* All read segments in this chunk
-		 * have the same "position".
-		 */
-		*iptr++ = cpu_to_be32(pos);
-		iptr = xdr_encode_rdma_segment(iptr, mw);
+		if (encode_read_segment(xdr, mw, pos) < 0)
+			return -EMSGSIZE;
 
 		dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
 			rqst->rq_task->tk_pid, __func__, pos,
 			mw->mw_length, (unsigned long long)mw->mw_offset,
-			mw->mw_handle, n < nsegs ? "more" : "last");
+			mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
 
 		r_xprt->rx_stats.read_chunk_count++;
-		seg += n;
-		nsegs -= n;
+		nsegs -= mw->mw_nents;
 	} while (nsegs);
 
-	/* Finish Read list */
-	*iptr++ = xdr_zero;	/* Next item not present */
-	return iptr;
+	return 0;
 }
 
-/* XDR-encode the Write list. Supports encoding a list containing
- * one array of plain segments that belong to a single write chunk.
+/* Register and XDR encode the Write list. Supports encoding a list
+ * containing one array of plain segments that belong to a single
+ * write chunk.
  *
  * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
  *
@@ -356,66 +393,65 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
  *   N elements:
  *    1 - N - HLOO - HLOO - ... - HLOO - 0
  *
- * Returns a pointer to the XDR word in the RDMA header following
- * the end of the Write list, or an error pointer.
+ * Returns zero on success, or a negative errno if a failure occurred.
+ * @xdr is advanced to the next position in the stream.
+ *
+ * Only a single Write chunk is currently supported.
  */
-static __be32 *
+static noinline int
 rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-			  struct rpc_rqst *rqst, __be32 *iptr,
-			  enum rpcrdma_chunktype wtype)
+			  struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
 {
+	struct xdr_stream *xdr = &req->rl_stream;
 	struct rpcrdma_mr_seg *seg;
 	struct rpcrdma_mw *mw;
-	int n, nsegs, nchunks;
+	int nsegs, nchunks;
 	__be32 *segcount;
 
-	if (wtype != rpcrdma_writech) {
-		*iptr++ = xdr_zero;	/* no Write list present */
-		return iptr;
-	}
-
 	seg = req->rl_segments;
 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
 				     rqst->rq_rcv_buf.head[0].iov_len,
 				     wtype, seg);
 	if (nsegs < 0)
-		return ERR_PTR(nsegs);
+		return nsegs;
 
-	*iptr++ = xdr_one;	/* Write list present */
-	segcount = iptr++;	/* save location of segment count */
+	if (encode_item_present(xdr) < 0)
+		return -EMSGSIZE;
+	segcount = xdr_reserve_space(xdr, sizeof(*segcount));
+	if (unlikely(!segcount))
+		return -EMSGSIZE;
+	/* Actual value encoded below */
 
 	nchunks = 0;
 	do {
-		n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
-						 true, &mw);
-		if (n < 0)
-			return ERR_PTR(n);
+		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
+						   true, &mw);
+		if (IS_ERR(seg))
+			return PTR_ERR(seg);
 		rpcrdma_push_mw(mw, &req->rl_registered);
 
-		iptr = xdr_encode_rdma_segment(iptr, mw);
+		if (encode_rdma_segment(xdr, mw) < 0)
+			return -EMSGSIZE;
 
 		dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
 			rqst->rq_task->tk_pid, __func__,
 			mw->mw_length, (unsigned long long)mw->mw_offset,
-			mw->mw_handle, n < nsegs ? "more" : "last");
+			mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
 
 		r_xprt->rx_stats.write_chunk_count++;
 		r_xprt->rx_stats.total_rdma_request += seg->mr_len;
 		nchunks++;
-		seg   += n;
-		nsegs -= n;
+		nsegs -= mw->mw_nents;
 	} while (nsegs);
 
 	/* Update count of segments in this Write chunk */
 	*segcount = cpu_to_be32(nchunks);
 
-	/* Finish Write list */
-	*iptr++ = xdr_zero;	/* Next item not present */
-	return iptr;
+	return 0;
 }
 
-/* XDR-encode the Reply chunk. Supports encoding an array of plain
- * segments that belong to a single write (reply) chunk.
+/* Register and XDR encode the Reply chunk. Supports encoding an array
+ * of plain segments that belong to a single write (reply) chunk.
  *
  * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
  *
@@ -423,81 +459,113 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
  *   N elements:
  *    1 - N - HLOO - HLOO - ... - HLOO
  *
- * Returns a pointer to the XDR word in the RDMA header following
- * the end of the Reply chunk, or an error pointer.
+ * Returns zero on success, or a negative errno if a failure occurred.
+ * @xdr is advanced to the next position in the stream.
  */
-static __be32 *
-rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
-			   struct rpcrdma_req *req, struct rpc_rqst *rqst,
-			   __be32 *iptr, enum rpcrdma_chunktype wtype)
+static noinline int
+rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+			   struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype)
 {
+	struct xdr_stream *xdr = &req->rl_stream;
 	struct rpcrdma_mr_seg *seg;
 	struct rpcrdma_mw *mw;
-	int n, nsegs, nchunks;
+	int nsegs, nchunks;
 	__be32 *segcount;
 
-	if (wtype != rpcrdma_replych) {
-		*iptr++ = xdr_zero;	/* no Reply chunk present */
-		return iptr;
-	}
-
 	seg = req->rl_segments;
 	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
 	if (nsegs < 0)
-		return ERR_PTR(nsegs);
+		return nsegs;
 
-	*iptr++ = xdr_one;	/* Reply chunk present */
-	segcount = iptr++;	/* save location of segment count */
+	if (encode_item_present(xdr) < 0)
+		return -EMSGSIZE;
+	segcount = xdr_reserve_space(xdr, sizeof(*segcount));
+	if (unlikely(!segcount))
+		return -EMSGSIZE;
+	/* Actual value encoded below */
 
 	nchunks = 0;
 	do {
-		n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
-						 true, &mw);
-		if (n < 0)
-			return ERR_PTR(n);
+		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
+						   true, &mw);
+		if (IS_ERR(seg))
+			return PTR_ERR(seg);
 		rpcrdma_push_mw(mw, &req->rl_registered);
 
-		iptr = xdr_encode_rdma_segment(iptr, mw);
+		if (encode_rdma_segment(xdr, mw) < 0)
+			return -EMSGSIZE;
 
 		dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
 			rqst->rq_task->tk_pid, __func__,
 			mw->mw_length, (unsigned long long)mw->mw_offset,
-			mw->mw_handle, n < nsegs ? "more" : "last");
+			mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
 
 		r_xprt->rx_stats.reply_chunk_count++;
 		r_xprt->rx_stats.total_rdma_request += seg->mr_len;
 		nchunks++;
-		seg   += n;
-		nsegs -= n;
+		nsegs -= mw->mw_nents;
 	} while (nsegs);
 
 	/* Update count of segments in the Reply chunk */
 	*segcount = cpu_to_be32(nchunks);
 
-	return iptr;
+	return 0;
+}
+
+/**
+ * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * @sc: sendctx containing SGEs to unmap
+ *
+ */
+void
+rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+{
+	struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
+	struct ib_sge *sge;
+	unsigned int count;
+
+	dprintk("RPC:       %s: unmapping %u sges for sc=%p\n",
+		__func__, sc->sc_unmap_count, sc);
+
+	/* The first two SGEs contain the transport header and
+	 * the inline buffer. These are always left mapped so
+	 * they can be cheaply re-used.
+	 */
+	sge = &sc->sc_sges[2];
+	for (count = sc->sc_unmap_count; count; ++sge, --count)
+		ib_dma_unmap_page(ia->ri_device,
+				  sge->addr, sge->length, DMA_TO_DEVICE);
+
+	if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
+		smp_mb__after_atomic();
+		wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+	}
 }
 
-/* Prepare the RPC-over-RDMA header SGE.
+/* Prepare an SGE for the RPC-over-RDMA transport header.
  */
 static bool
 rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 			u32 len)
 {
+	struct rpcrdma_sendctx *sc = req->rl_sendctx;
 	struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
-	struct ib_sge *sge = &req->rl_send_sge[0];
+	struct ib_sge *sge = sc->sc_sges;
 
-	if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) {
-		if (!__rpcrdma_dma_map_regbuf(ia, rb))
-			return false;
-		sge->addr = rdmab_addr(rb);
-		sge->lkey = rdmab_lkey(rb);
-	}
+	if (!rpcrdma_dma_map_regbuf(ia, rb))
+		goto out_regbuf;
+	sge->addr = rdmab_addr(rb);
 	sge->length = len;
+	sge->lkey = rdmab_lkey(rb);
 
 	ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
 				      sge->length, DMA_TO_DEVICE);
-	req->rl_send_wr.num_sge++;
+	sc->sc_wr.num_sge++;
 	return true;
+
+out_regbuf:
+	pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+	return false;
 }
 
 /* Prepare the Send SGEs. The head and tail iovec, and each entry
@@ -507,10 +575,11 @@ static bool
 rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 			 struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
 {
+	struct rpcrdma_sendctx *sc = req->rl_sendctx;
 	unsigned int sge_no, page_base, len, remaining;
 	struct rpcrdma_regbuf *rb = req->rl_sendbuf;
 	struct ib_device *device = ia->ri_device;
-	struct ib_sge *sge = req->rl_send_sge;
+	struct ib_sge *sge = sc->sc_sges;
 	u32 lkey = ia->ri_pd->local_dma_lkey;
 	struct page *page, **ppages;
 
@@ -518,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	 * DMA-mapped. Sync the content that has changed.
 	 */
 	if (!rpcrdma_dma_map_regbuf(ia, rb))
-		return false;
+		goto out_regbuf;
 	sge_no = 1;
 	sge[sge_no].addr = rdmab_addr(rb);
 	sge[sge_no].length = xdr->head[0].iov_len;
@@ -573,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 			sge[sge_no].length = len;
 			sge[sge_no].lkey = lkey;
 
-			req->rl_mapped_sges++;
+			sc->sc_unmap_count++;
 			ppages++;
 			remaining -= len;
 			page_base = 0;
@@ -599,89 +668,109 @@ map_tail:
 			goto out_mapping_err;
 		sge[sge_no].length = len;
 		sge[sge_no].lkey = lkey;
-		req->rl_mapped_sges++;
+		sc->sc_unmap_count++;
 	}
 
 out:
-	req->rl_send_wr.num_sge = sge_no + 1;
+	sc->sc_wr.num_sge += sge_no;
+	if (sc->sc_unmap_count)
+		__set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
 	return true;
 
+out_regbuf:
+	pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+	return false;
+
 out_mapping_overflow:
+	rpcrdma_unmap_sendctx(sc);
 	pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
 	return false;
 
 out_mapping_err:
+	rpcrdma_unmap_sendctx(sc);
 	pr_err("rpcrdma: Send mapping error\n");
 	return false;
 }
 
-bool
-rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
-			  u32 hdrlen, struct xdr_buf *xdr,
-			  enum rpcrdma_chunktype rtype)
+/**
+ * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR
+ * @r_xprt: controlling transport
+ * @req: context of RPC Call being marshalled
+ * @hdrlen: size of transport header, in bytes
+ * @xdr: xdr_buf containing RPC Call
+ * @rtype: chunk type being encoded
+ *
+ * Returns 0 on success; otherwise a negative errno is returned.
+ */
+int
+rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+			  struct rpcrdma_req *req, u32 hdrlen,
+			  struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
 {
-	req->rl_send_wr.num_sge = 0;
-	req->rl_mapped_sges = 0;
+	req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+	if (!req->rl_sendctx)
+		return -ENOBUFS;
+	req->rl_sendctx->sc_wr.num_sge = 0;
+	req->rl_sendctx->sc_unmap_count = 0;
+	req->rl_sendctx->sc_req = req;
+	__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
 
-	if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen))
-		goto out_map;
+	if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
+		return -EIO;
 
 	if (rtype != rpcrdma_areadch)
-		if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype))
-			goto out_map;
-
-	return true;
-
-out_map:
-	pr_err("rpcrdma: failed to DMA map a Send buffer\n");
-	return false;
-}
-
-void
-rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
-{
-	struct ib_device *device = ia->ri_device;
-	struct ib_sge *sge;
-	int count;
+		if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
+			return -EIO;
 
-	sge = &req->rl_send_sge[2];
-	for (count = req->rl_mapped_sges; count--; sge++)
-		ib_dma_unmap_page(device, sge->addr, sge->length,
-				  DMA_TO_DEVICE);
-	req->rl_mapped_sges = 0;
+	return 0;
 }
 
-/*
- * Marshal a request: the primary job of this routine is to choose
- * the transfer modes. See comments below.
+/**
+ * rpcrdma_marshal_req - Marshal and send one RPC request
+ * @r_xprt: controlling transport
+ * @rqst: RPC request to be marshaled
+ *
+ * For the RPC in "rqst", this function:
+ *  - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG)
+ *  - Registers Read, Write, and Reply chunks
+ *  - Constructs the transport header
+ *  - Posts a Send WR to send the transport header and request
  *
- * Returns zero on success, otherwise a negative errno.
+ * Returns:
+ *	%0 if the RPC was sent successfully,
+ *	%-ENOTCONN if the connection was lost,
+ *	%-EAGAIN if not enough pages are available for on-demand reply buffer,
+ *	%-ENOBUFS if no MRs are available to register chunks,
+ *	%-EMSGSIZE if the transport header is too small,
+ *	%-EIO if a permanent problem occurred while marshaling.
  */
-
 int
-rpcrdma_marshal_req(struct rpc_rqst *rqst)
+rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
 {
-	struct rpc_xprt *xprt = rqst->rq_xprt;
-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct xdr_stream *xdr = &req->rl_stream;
 	enum rpcrdma_chunktype rtype, wtype;
-	struct rpcrdma_msg *headerp;
 	bool ddp_allowed;
-	ssize_t hdrlen;
-	size_t rpclen;
-	__be32 *iptr;
+	__be32 *p;
+	int ret;
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
 		return rpcrdma_bc_marshal_reply(rqst);
 #endif
 
-	headerp = rdmab_to_msg(req->rl_rdmabuf);
-	/* don't byte-swap XID, it's already done in request */
-	headerp->rm_xid = rqst->rq_xid;
-	headerp->rm_vers = rpcrdma_version;
-	headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
-	headerp->rm_type = rdma_msg;
+	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
+	xdr_init_encode(xdr, &req->rl_hdrbuf,
+			req->rl_rdmabuf->rg_base);
+
+	/* Fixed header fields */
+	ret = -EMSGSIZE;
+	p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+	if (!p)
+		goto out_err;
+	*p++ = rqst->rq_xid;
+	*p++ = rpcrdma_version;
+	*p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
 
 	/* When the ULP employs a GSS flavor that guarantees integrity
 	 * or privacy, direct data placement of individual data items
@@ -721,22 +810,17 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * by themselves are larger than the inline threshold.
 	 */
 	if (rpcrdma_args_inline(r_xprt, rqst)) {
+		*p++ = rdma_msg;
 		rtype = rpcrdma_noch;
-		rpclen = rqst->rq_snd_buf.len;
 	} else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
+		*p++ = rdma_msg;
 		rtype = rpcrdma_readch;
-		rpclen = rqst->rq_snd_buf.head[0].iov_len +
-			 rqst->rq_snd_buf.tail[0].iov_len;
 	} else {
 		r_xprt->rx_stats.nomsg_call_count++;
-		headerp->rm_type = htonl(RDMA_NOMSG);
+		*p++ = rdma_nomsg;
 		rtype = rpcrdma_areadch;
-		rpclen = 0;
 	}
 
-	req->rl_xid = rqst->rq_xid;
-	rpcrdma_insert_req(&r_xprt->rx_buf, req);
-
 	/* This implementation supports the following combinations
 	 * of chunk lists in one RPC-over-RDMA Call message:
 	 *
@@ -759,79 +843,48 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	 * send a Call message with a Position Zero Read chunk and a
 	 * regular Read chunk at the same time.
 	 */
-	iptr = headerp->rm_body.rm_chunks;
-	iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype);
-	if (IS_ERR(iptr))
+	if (rtype != rpcrdma_noch) {
+		ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
+		if (ret)
+			goto out_err;
+	}
+	ret = encode_item_not_present(xdr);
+	if (ret)
 		goto out_err;
-	iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
-	if (IS_ERR(iptr))
+
+	if (wtype == rpcrdma_writech) {
+		ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
+		if (ret)
+			goto out_err;
+	}
+	ret = encode_item_not_present(xdr);
+	if (ret)
 		goto out_err;
-	iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
-	if (IS_ERR(iptr))
+
+	if (wtype != rpcrdma_replych)
+		ret = encode_item_not_present(xdr);
+	else
+		ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
+	if (ret)
 		goto out_err;
-	hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
 
-	dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
+	dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
 		rqst->rq_task->tk_pid, __func__,
 		transfertypes[rtype], transfertypes[wtype],
-		hdrlen, rpclen);
+		xdr_stream_pos(xdr));
 
-	if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
-				       &rqst->rq_snd_buf, rtype)) {
-		iptr = ERR_PTR(-EIO);
+	ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+					&rqst->rq_snd_buf, rtype);
+	if (ret)
 		goto out_err;
-	}
 	return 0;
 
 out_err:
-	if (PTR_ERR(iptr) != -ENOBUFS) {
-		pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
-		       PTR_ERR(iptr));
+	if (ret != -ENOBUFS) {
+		pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
 		r_xprt->rx_stats.failed_marshal_count++;
 	}
-	return PTR_ERR(iptr);
-}
-
-/*
- * Chase down a received write or reply chunklist to get length
- * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
- */
-static int
-rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp)
-{
-	unsigned int i, total_len;
-	struct rpcrdma_write_chunk *cur_wchunk;
-	char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf);
-
-	i = be32_to_cpu(**iptrp);
-	cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
-	total_len = 0;
-	while (i--) {
-		struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
-		ifdebug(FACILITY) {
-			u64 off;
-			xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
-			dprintk("RPC:       %s: chunk %d@0x%016llx:0x%08x\n",
-				__func__,
-				be32_to_cpu(seg->rs_length),
-				(unsigned long long)off,
-				be32_to_cpu(seg->rs_handle));
-		}
-		total_len += be32_to_cpu(seg->rs_length);
-		++cur_wchunk;
-	}
-	/* check and adjust for properly terminated write chunk */
-	if (wrchunk) {
-		__be32 *w = (__be32 *) cur_wchunk;
-		if (*w++ != xdr_zero)
-			return -1;
-		cur_wchunk = (struct rpcrdma_write_chunk *) w;
-	}
-	if ((char *)cur_wchunk > base + rep->rr_len)
-		return -1;
-
-	*iptrp = (__be32 *) cur_wchunk;
-	return total_len;
+	return ret;
 }
 
 /**
@@ -949,196 +1002,417 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws,
 		}
 }
 
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /* By convention, backchannel calls arrive via rdma_msg type
  * messages, and never populate the chunk lists. This makes
  * the RPC/RDMA header small and fixed in size, so it is
  * straightforward to check the RPC header's direction field.
  */
 static bool
-rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
+rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 {
-	__be32 *p = (__be32 *)headerp;
+	struct xdr_stream *xdr = &rep->rr_stream;
+	__be32 *p;
 
-	if (headerp->rm_type != rdma_msg)
+	if (rep->rr_proc != rdma_msg)
 		return false;
-	if (headerp->rm_body.rm_chunks[0] != xdr_zero)
+
+	/* Peek at stream contents without advancing. */
+	p = xdr_inline_decode(xdr, 0);
+
+	/* Chunk lists */
+	if (*p++ != xdr_zero)
 		return false;
-	if (headerp->rm_body.rm_chunks[1] != xdr_zero)
+	if (*p++ != xdr_zero)
 		return false;
-	if (headerp->rm_body.rm_chunks[2] != xdr_zero)
+	if (*p++ != xdr_zero)
 		return false;
 
-	/* sanity */
-	if (p[7] != headerp->rm_xid)
+	/* RPC header */
+	if (*p++ != rep->rr_xid)
 		return false;
-	/* call direction */
-	if (p[8] != cpu_to_be32(RPC_CALL))
+	if (*p != cpu_to_be32(RPC_CALL))
 		return false;
 
+	/* Now that we are sure this is a backchannel call,
+	 * advance to the RPC header.
+	 */
+	p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+	if (unlikely(!p))
+		goto out_short;
+
+	rpcrdma_bc_receive_call(r_xprt, rep);
+	return true;
+
+out_short:
+	pr_warn("RPC/RDMA short backward direction call\n");
+	if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
+		xprt_disconnect_done(&r_xprt->rx_xprt);
 	return true;
 }
+#else	/* CONFIG_SUNRPC_BACKCHANNEL */
+{
+	return false;
+}
 #endif	/* CONFIG_SUNRPC_BACKCHANNEL */
 
+static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4 * sizeof(*p));
+	if (unlikely(!p))
+		return -EIO;
+
+	ifdebug(FACILITY) {
+		u64 offset;
+		u32 handle;
+
+		handle = be32_to_cpup(p++);
+		*length = be32_to_cpup(p++);
+		xdr_decode_hyper(p, &offset);
+		dprintk("RPC:       %s:   segment %u@0x%016llx:0x%08x\n",
+			__func__, *length, (unsigned long long)offset,
+			handle);
+	} else {
+		*length = be32_to_cpup(p + 1);
+	}
+
+	return 0;
+}
+
+static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
+{
+	u32 segcount, seglength;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (unlikely(!p))
+		return -EIO;
+
+	*length = 0;
+	segcount = be32_to_cpup(p);
+	while (segcount--) {
+		if (decode_rdma_segment(xdr, &seglength))
+			return -EIO;
+		*length += seglength;
+	}
+
+	dprintk("RPC:       %s: segcount=%u, %u bytes\n",
+		__func__, be32_to_cpup(p), *length);
+	return 0;
+}
+
+/* In RPC-over-RDMA Version One replies, a Read list is never
+ * expected. This decoder is a stub that returns an error if
+ * a Read list is present.
+ */
+static int decode_read_list(struct xdr_stream *xdr)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (unlikely(!p))
+		return -EIO;
+	if (unlikely(*p != xdr_zero))
+		return -EIO;
+	return 0;
+}
+
+/* Supports only one Write chunk in the Write list
+ */
+static int decode_write_list(struct xdr_stream *xdr, u32 *length)
+{
+	u32 chunklen;
+	bool first;
+	__be32 *p;
+
+	*length = 0;
+	first = true;
+	do {
+		p = xdr_inline_decode(xdr, sizeof(*p));
+		if (unlikely(!p))
+			return -EIO;
+		if (*p == xdr_zero)
+			break;
+		if (!first)
+			return -EIO;
+
+		if (decode_write_chunk(xdr, &chunklen))
+			return -EIO;
+		*length += chunklen;
+		first = false;
+	} while (true);
+	return 0;
+}
+
+static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (unlikely(!p))
+		return -EIO;
+
+	*length = 0;
+	if (*p != xdr_zero)
+		if (decode_write_chunk(xdr, length))
+			return -EIO;
+	return 0;
+}
+
+static int
+rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
+		   struct rpc_rqst *rqst)
+{
+	struct xdr_stream *xdr = &rep->rr_stream;
+	u32 writelist, replychunk, rpclen;
+	char *base;
+
+	/* Decode the chunk lists */
+	if (decode_read_list(xdr))
+		return -EIO;
+	if (decode_write_list(xdr, &writelist))
+		return -EIO;
+	if (decode_reply_chunk(xdr, &replychunk))
+		return -EIO;
+
+	/* RDMA_MSG sanity checks */
+	if (unlikely(replychunk))
+		return -EIO;
+
+	/* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */
+	base = (char *)xdr_inline_decode(xdr, 0);
+	rpclen = xdr_stream_remaining(xdr);
+	r_xprt->rx_stats.fixup_copy_count +=
+		rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3);
+
+	r_xprt->rx_stats.total_rdma_reply += writelist;
+	return rpclen + xdr_align_size(writelist);
+}
+
+static noinline int
+rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
+{
+	struct xdr_stream *xdr = &rep->rr_stream;
+	u32 writelist, replychunk;
+
+	/* Decode the chunk lists */
+	if (decode_read_list(xdr))
+		return -EIO;
+	if (decode_write_list(xdr, &writelist))
+		return -EIO;
+	if (decode_reply_chunk(xdr, &replychunk))
+		return -EIO;
+
+	/* RDMA_NOMSG sanity checks */
+	if (unlikely(writelist))
+		return -EIO;
+	if (unlikely(!replychunk))
+		return -EIO;
+
+	/* Reply chunk buffer already is the reply vector */
+	r_xprt->rx_stats.total_rdma_reply += replychunk;
+	return replychunk;
+}
+
+static noinline int
+rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
+		     struct rpc_rqst *rqst)
+{
+	struct xdr_stream *xdr = &rep->rr_stream;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (unlikely(!p))
+		return -EIO;
+
+	switch (*p) {
+	case err_vers:
+		p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+		if (!p)
+			break;
+		dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n",
+			rqst->rq_task->tk_pid, __func__,
+			be32_to_cpup(p), be32_to_cpu(*(p + 1)));
+		break;
+	case err_chunk:
+		dprintk("RPC: %5u: %s: server reports header decoding error\n",
+			rqst->rq_task->tk_pid, __func__);
+		break;
+	default:
+		dprintk("RPC: %5u: %s: server reports unrecognized error %d\n",
+			rqst->rq_task->tk_pid, __func__, be32_to_cpup(p));
+	}
+
+	r_xprt->rx_stats.bad_reply_count++;
+	return -EREMOTEIO;
+}
+
+/* Perform XID lookup, reconstruction of the RPC reply, and
+ * RPC completion while holding the transport lock to ensure
+ * the rep, rqst, and rq_task pointers remain stable.
+ */
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+	struct rpc_rqst *rqst = rep->rr_rqst;
+	unsigned long cwnd;
+	int status;
+
+	xprt->reestablish_timeout = 0;
+
+	switch (rep->rr_proc) {
+	case rdma_msg:
+		status = rpcrdma_decode_msg(r_xprt, rep, rqst);
+		break;
+	case rdma_nomsg:
+		status = rpcrdma_decode_nomsg(r_xprt, rep);
+		break;
+	case rdma_error:
+		status = rpcrdma_decode_error(r_xprt, rep, rqst);
+		break;
+	default:
+		status = -EIO;
+	}
+	if (status < 0)
+		goto out_badheader;
+
+out:
+	spin_lock(&xprt->recv_lock);
+	cwnd = xprt->cwnd;
+	xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
+	if (xprt->cwnd > cwnd)
+		xprt_release_rqst_cong(rqst->rq_task);
+
+	xprt_complete_rqst(rqst->rq_task, status);
+	xprt_unpin_rqst(rqst);
+	spin_unlock(&xprt->recv_lock);
+	return;
+
+/* If the incoming reply terminated a pending RPC, the next
+ * RPC call will post a replacement receive buffer as it is
+ * being marshaled.
+ */
+out_badheader:
+	dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
+		rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+	r_xprt->rx_stats.bad_reply_count++;
+	status = -EIO;
+	goto out;
+}
+
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+	/* Invalidate and unmap the data payloads before waking
+	 * the waiting application. This guarantees the memory
+	 * regions are properly fenced from the server before the
+	 * application accesses the data. It also ensures proper
+	 * send flow control: waking the next RPC waits until this
+	 * RPC has relinquished all its Send Queue entries.
+	 */
+	if (!list_empty(&req->rl_registered))
+		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+						    &req->rl_registered);
+
+	/* Ensure that any DMA mapped pages associated with
+	 * the Send of the RPC Call have been unmapped before
+	 * allowing the RPC to complete. This protects argument
+	 * memory not controlled by the RPC client from being
+	 * re-used before we're done with it.
+	 */
+	if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+		r_xprt->rx_stats.reply_waits_for_send++;
+		out_of_line_wait_on_bit(&req->rl_flags,
+					RPCRDMA_REQ_F_TX_RESOURCES,
+					bit_wait,
+					TASK_UNINTERRUPTIBLE);
+	}
+}
+
+/* Reply handling runs in the poll worker thread. Anything that
+ * might wait is deferred to a separate workqueue.
+ */
+void rpcrdma_deferred_completion(struct work_struct *work)
+{
+	struct rpcrdma_rep *rep =
+			container_of(work, struct rpcrdma_rep, rr_work);
+	struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+
+	rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+	rpcrdma_release_rqst(rep->rr_rxprt, req);
+	rpcrdma_complete_rqst(rep);
+}
+
 /* Process received RPC/RDMA messages.
  *
  * Errors must result in the RPC task either being awakened, or
  * allowed to timeout, to discover the errors at that time.
  */
-void
-rpcrdma_reply_handler(struct work_struct *work)
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 {
-	struct rpcrdma_rep *rep =
-			container_of(work, struct rpcrdma_rep, rr_work);
 	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-	struct rpcrdma_msg *headerp;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_req *req;
 	struct rpc_rqst *rqst;
-	__be32 *iptr;
-	int rdmalen, status, rmerr;
-	unsigned long cwnd;
-	struct list_head mws;
+	u32 credits;
+	__be32 *p;
 
 	dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);
 
-	if (rep->rr_len == RPCRDMA_BAD_LEN)
+	if (rep->rr_hdrbuf.head[0].iov_len == 0)
 		goto out_badstatus;
-	if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
+
+	xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
+			rep->rr_hdrbuf.head[0].iov_base);
+
+	/* Fixed transport header fields */
+	p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
+	if (unlikely(!p))
 		goto out_shortreply;
+	rep->rr_xid = *p++;
+	rep->rr_vers = *p++;
+	credits = be32_to_cpu(*p++);
+	rep->rr_proc = *p++;
 
-	headerp = rdmab_to_msg(rep->rr_rdmabuf);
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-	if (rpcrdma_is_bcall(headerp))
-		goto out_bcall;
-#endif
+	if (rep->rr_vers != rpcrdma_version)
+		goto out_badversion;
+
+	if (rpcrdma_is_bcall(r_xprt, rep))
+		return;
 
 	/* Match incoming rpcrdma_rep to an rpcrdma_req to
 	 * get context for handling any incoming chunks.
 	 */
-	spin_lock(&buf->rb_lock);
-	req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
-					headerp->rm_xid);
-	if (!req)
-		goto out_nomatch;
-	if (req->rl_reply)
-		goto out_duplicate;
-
-	list_replace_init(&req->rl_registered, &mws);
-	rpcrdma_mark_remote_invalidation(&mws, rep);
-
-	/* Avoid races with signals and duplicate replies
-	 * by marking this req as matched.
-	 */
-	req->rl_reply = rep;
-	spin_unlock(&buf->rb_lock);
-
-	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
-		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
-
-	/* Invalidate and unmap the data payloads before waking the
-	 * waiting application. This guarantees the memory regions
-	 * are properly fenced from the server before the application
-	 * accesses the data. It also ensures proper send flow control:
-	 * waking the next RPC waits until this RPC has relinquished
-	 * all its Send Queue entries.
-	 */
-	if (!list_empty(&mws))
-		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
-
-	/* Perform XID lookup, reconstruction of the RPC reply, and
-	 * RPC completion while holding the transport lock to ensure
-	 * the rep, rqst, and rq_task pointers remain stable.
-	 */
-	spin_lock_bh(&xprt->transport_lock);
-	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+	spin_lock(&xprt->recv_lock);
+	rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
 	if (!rqst)
 		goto out_norqst;
-	xprt->reestablish_timeout = 0;
-	if (headerp->rm_vers != rpcrdma_version)
-		goto out_badversion;
-
-	/* check for expected message types */
-	/* The order of some of these tests is important. */
-	switch (headerp->rm_type) {
-	case rdma_msg:
-		/* never expect read chunks */
-		/* never expect reply chunks (two ways to check) */
-		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
-		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
-		     headerp->rm_body.rm_chunks[2] != xdr_zero))
-			goto badheader;
-		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
-			/* count any expected write chunks in read reply */
-			/* start at write chunk array count */
-			iptr = &headerp->rm_body.rm_chunks[2];
-			rdmalen = rpcrdma_count_chunks(rep, 1, &iptr);
-			/* check for validity, and no reply chunk after */
-			if (rdmalen < 0 || *iptr++ != xdr_zero)
-				goto badheader;
-			rep->rr_len -=
-			    ((unsigned char *)iptr - (unsigned char *)headerp);
-			status = rep->rr_len + rdmalen;
-			r_xprt->rx_stats.total_rdma_reply += rdmalen;
-			/* special case - last chunk may omit padding */
-			if (rdmalen &= 3) {
-				rdmalen = 4 - rdmalen;
-				status += rdmalen;
-			}
-		} else {
-			/* else ordinary inline */
-			rdmalen = 0;
-			iptr = (__be32 *)((unsigned char *)headerp +
-							RPCRDMA_HDRLEN_MIN);
-			rep->rr_len -= RPCRDMA_HDRLEN_MIN;
-			status = rep->rr_len;
-		}
+	xprt_pin_rqst(rqst);
 
-		r_xprt->rx_stats.fixup_copy_count +=
-			rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len,
-					     rdmalen);
-		break;
+	if (credits == 0)
+		credits = 1;	/* don't deadlock */
+	else if (credits > buf->rb_max_requests)
+		credits = buf->rb_max_requests;
+	buf->rb_credits = credits;
 
-	case rdma_nomsg:
-		/* never expect read or write chunks, always reply chunks */
-		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
-		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
-		    headerp->rm_body.rm_chunks[2] != xdr_one)
-			goto badheader;
-		iptr = (__be32 *)((unsigned char *)headerp +
-							RPCRDMA_HDRLEN_MIN);
-		rdmalen = rpcrdma_count_chunks(rep, 0, &iptr);
-		if (rdmalen < 0)
-			goto badheader;
-		r_xprt->rx_stats.total_rdma_reply += rdmalen;
-		/* Reply chunk buffer already is the reply vector - no fixup. */
-		status = rdmalen;
-		break;
+	spin_unlock(&xprt->recv_lock);
 
-	case rdma_error:
-		goto out_rdmaerr;
-
-badheader:
-	default:
-		dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
-			rqst->rq_task->tk_pid, __func__,
-			be32_to_cpu(headerp->rm_type));
-		status = -EIO;
-		r_xprt->rx_stats.bad_reply_count++;
-		break;
-	}
+	req = rpcr_to_rdmar(rqst);
+	req->rl_reply = rep;
+	rep->rr_rqst = rqst;
+	clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
 
-out:
-	cwnd = xprt->cwnd;
-	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
-	if (xprt->cwnd > cwnd)
-		xprt_release_rqst_cong(rqst->rq_task);
+	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
+		__func__, rep, req, be32_to_cpu(rep->rr_xid));
 
-	xprt_complete_rqst(rqst->rq_task, status);
-	spin_unlock_bh(&xprt->transport_lock);
-	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
-		__func__, xprt, rqst, status);
+	if (list_empty(&req->rl_registered) &&
+	    !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags))
+		rpcrdma_complete_rqst(rep);
+	else
+		queue_work(rpcrdma_receive_wq, &rep->rr_work);
 	return;
 
 out_badstatus:
@@ -1149,71 +1423,22 @@ out_badstatus:
 	}
 	return;
 
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-out_bcall:
-	rpcrdma_bc_receive_call(r_xprt, rep);
-	return;
-#endif
-
-/* If the incoming reply terminated a pending RPC, the next
- * RPC call will post a replacement receive buffer as it is
- * being marshaled.
- */
 out_badversion:
 	dprintk("RPC:       %s: invalid version %d\n",
-		__func__, be32_to_cpu(headerp->rm_vers));
-	status = -EIO;
-	r_xprt->rx_stats.bad_reply_count++;
-	goto out;
-
-out_rdmaerr:
-	rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
-	switch (rmerr) {
-	case ERR_VERS:
-		pr_err("%s: server reports header version error (%u-%u)\n",
-		       __func__,
-		       be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
-		       be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
-		break;
-	case ERR_CHUNK:
-		pr_err("%s: server reports header decoding error\n",
-		       __func__);
-		break;
-	default:
-		pr_err("%s: server reports unknown error %d\n",
-		       __func__, rmerr);
-	}
-	status = -EREMOTEIO;
-	r_xprt->rx_stats.bad_reply_count++;
-	goto out;
+		__func__, be32_to_cpu(rep->rr_vers));
+	goto repost;
 
-/* The req was still available, but by the time the transport_lock
- * was acquired, the rqst and task had been released. Thus the RPC
- * has already been terminated.
+/* The RPC transaction has already been terminated, or the header
+ * is corrupt.
  */
 out_norqst:
-	spin_unlock_bh(&xprt->transport_lock);
-	rpcrdma_buffer_put(req);
-	dprintk("RPC:       %s: race, no rqst left for req %p\n",
-		__func__, req);
-	return;
+	spin_unlock(&xprt->recv_lock);
+	dprintk("RPC:       %s: no match for incoming xid 0x%08x\n",
+		__func__, be32_to_cpu(rep->rr_xid));
+	goto repost;
 
 out_shortreply:
 	dprintk("RPC:       %s: short/invalid reply\n", __func__);
-	goto repost;
-
-out_nomatch:
-	spin_unlock(&buf->rb_lock);
-	dprintk("RPC:       %s: no match for incoming xid 0x%08x len %d\n",
-		__func__, be32_to_cpu(headerp->rm_xid),
-		rep->rr_len);
-	goto repost;
-
-out_duplicate:
-	spin_unlock(&buf->rb_lock);
-	dprintk("RPC:       %s: "
-		"duplicate reply %p to RPC request %p: xid 0x%08x\n",
-		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
 /* If no pending RPC transaction was matched, post a replacement
  * receive buffer before returning.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index c676ed0efb5a..af7893501e40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015 Oracle.  All rights reserved.
  *
@@ -52,7 +53,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 	if (src->iov_len < 24)
 		goto out_shortreply;
 
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->recv_lock);
 	req = xprt_lookup_rqst(xprt, xid);
 	if (!req)
 		goto out_notfound;
@@ -69,17 +70,20 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 	else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
 		credits = r_xprt->rx_buf.rb_bc_max_requests;
 
+	spin_lock_bh(&xprt->transport_lock);
 	cwnd = xprt->cwnd;
 	xprt->cwnd = credits << RPC_CWNDSHIFT;
 	if (xprt->cwnd > cwnd)
 		xprt_release_rqst_cong(req->rq_task);
+	spin_unlock_bh(&xprt->transport_lock);
+
 
 	ret = 0;
 	xprt_complete_rqst(req->rq_task, rcvbuf->len);
 	rcvbuf->len = 0;
 
 out_unlock:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->recv_lock);
 out:
 	return ret;
 
@@ -129,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	if (ret)
 		goto out_err;
 
+	/* Bump page refcnt so Send completion doesn't release
+	 * the rq_buffer before all retransmits are complete.
+	 */
+	get_page(virt_to_page(rqst->rq_buffer));
 	ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
 	if (ret)
 		goto out_unmap;
@@ -161,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
 		return -EINVAL;
 	}
 
-	/* svc_rdma_sendto releases this page */
 	page = alloc_page(RPCRDMA_DEF_GFP);
 	if (!page)
 		return -ENOMEM;
@@ -180,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
 {
 	struct rpc_rqst *rqst = task->tk_rqstp;
 
+	put_page(virt_to_page(rqst->rq_buffer));
 	kfree(rqst->rq_rbuffer);
 }
 
@@ -266,7 +274,7 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt)
 	module_put(THIS_MODULE);
 }
 
-static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
 	.alloc_slot		= xprt_alloc_slot,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 933f79bed270..9bd04549a1ad 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2016 Oracle.  All rights reserved.
  *
@@ -660,19 +661,21 @@ out_initerr:
 	return -EIO;
 }
 
+/* Walk the segments in the Read chunk starting at @p and construct
+ * RDMA Read operations to pull the chunk to the server.
+ */
 static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
 				     struct svc_rdma_read_info *info,
 				     __be32 *p)
 {
 	int ret;
 
+	ret = -EINVAL;
 	info->ri_chunklen = 0;
-	while (*p++ != xdr_zero) {
+	while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
 		u32 rs_handle, rs_length;
 		u64 rs_offset;
 
-		if (be32_to_cpup(p++) != info->ri_position)
-			break;
 		rs_handle = be32_to_cpup(p++);
 		rs_length = be32_to_cpup(p++);
 		p = xdr_decode_hyper(p, &rs_offset);
@@ -689,78 +692,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
 	return ret;
 }
 
-/* If there is inline content following the Read chunk, append it to
- * the page list immediately following the data payload. This has to
- * be done after the reader function has determined how many pages
- * were consumed for RDMA Read.
- *
- * On entry, ri_pageno and ri_pageoff point directly to the end of the
- * page list. On exit, both have been updated to the new "next byte".
- *
- * Assumptions:
- *	- Inline content fits entirely in rq_pages[0]
- *	- Trailing content is only a handful of bytes
- */
-static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
-			      struct svc_rdma_read_info *info)
-{
-	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
-	unsigned int tail_length, remaining;
-	u8 *srcp, *destp;
-
-	/* Assert that all inline content fits in page 0. This is an
-	 * implementation limit, not a protocol limit.
-	 */
-	if (head->arg.head[0].iov_len > PAGE_SIZE) {
-		pr_warn_once("svcrdma: too much trailing inline content\n");
-		return -EINVAL;
-	}
-
-	srcp = head->arg.head[0].iov_base;
-	srcp += info->ri_position;
-	tail_length = head->arg.head[0].iov_len - info->ri_position;
-	remaining = tail_length;
-
-	/* If there is room on the last page in the page list, try to
-	 * fit the trailing content there.
-	 */
-	if (info->ri_pageoff > 0) {
-		unsigned int len;
-
-		len = min_t(unsigned int, remaining,
-			    PAGE_SIZE - info->ri_pageoff);
-		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
-		destp += info->ri_pageoff;
-
-		memcpy(destp, srcp, len);
-		srcp += len;
-		destp += len;
-		info->ri_pageoff += len;
-		remaining -= len;
-
-		if (info->ri_pageoff == PAGE_SIZE) {
-			info->ri_pageno++;
-			info->ri_pageoff = 0;
-		}
-	}
-
-	/* Otherwise, a fresh page is needed. */
-	if (remaining) {
-		head->arg.pages[info->ri_pageno] =
-				rqstp->rq_pages[info->ri_pageno];
-		head->count++;
-
-		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
-		memcpy(destp, srcp, remaining);
-		info->ri_pageoff += remaining;
-	}
-
-	head->arg.page_len += tail_length;
-	head->arg.len += tail_length;
-	head->arg.buflen += tail_length;
-	return 0;
-}
-
 /* Construct RDMA Reads to pull over a normal Read chunk. The chunk
  * data lands in the page list of head->arg.pages.
  *
@@ -785,34 +716,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
 	if (ret < 0)
 		goto out;
 
-	/* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+	/* Split the Receive buffer between the head and tail
+	 * buffers at Read chunk's position. XDR roundup of the
+	 * chunk is not included in either the pagelist or in
+	 * the tail.
 	 */
-	if (info->ri_chunklen & 3) {
-		u32 padlen = 4 - (info->ri_chunklen & 3);
-
-		info->ri_chunklen += padlen;
+	head->arg.tail[0].iov_base =
+		head->arg.head[0].iov_base + info->ri_position;
+	head->arg.tail[0].iov_len =
+		head->arg.head[0].iov_len - info->ri_position;
+	head->arg.head[0].iov_len = info->ri_position;
 
-		/* NB: data payload always starts on XDR alignment,
-		 * thus the pad can never contain a page boundary.
-		 */
-		info->ri_pageoff += padlen;
-		if (info->ri_pageoff == PAGE_SIZE) {
-			info->ri_pageno++;
-			info->ri_pageoff = 0;
-		}
-	}
+	/* Read chunk may need XDR roundup (see RFC 5666, s. 3.7).
+	 *
+	 * NFSv2/3 write decoders need the length of the tail to
+	 * contain the size of the roundup padding.
+	 */
+	head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3);
 
 	head->arg.page_len = info->ri_chunklen;
 	head->arg.len += info->ri_chunklen;
 	head->arg.buflen += info->ri_chunklen;
 
-	if (info->ri_position < head->arg.head[0].iov_len) {
-		ret = svc_rdma_copy_tail(rqstp, info);
-		if (ret < 0)
-			goto out;
-	}
-	head->arg.head[0].iov_len = info->ri_position;
-
 out:
 	return ret;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e660d4965b18..46ec069150d5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -51,6 +51,7 @@
 #include <linux/workqueue.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
 #include <linux/sunrpc/svc_rdma.h>
 #include <linux/export.h>
 #include "xprt_rdma.h"
@@ -70,7 +71,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
 static int svc_rdma_secure_port(struct svc_rqst *);
 static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
 
-static struct svc_xprt_ops svc_rdma_ops = {
+static const struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_create = svc_rdma_create,
 	.xpo_recvfrom = svc_rdma_recvfrom,
 	.xpo_sendto = svc_rdma_sendto,
@@ -98,7 +99,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
 static void svc_rdma_bc_detach(struct svc_xprt *);
 static void svc_rdma_bc_free(struct svc_xprt *);
 
-static struct svc_xprt_ops svc_rdma_bc_ops = {
+static const struct svc_xprt_ops svc_rdma_bc_ops = {
 	.xpo_create = svc_rdma_bc_create,
 	.xpo_detach = svc_rdma_bc_detach,
 	.xpo_free = svc_rdma_bc_free,
@@ -167,8 +168,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
 {
 	unsigned int i;
 
-	/* Each RPC/RDMA credit can consume a number of send
-	 * and receive WQEs. One ctxt is allocated for each.
+	/* Each RPC/RDMA credit can consume one Receive and
+	 * one Send WQE at the same time.
 	 */
 	i = xprt->sc_sq_depth + xprt->sc_rq_depth;
 
@@ -289,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
 			ib_event_msg(event->event), event->event,
 			event->element.qp);
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
+		svc_xprt_enqueue(xprt);
 		break;
 	}
 }
@@ -321,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 	if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
 		goto out;
-	svc_xprt_enqueue(&xprt->sc_xprt);
-	goto out;
+	goto out_enqueue;
 
 flushed:
 	if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -332,6 +333,8 @@ flushed:
 	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
 	svc_rdma_put_context(ctxt, 1);
 
+out_enqueue:
+	svc_xprt_enqueue(&xprt->sc_xprt);
 out:
 	svc_xprt_put(&xprt->sc_xprt);
 }
@@ -357,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		svc_xprt_enqueue(&xprt->sc_xprt);
 		if (wc->status != IB_WC_WR_FLUSH_ERR)
 			pr_err("svcrdma: Send: %s (%u/0x%x)\n",
 			       ib_wc_status_msg(wc->status),
@@ -568,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
 			xprt, cma_id);
-		if (xprt)
+		if (xprt) {
 			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			svc_xprt_enqueue(&xprt->sc_xprt);
+		}
 		break;
 
 	default:
@@ -713,7 +719,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device *dev;
 	struct sockaddr *sap;
-	unsigned int i;
+	unsigned int i, ctxts;
 	int ret = 0;
 
 	listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -742,14 +748,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
 				  (size_t)RPCSVC_MAXPAGES);
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
-	newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
-					 svcrdma_max_requests);
-	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
-	newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
-					    svcrdma_max_bc_requests);
+	newxprt->sc_max_requests = svcrdma_max_requests;
+	newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
 	newxprt->sc_rq_depth = newxprt->sc_max_requests +
 			       newxprt->sc_max_bc_requests;
-	newxprt->sc_sq_depth = newxprt->sc_rq_depth;
+	if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
+		pr_warn("svcrdma: reducing receive depth to %d\n",
+			dev->attrs.max_qp_wr);
+		newxprt->sc_rq_depth = dev->attrs.max_qp_wr;
+		newxprt->sc_max_requests = newxprt->sc_rq_depth - 2;
+		newxprt->sc_max_bc_requests = 2;
+	}
+	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
+	ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
+	ctxts *= newxprt->sc_max_requests;
+	newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts;
+	if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
+		pr_warn("svcrdma: reducing send depth to %d\n",
+			dev->attrs.max_qp_wr);
+		newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
+	}
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
 
 	if (!svc_rdma_prealloc_ctxts(newxprt))
@@ -784,8 +802,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
 	qp_attr.port_num = newxprt->sc_port_num;
-	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
-	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
+	qp_attr.cap.max_rdma_ctxs = ctxts;
+	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
 	qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
 	qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
@@ -853,6 +871,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
 	dprintk("    max_sge         : %d\n", newxprt->sc_max_sge);
 	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
+	dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", newxprt->sc_ord);
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index d1c458e5ec4d..646c24494ea7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014-2017 Oracle.  All rights reserved.
  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -149,7 +150,7 @@ static struct ctl_table sunrpc_table[] = {
 
 #endif
 
-static struct rpc_xprt_ops xprt_rdma_procs;	/*forward reference */
+static const struct rpc_xprt_ops xprt_rdma_procs;
 
 static void
 xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -559,6 +560,7 @@ rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
 	r_xprt->rx_stats.hardway_register_count += size;
 	req->rl_rdmabuf = rb;
+	xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
 	return true;
 }
 
@@ -677,17 +679,14 @@ xprt_rdma_free(struct rpc_task *task)
 	struct rpc_rqst *rqst = task->tk_rqstp;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-	if (req->rl_backchannel)
+	if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
 		return;
 
 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
 
-	rpcrdma_remove_req(&r_xprt->rx_buf, req);
-	if (!list_empty(&req->rl_registered))
-		ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
-	rpcrdma_unmap_sges(ia, req);
+	if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
+		rpcrdma_release_rqst(r_xprt, req);
 	rpcrdma_buffer_put(req);
 }
 
@@ -728,9 +727,10 @@ xprt_rdma_send_request(struct rpc_task *task)
 
 	/* On retransmit, remove any previously registered chunks */
 	if (unlikely(!list_empty(&req->rl_registered)))
-		r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+						    &req->rl_registered);
 
-	rc = rpcrdma_marshal_req(rqst);
+	rc = rpcrdma_marshal_req(r_xprt, rqst);
 	if (rc < 0)
 		goto failed_marshal;
 
@@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task)
 		goto drop_connection;
 	req->rl_connect_cookie = xprt->connect_cookie;
 
+	set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
 		goto drop_connection;
 
@@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 		   r_xprt->rx_stats.failed_marshal_count,
 		   r_xprt->rx_stats.bad_reply_count,
 		   r_xprt->rx_stats.nomsg_call_count);
-	seq_printf(seq, "%lu %lu %lu %lu\n",
+	seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
 		   r_xprt->rx_stats.mrs_recovered,
 		   r_xprt->rx_stats.mrs_orphaned,
 		   r_xprt->rx_stats.mrs_allocated,
-		   r_xprt->rx_stats.local_inv_needed);
+		   r_xprt->rx_stats.local_inv_needed,
+		   r_xprt->rx_stats.empty_sendctx_q,
+		   r_xprt->rx_stats.reply_waits_for_send);
 }
 
 static int
@@ -811,7 +814,7 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
  * Plumbing for rpc transport switch and kernel module
  */
 
-static struct rpc_xprt_ops xprt_rdma_procs = {
+static const struct rpc_xprt_ops xprt_rdma_procs = {
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
 	.alloc_slot		= xprt_alloc_slot,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e4171f2abe37..710b3f77db82 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014-2017 Oracle.  All rights reserved.
  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -49,9 +50,10 @@
 
 #include <linux/interrupt.h>
 #include <linux/slab.h>
-#include <linux/prefetch.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc_rdma.h>
+
+#include <asm-generic/barrier.h>
 #include <asm/bitops.h>
 
 #include <rdma/ib_cm.h>
@@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
 
-static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
+struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
 
 int
 rpcrdma_alloc_wq(void)
@@ -126,33 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
 static void
 rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 {
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct rpcrdma_sendctx *sc =
+		container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
+
 	/* WARNING: Only wr_cqe and status are reliable at this point */
 	if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
 		pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
 		       ib_wc_status_msg(wc->status),
 		       wc->status, wc->vendor_err);
-}
-
-/* Perform basic sanity checking to avoid using garbage
- * to update the credit grant value.
- */
-static void
-rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
-{
-	struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
-	struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
-	u32 credits;
-
-	if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
-		return;
-
-	credits = be32_to_cpu(rmsgp->rm_credit);
-	if (credits == 0)
-		credits = 1;	/* don't deadlock */
-	else if (credits > buffer->rb_max_requests)
-		credits = buffer->rb_max_requests;
 
-	atomic_set(&buffer->rb_credits, credits);
+	rpcrdma_sendctx_put_locked(sc);
 }
 
 /**
@@ -173,24 +159,19 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 		goto out_fail;
 
 	/* status == SUCCESS means all fields in wc are trustworthy */
-	if (wc->opcode != IB_WC_RECV)
-		return;
-
 	dprintk("RPC:       %s: rep %p opcode 'recv', length %u: success\n",
 		__func__, rep, wc->byte_len);
 
-	rep->rr_len = wc->byte_len;
+	rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
 	rep->rr_wc_flags = wc->wc_flags;
 	rep->rr_inv_rkey = wc->ex.invalidate_rkey;
 
 	ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
 				   rdmab_addr(rep->rr_rdmabuf),
-				   rep->rr_len, DMA_FROM_DEVICE);
-
-	rpcrdma_update_granted_credits(rep);
+				   wc->byte_len, DMA_FROM_DEVICE);
 
 out_schedule:
-	queue_work(rpcrdma_receive_wq, &rep->rr_work);
+	rpcrdma_reply_handler(rep);
 	return;
 
 out_fail:
@@ -198,7 +179,7 @@ out_fail:
 		pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
 		       ib_wc_status_msg(wc->status),
 		       wc->status, wc->vendor_err);
-	rep->rr_len = RPCRDMA_BAD_LEN;
+	rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0);
 	goto out_schedule;
 }
 
@@ -300,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
 connected:
-		atomic_set(&xprt->rx_buf.rb_credits, 1);
+		xprt->rx_buf.rb_credits = 1;
 		ep->rep_connected = connstate;
 		rpcrdma_conn_func(ep);
 		wake_up_all(&ep->rep_connect_wait);
@@ -569,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 		ep->rep_attr.cap.max_recv_sge);
 
 	/* set trigger for requesting send completion */
-	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
-	if (ep->rep_cqinit <= 2)
-		ep->rep_cqinit = 0;	/* always signal? */
-	rpcrdma_init_cqcount(ep, 0);
+	ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
+				   cdata->max_requests >> 2);
+	ep->rep_send_count = ep->rep_send_batch;
 	init_waitqueue_head(&ep->rep_connect_wait);
 	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
 
 	sendcq = ib_alloc_cq(ia->ri_device, NULL,
 			     ep->rep_attr.cap.max_send_wr + 1,
-			     0, IB_POLL_SOFTIRQ);
+			     1, IB_POLL_WORKQUEUE);
 	if (IS_ERR(sendcq)) {
 		rc = PTR_ERR(sendcq);
 		dprintk("RPC:       %s: failed to create send CQ: %i\n",
@@ -588,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
 	recvcq = ib_alloc_cq(ia->ri_device, NULL,
 			     ep->rep_attr.cap.max_recv_wr + 1,
-			     0, IB_POLL_SOFTIRQ);
+			     0, IB_POLL_WORKQUEUE);
 	if (IS_ERR(recvcq)) {
 		rc = PTR_ERR(recvcq);
 		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
@@ -851,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	ib_drain_qp(ia->ri_id->qp);
 }
 
+/* Fixed-size circular FIFO queue. This implementation is wait-free and
+ * lock-free.
+ *
+ * Consumer is the code path that posts Sends. This path dequeues a
+ * sendctx for use by a Send operation. Multiple consumer threads
+ * are serialized by the RPC transport lock, which allows only one
+ * ->send_request call at a time.
+ *
+ * Producer is the code path that handles Send completions. This path
+ * enqueues a sendctx that has been completed. Multiple producer
+ * threads are serialized by the ib_poll_cq() function.
+ */
+
+/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
+ * queue activity, and ib_drain_qp has flushed all remaining Send
+ * requests.
+ */
+static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
+{
+	unsigned long i;
+
+	for (i = 0; i <= buf->rb_sc_last; i++)
+		kfree(buf->rb_sc_ctxs[i]);
+	kfree(buf->rb_sc_ctxs);
+}
+
+static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia)
+{
+	struct rpcrdma_sendctx *sc;
+
+	sc = kzalloc(sizeof(*sc) +
+		     ia->ri_max_send_sges * sizeof(struct ib_sge),
+		     GFP_KERNEL);
+	if (!sc)
+		return NULL;
+
+	sc->sc_wr.wr_cqe = &sc->sc_cqe;
+	sc->sc_wr.sg_list = sc->sc_sges;
+	sc->sc_wr.opcode = IB_WR_SEND;
+	sc->sc_cqe.done = rpcrdma_wc_send;
+	return sc;
+}
+
+static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_sendctx *sc;
+	unsigned long i;
+
+	/* Maximum number of concurrent outstanding Send WRs. Capping
+	 * the circular queue size stops Send Queue overflow by causing
+	 * the ->send_request call to fail temporarily before too many
+	 * Sends are posted.
+	 */
+	i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS;
+	dprintk("RPC:       %s: allocating %lu send_ctxs\n", __func__, i);
+	buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL);
+	if (!buf->rb_sc_ctxs)
+		return -ENOMEM;
+
+	buf->rb_sc_last = i - 1;
+	for (i = 0; i <= buf->rb_sc_last; i++) {
+		sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
+		if (!sc)
+			goto out_destroy;
+
+		sc->sc_xprt = r_xprt;
+		buf->rb_sc_ctxs[i] = sc;
+	}
+
+	return 0;
+
+out_destroy:
+	rpcrdma_sendctxs_destroy(buf);
+	return -ENOMEM;
+}
+
+/* The sendctx queue is not guaranteed to have a size that is a
+ * power of two, thus the helpers in circ_buf.h cannot be used.
+ * The other option is to use modulus (%), which can be expensive.
+ */
+static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
+					  unsigned long item)
+{
+	return likely(item < buf->rb_sc_last) ? item + 1 : 0;
+}
+
+/**
+ * rpcrdma_sendctx_get_locked - Acquire a send context
+ * @buf: transport buffers from which to acquire an unused context
+ *
+ * Returns pointer to a free send completion context; or NULL if
+ * the queue is empty.
+ *
+ * Usage: Called to acquire an SGE array before preparing a Send WR.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer),
+ * and provides an effective memory barrier that flushes the new value
+ * of rb_sc_head.
+ */
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+{
+	struct rpcrdma_xprt *r_xprt;
+	struct rpcrdma_sendctx *sc;
+	unsigned long next_head;
+
+	next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head);
+
+	if (next_head == READ_ONCE(buf->rb_sc_tail))
+		goto out_emptyq;
+
+	/* ORDER: item must be accessed _before_ head is updated */
+	sc = buf->rb_sc_ctxs[next_head];
+
+	/* Releasing the lock in the caller acts as a memory
+	 * barrier that flushes rb_sc_head.
+	 */
+	buf->rb_sc_head = next_head;
+
+	return sc;
+
+out_emptyq:
+	/* The queue is "empty" if there have not been enough Send
+	 * completions recently. This is a sign the Send Queue is
+	 * backing up. Cause the caller to pause and try again.
+	 */
+	dprintk("RPC:       %s: empty sendctx queue\n", __func__);
+	r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
+	r_xprt->rx_stats.empty_sendctx_q++;
+	return NULL;
+}
+
+/**
+ * rpcrdma_sendctx_put_locked - Release a send context
+ * @sc: send context to release
+ *
+ * Usage: Called from Send completion to return a sendctxt
+ * to the queue.
+ *
+ * The caller serializes calls to this function (per rpcrdma_buffer).
+ */
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
+{
+	struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
+	unsigned long next_tail;
+
+	/* Unmap SGEs of previously completed by unsignaled
+	 * Sends by walking up the queue until @sc is found.
+	 */
+	next_tail = buf->rb_sc_tail;
+	do {
+		next_tail = rpcrdma_sendctx_next(buf, next_tail);
+
+		/* ORDER: item must be accessed _before_ tail is updated */
+		rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+
+	} while (buf->rb_sc_ctxs[next_tail] != sc);
+
+	/* Paired with READ_ONCE */
+	smp_store_release(&buf->rb_sc_tail, next_tail);
+}
+
 static void
 rpcrdma_mr_recovery_worker(struct work_struct *work)
 {
@@ -946,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
 	spin_lock(&buffer->rb_reqslock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
 	spin_unlock(&buffer->rb_reqslock);
-	req->rl_cqe.done = rpcrdma_wc_send;
 	req->rl_buffer = &r_xprt->rx_buf;
 	INIT_LIST_HEAD(&req->rl_registered);
-	req->rl_send_wr.next = NULL;
-	req->rl_send_wr.wr_cqe = &req->rl_cqe;
-	req->rl_send_wr.sg_list = req->rl_send_sge;
-	req->rl_send_wr.opcode = IB_WR_SEND;
 	return req;
 }
 
@@ -974,10 +1111,12 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 		rc = PTR_ERR(rep->rr_rdmabuf);
 		goto out_free;
 	}
+	xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
+		     rdmab_length(rep->rr_rdmabuf));
 
 	rep->rr_cqe.done = rpcrdma_wc_receive;
 	rep->rr_rxprt = r_xprt;
-	INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
+	INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
 	rep->rr_recv_wr.next = NULL;
 	rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
 	rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
@@ -998,13 +1137,11 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 
 	buf->rb_max_requests = r_xprt->rx_data.max_requests;
 	buf->rb_bc_srv_max_requests = 0;
-	atomic_set(&buf->rb_credits, 1);
 	spin_lock_init(&buf->rb_mwlock);
 	spin_lock_init(&buf->rb_lock);
 	spin_lock_init(&buf->rb_recovery_lock);
 	INIT_LIST_HEAD(&buf->rb_mws);
 	INIT_LIST_HEAD(&buf->rb_all);
-	INIT_LIST_HEAD(&buf->rb_pending);
 	INIT_LIST_HEAD(&buf->rb_stale_mrs);
 	INIT_DELAYED_WORK(&buf->rb_refresh_worker,
 			  rpcrdma_mr_refresh_worker);
@@ -1026,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 			rc = PTR_ERR(req);
 			goto out;
 		}
-		req->rl_backchannel = false;
 		list_add(&req->rl_list, &buf->rb_send_bufs);
 	}
 
@@ -1044,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 		list_add(&rep->rr_list, &buf->rb_recv_bufs);
 	}
 
+	rc = rpcrdma_sendctxs_create(r_xprt);
+	if (rc)
+		goto out;
+
 	return 0;
 out:
 	rpcrdma_buffer_destroy(buf);
@@ -1120,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 	cancel_delayed_work_sync(&buf->rb_recovery_worker);
 	cancel_delayed_work_sync(&buf->rb_refresh_worker);
 
+	rpcrdma_sendctxs_destroy(buf);
+
 	while (!list_empty(&buf->rb_recv_bufs)) {
 		struct rpcrdma_rep *rep;
 
@@ -1235,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
 	struct rpcrdma_buffer *buffers = req->rl_buffer;
 	struct rpcrdma_rep *rep = req->rl_reply;
 
-	req->rl_send_wr.num_sge = 0;
 	req->rl_reply = NULL;
 
 	spin_lock(&buffers->rb_lock);
@@ -1367,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 		struct rpcrdma_ep *ep,
 		struct rpcrdma_req *req)
 {
-	struct ib_send_wr *send_wr = &req->rl_send_wr;
+	struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
 	struct ib_send_wr *send_wr_fail;
 	int rc;
 
@@ -1381,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 	dprintk("RPC:       %s: posting %d s/g entries\n",
 		__func__, send_wr->num_sge);
 
-	rpcrdma_set_signaled(ep, send_wr);
+	if (!ep->rep_send_count ||
+	    test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+		send_wr->send_flags |= IB_SEND_SIGNALED;
+		ep->rep_send_count = ep->rep_send_batch;
+	} else {
+		send_wr->send_flags &= ~IB_SEND_SIGNALED;
+		--ep->rep_send_count;
+	}
 	rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
 	if (rc)
 		goto out_postsend_err;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index b282d3f8cdd8..51686d9eac5f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014-2017 Oracle.  All rights reserved.
  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -93,8 +94,8 @@ enum {
  */
 
 struct rpcrdma_ep {
-	atomic_t		rep_cqcount;
-	int			rep_cqinit;
+	unsigned int		rep_send_count;
+	unsigned int		rep_send_batch;
 	int			rep_connected;
 	struct ib_qp_init_attr	rep_attr;
 	wait_queue_head_t 	rep_connect_wait;
@@ -104,25 +105,6 @@ struct rpcrdma_ep {
 	struct delayed_work	rep_connect_worker;
 };
 
-static inline void
-rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count)
-{
-	atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count);
-}
-
-/* To update send queue accounting, provider must take a
- * send completion every now and then.
- */
-static inline void
-rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr)
-{
-	send_wr->send_flags = 0;
-	if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) {
-		rpcrdma_init_cqcount(ep, 0);
-		send_wr->send_flags = IB_SEND_SIGNALED;
-	}
-}
-
 /* Pre-allocate extra Work Requests for handling backward receives
  * and sends. This is a fixed value because the Work Queues are
  * allocated when the forward channel is set up.
@@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb)
 	return rb->rg_iov.lkey;
 }
 
-static inline struct rpcrdma_msg *
-rdmab_to_msg(struct rpcrdma_regbuf *rb)
-{
-	return (struct rpcrdma_msg *)rb->rg_base;
-}
-
 static inline struct ib_device *
 rdmab_device(struct rpcrdma_regbuf *rb)
 {
@@ -202,33 +178,58 @@ enum {
 };
 
 /*
- * struct rpcrdma_rep -- this structure encapsulates state required to recv
- * and complete a reply, asychronously. It needs several pieces of
- * state:
- *   o recv buffer (posted to provider)
- *   o ib_sge (also donated to provider)
- *   o status of reply (length, success or not)
- *   o bookkeeping state to get run by reply handler (list, etc)
+ * struct rpcrdma_rep -- this structure encapsulates state required
+ * to receive and complete an RPC Reply, asychronously. It needs
+ * several pieces of state:
  *
- * These are allocated during initialization, per-transport instance.
+ *   o receive buffer and ib_sge (donated to provider)
+ *   o status of receive (success or not, length, inv rkey)
+ *   o bookkeeping state to get run by reply handler (XDR stream)
  *
- * N of these are associated with a transport instance, and stored in
- * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ * These structures are allocated during transport initialization.
+ * N of these are associated with a transport instance, managed by
+ * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
  */
 
 struct rpcrdma_rep {
 	struct ib_cqe		rr_cqe;
-	unsigned int		rr_len;
+	__be32			rr_xid;
+	__be32			rr_vers;
+	__be32			rr_proc;
 	int			rr_wc_flags;
 	u32			rr_inv_rkey;
+	struct rpcrdma_regbuf	*rr_rdmabuf;
 	struct rpcrdma_xprt	*rr_rxprt;
 	struct work_struct	rr_work;
+	struct xdr_buf		rr_hdrbuf;
+	struct xdr_stream	rr_stream;
+	struct rpc_rqst		*rr_rqst;
 	struct list_head	rr_list;
 	struct ib_recv_wr	rr_recv_wr;
-	struct rpcrdma_regbuf	*rr_rdmabuf;
 };
 
-#define RPCRDMA_BAD_LEN		(~0U)
+/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
+ */
+struct rpcrdma_req;
+struct rpcrdma_xprt;
+struct rpcrdma_sendctx {
+	struct ib_send_wr	sc_wr;
+	struct ib_cqe		sc_cqe;
+	struct rpcrdma_xprt	*sc_xprt;
+	struct rpcrdma_req	*sc_req;
+	unsigned int		sc_unmap_count;
+	struct ib_sge		sc_sges[];
+};
+
+/* Limit the number of SGEs that can be unmapped during one
+ * Send completion. This caps the amount of work a single
+ * completion can do before returning to the provider.
+ *
+ * Setting this to zero disables Send completion batching.
+ */
+enum {
+	RPCRDMA_MAX_SEND_BATCH = 7,
+};
 
 /*
  * struct rpcrdma_mw - external memory region metadata
@@ -341,25 +342,30 @@ enum {
 struct rpcrdma_buffer;
 struct rpcrdma_req {
 	struct list_head	rl_list;
-	__be32			rl_xid;
-	unsigned int		rl_mapped_sges;
 	unsigned int		rl_connect_cookie;
 	struct rpcrdma_buffer	*rl_buffer;
 	struct rpcrdma_rep	*rl_reply;
-	struct ib_send_wr	rl_send_wr;
-	struct ib_sge		rl_send_sge[RPCRDMA_MAX_SEND_SGES];
+	struct xdr_stream	rl_stream;
+	struct xdr_buf		rl_hdrbuf;
+	struct rpcrdma_sendctx	*rl_sendctx;
 	struct rpcrdma_regbuf	*rl_rdmabuf;	/* xprt header */
 	struct rpcrdma_regbuf	*rl_sendbuf;	/* rq_snd_buf */
 	struct rpcrdma_regbuf	*rl_recvbuf;	/* rq_rcv_buf */
 
-	struct ib_cqe		rl_cqe;
 	struct list_head	rl_all;
-	bool			rl_backchannel;
+	unsigned long		rl_flags;
 
 	struct list_head	rl_registered;	/* registered segments */
 	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
 };
 
+/* rl_flags */
+enum {
+	RPCRDMA_REQ_F_BACKCHANNEL = 0,
+	RPCRDMA_REQ_F_PENDING,
+	RPCRDMA_REQ_F_TX_RESOURCES,
+};
+
 static inline void
 rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
 {
@@ -399,13 +405,17 @@ struct rpcrdma_buffer {
 	struct list_head	rb_mws;
 	struct list_head	rb_all;
 
+	unsigned long		rb_sc_head;
+	unsigned long		rb_sc_tail;
+	unsigned long		rb_sc_last;
+	struct rpcrdma_sendctx	**rb_sc_ctxs;
+
 	spinlock_t		rb_lock;	/* protect buf lists */
 	int			rb_send_count, rb_recv_count;
 	struct list_head	rb_send_bufs;
 	struct list_head	rb_recv_bufs;
-	struct list_head	rb_pending;
 	u32			rb_max_requests;
-	atomic_t		rb_credits;	/* most recent credit grant */
+	u32			rb_credits;	/* most recent credit grant */
 
 	u32			rb_bc_srv_max_requests;
 	spinlock_t		rb_reqslock;	/* protect rb_allreqs */
@@ -440,24 +450,29 @@ struct rpcrdma_create_data_internal {
  * Statistics for RPCRDMA
  */
 struct rpcrdma_stats {
+	/* accessed when sending a call */
 	unsigned long		read_chunk_count;
 	unsigned long		write_chunk_count;
 	unsigned long		reply_chunk_count;
-
 	unsigned long long	total_rdma_request;
-	unsigned long long	total_rdma_reply;
 
+	/* rarely accessed error counters */
 	unsigned long long	pullup_copy_count;
-	unsigned long long	fixup_copy_count;
 	unsigned long		hardway_register_count;
 	unsigned long		failed_marshal_count;
 	unsigned long		bad_reply_count;
-	unsigned long		nomsg_call_count;
-	unsigned long		bcall_count;
 	unsigned long		mrs_recovered;
 	unsigned long		mrs_orphaned;
 	unsigned long		mrs_allocated;
+	unsigned long		empty_sendctx_q;
+
+	/* accessed when receiving a reply */
+	unsigned long long	total_rdma_reply;
+	unsigned long long	fixup_copy_count;
+	unsigned long		reply_waits_for_send;
 	unsigned long		local_inv_needed;
+	unsigned long		nomsg_call_count;
+	unsigned long		bcall_count;
 };
 
 /*
@@ -465,13 +480,12 @@ struct rpcrdma_stats {
  */
 struct rpcrdma_xprt;
 struct rpcrdma_memreg_ops {
-	int		(*ro_map)(struct rpcrdma_xprt *,
+	struct rpcrdma_mr_seg *
+			(*ro_map)(struct rpcrdma_xprt *,
 				  struct rpcrdma_mr_seg *, int, bool,
 				  struct rpcrdma_mw **);
 	void		(*ro_unmap_sync)(struct rpcrdma_xprt *,
 					 struct list_head *);
-	void		(*ro_unmap_safe)(struct rpcrdma_xprt *,
-					 struct rpcrdma_req *, bool);
 	void		(*ro_recover_mr)(struct rpcrdma_mw *);
 	int		(*ro_open)(struct rpcrdma_ia *,
 				   struct rpcrdma_ep *,
@@ -529,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
 bool frwr_is_supported(struct rpcrdma_ia *);
 bool fmr_is_supported(struct rpcrdma_ia *);
 
+extern struct workqueue_struct *rpcrdma_receive_wq;
+
 /*
  * Endpoint calls - xprtrdma/verbs.c
  */
@@ -551,34 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
 void rpcrdma_destroy_req(struct rpcrdma_req *);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
-
-static inline void
-rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
-{
-	spin_lock(&buffers->rb_lock);
-	if (list_empty(&req->rl_list))
-		list_add_tail(&req->rl_list, &buffers->rb_pending);
-	spin_unlock(&buffers->rb_lock);
-}
-
-static inline struct rpcrdma_req *
-rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
-{
-	struct rpcrdma_req *pos;
-
-	list_for_each_entry(pos, &buffers->rb_pending, rl_list)
-		if (pos->rl_xid == xid)
-			return pos;
-	return NULL;
-}
-
-static inline void
-rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
-{
-	spin_lock(&buffers->rb_lock);
-	list_del(&req->rl_list);
-	spin_unlock(&buffers->rb_lock);
-}
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
 
 struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
 void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
@@ -635,12 +625,24 @@ enum rpcrdma_chunktype {
 	rpcrdma_replych
 };
 
-bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
-			       u32, struct xdr_buf *, enum rpcrdma_chunktype);
-void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
-int rpcrdma_marshal_req(struct rpc_rqst *);
+int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
+			      struct rpcrdma_req *req, u32 hdrlen,
+			      struct xdr_buf *xdr,
+			      enum rpcrdma_chunktype rtype);
+void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
+int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
-void rpcrdma_reply_handler(struct work_struct *work);
+void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
+void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
+			  struct rpcrdma_req *req);
+void rpcrdma_deferred_completion(struct work_struct *work);
+
+static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
+{
+	xdr->head[0].iov_len = len;
+	xdr->len = len;
+}
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4f154d388748..9cc850c2719e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/net/sunrpc/xprtsock.c
  *
@@ -551,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task)
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
 			-status);
+		/* fall through */
 	case -EPIPE:
 		xs_close(xprt);
 		status = -ENOTCONN;
@@ -969,10 +971,12 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
 		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->recv_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
+	xprt_pin_rqst(rovr);
+	spin_unlock(&xprt->recv_lock);
 	task = rovr->rq_task;
 
 	copied = rovr->rq_private_buf.buflen;
@@ -981,13 +985,16 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
 
 	if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
 		dprintk("RPC:       sk_buff copy failed\n");
-		goto out_unlock;
+		spin_lock(&xprt->recv_lock);
+		goto out_unpin;
 	}
 
+	spin_lock(&xprt->recv_lock);
 	xprt_complete_rqst(task, copied);
-
+out_unpin:
+	xprt_unpin_rqst(rovr);
  out_unlock:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->recv_lock);
 }
 
 static void xs_local_data_receive(struct sock_xprt *transport)
@@ -1050,10 +1057,12 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
 		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->recv_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
+	xprt_pin_rqst(rovr);
+	spin_unlock(&xprt->recv_lock);
 	task = rovr->rq_task;
 
 	if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1062,16 +1071,21 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
 	/* Suck it into the iovec, verify checksum if not done by hw. */
 	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
 		__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
-		goto out_unlock;
+		spin_lock(&xprt->recv_lock);
+		goto out_unpin;
 	}
 
 	__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
 
+	spin_lock_bh(&xprt->transport_lock);
 	xprt_adjust_cwnd(xprt, task, copied);
+	spin_unlock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->recv_lock);
 	xprt_complete_rqst(task, copied);
-
+out_unpin:
+	xprt_unpin_rqst(rovr);
  out_unlock:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&xprt->recv_lock);
 }
 
 static void xs_udp_data_receive(struct sock_xprt *transport)
@@ -1277,25 +1291,12 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 	}
 
 	len = desc->count;
-	if (len > transport->tcp_reclen - transport->tcp_offset) {
-		struct xdr_skb_reader my_desc;
-
-		len = transport->tcp_reclen - transport->tcp_offset;
-		memcpy(&my_desc, desc, sizeof(my_desc));
-		my_desc.count = len;
-		r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
-					  &my_desc, xdr_skb_read_bits);
-		desc->count -= r;
-		desc->offset += r;
-	} else
-		r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+	if (len > transport->tcp_reclen - transport->tcp_offset)
+		desc->count = transport->tcp_reclen - transport->tcp_offset;
+	r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
 					  desc, xdr_skb_read_bits);
 
-	if (r > 0) {
-		transport->tcp_copied += r;
-		transport->tcp_offset += r;
-	}
-	if (r != len) {
+	if (desc->count) {
 		/* Error when copying to the receive buffer,
 		 * usually because we weren't able to allocate
 		 * additional buffer pages. All we can do now
@@ -1315,6 +1316,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 		return;
 	}
 
+	transport->tcp_copied += r;
+	transport->tcp_offset += r;
+	desc->count = len - r;
+
 	dprintk("RPC:       XID %08x read %zd bytes\n",
 			ntohl(transport->tcp_xid), r);
 	dprintk("RPC:       xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
@@ -1343,21 +1348,24 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
 
 	/* Find and lock the request corresponding to this xid */
-	spin_lock_bh(&xprt->transport_lock);
+	spin_lock(&xprt->recv_lock);
 	req = xprt_lookup_rqst(xprt, transport->tcp_xid);
 	if (!req) {
 		dprintk("RPC:       XID %08x request not found!\n",
 				ntohl(transport->tcp_xid));
-		spin_unlock_bh(&xprt->transport_lock);
+		spin_unlock(&xprt->recv_lock);
 		return -1;
 	}
+	xprt_pin_rqst(req);
+	spin_unlock(&xprt->recv_lock);
 
 	xs_tcp_read_common(xprt, desc, req);
 
+	spin_lock(&xprt->recv_lock);
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_rqst(req->rq_task, transport->tcp_copied);
-
-	spin_unlock_bh(&xprt->transport_lock);
+	xprt_unpin_rqst(req);
+	spin_unlock(&xprt->recv_lock);
 	return 0;
 }
 
@@ -1376,11 +1384,9 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 				container_of(xprt, struct sock_xprt, xprt);
 	struct rpc_rqst *req;
 
-	/* Look up and lock the request corresponding to the given XID */
-	spin_lock_bh(&xprt->transport_lock);
+	/* Look up the request corresponding to the given XID */
 	req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
 	if (req == NULL) {
-		spin_unlock_bh(&xprt->transport_lock);
 		printk(KERN_WARNING "Callback slot table overflowed\n");
 		xprt_force_disconnect(xprt);
 		return -1;
@@ -1391,7 +1397,6 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_bc_request(req, transport->tcp_copied);
-	spin_unlock_bh(&xprt->transport_lock);
 
 	return 0;
 }
@@ -1516,6 +1521,7 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
 		.arg.data = xprt,
 	};
 	unsigned long total = 0;
+	int loop;
 	int read = 0;
 
 	mutex_lock(&transport->recv_mutex);
@@ -1524,20 +1530,20 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
 		goto out;
 
 	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
-	for (;;) {
+	for (loop = 0; loop < 64; loop++) {
 		lock_sock(sk);
 		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
 		if (read <= 0) {
 			clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 			release_sock(sk);
-			if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
-				break;
-		} else {
-			release_sock(sk);
-			total += read;
+			break;
 		}
+		release_sock(sk);
+		total += read;
 		rd_desc.count = 65536;
 	}
+	if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+		queue_work(xprtiod_workqueue, &transport->recv_worker);
 out:
 	mutex_unlock(&transport->recv_mutex);
 	trace_xs_tcp_data_ready(xprt, read, total);
@@ -1606,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk)
 		xprt->connect_cookie++;
 		clear_bit(XPRT_CONNECTED, &xprt->state);
 		xs_tcp_force_close(xprt);
+		/* fall through */
 	case TCP_CLOSING:
 		/*
 		 * If the server closed down the connection, make sure that
@@ -2199,7 +2206,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
 	struct sock_xprt *transport =
 		container_of(work, struct sock_xprt, connect_worker.work);
 	struct rpc_xprt *xprt = &transport->xprt;
-	struct socket *sock = transport->sock;
+	struct socket *sock;
 	int status = -EIO;
 
 	sock = xs_create_sock(xprt, transport,
@@ -2363,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	switch (ret) {
 	case 0:
 		xs_set_srcport(transport, sock);
+		/* fall through */
 	case -EINPROGRESS:
 		/* SYN_SENT! */
 		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2414,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 	default:
 		printk("%s: connect returned unhandled error %d\n",
 			__func__, status);
+		/* fall through */
 	case -EADDRNOTAVAIL:
 		/* We're probably in TIME_WAIT. Get rid of existing socket,
 		 * and retry
@@ -2724,7 +2733,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
 	module_put(THIS_MODULE);
 }
 
-static struct rpc_xprt_ops xs_local_ops = {
+static const struct rpc_xprt_ops xs_local_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
 	.alloc_slot		= xprt_alloc_slot,
@@ -2742,7 +2751,7 @@ static struct rpc_xprt_ops xs_local_ops = {
 	.disable_swap		= xs_disable_swap,
 };
 
-static struct rpc_xprt_ops xs_udp_ops = {
+static const struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
@@ -2764,7 +2773,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.inject_disconnect	= xs_inject_disconnect,
 };
 
-static struct rpc_xprt_ops xs_tcp_ops = {
+static const struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
 	.alloc_slot		= xprt_lock_and_alloc_slot,
@@ -2795,7 +2804,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
  * The rpc_xprt_ops for the server backchannel
  */
 
-static struct rpc_xprt_ops bc_tcp_ops = {
+static const struct rpc_xprt_ops bc_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
 	.alloc_slot		= xprt_alloc_slot,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 25dc67ef9d37..74b9d916a58b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -343,10 +343,10 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		return sizeof(struct switchdev_obj_port_vlan);
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		return sizeof(struct switchdev_obj_port_fdb);
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		return sizeof(struct switchdev_obj_port_mdb);
+	case SWITCHDEV_OBJ_ID_HOST_MDB:
+		return sizeof(struct switchdev_obj_port_mdb);
 	default:
 		BUG();
 	}
@@ -534,43 +534,6 @@ int switchdev_port_obj_del(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 
-/**
- *	switchdev_port_obj_dump - Dump port objects
- *
- *	@dev: port device
- *	@id: object ID
- *	@obj: object to dump
- *	@cb: function to call with a filled object
- *
- *	rtnl_lock must be held.
- */
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
-			    switchdev_obj_dump_cb_t *cb)
-{
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	int err = -EOPNOTSUPP;
-
-	ASSERT_RTNL();
-
-	if (ops && ops->switchdev_port_obj_dump)
-		return ops->switchdev_port_obj_dump(dev, obj, cb);
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to dump objects on
-	 * first port at bottom of stack.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_obj_dump(lower_dev, obj, cb);
-		break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-
 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -613,486 +576,6 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
-struct switchdev_vlan_dump {
-	struct switchdev_obj_port_vlan vlan;
-	struct sk_buff *skb;
-	u32 filter_mask;
-	u16 flags;
-	u16 begin;
-	u16 end;
-};
-
-static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
-{
-	struct bridge_vlan_info vinfo;
-
-	vinfo.flags = dump->flags;
-
-	if (dump->begin == 0 && dump->end == 0) {
-		return 0;
-	} else if (dump->begin == dump->end) {
-		vinfo.vid = dump->begin;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-	} else {
-		vinfo.vid = dump->begin;
-		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-		vinfo.vid = dump->end;
-		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
-		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-	}
-
-	return 0;
-}
-
-static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
-{
-	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
-	struct switchdev_vlan_dump *dump =
-		container_of(vlan, struct switchdev_vlan_dump, vlan);
-	int err = 0;
-
-	if (vlan->vid_begin > vlan->vid_end)
-		return -EINVAL;
-
-	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
-		dump->flags = vlan->flags;
-		for (dump->begin = dump->end = vlan->vid_begin;
-		     dump->begin <= vlan->vid_end;
-		     dump->begin++, dump->end++) {
-			err = switchdev_port_vlan_dump_put(dump);
-			if (err)
-				return err;
-		}
-	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
-		if (dump->begin > vlan->vid_begin &&
-		    dump->begin >= vlan->vid_end) {
-			if ((dump->begin - 1) == vlan->vid_end &&
-			    dump->flags == vlan->flags) {
-				/* prepend */
-				dump->begin = vlan->vid_begin;
-			} else {
-				err = switchdev_port_vlan_dump_put(dump);
-				dump->flags = vlan->flags;
-				dump->begin = vlan->vid_begin;
-				dump->end = vlan->vid_end;
-			}
-		} else if (dump->end <= vlan->vid_begin &&
-		           dump->end < vlan->vid_end) {
-			if ((dump->end  + 1) == vlan->vid_begin &&
-			    dump->flags == vlan->flags) {
-				/* append */
-				dump->end = vlan->vid_end;
-			} else {
-				err = switchdev_port_vlan_dump_put(dump);
-				dump->flags = vlan->flags;
-				dump->begin = vlan->vid_begin;
-				dump->end = vlan->vid_end;
-			}
-		} else {
-			err = -EINVAL;
-		}
-	}
-
-	return err;
-}
-
-static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
-				    u32 filter_mask)
-{
-	struct switchdev_vlan_dump dump = {
-		.vlan.obj.orig_dev = dev,
-		.vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-		.skb = skb,
-		.filter_mask = filter_mask,
-	};
-	int err = 0;
-
-	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
-	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
-		err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
-					      switchdev_port_vlan_dump_cb);
-		if (err)
-			goto err_out;
-		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
-			/* last one */
-			err = switchdev_port_vlan_dump_put(&dump);
-	}
-
-err_out:
-	return err == -EOPNOTSUPP ? 0 : err;
-}
-
-/**
- *	switchdev_port_bridge_getlink - Get bridge port attributes
- *
- *	@dev: port device
- *
- *	Called for SELF on rtnl_bridge_getlink to get bridge port
- *	attributes.
- */
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask,
-				  int nlflags)
-{
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	};
-	u16 mode = BRIDGE_MODE_UNDEF;
-	u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
-	int err;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	err = switchdev_port_attr_get(dev, &attr);
-	if (err && err != -EOPNOTSUPP)
-		return err;
-
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
-				       attr.u.brport_flags, mask, nlflags,
-				       filter_mask, switchdev_port_vlan_fill);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
-
-static int switchdev_port_br_setflag(struct net_device *dev,
-				     struct nlattr *nlattr,
-				     unsigned long brport_flag)
-{
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	};
-	u8 flag = nla_get_u8(nlattr);
-	int err;
-
-	err = switchdev_port_attr_get(dev, &attr);
-	if (err)
-		return err;
-
-	if (flag)
-		attr.u.brport_flags |= brport_flag;
-	else
-		attr.u.brport_flags &= ~brport_flag;
-
-	return switchdev_port_attr_set(dev, &attr);
-}
-
-static const struct nla_policy
-switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
-	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
-	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
-	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
-	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
-	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
-};
-
-static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
-					      struct nlattr *protinfo)
-{
-	struct nlattr *attr;
-	int rem;
-	int err;
-
-	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
-				  switchdev_port_bridge_policy, NULL);
-	if (err)
-		return err;
-
-	nla_for_each_nested(attr, protinfo, rem) {
-		switch (nla_type(attr)) {
-		case IFLA_BRPORT_LEARNING:
-			err = switchdev_port_br_setflag(dev, attr,
-							BR_LEARNING);
-			break;
-		case IFLA_BRPORT_LEARNING_SYNC:
-			err = switchdev_port_br_setflag(dev, attr,
-							BR_LEARNING_SYNC);
-			break;
-		case IFLA_BRPORT_UNICAST_FLOOD:
-			err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
-			break;
-		default:
-			err = -EOPNOTSUPP;
-			break;
-		}
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int switchdev_port_br_afspec(struct net_device *dev,
-				    struct nlattr *afspec,
-				    int (*f)(struct net_device *dev,
-					     const struct switchdev_obj *obj))
-{
-	struct nlattr *attr;
-	struct bridge_vlan_info *vinfo;
-	struct switchdev_obj_port_vlan vlan = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-	};
-	int rem;
-	int err;
-
-	nla_for_each_nested(attr, afspec, rem) {
-		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
-			continue;
-		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
-			return -EINVAL;
-		vinfo = nla_data(attr);
-		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
-			return -EINVAL;
-		vlan.flags = vinfo->flags;
-		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
-			if (vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_begin = vinfo->vid;
-			/* don't allow range of pvids */
-			if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
-				return -EINVAL;
-		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
-			if (!vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_end = vinfo->vid;
-			if (vlan.vid_end <= vlan.vid_begin)
-				return -EINVAL;
-			err = f(dev, &vlan.obj);
-			if (err)
-				return err;
-			vlan.vid_begin = 0;
-		} else {
-			if (vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_begin = vinfo->vid;
-			vlan.vid_end = vinfo->vid;
-			err = f(dev, &vlan.obj);
-			if (err)
-				return err;
-			vlan.vid_begin = 0;
-		}
-	}
-
-	return 0;
-}
-
-/**
- *	switchdev_port_bridge_setlink - Set bridge port attributes
- *
- *	@dev: port device
- *	@nlh: netlink header
- *	@flags: netlink flags
- *
- *	Called for SELF on rtnl_bridge_setlink to set bridge port
- *	attributes.
- */
-int switchdev_port_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags)
-{
-	struct nlattr *protinfo;
-	struct nlattr *afspec;
-	int err = 0;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				   IFLA_PROTINFO);
-	if (protinfo) {
-		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
-		if (err)
-			return err;
-	}
-
-	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				 IFLA_AF_SPEC);
-	if (afspec)
-		err = switchdev_port_br_afspec(dev, afspec,
-					       switchdev_port_obj_add);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
-
-/**
- *	switchdev_port_bridge_dellink - Set bridge port attributes
- *
- *	@dev: port device
- *	@nlh: netlink header
- *	@flags: netlink flags
- *
- *	Called for SELF on rtnl_bridge_dellink to set bridge port
- *	attributes.
- */
-int switchdev_port_bridge_dellink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags)
-{
-	struct nlattr *afspec;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				 IFLA_AF_SPEC);
-	if (afspec)
-		return switchdev_port_br_afspec(dev, afspec,
-						switchdev_port_obj_del);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
-
-/**
- *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
- *
- *	@ndmsg: netlink hdr
- *	@nlattr: netlink attributes
- *	@dev: port device
- *	@addr: MAC address to add
- *	@vid: VLAN to add
- *
- *	Add FDB entry to switch device.
- */
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid, u16 nlm_flags)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.vid = vid,
-	};
-
-	ether_addr_copy(fdb.addr, addr);
-	return switchdev_port_obj_add(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
-
-/**
- *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
- *
- *	@ndmsg: netlink hdr
- *	@nlattr: netlink attributes
- *	@dev: port device
- *	@addr: MAC address to delete
- *	@vid: VLAN to delete
- *
- *	Delete FDB entry from switch device.
- */
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.vid = vid,
-	};
-
-	ether_addr_copy(fdb.addr, addr);
-	return switchdev_port_obj_del(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-
-struct switchdev_fdb_dump {
-	struct switchdev_obj_port_fdb fdb;
-	struct net_device *dev;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-	int idx;
-};
-
-static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
-{
-	struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
-	struct switchdev_fdb_dump *dump =
-		container_of(fdb, struct switchdev_fdb_dump, fdb);
-	u32 portid = NETLINK_CB(dump->cb->skb).portid;
-	u32 seq = dump->cb->nlh->nlmsg_seq;
-	struct nlmsghdr *nlh;
-	struct ndmsg *ndm;
-
-	if (dump->idx < dump->cb->args[2])
-		goto skip;
-
-	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
-			sizeof(*ndm), NLM_F_MULTI);
-	if (!nlh)
-		return -EMSGSIZE;
-
-	ndm = nlmsg_data(nlh);
-	ndm->ndm_family  = AF_BRIDGE;
-	ndm->ndm_pad1    = 0;
-	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags   = NTF_SELF;
-	ndm->ndm_type    = 0;
-	ndm->ndm_ifindex = dump->dev->ifindex;
-	ndm->ndm_state   = fdb->ndm_state;
-
-	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
-		goto nla_put_failure;
-
-	if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
-		goto nla_put_failure;
-
-	nlmsg_end(dump->skb, nlh);
-
-skip:
-	dump->idx++;
-	return 0;
-
-nla_put_failure:
-	nlmsg_cancel(dump->skb, nlh);
-	return -EMSGSIZE;
-}
-
-/**
- *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
- *
- *	@skb: netlink skb
- *	@cb: netlink callback
- *	@dev: port device
- *	@filter_dev: filter device
- *	@idx:
- *
- *	Dump FDB entries from switch device.
- */
-int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			    struct net_device *dev,
-			    struct net_device *filter_dev, int *idx)
-{
-	struct switchdev_fdb_dump dump = {
-		.fdb.obj.orig_dev = dev,
-		.fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.dev = dev,
-		.skb = skb,
-		.cb = cb,
-		.idx = *idx,
-	};
-	int err;
-
-	err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
-				      switchdev_port_fdb_dump_cb);
-	*idx = dump.idx;
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
-
 bool switchdev_port_same_parent_id(struct net_device *a,
 				   struct net_device *b)
 {
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 31b9f9c52974..37bb0bfbd936 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux TIPC layer
 #
@@ -8,7 +9,7 @@ tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o monitor.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
-	   server.o socket.o
+	   server.o socket.o group.o
 
 tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 7d99029df342..329325bd553e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
 	struct sk_buff_head xmitq;
 	int rc = 0;
 
-	__skb_queue_head_init(&xmitq);
+	skb_queue_head_init(&xmitq);
 	tipc_bcast_lock(net);
 	if (tipc_link_bc_peers(l))
 		rc = tipc_link_xmit(l, pkts, &xmitq);
@@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
 static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
 			   struct tipc_nlist *dests, u16 *cong_link_cnt)
 {
+	struct tipc_dest *dst, *tmp;
 	struct sk_buff_head _pkts;
-	struct u32_item *n, *tmp;
-	u32 dst, selector;
+	u32 dnode, selector;
 
 	selector = msg_link_selector(buf_msg(skb_peek(pkts)));
-	__skb_queue_head_init(&_pkts);
+	skb_queue_head_init(&_pkts);
 
-	list_for_each_entry_safe(n, tmp, &dests->list, list) {
-		dst = n->value;
-		if (!tipc_msg_pskb_copy(dst, pkts, &_pkts))
+	list_for_each_entry_safe(dst, tmp, &dests->list, list) {
+		dnode = dst->node;
+		if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
 			return -ENOMEM;
 
 		/* Any other return value than -ELINKCONG is ignored */
-		if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG)
+		if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
 			(*cong_link_cnt)++;
 	}
 	return 0;
@@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
 {
 	if (node == nl->self)
 		nl->local = true;
-	else if (u32_push(&nl->list, node))
+	else if (tipc_dest_push(&nl->list, node, 0))
 		nl->remote++;
 }
 
@@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
 {
 	if (node == nl->self)
 		nl->local = false;
-	else if (u32_del(&nl->list, node))
+	else if (tipc_dest_del(&nl->list, node, 0))
 		nl->remote--;
 }
 
 void tipc_nlist_purge(struct tipc_nlist *nl)
 {
-	u32_list_purge(&nl->list);
+	tipc_dest_list_purge(&nl->list);
 	nl->remote = 0;
 	nl->local = 0;
 }
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 767e0537dde5..47ec121574ce 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
 }
 
 static void bearer_disable(struct net *net, struct tipc_bearer *b);
+static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
+			   struct packet_type *pt, struct net_device *orig_dev);
 
 /**
  * tipc_media_find - locates specified media object by name
@@ -365,30 +367,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
 	return 0;
 }
 
-/* tipc_bearer_reset_all - reset all links on all bearers
- */
-void tipc_bearer_reset_all(struct net *net)
-{
-	struct tipc_bearer *b;
-	int i;
-
-	for (i = 0; i < MAX_BEARERS; i++) {
-		b = bearer_get(net, i);
-		if (b)
-			clear_bit_unlock(0, &b->up);
-	}
-	for (i = 0; i < MAX_BEARERS; i++) {
-		b = bearer_get(net, i);
-		if (b)
-			tipc_reset_bearer(net, b);
-	}
-	for (i = 0; i < MAX_BEARERS; i++) {
-		b = bearer_get(net, i);
-		if (b)
-			test_and_set_bit_lock(0, &b->up);
-	}
-}
-
 /**
  * bearer_disable
  *
@@ -428,6 +406,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 
 	/* Associate TIPC bearer with L2 bearer */
 	rcu_assign_pointer(b->media_ptr, dev);
+	b->pt.dev = dev;
+	b->pt.type = htons(ETH_P_TIPC);
+	b->pt.func = tipc_l2_rcv_msg;
+	dev_add_pack(&b->pt);
 	memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
 	memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
 	b->bcast_addr.media_id = b->media->type_id;
@@ -447,6 +429,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
 	struct net_device *dev;
 
 	dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+	dev_remove_pack(&b->pt);
 	RCU_INIT_POINTER(dev->tipc_ptr, NULL);
 	synchronize_net();
 	dev_put(dev);
@@ -594,11 +577,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(dev->tipc_ptr);
+	b = rcu_dereference_rtnl(dev->tipc_ptr) ?:
+		rcu_dereference_rtnl(orig_dev->tipc_ptr);
 	if (likely(b && test_bit(0, &b->up) &&
 		   (skb->pkt_type <= PACKET_MULTICAST))) {
 		skb->next = NULL;
-		tipc_rcv(dev_net(dev), skb, b);
+		tipc_rcv(dev_net(b->pt.dev), skb, b);
 		rcu_read_unlock();
 		return NET_RX_SUCCESS;
 	}
@@ -653,17 +637,12 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 		break;
 	case NETDEV_UNREGISTER:
 	case NETDEV_CHANGENAME:
-		bearer_disable(dev_net(dev), b);
+		bearer_disable(net, b);
 		break;
 	}
 	return NOTIFY_OK;
 }
 
-static struct packet_type tipc_packet_type __read_mostly = {
-	.type = htons(ETH_P_TIPC),
-	.func = tipc_l2_rcv_msg,
-};
-
 static struct notifier_block notifier = {
 	.notifier_call  = tipc_l2_device_event,
 	.priority	= 0,
@@ -671,19 +650,12 @@ static struct notifier_block notifier = {
 
 int tipc_bearer_setup(void)
 {
-	int err;
-
-	err = register_netdevice_notifier(&notifier);
-	if (err)
-		return err;
-	dev_add_pack(&tipc_packet_type);
-	return 0;
+	return register_netdevice_notifier(&notifier);
 }
 
 void tipc_bearer_cleanup(void)
 {
 	unregister_netdevice_notifier(&notifier);
-	dev_remove_pack(&tipc_packet_type);
 }
 
 void tipc_bearer_stop(struct net *net)
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 635c9086e19a..42d6eeeb646d 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -131,6 +131,7 @@ struct tipc_media {
  * @name: bearer name (format = media:interface)
  * @media: ptr to media structure associated with bearer
  * @bcast_addr: media address used in broadcasting
+ * @pt: packet type for bearer
  * @rcu: rcu struct for tipc_bearer
  * @priority: default link priority for bearer
  * @window: default window size for bearer
@@ -151,6 +152,7 @@ struct tipc_bearer {
 	char name[TIPC_MAX_BEARER_NAME];
 	struct tipc_media *media;
 	struct tipc_media_addr bcast_addr;
+	struct packet_type pt;
 	struct rcu_head rcu;
 	u32 priority;
 	u32 window;
@@ -210,7 +212,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
 int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
 struct tipc_media *tipc_media_find(const char *name);
-void tipc_bearer_reset_all(struct net *net);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(struct net *net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5cc5398be722..964342689f2c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
 	return &tipc_net(net)->node_list;
 }
 
+static inline struct tipc_server *tipc_topsrv(struct net *net)
+{
+	return tipc_net(net)->topsrv;
+}
+
 static inline unsigned int tipc_hashfn(u32 addr)
 {
 	return addr & (NODE_HTABLE_SIZE - 1);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 02462d67d191..92e4828c6b09 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
  *
  * Called whenever a link setup request timer associated with a bearer expires.
  */
-static void disc_timeout(unsigned long data)
+static void disc_timeout(struct timer_list *t)
 {
-	struct tipc_link_req *req = (struct tipc_link_req *)data;
+	struct tipc_link_req *req = from_timer(req, t, timer);
 	struct sk_buff *skb;
 	int max_delay;
 
@@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
 	req->num_nodes = 0;
 	req->timer_intv = TIPC_LINK_REQ_INIT;
 	spin_lock_init(&req->lock);
-	setup_timer(&req->timer, disc_timeout, (unsigned long)req);
+	timer_setup(&req->timer, disc_timeout, 0);
 	mod_timer(&req->timer, jiffies + req->timer_intv);
 	b->link_req = req;
 	*skb = skb_clone(req->buf, GFP_ATOMIC);
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 000000000000..7821085a7dd8
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,871 @@
+/*
+ * net/tipc/group.c: TIPC group messaging code
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "group.h"
+#include "bcast.h"
+#include "server.h"
+#include "msg.h"
+#include "socket.h"
+#include "node.h"
+#include "name_table.h"
+#include "subscr.h"
+
+#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
+#define ADV_IDLE ADV_UNIT
+#define ADV_ACTIVE (ADV_UNIT * 12)
+
+enum mbr_state {
+	MBR_QUARANTINED,
+	MBR_DISCOVERED,
+	MBR_JOINING,
+	MBR_PUBLISHED,
+	MBR_JOINED,
+	MBR_PENDING,
+	MBR_ACTIVE,
+	MBR_RECLAIMING,
+	MBR_REMITTED,
+	MBR_LEAVING
+};
+
+struct tipc_member {
+	struct rb_node tree_node;
+	struct list_head list;
+	struct list_head congested;
+	struct sk_buff *event_msg;
+	struct sk_buff_head deferredq;
+	struct tipc_group *group;
+	u32 node;
+	u32 port;
+	u32 instance;
+	enum mbr_state state;
+	u16 advertised;
+	u16 window;
+	u16 bc_rcv_nxt;
+	u16 bc_syncpt;
+	u16 bc_acked;
+	bool usr_pending;
+};
+
+struct tipc_group {
+	struct rb_root members;
+	struct list_head congested;
+	struct list_head pending;
+	struct list_head active;
+	struct list_head reclaiming;
+	struct tipc_nlist dests;
+	struct net *net;
+	int subid;
+	u32 type;
+	u32 instance;
+	u32 domain;
+	u32 scope;
+	u32 portid;
+	u16 member_cnt;
+	u16 active_cnt;
+	u16 max_active;
+	u16 bc_snd_nxt;
+	u16 bc_ackers;
+	bool loopback;
+	bool events;
+};
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+				  int mtyp, struct sk_buff_head *xmitq);
+
+static void tipc_group_decr_active(struct tipc_group *grp,
+				   struct tipc_member *m)
+{
+	if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+		grp->active_cnt--;
+}
+
+static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
+{
+	int max_active, active_pool, idle_pool;
+	int mcnt = grp->member_cnt + 1;
+
+	/* Limit simultaneous reception from other members */
+	max_active = min(mcnt / 8, 64);
+	max_active = max(max_active, 16);
+	grp->max_active = max_active;
+
+	/* Reserve blocks for active and idle members */
+	active_pool = max_active * ADV_ACTIVE;
+	idle_pool = (mcnt - max_active) * ADV_IDLE;
+
+	/* Scale to bytes, considering worst-case truesize/msgsize ratio */
+	return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
+}
+
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
+{
+	return grp->bc_snd_nxt;
+}
+
+static bool tipc_group_is_enabled(struct tipc_member *m)
+{
+	return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
+}
+
+static bool tipc_group_is_receiver(struct tipc_member *m)
+{
+	return m && m->state >= MBR_JOINED;
+}
+
+u32 tipc_group_exclude(struct tipc_group *grp)
+{
+	if (!grp->loopback)
+		return grp->portid;
+	return 0;
+}
+
+int tipc_group_size(struct tipc_group *grp)
+{
+	return grp->member_cnt;
+}
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+				     struct tipc_group_req *mreq)
+{
+	struct tipc_group *grp;
+	u32 type = mreq->type;
+
+	grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
+	if (!grp)
+		return NULL;
+	tipc_nlist_init(&grp->dests, tipc_own_addr(net));
+	INIT_LIST_HEAD(&grp->congested);
+	INIT_LIST_HEAD(&grp->active);
+	INIT_LIST_HEAD(&grp->pending);
+	INIT_LIST_HEAD(&grp->reclaiming);
+	grp->members = RB_ROOT;
+	grp->net = net;
+	grp->portid = portid;
+	grp->domain = addr_domain(net, mreq->scope);
+	grp->type = type;
+	grp->instance = mreq->instance;
+	grp->scope = mreq->scope;
+	grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
+	grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
+	if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
+		return grp;
+	kfree(grp);
+	return NULL;
+}
+
+void tipc_group_delete(struct net *net, struct tipc_group *grp)
+{
+	struct rb_root *tree = &grp->members;
+	struct tipc_member *m, *tmp;
+	struct sk_buff_head xmitq;
+
+	__skb_queue_head_init(&xmitq);
+
+	rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+		tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
+		list_del(&m->list);
+		kfree(m);
+	}
+	tipc_node_distr_xmit(net, &xmitq);
+	tipc_nlist_purge(&grp->dests);
+	tipc_topsrv_kern_unsubscr(net, grp->subid);
+	kfree(grp);
+}
+
+struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
+					   u32 node, u32 port)
+{
+	struct rb_node *n = grp->members.rb_node;
+	u64 nkey, key = (u64)node << 32 | port;
+	struct tipc_member *m;
+
+	while (n) {
+		m = container_of(n, struct tipc_member, tree_node);
+		nkey = (u64)m->node << 32 | m->port;
+		if (key < nkey)
+			n = n->rb_left;
+		else if (key > nkey)
+			n = n->rb_right;
+		else
+			return m;
+	}
+	return NULL;
+}
+
+static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
+						u32 node, u32 port)
+{
+	struct tipc_member *m;
+
+	m = tipc_group_find_member(grp, node, port);
+	if (m && tipc_group_is_enabled(m))
+		return m;
+	return NULL;
+}
+
+static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
+						u32 node)
+{
+	struct tipc_member *m;
+	struct rb_node *n;
+
+	for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+		m = container_of(n, struct tipc_member, tree_node);
+		if (m->node == node)
+			return m;
+	}
+	return NULL;
+}
+
+static void tipc_group_add_to_tree(struct tipc_group *grp,
+				   struct tipc_member *m)
+{
+	u64 nkey, key = (u64)m->node << 32 | m->port;
+	struct rb_node **n, *parent = NULL;
+	struct tipc_member *tmp;
+
+	n = &grp->members.rb_node;
+	while (*n) {
+		tmp = container_of(*n, struct tipc_member, tree_node);
+		parent = *n;
+		tmp = container_of(parent, struct tipc_member, tree_node);
+		nkey = (u64)tmp->node << 32 | tmp->port;
+		if (key < nkey)
+			n = &(*n)->rb_left;
+		else if (key > nkey)
+			n = &(*n)->rb_right;
+		else
+			return;
+	}
+	rb_link_node(&m->tree_node, parent, n);
+	rb_insert_color(&m->tree_node, &grp->members);
+}
+
+static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
+						    u32 node, u32 port,
+						    int state)
+{
+	struct tipc_member *m;
+
+	m = kzalloc(sizeof(*m), GFP_ATOMIC);
+	if (!m)
+		return NULL;
+	INIT_LIST_HEAD(&m->list);
+	INIT_LIST_HEAD(&m->congested);
+	__skb_queue_head_init(&m->deferredq);
+	m->group = grp;
+	m->node = node;
+	m->port = port;
+	m->bc_acked = grp->bc_snd_nxt - 1;
+	grp->member_cnt++;
+	tipc_group_add_to_tree(grp, m);
+	tipc_nlist_add(&grp->dests, m->node);
+	m->state = state;
+	return m;
+}
+
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
+{
+	tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
+}
+
+static void tipc_group_delete_member(struct tipc_group *grp,
+				     struct tipc_member *m)
+{
+	rb_erase(&m->tree_node, &grp->members);
+	grp->member_cnt--;
+
+	/* Check if we were waiting for replicast ack from this member */
+	if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
+		grp->bc_ackers--;
+
+	list_del_init(&m->list);
+	list_del_init(&m->congested);
+	tipc_group_decr_active(grp, m);
+
+	/* If last member on a node, remove node from dest list */
+	if (!tipc_group_find_node(grp, m->node))
+		tipc_nlist_del(&grp->dests, m->node);
+
+	kfree(m);
+}
+
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
+{
+	return &grp->dests;
+}
+
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+		     int *scope)
+{
+	seq->type = grp->type;
+	seq->lower = grp->instance;
+	seq->upper = grp->instance;
+	*scope = grp->scope;
+}
+
+void tipc_group_update_member(struct tipc_member *m, int len)
+{
+	struct tipc_group *grp = m->group;
+	struct tipc_member *_m, *tmp;
+
+	if (!tipc_group_is_enabled(m))
+		return;
+
+	m->window -= len;
+
+	if (m->window >= ADV_IDLE)
+		return;
+
+	if (!list_empty(&m->congested))
+		return;
+
+	/* Sort member into congested members' list */
+	list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
+		if (m->window > _m->window)
+			continue;
+		list_add_tail(&m->congested, &_m->congested);
+		return;
+	}
+	list_add_tail(&m->congested, &grp->congested);
+}
+
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
+{
+	u16 prev = grp->bc_snd_nxt - 1;
+	struct tipc_member *m;
+	struct rb_node *n;
+
+	for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+		m = container_of(n, struct tipc_member, tree_node);
+		if (tipc_group_is_enabled(m)) {
+			tipc_group_update_member(m, len);
+			m->bc_acked = prev;
+		}
+	}
+
+	/* Mark number of acknowledges to expect, if any */
+	if (ack)
+		grp->bc_ackers = grp->member_cnt;
+	grp->bc_snd_nxt++;
+}
+
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+		     int len, struct tipc_member **mbr)
+{
+	struct sk_buff_head xmitq;
+	struct tipc_member *m;
+	int adv, state;
+
+	m = tipc_group_find_dest(grp, dnode, dport);
+	*mbr = m;
+	if (!m)
+		return false;
+	if (m->usr_pending)
+		return true;
+	if (m->window >= len)
+		return false;
+	m->usr_pending = true;
+
+	/* If not fully advertised, do it now to prevent mutual blocking */
+	adv = m->advertised;
+	state = m->state;
+	if (state < MBR_JOINED)
+		return true;
+	if (state == MBR_JOINED && adv == ADV_IDLE)
+		return true;
+	if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
+		return true;
+	if (state == MBR_PENDING && adv == ADV_IDLE)
+		return true;
+	skb_queue_head_init(&xmitq);
+	tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
+	tipc_node_distr_xmit(grp->net, &xmitq);
+	return true;
+}
+
+bool tipc_group_bc_cong(struct tipc_group *grp, int len)
+{
+	struct tipc_member *m = NULL;
+
+	/* If prev bcast was replicast, reject until all receivers have acked */
+	if (grp->bc_ackers)
+		return true;
+
+	if (list_empty(&grp->congested))
+		return false;
+
+	m = list_first_entry(&grp->congested, struct tipc_member, congested);
+	if (m->window >= len)
+		return false;
+
+	return tipc_group_cong(grp, m->node, m->port, len, &m);
+}
+
+/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
+ */
+static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
+{
+	struct tipc_msg *_hdr, *hdr = buf_msg(skb);
+	u16 bc_seqno = msg_grp_bc_seqno(hdr);
+	struct sk_buff *_skb, *tmp;
+	int mtyp = msg_type(hdr);
+
+	/* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
+	if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
+		skb_queue_walk_safe(defq, _skb, tmp) {
+			_hdr = buf_msg(_skb);
+			if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
+				continue;
+			__skb_queue_before(defq, _skb, skb);
+			return;
+		}
+		/* Bcast was not bypassed, - add to tail */
+	}
+	/* Unicasts are never bypassed, - always add to tail */
+	__skb_queue_tail(defq, skb);
+}
+
+/* tipc_group_filter_msg() - determine if we should accept arriving message
+ */
+void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
+			   struct sk_buff_head *xmitq)
+{
+	struct sk_buff *skb = __skb_dequeue(inputq);
+	bool ack, deliver, update, leave = false;
+	struct sk_buff_head *defq;
+	struct tipc_member *m;
+	struct tipc_msg *hdr;
+	u32 node, port;
+	int mtyp, blks;
+
+	if (!skb)
+		return;
+
+	hdr = buf_msg(skb);
+	node =  msg_orignode(hdr);
+	port = msg_origport(hdr);
+
+	if (!msg_in_group(hdr))
+		goto drop;
+
+	m = tipc_group_find_member(grp, node, port);
+	if (!tipc_group_is_receiver(m))
+		goto drop;
+
+	if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+		goto drop;
+
+	TIPC_SKB_CB(skb)->orig_member = m->instance;
+	defq = &m->deferredq;
+	tipc_group_sort_msg(skb, defq);
+
+	while ((skb = skb_peek(defq))) {
+		hdr = buf_msg(skb);
+		mtyp = msg_type(hdr);
+		deliver = true;
+		ack = false;
+		update = false;
+
+		if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+			break;
+
+		/* Decide what to do with message */
+		switch (mtyp) {
+		case TIPC_GRP_MCAST_MSG:
+			if (msg_nameinst(hdr) != grp->instance) {
+				update = true;
+				deliver = false;
+			}
+			/* Fall thru */
+		case TIPC_GRP_BCAST_MSG:
+			m->bc_rcv_nxt++;
+			ack = msg_grp_bc_ack_req(hdr);
+			break;
+		case TIPC_GRP_UCAST_MSG:
+			break;
+		case TIPC_GRP_MEMBER_EVT:
+			if (m->state == MBR_LEAVING)
+				leave = true;
+			if (!grp->events)
+				deliver = false;
+			break;
+		default:
+			break;
+		}
+
+		/* Execute decisions */
+		__skb_dequeue(defq);
+		if (deliver)
+			__skb_queue_tail(inputq, skb);
+		else
+			kfree_skb(skb);
+
+		if (ack)
+			tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
+
+		if (leave) {
+			tipc_group_delete_member(grp, m);
+			__skb_queue_purge(defq);
+			break;
+		}
+		if (!update)
+			continue;
+
+		blks = msg_blocks(hdr);
+		tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
+	}
+	return;
+drop:
+	kfree_skb(skb);
+}
+
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+			       u32 port, struct sk_buff_head *xmitq)
+{
+	struct list_head *active = &grp->active;
+	int max_active = grp->max_active;
+	int reclaim_limit = max_active * 3 / 4;
+	int active_cnt = grp->active_cnt;
+	struct tipc_member *m, *rm;
+
+	m = tipc_group_find_member(grp, node, port);
+	if (!m)
+		return;
+
+	m->advertised -= blks;
+
+	switch (m->state) {
+	case MBR_JOINED:
+		/* Reclaim advertised space from least active member */
+		if (!list_empty(active) && active_cnt >= reclaim_limit) {
+			rm = list_first_entry(active, struct tipc_member, list);
+			rm->state = MBR_RECLAIMING;
+			list_move_tail(&rm->list, &grp->reclaiming);
+			tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
+		}
+		/* If max active, become pending and wait for reclaimed space */
+		if (active_cnt >= max_active) {
+			m->state = MBR_PENDING;
+			list_add_tail(&m->list, &grp->pending);
+			break;
+		}
+		/* Otherwise become active */
+		m->state = MBR_ACTIVE;
+		list_add_tail(&m->list, &grp->active);
+		grp->active_cnt++;
+		/* Fall through */
+	case MBR_ACTIVE:
+		if (!list_is_last(&m->list, &grp->active))
+			list_move_tail(&m->list, &grp->active);
+		if (m->advertised > (ADV_ACTIVE * 3 / 4))
+			break;
+		tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+		break;
+	case MBR_REMITTED:
+		if (m->advertised > ADV_IDLE)
+			break;
+		m->state = MBR_JOINED;
+		if (m->advertised < ADV_IDLE) {
+			pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
+			tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+		}
+		break;
+	case MBR_RECLAIMING:
+	case MBR_DISCOVERED:
+	case MBR_JOINING:
+	case MBR_LEAVING:
+	default:
+		break;
+	}
+}
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+				  int mtyp, struct sk_buff_head *xmitq)
+{
+	struct tipc_msg *hdr;
+	struct sk_buff *skb;
+	int adv = 0;
+
+	skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
+			      m->node, tipc_own_addr(grp->net),
+			      m->port, grp->portid, 0);
+	if (!skb)
+		return;
+
+	if (m->state == MBR_ACTIVE)
+		adv = ADV_ACTIVE - m->advertised;
+	else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
+		adv = ADV_IDLE - m->advertised;
+
+	hdr = buf_msg(skb);
+
+	if (mtyp == GRP_JOIN_MSG) {
+		msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+		msg_set_adv_win(hdr, adv);
+		m->advertised += adv;
+	} else if (mtyp == GRP_LEAVE_MSG) {
+		msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+	} else if (mtyp == GRP_ADV_MSG) {
+		msg_set_adv_win(hdr, adv);
+		m->advertised += adv;
+	} else if (mtyp == GRP_ACK_MSG) {
+		msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
+	} else if (mtyp == GRP_REMIT_MSG) {
+		msg_set_grp_remitted(hdr, m->window);
+	}
+	__skb_queue_tail(xmitq, skb);
+}
+
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
+			  struct tipc_msg *hdr, struct sk_buff_head *inputq,
+			  struct sk_buff_head *xmitq)
+{
+	u32 node = msg_orignode(hdr);
+	u32 port = msg_origport(hdr);
+	struct tipc_member *m, *pm;
+	struct tipc_msg *ehdr;
+	u16 remitted, in_flight;
+
+	if (!grp)
+		return;
+
+	m = tipc_group_find_member(grp, node, port);
+
+	switch (msg_type(hdr)) {
+	case GRP_JOIN_MSG:
+		if (!m)
+			m = tipc_group_create_member(grp, node, port,
+						     MBR_QUARANTINED);
+		if (!m)
+			return;
+		m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+		m->bc_rcv_nxt = m->bc_syncpt;
+		m->window += msg_adv_win(hdr);
+
+		/* Wait until PUBLISH event is received */
+		if (m->state == MBR_DISCOVERED) {
+			m->state = MBR_JOINING;
+		} else if (m->state == MBR_PUBLISHED) {
+			m->state = MBR_JOINED;
+			*usr_wakeup = true;
+			m->usr_pending = false;
+			tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+			ehdr = buf_msg(m->event_msg);
+			msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+			__skb_queue_tail(inputq, m->event_msg);
+		}
+		if (m->window < ADV_IDLE)
+			tipc_group_update_member(m, 0);
+		else
+			list_del_init(&m->congested);
+		return;
+	case GRP_LEAVE_MSG:
+		if (!m)
+			return;
+		m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+
+		/* Wait until WITHDRAW event is received */
+		if (m->state != MBR_LEAVING) {
+			tipc_group_decr_active(grp, m);
+			m->state = MBR_LEAVING;
+			return;
+		}
+		/* Otherwise deliver already received WITHDRAW event */
+		ehdr = buf_msg(m->event_msg);
+		msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+		__skb_queue_tail(inputq, m->event_msg);
+		*usr_wakeup = true;
+		list_del_init(&m->congested);
+		return;
+	case GRP_ADV_MSG:
+		if (!m)
+			return;
+		m->window += msg_adv_win(hdr);
+		*usr_wakeup = m->usr_pending;
+		m->usr_pending = false;
+		list_del_init(&m->congested);
+		return;
+	case GRP_ACK_MSG:
+		if (!m)
+			return;
+		m->bc_acked = msg_grp_bc_acked(hdr);
+		if (--grp->bc_ackers)
+			break;
+		*usr_wakeup = true;
+		m->usr_pending = false;
+		return;
+	case GRP_RECLAIM_MSG:
+		if (!m)
+			return;
+		*usr_wakeup = m->usr_pending;
+		m->usr_pending = false;
+		tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
+		m->window = ADV_IDLE;
+		return;
+	case GRP_REMIT_MSG:
+		if (!m || m->state != MBR_RECLAIMING)
+			return;
+
+		list_del_init(&m->list);
+		grp->active_cnt--;
+		remitted = msg_grp_remitted(hdr);
+
+		/* Messages preceding the REMIT still in receive queue */
+		if (m->advertised > remitted) {
+			m->state = MBR_REMITTED;
+			in_flight = m->advertised - remitted;
+		}
+		/* All messages preceding the REMIT have been read */
+		if (m->advertised <= remitted) {
+			m->state = MBR_JOINED;
+			in_flight = 0;
+		}
+		/* ..and the REMIT overtaken by more messages => re-advertise */
+		if (m->advertised < remitted)
+			tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+
+		m->advertised = ADV_IDLE + in_flight;
+
+		/* Set oldest pending member to active and advertise */
+		if (list_empty(&grp->pending))
+			return;
+		pm = list_first_entry(&grp->pending, struct tipc_member, list);
+		pm->state = MBR_ACTIVE;
+		list_move_tail(&pm->list, &grp->active);
+		grp->active_cnt++;
+		if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
+			tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+		return;
+	default:
+		pr_warn("Received unknown GROUP_PROTO message\n");
+	}
+}
+
+/* tipc_group_member_evt() - receive and handle a member up/down event
+ */
+void tipc_group_member_evt(struct tipc_group *grp,
+			   bool *usr_wakeup,
+			   int *sk_rcvbuf,
+			   struct sk_buff *skb,
+			   struct sk_buff_head *inputq,
+			   struct sk_buff_head *xmitq)
+{
+	struct tipc_msg *hdr = buf_msg(skb);
+	struct tipc_event *evt = (void *)msg_data(hdr);
+	u32 instance = evt->found_lower;
+	u32 node = evt->port.node;
+	u32 port = evt->port.ref;
+	int event = evt->event;
+	struct tipc_member *m;
+	struct net *net;
+	bool node_up;
+	u32 self;
+
+	if (!grp)
+		goto drop;
+
+	net = grp->net;
+	self = tipc_own_addr(net);
+	if (!grp->loopback && node == self && port == grp->portid)
+		goto drop;
+
+	/* Convert message before delivery to user */
+	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+	msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+	msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
+	msg_set_origport(hdr, port);
+	msg_set_orignode(hdr, node);
+	msg_set_nametype(hdr, grp->type);
+	msg_set_grp_evt(hdr, event);
+
+	m = tipc_group_find_member(grp, node, port);
+
+	if (event == TIPC_PUBLISHED) {
+		if (!m)
+			m = tipc_group_create_member(grp, node, port,
+						     MBR_DISCOVERED);
+		if (!m)
+			goto drop;
+
+		/* Hold back event if JOIN message not yet received */
+		if (m->state == MBR_DISCOVERED) {
+			m->event_msg = skb;
+			m->state = MBR_PUBLISHED;
+		} else {
+			msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+			__skb_queue_tail(inputq, skb);
+			m->state = MBR_JOINED;
+			*usr_wakeup = true;
+			m->usr_pending = false;
+		}
+		m->instance = instance;
+		TIPC_SKB_CB(skb)->orig_member = m->instance;
+		tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+		if (m->window < ADV_IDLE)
+			tipc_group_update_member(m, 0);
+		else
+			list_del_init(&m->congested);
+	} else if (event == TIPC_WITHDRAWN) {
+		if (!m)
+			goto drop;
+
+		TIPC_SKB_CB(skb)->orig_member = m->instance;
+
+		*usr_wakeup = true;
+		m->usr_pending = false;
+		node_up = tipc_node_is_up(net, node);
+
+		/* Hold back event if more messages might be expected */
+		if (m->state != MBR_LEAVING && node_up) {
+			m->event_msg = skb;
+			tipc_group_decr_active(grp, m);
+			m->state = MBR_LEAVING;
+		} else {
+			if (node_up)
+				msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+			else
+				msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+			__skb_queue_tail(inputq, skb);
+		}
+		list_del_init(&m->congested);
+	}
+	*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+	return;
+drop:
+	kfree_skb(skb);
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 000000000000..d525e1cd7de5
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_GROUP_H
+#define _TIPC_GROUP_H
+
+#include "core.h"
+
+struct tipc_group;
+struct tipc_member;
+struct tipc_msg;
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+				     struct tipc_group_req *mreq);
+void tipc_group_delete(struct net *net, struct tipc_group *grp);
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+		     int *scope);
+u32 tipc_group_exclude(struct tipc_group *grp);
+void tipc_group_filter_msg(struct tipc_group *grp,
+			   struct sk_buff_head *inputq,
+			   struct sk_buff_head *xmitq);
+void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
+			   int *sk_rcvbuf, struct sk_buff *skb,
+			   struct sk_buff_head *inputq,
+			   struct sk_buff_head *xmitq);
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
+			  struct tipc_msg *hdr,
+			  struct sk_buff_head *inputq,
+			  struct sk_buff_head *xmitq);
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+		     int len, struct tipc_member **m);
+bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+			       u32 port, struct sk_buff_head *xmitq);
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
+void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_size(struct tipc_group *grp);
+#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 60820dc35a08..6bce0b1117bd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l)
 static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 			       struct sk_buff_head *xmitq);
 static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
-				      u16 rcvgap, int tolerance, int priority,
+				      bool probe_reply, u16 rcvgap,
+				      int tolerance, int priority,
 				      struct sk_buff_head *xmitq);
 static void link_print(struct tipc_link *l, const char *str);
 static int tipc_link_build_nack_msg(struct tipc_link *l,
@@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 	}
 
 	if (state || probe || setup)
-		tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq);
+		tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq);
 
 	return rc;
 }
@@ -978,15 +979,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
 	struct tipc_msg *hdr = buf_msg(skb);
 
 	pr_warn("Retransmission failure on link <%s>\n", l->name);
-	link_print(l, "Resetting link ");
+	link_print(l, "State of link ");
 	pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
 		msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
 	pr_info("sqno %u, prev: %x, src: %x\n",
 		msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
 }
 
-int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
-		      struct sk_buff_head *xmitq)
+int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
+		      u16 from, u16 to, struct sk_buff_head *xmitq)
 {
 	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
 	struct tipc_msg *hdr;
@@ -997,11 +998,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
 		return 0;
 
 	/* Detect repeated retransmit failures on same packet */
-	if (likely(l->last_retransm != buf_seqno(skb))) {
-		l->last_retransm = buf_seqno(skb);
-		l->stale_count = 1;
-	} else if (++l->stale_count > 100) {
+	if (nacker->last_retransm != buf_seqno(skb)) {
+		nacker->last_retransm = buf_seqno(skb);
+		nacker->stale_count = 1;
+	} else if (++nacker->stale_count > 100) {
 		link_retransmit_failure(l, skb);
+		nacker->stale_count = 0;
+		if (link_is_bc_sndlink(l))
+			return TIPC_LINK_DOWN_EVT;
 		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 	}
 
@@ -1036,6 +1040,7 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
 static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
 			    struct sk_buff_head *inputq)
 {
+	struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
 	struct tipc_msg *hdr = buf_msg(skb);
 
 	switch (msg_user(hdr)) {
@@ -1043,13 +1048,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
 	case TIPC_MEDIUM_IMPORTANCE:
 	case TIPC_HIGH_IMPORTANCE:
 	case TIPC_CRITICAL_IMPORTANCE:
-		if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) {
-			skb_queue_tail(l->bc_rcvlink->inputq, skb);
+		if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
+			skb_queue_tail(mc_inputq, skb);
 			return true;
 		}
 	case CONN_MANAGER:
 		skb_queue_tail(inputq, skb);
 		return true;
+	case GROUP_PROTOCOL:
+		skb_queue_tail(mc_inputq, skb);
+		return true;
 	case NAME_DISTRIBUTOR:
 		l->bc_rcvlink->state = LINK_ESTABLISHED;
 		skb_queue_tail(l->namedq, skb);
@@ -1167,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
 	/* Unicast ACK */
 	l->rcv_unacked = 0;
 	l->stats.sent_acks++;
-	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
 	return 0;
 }
 
@@ -1181,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
 	if (l->state == LINK_ESTABLISHING)
 		mtyp = ACTIVATE_MSG;
 
-	tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+	tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq);
 
 	/* Inform peer that this endpoint is going down if applicable */
 	skb = skb_peek_tail(xmitq);
@@ -1208,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
 	}
 
 	if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
-		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
 	return 0;
 }
 
@@ -1282,7 +1290,8 @@ drop:
 }
 
 static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
-				      u16 rcvgap, int tolerance, int priority,
+				      bool probe_reply, u16 rcvgap,
+				      int tolerance, int priority,
 				      struct sk_buff_head *xmitq)
 {
 	struct tipc_link *bcl = l->bc_rcvlink;
@@ -1330,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 		msg_set_seq_gap(hdr, rcvgap);
 		msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
 		msg_set_probe(hdr, probe);
+		msg_set_is_keepalive(hdr, probe || probe_reply);
 		tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
 		msg_set_size(hdr, INT_H_SIZE + dlen);
 		skb_trim(skb, INT_H_SIZE + dlen);
@@ -1435,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	u16 rcv_nxt = l->rcv_nxt;
 	u16 dlen = msg_data_sz(hdr);
 	int mtyp = msg_type(hdr);
+	bool reply = msg_probe(hdr);
 	void *data;
 	char *if_name;
 	int rc = 0;
@@ -1521,14 +1532,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		/* Send NACK if peer has sent pkts we haven't received yet */
 		if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
 			rcvgap = peers_snd_nxt - l->rcv_nxt;
-		if (rcvgap || (msg_probe(hdr)))
-			tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
-						  0, 0, xmitq);
+		if (rcvgap || reply)
+			tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
+						  rcvgap, 0, 0, xmitq);
 		tipc_link_release_pkts(l, ack);
 
 		/* If NACK, retransmit will now start at right position */
 		if (gap) {
-			rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
+			rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq);
 			l->stats.recv_nacks++;
 		}
 
@@ -1680,7 +1691,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 		return rc;
 
 	if (link_bc_retr_eval(snd_l, &from, &to))
-		rc = tipc_link_retrans(snd_l, from, to, xmitq);
+		rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
 
 	l->snd_nxt = peers_snd_nxt;
 	if (link_bc_rcv_gap(l))
@@ -1775,7 +1786,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 	if (dnode == tipc_own_addr(l->net)) {
 		tipc_link_bc_ack_rcv(l, acked, xmitq);
-		rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
+		rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
 		l->stats.recv_nacks++;
 		return rc;
 	}
@@ -2115,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq)
 {
 	l->tolerance = tol;
-	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
+	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
 }
 
 void tipc_link_set_prio(struct tipc_link *l, u32 prio,
 			struct sk_buff_head *xmitq)
 {
 	l->priority = prio;
-	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
+	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq);
 }
 
 void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 9e109bb1a207..8e884ed06d4b 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
 	u16 gen = mon->dom_gen;
 	u16 len;
 
-	if (!tipc_mon_is_active(net, mon))
+	/* Send invalid record if not active */
+	if (!tipc_mon_is_active(net, mon)) {
+		dom->len = 0;
 		return;
+	}
 
 	/* Send only a dummy record with ack if peer has acked our last sent */
 	if (likely(state->acked_gen == gen)) {
@@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr,
 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
 	struct tipc_peer *peer;
 
+	if (!tipc_mon_is_active(net, mon)) {
+		state->probing = false;
+		state->monitoring = true;
+		return;
+	}
+
 	/* Used cached state if table has not changed */
 	if (!state->probing &&
 	    (state->list_gen == mon->list_gen) &&
@@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr,
 	read_unlock_bh(&mon->lock);
 }
 
-static void mon_timeout(unsigned long m)
+static void mon_timeout(struct timer_list *t)
 {
-	struct tipc_monitor *mon = (void *)m;
+	struct tipc_monitor *mon = from_timer(mon, t, timer);
 	struct tipc_peer *self;
 	int best_member_cnt = dom_size(mon->peer_cnt) - 1;
 
@@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id)
 	self->is_up = true;
 	self->is_head = true;
 	INIT_LIST_HEAD(&self->list);
-	setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+	timer_setup(&mon->timer, mon_timeout, 0);
 	mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
 	mod_timer(&mon->timer, jiffies + mon->timer_intv);
 	return 0;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index dcd90e6fa7c3..b0d07b35909d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 
 	if (fragid == LAST_FRAGMENT) {
 		TIPC_SKB_CB(head)->validated = false;
-		if (unlikely(!tipc_msg_validate(head)))
+		if (unlikely(!tipc_msg_validate(&head)))
 			goto err;
 		*buf = head;
 		TIPC_SKB_CB(head)->tail = NULL;
@@ -201,11 +201,21 @@ err:
  * TIPC will ignore the excess, under the assumption that it is optional info
  * introduced by a later release of the protocol.
  */
-bool tipc_msg_validate(struct sk_buff *skb)
+bool tipc_msg_validate(struct sk_buff **_skb)
 {
-	struct tipc_msg *msg;
+	struct sk_buff *skb = *_skb;
+	struct tipc_msg *hdr;
 	int msz, hsz;
 
+	/* Ensure that flow control ratio condition is satisfied */
+	if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) {
+		skb = skb_copy(skb, GFP_ATOMIC);
+		if (!skb)
+			return false;
+		kfree_skb(*_skb);
+		*_skb = skb;
+	}
+
 	if (unlikely(TIPC_SKB_CB(skb)->validated))
 		return true;
 	if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
@@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, hsz)))
 		return false;
 
-	msg = buf_msg(skb);
-	if (unlikely(msg_version(msg) != TIPC_VERSION))
+	hdr = buf_msg(skb);
+	if (unlikely(msg_version(hdr) != TIPC_VERSION))
 		return false;
 
-	msz = msg_size(msg);
+	msz = msg_size(hdr);
 	if (unlikely(msz < hsz))
 		return false;
 	if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE))
@@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
 	skb_pull(*iskb, offset);
 	imsz = msg_size(buf_msg(*iskb));
 	skb_trim(*iskb, imsz);
-	if (unlikely(!tipc_msg_validate(*iskb)))
+	if (unlikely(!tipc_msg_validate(iskb)))
 		goto none;
 	*pos += align(imsz);
 	return true;
@@ -479,13 +489,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb,  struct tipc_msg *msg,
 bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 {
 	struct sk_buff *_skb = *skb;
-	struct tipc_msg *hdr = buf_msg(_skb);
+	struct tipc_msg *hdr;
 	struct tipc_msg ohdr;
-	int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
+	int dlen;
 
 	if (skb_linearize(_skb))
 		goto exit;
 	hdr = buf_msg(_skb);
+	dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
 	if (msg_dest_droppable(hdr))
 		goto exit;
 	if (msg_errcode(hdr))
@@ -511,6 +522,8 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
 		goto exit;
 
+	/* reassign after skb header modifications */
+	hdr = buf_msg(_skb);
 	/* Now reverse the concerned fields */
 	msg_set_errcode(hdr, err);
 	msg_set_non_seq(hdr, 0);
@@ -548,7 +561,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 		return false;
 	if (msg_errcode(msg))
 		return false;
-	*err = -TIPC_ERR_NO_NAME;
+	*err = TIPC_ERR_NO_NAME;
 	if (skb_linearize(skb))
 		return false;
 	msg = buf_msg(skb);
@@ -565,6 +578,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	msg_set_destnode(msg, dnode);
 	msg_set_destport(msg, dport);
 	*err = TIPC_OK;
+
+	if (!skb_cloned(skb))
+		return true;
+
+	/* Unclone buffer in case it was bundled */
+	if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
+		return false;
+
 	return true;
 }
 
@@ -655,3 +676,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 	}
 	kfree_skb(skb);
 }
+
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+		     struct sk_buff_head *xmitq)
+{
+	if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
+		__skb_queue_tail(xmitq, skb);
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c843fd2bc48d..3e4384c222f7 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/msg.h: Include file for TIPC message header routines
  *
- * Copyright (c) 2000-2007, 2014-2015 Ericsson AB
+ * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
  * Copyright (c) 2005-2008, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -61,10 +61,14 @@ struct plist;
 /*
  * Payload message types
  */
-#define TIPC_CONN_MSG		0
-#define TIPC_MCAST_MSG		1
-#define TIPC_NAMED_MSG		2
-#define TIPC_DIRECT_MSG		3
+#define TIPC_CONN_MSG           0
+#define TIPC_MCAST_MSG          1
+#define TIPC_NAMED_MSG          2
+#define TIPC_DIRECT_MSG         3
+#define TIPC_GRP_MEMBER_EVT     4
+#define TIPC_GRP_BCAST_MSG      5
+#define TIPC_GRP_MCAST_MSG      6
+#define TIPC_GRP_UCAST_MSG      7
 
 /*
  * Internal message users
@@ -73,11 +77,13 @@ struct plist;
 #define  MSG_BUNDLER          6
 #define  LINK_PROTOCOL        7
 #define  CONN_MANAGER         8
+#define  GROUP_PROTOCOL       9
 #define  TUNNEL_PROTOCOL      10
 #define  NAME_DISTRIBUTOR     11
 #define  MSG_FRAGMENTER       12
 #define  LINK_CONFIG          13
 #define  SOCK_WAKEUP          14       /* pseudo user */
+#define  TOP_SRV              15       /* pseudo user */
 
 /*
  * Message header sizes
@@ -86,6 +92,7 @@ struct plist;
 #define BASIC_H_SIZE              32	/* Basic payload message */
 #define NAMED_H_SIZE              40	/* Named payload message */
 #define MCAST_H_SIZE              44	/* Multicast payload message */
+#define GROUP_H_SIZE              44	/* Group payload message */
 #define INT_H_SIZE                40	/* Internal messages */
 #define MIN_H_SIZE                24	/* Smallest legal TIPC header size */
 #define MAX_H_SIZE                60	/* Largest possible TIPC header size */
@@ -96,6 +103,7 @@ struct plist;
 
 struct tipc_skb_cb {
 	u32 bytes_read;
+	u32 orig_member;
 	struct sk_buff *tail;
 	bool validated;
 	u16 chain_imp;
@@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m)
 	return msg_bits(m, 0, 0, 0x1ffff);
 }
 
+static inline u32 msg_blocks(struct tipc_msg *m)
+{
+	return (msg_size(m) / 1024) + 1;
+}
+
 static inline u32 msg_data_sz(struct tipc_msg *m)
 {
 	return msg_size(m) - msg_hdr_sz(m);
@@ -213,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d)
 	msg_set_bits(m, 0, 19, 1, d);
 }
 
+static inline int msg_is_keepalive(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 19, 1);
+}
+
+static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d)
+{
+	msg_set_bits(m, 0, 19, 1, d);
+}
+
 static inline int msg_src_droppable(struct tipc_msg *m)
 {
 	return msg_bits(m, 0, 18, 1);
@@ -251,6 +274,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 1, 29, 0x7, n);
 }
 
+static inline int msg_in_group(struct tipc_msg *m)
+{
+	int mtyp = msg_type(m);
+
+	return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
+}
+
+static inline bool msg_is_grp_evt(struct tipc_msg *m)
+{
+	return msg_type(m) == TIPC_GRP_MEMBER_EVT;
+}
+
 static inline u32 msg_named(struct tipc_msg *m)
 {
 	return msg_type(m) == TIPC_NAMED_MSG;
@@ -258,7 +293,10 @@ static inline u32 msg_named(struct tipc_msg *m)
 
 static inline u32 msg_mcast(struct tipc_msg *m)
 {
-	return msg_type(m) == TIPC_MCAST_MSG;
+	int mtyp = msg_type(m);
+
+	return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
+		(mtyp == TIPC_GRP_MCAST_MSG));
 }
 
 static inline u32 msg_connected(struct tipc_msg *m)
@@ -514,6 +552,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
 #define DSC_RESP_MSG		1
 
 /*
+ * Group protocol message types
+ */
+#define GRP_JOIN_MSG         0
+#define GRP_LEAVE_MSG        1
+#define GRP_ADV_MSG          2
+#define GRP_ACK_MSG          3
+#define GRP_RECLAIM_MSG      4
+#define GRP_REMIT_MSG        5
+
+/*
  * Word 1
  */
 static inline u32 msg_seq_gap(struct tipc_msg *m)
@@ -764,12 +812,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 9, 16, 0xffff, n);
 }
 
-static inline u32 msg_adv_win(struct tipc_msg *m)
+static inline u16 msg_adv_win(struct tipc_msg *m)
 {
 	return msg_bits(m, 9, 0, 0xffff);
 }
 
-static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
+static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
 {
 	msg_set_bits(m, 9, 0, 0xffff, n);
 }
@@ -794,6 +842,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 9, 0, 0xffff, n);
 }
 
+static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
+{
+	msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
+{
+	msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_remitted(struct tipc_msg *m)
+{
+	return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
+{
+	msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+/* Word 10
+ */
+static inline u16 msg_grp_evt(struct tipc_msg *m)
+{
+	return msg_bits(m, 10, 0, 0x3);
+}
+
+static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
+{
+	msg_set_bits(m, 10, 0, 0x3, n);
+}
+
+static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
+{
+	return msg_bits(m, 10, 0, 0x1);
+}
+
+static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
+{
+	msg_set_bits(m, 10, 0, 0x1, n);
+}
+
+static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
+{
+	return msg_bits(m, 10, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 10, 16, 0xffff, n);
+}
+
 static inline bool msg_peer_link_is_up(struct tipc_msg *m)
 {
 	if (likely(msg_user(m) != LINK_PROTOCOL))
@@ -816,8 +926,10 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
 }
 
 struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
-bool tipc_msg_validate(struct sk_buff *skb);
+bool tipc_msg_validate(struct sk_buff **_skb);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+		     struct sk_buff_head *xmitq);
 void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
 		   u32 hsize, u32 destnode);
 struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
@@ -842,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb)
 	return msg_seqno(buf_msg(skb));
 }
 
+static inline int buf_roundup_len(struct sk_buff *skb)
+{
+	return (skb->len / 1024 + 1) * 1024;
+}
+
 /* tipc_skb_peek(): peek and reserve first buffer in list
  * @list: list to be peeked in
  * Returns pointer to first buffer in list, if any
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bd0aac87b41a..b3829bcf63c7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -43,6 +43,7 @@
 #include "bcast.h"
 #include "addr.h"
 #include "node.h"
+#include "group.h"
 #include <net/genetlink.h>
 
 #define TIPC_NAMETBL_SIZE 1024		/* must be a power of 2 */
@@ -596,18 +597,47 @@ not_found:
 	return ref;
 }
 
-/**
- * tipc_nametbl_mc_translate - find multicast destinations
- *
- * Creates list of all local ports that overlap the given multicast address;
- * also determines if any off-node ports overlap.
- *
- * Note: Publications with a scope narrower than 'limit' are ignored.
- * (i.e. local node-scope publications mustn't receive messages arriving
- * from another node, even if the multcast link brought it here)
- *
- * Returns non-zero if any off-node ports overlap
- */
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+			 struct list_head *dsts, int *dstcnt, u32 exclude,
+			 bool all)
+{
+	u32 self = tipc_own_addr(net);
+	struct publication *publ;
+	struct name_info *info;
+	struct name_seq *seq;
+	struct sub_seq *sseq;
+
+	if (!tipc_in_scope(domain, self))
+		return false;
+
+	*dstcnt = 0;
+	rcu_read_lock();
+	seq = nametbl_find_seq(net, type);
+	if (unlikely(!seq))
+		goto exit;
+	spin_lock_bh(&seq->lock);
+	sseq = nameseq_find_subseq(seq, instance);
+	if (likely(sseq)) {
+		info = sseq->info;
+		list_for_each_entry(publ, &info->zone_list, zone_list) {
+			if (!tipc_in_scope(domain, publ->node))
+				continue;
+			if (publ->ref == exclude && publ->node == self)
+				continue;
+			tipc_dest_push(dsts, publ->node, publ->ref);
+			(*dstcnt)++;
+			if (all)
+				continue;
+			list_move_tail(&publ->zone_list, &info->zone_list);
+			break;
+		}
+	}
+	spin_unlock_bh(&seq->lock);
+exit:
+	rcu_read_unlock();
+	return !list_empty(dsts);
+}
+
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 			      u32 limit, struct list_head *dports)
 {
@@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 		info = sseq->info;
 		list_for_each_entry(publ, &info->node_list, node_list) {
 			if (publ->scope <= limit)
-				u32_push(dports, publ->ref);
+				tipc_dest_push(dports, 0, publ->ref);
 		}
 
 		if (info->cluster_list_size != info->node_list_size)
@@ -667,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 	spin_lock_bh(&seq->lock);
 	sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
 	stop = seq->sseqs + seq->first_free;
-	for (; sseq->lower <= upper && sseq != stop; sseq++) {
+	for (; sseq != stop && sseq->lower <= upper; sseq++) {
 		info = sseq->info;
 		list_for_each_entry(publ, &info->zone_list, zone_list) {
 			if (tipc_in_scope(domain, publ->node))
@@ -679,6 +709,37 @@ exit:
 	rcu_read_unlock();
 }
 
+/* tipc_nametbl_build_group - build list of communication group members
+ */
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+			      u32 type, u32 domain)
+{
+	struct sub_seq *sseq, *stop;
+	struct name_info *info;
+	struct publication *p;
+	struct name_seq *seq;
+
+	rcu_read_lock();
+	seq = nametbl_find_seq(net, type);
+	if (!seq)
+		goto exit;
+
+	spin_lock_bh(&seq->lock);
+	sseq = seq->sseqs;
+	stop = seq->sseqs + seq->first_free;
+	for (; sseq != stop; sseq++) {
+		info = sseq->info;
+		list_for_each_entry(p, &info->zone_list, zone_list) {
+			if (!tipc_in_scope(domain, p->node))
+				continue;
+			tipc_group_add_member(grp, p->node, p->ref);
+		}
+	}
+	spin_unlock_bh(&seq->lock);
+exit:
+	rcu_read_unlock();
+}
+
 /*
  * tipc_nametbl_publish - add name publication to network name tables
  */
@@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-bool u32_find(struct list_head *l, u32 value)
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
 {
-	struct u32_item *item;
+	u64 value = (u64)node << 32 | port;
+	struct tipc_dest *dst;
 
-	list_for_each_entry(item, l, list) {
-		if (item->value == value)
-			return true;
+	list_for_each_entry(dst, l, list) {
+		if (dst->value != value)
+			continue;
+		return dst;
 	}
-	return false;
+	return NULL;
 }
 
-bool u32_push(struct list_head *l, u32 value)
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
 {
-	struct u32_item *item;
+	u64 value = (u64)node << 32 | port;
+	struct tipc_dest *dst;
 
-	list_for_each_entry(item, l, list) {
-		if (item->value == value)
-			return false;
-	}
-	item = kmalloc(sizeof(*item), GFP_ATOMIC);
-	if (unlikely(!item))
+	if (tipc_dest_find(l, node, port))
 		return false;
 
-	item->value = value;
-	list_add(&item->list, l);
+	dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
+	if (unlikely(!dst))
+		return false;
+	dst->value = value;
+	list_add(&dst->list, l);
 	return true;
 }
 
-u32 u32_pop(struct list_head *l)
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
 {
-	struct u32_item *item;
-	u32 value = 0;
+	struct tipc_dest *dst;
 
 	if (list_empty(l))
-		return 0;
-	item = list_first_entry(l, typeof(*item), list);
-	value = item->value;
-	list_del(&item->list);
-	kfree(item);
-	return value;
+		return false;
+	dst = list_first_entry(l, typeof(*dst), list);
+	if (port)
+		*port = dst->port;
+	if (node)
+		*node = dst->node;
+	list_del(&dst->list);
+	kfree(dst);
+	return true;
 }
 
-bool u32_del(struct list_head *l, u32 value)
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
 {
-	struct u32_item *item, *tmp;
+	struct tipc_dest *dst;
 
-	list_for_each_entry_safe(item, tmp, l, list) {
-		if (item->value != value)
-			continue;
-		list_del(&item->list);
-		kfree(item);
-		return true;
-	}
-	return false;
+	dst = tipc_dest_find(l, node, port);
+	if (!dst)
+		return false;
+	list_del(&dst->list);
+	kfree(dst);
+	return true;
 }
 
-void u32_list_purge(struct list_head *l)
+void tipc_dest_list_purge(struct list_head *l)
 {
-	struct u32_item *item, *tmp;
+	struct tipc_dest *dst, *tmp;
 
-	list_for_each_entry_safe(item, tmp, l, list) {
-		list_del(&item->list);
-		kfree(item);
+	list_for_each_entry_safe(dst, tmp, l, list) {
+		list_del(&dst->list);
+		kfree(dst);
 	}
 }
 
-int u32_list_len(struct list_head *l)
+int tipc_dest_list_len(struct list_head *l)
 {
-	struct u32_item *item;
+	struct tipc_dest *dst;
 	int i = 0;
 
-	list_for_each_entry(item, l, list) {
+	list_for_each_entry(dst, l, list) {
 		i++;
 	}
 	return i;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 6ebdeb1d84a5..71926e429446 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -40,6 +40,7 @@
 struct tipc_subscription;
 struct tipc_plist;
 struct tipc_nlist;
+struct tipc_group;
 
 /*
  * TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 			      u32 limit, struct list_head *dports);
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+			      u32 type, u32 domain);
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 				   u32 upper, u32 domain,
 				   struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+			 struct list_head *dsts, int *dstcnt, u32 exclude,
+			 bool all);
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 					 u32 upper, u32 scope, u32 port_ref,
 					 u32 key);
@@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
 
-struct u32_item {
+struct tipc_dest {
 	struct list_head list;
-	u32 value;
+	union {
+		struct {
+			u32 port;
+			u32 node;
+		};
+		u64 value;
+	};
 };
 
-bool u32_push(struct list_head *l, u32 value);
-u32 u32_pop(struct list_head *l);
-bool u32_find(struct list_head *l, u32 value);
-bool u32_del(struct list_head *l, u32 value);
-void u32_list_purge(struct list_head *l);
-int u32_list_len(struct list_head *l);
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
+void tipc_dest_list_purge(struct list_head *l);
+int tipc_dest_list_len(struct list_head *l);
 
 #endif
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 9bfe886ab330..e48f0b2c01b9 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
 		kfree_skb(msg->rep);
+		msg->rep = NULL;
 		return -ENOMEM;
 	}
 
 	err = __tipc_nl_compat_dumpit(cmd, msg, arg);
-	if (err)
+	if (err) {
 		kfree_skb(msg->rep);
-
+		msg->rep = NULL;
+	}
 	kfree_skb(arg);
 
 	return err;
@@ -1215,7 +1217,7 @@ send:
 	return err;
 }
 
-static struct genl_ops tipc_genl_compat_ops[] = {
+static const struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
 		.doit		= tipc_nl_compat_recv,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9b4dcb6a16b5..507017fe0f1b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
 				bool delete);
 static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
 static void tipc_node_delete(struct tipc_node *node);
-static void tipc_node_timeout(unsigned long data);
+static void tipc_node_timeout(struct timer_list *t);
 static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
 static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
 static void tipc_node_put(struct tipc_node *node);
-static bool tipc_node_is_up(struct tipc_node *n);
+static bool node_is_up(struct tipc_node *n);
 
 struct tipc_sock_conn {
 	u32 port;
@@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 		goto exit;
 	}
 	tipc_node_get(n);
-	setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
+	timer_setup(&n->timer, tipc_node_timeout, 0);
 	n->keepalive_intv = U32_MAX;
 	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
 	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
 
 /* tipc_node_timeout - handle expiration of node timer
  */
-static void tipc_node_timeout(unsigned long data)
+static void tipc_node_timeout(struct timer_list *t)
 {
-	struct tipc_node *n = (struct tipc_node *)data;
+	struct tipc_node *n = from_timer(n, t, timer);
 	struct tipc_link_entry *le;
 	struct sk_buff_head xmitq;
 	int bearer_id;
@@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 		*slot1 = i;
 	}
 
-	if (!tipc_node_is_up(n)) {
+	if (!node_is_up(n)) {
 		if (tipc_link_peer_is_down(l))
 			tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
 		tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
@@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
 	tipc_sk_rcv(n->net, &le->inputq);
 }
 
-static bool tipc_node_is_up(struct tipc_node *n)
+static bool node_is_up(struct tipc_node *n)
 {
 	return n->active_links[0] != INVALID_BEARER_ID;
 }
 
+bool tipc_node_is_up(struct net *net, u32 addr)
+{
+	struct tipc_node *n;
+	bool retval = false;
+
+	if (in_own_node(net, addr))
+		return true;
+
+	n = tipc_node_find(net, addr);
+	if (!n)
+		return false;
+	retval = node_is_up(n);
+	tipc_node_put(n);
+	return retval;
+}
+
 void tipc_node_check_dest(struct net *net, u32 onode,
 			  struct tipc_bearer *b,
 			  u16 capabilities, u32 signature,
@@ -1126,8 +1142,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
 		strncpy(linkname, tipc_link_name(link), len);
 		err = 0;
 	}
-exit:
 	tipc_node_read_unlock(node);
+exit:
 	tipc_node_put(node);
 	return err;
 }
@@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
 
 	if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
 		goto attr_msg_full;
-	if (tipc_node_is_up(node))
+	if (node_is_up(node))
 		if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
 			goto attr_msg_full;
 
@@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
 	return 0;
 }
 
+/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
+ * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
+ */
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
+{
+	struct sk_buff *skb;
+	u32 selector, dnode;
+
+	while ((skb = __skb_dequeue(xmitq))) {
+		selector = msg_origport(buf_msg(skb));
+		dnode = msg_destnode(buf_msg(skb));
+		tipc_node_xmit_skb(net, skb, dnode, selector);
+	}
+	return 0;
+}
+
 void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
 {
 	struct sk_buff *txskb;
@@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
 		dst = n->addr;
 		if (in_own_node(net, dst))
 			continue;
-		if (!tipc_node_is_up(n))
+		if (!node_is_up(n))
 			continue;
 		txskb = pskb_copy(skb, GFP_ATOMIC);
 		if (!txskb)
@@ -1284,7 +1316,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
 	rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
 
 	if (rc & TIPC_LINK_DOWN_EVT) {
-		tipc_bearer_reset_all(n->net);
+		tipc_node_reset_links(n);
 		return;
 	}
 
@@ -1351,15 +1383,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
 	if (!skb_queue_empty(&be->inputq1))
 		tipc_node_mcast_rcv(n);
 
-	if (rc & TIPC_LINK_DOWN_EVT) {
-		/* Reception reassembly failure => reset all links to peer */
-		if (!tipc_link_is_up(be->link))
-			tipc_node_reset_links(n);
-
-		/* Retransmission failure => reset all links to all peers */
-		if (!tipc_link_is_up(tipc_bc_sndlink(net)))
-			tipc_bearer_reset_all(net);
-	}
+	/* If reassembly or retransmission failure => reset all links to peer */
+	if (rc & TIPC_LINK_DOWN_EVT)
+		tipc_node_reset_links(n);
 
 	tipc_node_put(n);
 }
@@ -1513,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 	__skb_queue_head_init(&xmitq);
 
 	/* Ensure message is well-formed before touching the header */
-	if (unlikely(!tipc_msg_validate(skb)))
+	if (unlikely(!tipc_msg_validate(&skb)))
 		goto discard;
 	hdr = buf_msg(skb);
 	usr = msg_user(hdr);
@@ -1557,6 +1583,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 
 	/* Check/update node state before receiving */
 	if (unlikely(skb)) {
+		if (unlikely(skb_linearize(skb)))
+			goto discard;
 		tipc_node_write_lock(n);
 		if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
 			if (le->link) {
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 898c22916984..acd58d23a70e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -48,7 +48,8 @@ enum {
 	TIPC_BCAST_SYNCH      = (1 << 1),
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
-	TIPC_BCAST_RCAST      = (1 << 4)
+	TIPC_BCAST_RCAST      = (1 << 4),
+	TIPC_MCAST_GROUPS     = (1 << 5)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
@@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
 			   char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
 		   int selector);
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
 int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
 		       u32 selector);
 void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
@@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
 int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
 void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
 int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+bool tipc_node_is_up(struct net *net, u32 addr);
 u16 tipc_node_get_capabilities(struct net *net, u32 addr);
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 3cd6402e812c..acaef80fb88c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -36,6 +36,8 @@
 #include "server.h"
 #include "core.h"
 #include "socket.h"
+#include "addr.h"
+#include "msg.h"
 #include <net/sock.h>
 #include <linux/module.h>
 
@@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
 		kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
 		sock_release(sock);
 		con->sock = NULL;
-
-		spin_lock_bh(&s->idr_lock);
-		idr_remove(&s->conn_idr, con->conid);
-		s->idr_in_use--;
-		spin_unlock_bh(&s->idr_lock);
 	}
-
+	spin_lock_bh(&s->idr_lock);
+	idr_remove(&s->conn_idr, con->conid);
+	s->idr_in_use--;
+	spin_unlock_bh(&s->idr_lock);
 	tipc_clean_outqueues(con);
 	kfree(con);
 }
@@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con)
 	struct tipc_server *s = con->server;
 
 	if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
-		tipc_unregister_callbacks(con);
+		if (con->sock)
+			tipc_unregister_callbacks(con);
 
 		if (con->conid)
 			s->tipc_conn_release(con->conid, con->usr_data);
@@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con)
 		 * are harmless for us here as we have already deleted this
 		 * connection from server connection list.
 		 */
-		kernel_sock_shutdown(con->sock, SHUT_RDWR);
-
+		if (con->sock)
+			kernel_sock_shutdown(con->sock, SHUT_RDWR);
 		conn_put(con);
 	}
 }
@@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
 	}
 }
 
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+			     u32 lower, u32 upper, int *conid)
+{
+	struct tipc_subscriber *scbr;
+	struct tipc_subscr sub;
+	struct tipc_server *s;
+	struct tipc_conn *con;
+
+	sub.seq.type = type;
+	sub.seq.lower = lower;
+	sub.seq.upper = upper;
+	sub.timeout = TIPC_WAIT_FOREVER;
+	sub.filter = TIPC_SUB_PORTS;
+	*(u32 *)&sub.usr_handle = port;
+
+	con = tipc_alloc_conn(tipc_topsrv(net));
+	if (IS_ERR(con))
+		return false;
+
+	*conid = con->conid;
+	s = con->server;
+	scbr = s->tipc_conn_new(*conid);
+	if (!scbr) {
+		tipc_close_conn(con);
+		return false;
+	}
+
+	con->usr_data = scbr;
+	con->sock = NULL;
+	s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
+	return true;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+	struct tipc_conn *con;
+
+	con = tipc_conn_lookup(tipc_topsrv(net), conid);
+	if (!con)
+		return;
+	tipc_close_conn(con);
+	conn_put(con);
+}
+
+static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
+{
+	u32 port = *(u32 *)&evt->s.usr_handle;
+	u32 self = tipc_own_addr(net);
+	struct sk_buff_head evtq;
+	struct sk_buff *skb;
+
+	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+			      self, self, port, port, 0);
+	if (!skb)
+		return;
+	msg_set_dest_droppable(buf_msg(skb), true);
+	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+	skb_queue_head_init(&evtq);
+	__skb_queue_tail(&evtq, skb);
+	tipc_sk_rcv(net, &evtq);
+}
+
 static void tipc_send_to_sock(struct tipc_conn *con)
 {
-	int count = 0;
 	struct tipc_server *s = con->server;
 	struct outqueue_entry *e;
+	struct tipc_event *evt;
 	struct msghdr msg;
+	int count = 0;
 	int ret;
 
 	spin_lock_bh(&con->outqueue_lock);
 	while (test_bit(CF_CONNECTED, &con->flags)) {
-		e = list_entry(con->outqueue.next, struct outqueue_entry,
-			       list);
+		e = list_entry(con->outqueue.next, struct outqueue_entry, list);
 		if ((struct list_head *) e == &con->outqueue)
 			break;
-		spin_unlock_bh(&con->outqueue_lock);
 
-		memset(&msg, 0, sizeof(msg));
-		msg.msg_flags = MSG_DONTWAIT;
+		spin_unlock_bh(&con->outqueue_lock);
 
-		if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
-			msg.msg_name = &e->dest;
-			msg.msg_namelen = sizeof(struct sockaddr_tipc);
-		}
-		ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
-				     e->iov.iov_len);
-		if (ret == -EWOULDBLOCK || ret == 0) {
-			cond_resched();
-			goto out;
-		} else if (ret < 0) {
-			goto send_err;
+		if (con->sock) {
+			memset(&msg, 0, sizeof(msg));
+			msg.msg_flags = MSG_DONTWAIT;
+			if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
+				msg.msg_name = &e->dest;
+				msg.msg_namelen = sizeof(struct sockaddr_tipc);
+			}
+			ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
+					     e->iov.iov_len);
+			if (ret == -EWOULDBLOCK || ret == 0) {
+				cond_resched();
+				goto out;
+			} else if (ret < 0) {
+				goto send_err;
+			}
+		} else {
+			evt = e->iov.iov_base;
+			tipc_send_kern_top_evt(s->net, evt);
 		}
-
 		/* Don't starve users filling buffers */
 		if (++count >= MAX_SEND_MSG_COUNT) {
 			cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 34f8055afa3b..2113c9192633 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -83,13 +83,16 @@ struct tipc_server {
 int tipc_conn_sendmsg(struct tipc_server *s, int conid,
 		      struct sockaddr_tipc *addr, void *data, size_t len);
 
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+			     u32 lower, u32 upper, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
+
 /**
  * tipc_conn_terminate - terminate connection with server
  *
  * Note: Must call it in process context since it might sleep
  */
 void tipc_conn_terminate(struct tipc_server *s, int conid);
-
 int tipc_server_start(struct tipc_server *s);
 
 void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 101e3597338f..5d18c0caa92b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/socket.c: TIPC socket API
  *
- * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -45,9 +45,10 @@
 #include "socket.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "group.h"
 
 #define CONN_TIMEOUT_DEFAULT	8000	/* default connect timeout = 8s */
-#define CONN_PROBING_INTERVAL	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
+#define CONN_PROBING_INTV	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
 #define TIPC_FWD_MSG		1
 #define TIPC_MAX_PORT		0xffffffff
 #define TIPC_MIN_PORT		1
@@ -61,6 +62,11 @@ enum {
 	TIPC_CONNECTING = TCP_SYN_SENT,
 };
 
+struct sockaddr_pair {
+	struct sockaddr_tipc sock;
+	struct sockaddr_tipc member;
+};
+
 /**
  * struct tipc_sock - TIPC socket structure
  * @sk: socket - interacts with 'port' and with user via the socket API
@@ -78,7 +84,7 @@ enum {
  * @conn_timeout: the time we can wait for an unresponded setup request
  * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
  * @cong_link_cnt: number of congested links
- * @sent_unacked: # messages sent by socket, and not yet acked by peer
+ * @snt_unacked: # messages sent by socket, and not yet acked by peer
  * @rcv_unacked: # messages read by user, but not yet acked back to peer
  * @peer: 'connected' peer for dgram/rdm
  * @node: hash table node
@@ -109,20 +115,22 @@ struct tipc_sock {
 	struct rhash_head node;
 	struct tipc_mc_method mc_method;
 	struct rcu_head rcu;
+	struct tipc_group *group;
 };
 
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
 static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 		       bool kern);
-static void tipc_sk_timeout(unsigned long data);
+static void tipc_sk_timeout(struct timer_list *t);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			    struct tipc_name_seq const *seq);
+static int tipc_sk_leave(struct tipc_sock *tsk);
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
 static int tipc_sk_insert(struct tipc_sock *tsk);
 static void tipc_sk_remove(struct tipc_sock *tsk);
@@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk)
 	return tsk->snt_unacked > tsk->snd_win;
 }
 
+static u16 tsk_blocks(int len)
+{
+	return ((len / FLOWCTL_BLK_SZ) + 1);
+}
+
 /* tsk_blocks(): translate a buffer size in bytes to number of
  * advertisable blocks, taking into account the ratio truesize(len)/len
  * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
@@ -451,9 +464,9 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 		      NAMED_H_SIZE, 0);
 
 	msg_set_origport(msg, tsk->portid);
-	setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
+	timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
 	sk->sk_shutdown = 0;
-	sk->sk_backlog_rcv = tipc_backlog_rcv;
+	sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
@@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock)
 
 	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
 	sk->sk_shutdown = SHUTDOWN_MASK;
+	tipc_sk_leave(tsk);
 	tipc_sk_withdraw(tsk, 0, NULL);
 	sk_stop_timer(sk, &sk->sk_timer);
 	tipc_sk_remove(tsk);
 
 	/* Reject any messages that accumulated in backlog queue */
 	release_sock(sk);
-	u32_list_purge(&tsk->cong_links);
+	tipc_dest_list_purge(&tsk->cong_links);
 	tsk->cong_link_cnt = 0;
 	call_rcu(&tsk->rcu, tipc_sk_callback);
 	sock->sk = NULL;
@@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
 		res = tipc_sk_withdraw(tsk, 0, NULL);
 		goto exit;
 	}
-
+	if (tsk->group) {
+		res = -EACCES;
+		goto exit;
+	}
 	if (uaddr_len < sizeof(struct sockaddr_tipc)) {
 		res = -EINVAL;
 		goto exit;
@@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
-	u32 mask = 0;
+	struct tipc_group *grp = tsk->group;
+	u32 revents = 0;
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+		revents |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
-		mask |= POLLHUP;
+		revents |= POLLHUP;
 
 	switch (sk->sk_state) {
 	case TIPC_ESTABLISHED:
 		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
-			mask |= POLLOUT;
+			revents |= POLLOUT;
 		/* fall thru' */
 	case TIPC_LISTEN:
 	case TIPC_CONNECTING:
 		if (!skb_queue_empty(&sk->sk_receive_queue))
-			mask |= (POLLIN | POLLRDNORM);
+			revents |= POLLIN | POLLRDNORM;
 		break;
 	case TIPC_OPEN:
-		if (!tsk->cong_link_cnt)
-			mask |= POLLOUT;
-		if (tipc_sk_type_connectionless(sk) &&
-		    (!skb_queue_empty(&sk->sk_receive_queue)))
-			mask |= (POLLIN | POLLRDNORM);
+		if (!grp || tipc_group_size(grp))
+			if (!tsk->cong_link_cnt)
+				revents |= POLLOUT;
+		if (!tipc_sk_type_connectionless(sk))
+			break;
+		if (skb_queue_empty(&sk->sk_receive_queue))
+			break;
+		revents |= POLLIN | POLLRDNORM;
 		break;
 	case TIPC_DISCONNECTING:
-		mask = (POLLIN | POLLRDNORM | POLLHUP);
+		revents = POLLIN | POLLRDNORM | POLLHUP;
 		break;
 	}
-
-	return mask;
+	return revents;
 }
 
 /**
@@ -757,6 +777,9 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	struct tipc_nlist dsts;
 	int rc;
 
+	if (tsk->group)
+		return -EACCES;
+
 	/* Block or return if any destination link is congested */
 	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
 	if (unlikely(rc))
@@ -794,6 +817,296 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 }
 
 /**
+ * tipc_send_group_msg - send a message to a member in the group
+ * @net: network namespace
+ * @m: message to send
+ * @mb: group member
+ * @dnode: destination node
+ * @dport: destination port
+ * @dlen: total length of message data
+ */
+static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
+			       struct msghdr *m, struct tipc_member *mb,
+			       u32 dnode, u32 dport, int dlen)
+{
+	u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
+	struct tipc_mc_method *method = &tsk->mc_method;
+	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+	struct tipc_msg *hdr = &tsk->phdr;
+	struct sk_buff_head pkts;
+	int mtu, rc;
+
+	/* Complete message header */
+	msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
+	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+	msg_set_destport(hdr, dport);
+	msg_set_destnode(hdr, dnode);
+	msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
+
+	/* Build message as chain of buffers */
+	skb_queue_head_init(&pkts);
+	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+	if (unlikely(rc != dlen))
+		return rc;
+
+	/* Send message */
+	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+	if (unlikely(rc == -ELINKCONG)) {
+		tipc_dest_push(&tsk->cong_links, dnode, 0);
+		tsk->cong_link_cnt++;
+	}
+
+	/* Update send window */
+	tipc_group_update_member(mb, blks);
+
+	/* A broadcast sent within next EXPIRE period must follow same path */
+	method->rcast = true;
+	method->mandatory = true;
+	return dlen;
+}
+
+/**
+ * tipc_send_group_unicast - send message to a member in the group
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
+				   int dlen, long timeout)
+{
+	struct sock *sk = sock->sk;
+	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_group *grp = tsk->group;
+	struct net *net = sock_net(sk);
+	struct tipc_member *mb = NULL;
+	u32 node, port;
+	int rc;
+
+	node = dest->addr.id.node;
+	port = dest->addr.id.ref;
+	if (!port && !node)
+		return -EHOSTUNREACH;
+
+	/* Block or return if destination link or member is congested */
+	rc = tipc_wait_for_cond(sock, &timeout,
+				!tipc_dest_find(&tsk->cong_links, node, 0) &&
+				!tipc_group_cong(grp, node, port, blks, &mb));
+	if (unlikely(rc))
+		return rc;
+
+	if (unlikely(!mb))
+		return -EHOSTUNREACH;
+
+	rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
+
+	return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_anycast - send message to any member with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
+				   int dlen, long timeout)
+{
+	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct sock *sk = sock->sk;
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct list_head *cong_links = &tsk->cong_links;
+	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+	struct tipc_group *grp = tsk->group;
+	struct tipc_member *first = NULL;
+	struct tipc_member *mbr = NULL;
+	struct net *net = sock_net(sk);
+	u32 node, port, exclude;
+	u32 type, inst, domain;
+	struct list_head dsts;
+	int lookups = 0;
+	int dstcnt, rc;
+	bool cong;
+
+	INIT_LIST_HEAD(&dsts);
+
+	type = dest->addr.name.name.type;
+	inst = dest->addr.name.name.instance;
+	domain = addr_domain(net, dest->scope);
+	exclude = tipc_group_exclude(grp);
+
+	while (++lookups < 4) {
+		first = NULL;
+
+		/* Look for a non-congested destination member, if any */
+		while (1) {
+			if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+						 &dstcnt, exclude, false))
+				return -EHOSTUNREACH;
+			tipc_dest_pop(&dsts, &node, &port);
+			cong = tipc_group_cong(grp, node, port, blks, &mbr);
+			if (!cong)
+				break;
+			if (mbr == first)
+				break;
+			if (!first)
+				first = mbr;
+		}
+
+		/* Start over if destination was not in member list */
+		if (unlikely(!mbr))
+			continue;
+
+		if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
+			break;
+
+		/* Block or return if destination link or member is congested */
+		rc = tipc_wait_for_cond(sock, &timeout,
+					!tipc_dest_find(cong_links, node, 0) &&
+					!tipc_group_cong(grp, node, port,
+							 blks, &mbr));
+		if (unlikely(rc))
+			return rc;
+
+		/* Send, unless destination disappeared while waiting */
+		if (likely(mbr))
+			break;
+	}
+
+	if (unlikely(lookups >= 4))
+		return -EHOSTUNREACH;
+
+	rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
+
+	return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_bcast - send message to all members in communication group
+ * @sk: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
+				 int dlen, long timeout)
+{
+	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_group *grp = tsk->group;
+	struct tipc_nlist *dsts = tipc_group_dests(grp);
+	struct tipc_mc_method *method = &tsk->mc_method;
+	bool ack = method->mandatory && method->rcast;
+	int blks = tsk_blocks(MCAST_H_SIZE + dlen);
+	struct tipc_msg *hdr = &tsk->phdr;
+	int mtu = tipc_bcast_get_mtu(net);
+	struct sk_buff_head pkts;
+	int rc = -EHOSTUNREACH;
+
+	if (!dsts->local && !dsts->remote)
+		return -EHOSTUNREACH;
+
+	/* Block or return if any destination link or member is congested */
+	rc = tipc_wait_for_cond(sock, &timeout,	!tsk->cong_link_cnt &&
+				!tipc_group_bc_cong(grp, blks));
+	if (unlikely(rc))
+		return rc;
+
+	/* Complete message header */
+	if (dest) {
+		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
+		msg_set_nameinst(hdr, dest->addr.name.name.instance);
+	} else {
+		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
+		msg_set_nameinst(hdr, 0);
+	}
+	msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+	msg_set_destport(hdr, 0);
+	msg_set_destnode(hdr, 0);
+	msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+
+	/* Avoid getting stuck with repeated forced replicasts */
+	msg_set_grp_bc_ack_req(hdr, ack);
+
+	/* Build message as chain of buffers */
+	skb_queue_head_init(&pkts);
+	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+	if (unlikely(rc != dlen))
+		return rc;
+
+	/* Send message */
+	rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
+	if (unlikely(rc))
+		return rc;
+
+	/* Update broadcast sequence number and send windows */
+	tipc_group_update_bc_members(tsk->group, blks, ack);
+
+	/* Broadcast link is now free to choose method for next broadcast */
+	method->mandatory = false;
+	method->expires = jiffies;
+
+	return dlen;
+}
+
+/**
+ * tipc_send_group_mcast - send message to all members with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
+				 int dlen, long timeout)
+{
+	struct sock *sk = sock->sk;
+	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_name_seq *seq = &dest->addr.nameseq;
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_group *grp = tsk->group;
+	struct net *net = sock_net(sk);
+	u32 domain, exclude, dstcnt;
+	struct list_head dsts;
+
+	INIT_LIST_HEAD(&dsts);
+
+	if (seq->lower != seq->upper)
+		return -ENOTSUPP;
+
+	domain = addr_domain(net, dest->scope);
+	exclude = tipc_group_exclude(grp);
+	if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
+				 &dsts, &dstcnt, exclude, true))
+		return -EHOSTUNREACH;
+
+	if (dstcnt == 1) {
+		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+		return tipc_send_group_unicast(sock, m, dlen, timeout);
+	}
+
+	tipc_dest_list_purge(&dsts);
+	return tipc_send_group_bcast(sock, m, dlen, timeout);
+}
+
+/**
  * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
  * @arrvq: queue with arriving messages, to be cloned after destination lookup
  * @inputq: queue with cloned messages, delivered to socket after dest lookup
@@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		       struct sk_buff_head *inputq)
 {
-	struct tipc_msg *msg;
-	struct list_head dports;
-	u32 portid;
 	u32 scope = TIPC_CLUSTER_SCOPE;
-	struct sk_buff_head tmpq;
-	uint hsz;
+	u32 self = tipc_own_addr(net);
 	struct sk_buff *skb, *_skb;
+	u32 lower = 0, upper = ~0;
+	struct sk_buff_head tmpq;
+	u32 portid, oport, onode;
+	struct list_head dports;
+	struct tipc_msg *msg;
+	int user, mtyp, hsz;
 
 	__skb_queue_head_init(&tmpq);
 	INIT_LIST_HEAD(&dports);
@@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 	skb = tipc_skb_peek(arrvq, &inputq->lock);
 	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
 		msg = buf_msg(skb);
+		user = msg_user(msg);
+		mtyp = msg_type(msg);
+		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
+			spin_lock_bh(&inputq->lock);
+			if (skb_peek(arrvq) == skb) {
+				__skb_dequeue(arrvq);
+				__skb_queue_tail(inputq, skb);
+			}
+			refcount_dec(&skb->users);
+			spin_unlock_bh(&inputq->lock);
+			continue;
+		}
 		hsz = skb_headroom(skb) + msg_hdr_sz(msg);
-
-		if (in_own_node(net, msg_orignode(msg)))
+		oport = msg_origport(msg);
+		onode = msg_orignode(msg);
+		if (onode == self)
 			scope = TIPC_NODE_SCOPE;
 
 		/* Create destination port list and message clones: */
-		tipc_nametbl_mc_translate(net,
-					  msg_nametype(msg), msg_namelower(msg),
-					  msg_nameupper(msg), scope, &dports);
-		portid = u32_pop(&dports);
-		for (; portid; portid = u32_pop(&dports)) {
+		if (!msg_in_group(msg)) {
+			lower = msg_namelower(msg);
+			upper = msg_nameupper(msg);
+		}
+		tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
+					  scope, &dports);
+		while (tipc_dest_pop(&dports, NULL, &portid)) {
 			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
 			if (_skb) {
 				msg_set_destport(buf_msg(_skb), portid);
@@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 }
 
 /**
- * tipc_sk_proto_rcv - receive a connection mng protocol message
+ * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
  * @tsk: receiving socket
  * @skb: pointer to message buffer.
  */
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
-			      struct sk_buff_head *xmitq)
+static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+				   struct sk_buff_head *xmitq)
 {
-	struct sock *sk = &tsk->sk;
-	u32 onode = tsk_own_node(tsk);
 	struct tipc_msg *hdr = buf_msg(skb);
+	u32 onode = tsk_own_node(tsk);
+	struct sock *sk = &tsk->sk;
 	int mtyp = msg_type(hdr);
 	bool conn_cong;
 
@@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 	struct list_head *clinks = &tsk->cong_links;
 	bool syn = !tipc_sk_type_connectionless(sk);
+	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct tipc_name_seq *seq;
 	struct sk_buff_head pkts;
@@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
 		return -EMSGSIZE;
 
+	if (likely(dest)) {
+		if (unlikely(m->msg_namelen < sizeof(*dest)))
+			return -EINVAL;
+		if (unlikely(dest->family != AF_TIPC))
+			return -EINVAL;
+	}
+
+	if (grp) {
+		if (!dest)
+			return tipc_send_group_bcast(sock, m, dlen, timeout);
+		if (dest->addrtype == TIPC_ADDR_NAME)
+			return tipc_send_group_anycast(sock, m, dlen, timeout);
+		if (dest->addrtype == TIPC_ADDR_ID)
+			return tipc_send_group_unicast(sock, m, dlen, timeout);
+		if (dest->addrtype == TIPC_ADDR_MCAST)
+			return tipc_send_group_mcast(sock, m, dlen, timeout);
+		return -EINVAL;
+	}
+
 	if (unlikely(!dest)) {
 		dest = &tsk->peer;
 		if (!syn || dest->family != AF_TIPC)
 			return -EDESTADDRREQ;
 	}
 
-	if (unlikely(m->msg_namelen < sizeof(*dest)))
-		return -EINVAL;
-
-	if (unlikely(dest->family != AF_TIPC))
-		return -EINVAL;
-
 	if (unlikely(syn)) {
 		if (sk->sk_state == TIPC_LISTEN)
 			return -EPIPE;
@@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 		msg_set_destport(hdr, dport);
 		if (unlikely(!dport && !dnode))
 			return -EHOSTUNREACH;
-
 	} else if (dest->addrtype == TIPC_ADDR_ID) {
 		dnode = dest->addr.id.node;
 		msg_set_type(hdr, TIPC_DIRECT_MSG);
@@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	}
 
 	/* Block or return if destination link is congested */
-	rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
+	rc = tipc_wait_for_cond(sock, &timeout,
+				!tipc_dest_find(clinks, dnode, 0));
 	if (unlikely(rc))
 		return rc;
 
@@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 
 	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
 	if (unlikely(rc == -ELINKCONG)) {
-		u32_push(clinks, dnode);
+		tipc_dest_push(clinks, dnode, 0);
 		tsk->cong_link_cnt++;
 		rc = 0;
 	}
@@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 	msg_set_lookup_scope(msg, 0);
 	msg_set_hdr_sz(msg, SHORT_H_SIZE);
 
-	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
 	tipc_set_sk_state(sk, TIPC_ESTABLISHED);
 	tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
 	tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
@@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
 }
 
 /**
- * set_orig_addr - capture sender's address for received message
+ * tipc_sk_set_orig_addr - capture sender's address for received message
  * @m: descriptor for message info
- * @msg: received message header
+ * @hdr: received message header
  *
  * Note: Address is not captured if not requested by receiver.
  */
-static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
+static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
 {
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
+	DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
+	struct tipc_msg *hdr = buf_msg(skb);
 
-	if (addr) {
-		addr->family = AF_TIPC;
-		addr->addrtype = TIPC_ADDR_ID;
-		memset(&addr->addr, 0, sizeof(addr->addr));
-		addr->addr.id.ref = msg_origport(msg);
-		addr->addr.id.node = msg_orignode(msg);
-		addr->addr.name.domain = 0;	/* could leave uninitialized */
-		addr->scope = 0;		/* could leave uninitialized */
-		m->msg_namelen = sizeof(struct sockaddr_tipc);
-	}
+	if (!srcaddr)
+		return;
+
+	srcaddr->sock.family = AF_TIPC;
+	srcaddr->sock.addrtype = TIPC_ADDR_ID;
+	srcaddr->sock.addr.id.ref = msg_origport(hdr);
+	srcaddr->sock.addr.id.node = msg_orignode(hdr);
+	srcaddr->sock.addr.name.domain = 0;
+	srcaddr->sock.scope = 0;
+	m->msg_namelen = sizeof(struct sockaddr_tipc);
+
+	if (!msg_in_group(hdr))
+		return;
+
+	/* Group message users may also want to know sending member's id */
+	srcaddr->member.family = AF_TIPC;
+	srcaddr->member.addrtype = TIPC_ADDR_NAME;
+	srcaddr->member.addr.name.name.type = msg_nametype(hdr);
+	srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
+	srcaddr->member.addr.name.domain = 0;
+	m->msg_namelen = sizeof(*srcaddr);
 }
 
 /**
@@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
 			size_t buflen,	int flags)
 {
 	struct sock *sk = sock->sk;
-	struct tipc_sock *tsk = tipc_sk(sk);
-	struct sk_buff *skb;
-	struct tipc_msg *hdr;
 	bool connected = !tipc_sk_type_connectionless(sk);
+	struct tipc_sock *tsk = tipc_sk(sk);
 	int rc, err, hlen, dlen, copy;
+	struct sk_buff_head xmitq;
+	struct tipc_msg *hdr;
+	struct sk_buff *skb;
+	bool grp_evt;
 	long timeout;
 
 	/* Catch invalid receive requests */
@@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
 	}
 	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
+	/* Step rcv queue to first msg with data or error; wait if necessary */
 	do {
-		/* Look at first msg in receive queue; wait if necessary */
 		rc = tipc_wait_for_rcvmsg(sock, &timeout);
 		if (unlikely(rc))
 			goto exit;
@@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
 		dlen = msg_data_sz(hdr);
 		hlen = msg_hdr_sz(hdr);
 		err = msg_errcode(hdr);
+		grp_evt = msg_is_grp_evt(hdr);
 		if (likely(dlen || err))
 			break;
 		tsk_advance_rx_queue(sk);
 	} while (1);
 
 	/* Collect msg meta data, including error code and rejected data */
-	set_orig_addr(m, hdr);
+	tipc_sk_set_orig_addr(m, skb);
 	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
 	if (unlikely(rc))
 		goto exit;
@@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
 	if (unlikely(rc))
 		goto exit;
 
+	/* Mark message as group event if applicable */
+	if (unlikely(grp_evt)) {
+		if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
+			m->msg_flags |= MSG_EOR;
+		m->msg_flags |= MSG_OOB;
+		copy = 0;
+	}
+
 	/* Caption of data or error code/rejected data was successful */
 	if (unlikely(flags & MSG_PEEK))
 		goto exit;
 
+	/* Send group flow control advertisement when applicable */
+	if (tsk->group && msg_in_group(hdr) && !grp_evt) {
+		skb_queue_head_init(&xmitq);
+		tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
+					  msg_orignode(hdr), msg_origport(hdr),
+					  &xmitq);
+		tipc_node_distr_xmit(sock_net(sk), &xmitq);
+	}
+
 	tsk_advance_rx_queue(sk);
+
 	if (likely(!connected))
 		goto exit;
 
-	/* Send connection flow control ack when applicable */
+	/* Send connection flow control advertisement when applicable */
 	tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
 	if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
 		tipc_sk_send_ack(tsk);
@@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
 
 		/* Collect msg meta data, incl. error code and rejected data */
 		if (!copied) {
-			set_orig_addr(m, hdr);
+			tipc_sk_set_orig_addr(m, skb);
 			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
 			if (rc)
 				break;
@@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk)
 	__skb_queue_purge(&sk->sk_receive_queue);
 }
 
+static void tipc_sk_proto_rcv(struct sock *sk,
+			      struct sk_buff_head *inputq,
+			      struct sk_buff_head *xmitq)
+{
+	struct sk_buff *skb = __skb_dequeue(inputq);
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_msg *hdr = buf_msg(skb);
+	struct tipc_group *grp = tsk->group;
+	bool wakeup = false;
+
+	switch (msg_user(hdr)) {
+	case CONN_MANAGER:
+		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
+		return;
+	case SOCK_WAKEUP:
+		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+		tsk->cong_link_cnt--;
+		wakeup = true;
+		break;
+	case GROUP_PROTOCOL:
+		tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
+		break;
+	case TOP_SRV:
+		tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
+				      skb, inputq, xmitq);
+		skb = NULL;
+		break;
+	default:
+		break;
+	}
+
+	if (wakeup)
+		sk->sk_write_space(sk);
+
+	kfree_skb(skb);
+}
+
 /**
- * filter_connect - Handle all incoming messages for a connection-based socket
+ * tipc_filter_connect - Handle incoming message for a connection-based socket
  * @tsk: TIPC socket
  * @skb: pointer to message buffer. Set to NULL if buffer is consumed
  *
  * Returns true if everything ok, false otherwise
  */
-static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
 {
 	struct sock *sk = &tsk->sk;
 	struct net *net = sock_net(sk);
@@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *hdr = buf_msg(skb);
 
+	if (unlikely(msg_in_group(hdr)))
+		return sk->sk_rcvbuf;
+
 	if (unlikely(!msg_connected(hdr)))
 		return sk->sk_rcvbuf << msg_importance(hdr);
 
@@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
- * filter_rcv - validate incoming message
+ * tipc_sk_filter_rcv - validate incoming message
  * @sk: socket
  * @skb: pointer to message.
  *
@@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
  *
  * Called with socket lock already taken
  *
- * Returns true if message was added to socket receive queue, otherwise false
  */
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
-		       struct sk_buff_head *xmitq)
+static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
+			       struct sk_buff_head *xmitq)
 {
+	bool sk_conn = !tipc_sk_type_connectionless(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = buf_msg(skb);
-	unsigned int limit = rcvbuf_limit(sk, skb);
-	int err = TIPC_OK;
-	int usr = msg_user(hdr);
-	u32 onode;
+	struct net *net = sock_net(sk);
+	struct sk_buff_head inputq;
+	int limit, err = TIPC_OK;
 
-	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
-		tipc_sk_proto_rcv(tsk, skb, xmitq);
-		return false;
-	}
+	TIPC_SKB_CB(skb)->bytes_read = 0;
+	__skb_queue_head_init(&inputq);
+	__skb_queue_tail(&inputq, skb);
 
-	if (unlikely(usr == SOCK_WAKEUP)) {
-		onode = msg_orignode(hdr);
-		kfree_skb(skb);
-		u32_del(&tsk->cong_links, onode);
-		tsk->cong_link_cnt--;
-		sk->sk_write_space(sk);
-		return false;
-	}
+	if (unlikely(!msg_isdata(hdr)))
+		tipc_sk_proto_rcv(sk, &inputq, xmitq);
 
-	/* Drop if illegal message type */
-	if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
-		kfree_skb(skb);
-		return false;
-	}
+	if (unlikely(grp))
+		tipc_group_filter_msg(grp, &inputq, xmitq);
 
-	/* Reject if wrong message type for current socket state */
-	if (tipc_sk_type_connectionless(sk)) {
-		if (msg_connected(hdr)) {
+	/* Validate and add to receive buffer if there is space */
+	while ((skb = __skb_dequeue(&inputq))) {
+		hdr = buf_msg(skb);
+		limit = rcvbuf_limit(sk, skb);
+		if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+		    (!sk_conn && msg_connected(hdr)) ||
+		    (!grp && msg_in_group(hdr)))
 			err = TIPC_ERR_NO_PORT;
-			goto reject;
-		}
-	} else if (unlikely(!filter_connect(tsk, skb))) {
-		err = TIPC_ERR_NO_PORT;
-		goto reject;
-	}
+		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+			err = TIPC_ERR_OVERLOAD;
 
-	/* Reject message if there isn't room to queue it */
-	if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
-		err = TIPC_ERR_OVERLOAD;
-		goto reject;
+		if (unlikely(err)) {
+			tipc_skb_reject(net, err, skb, xmitq);
+			err = TIPC_OK;
+			continue;
+		}
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		skb_set_owner_r(skb, sk);
+		sk->sk_data_ready(sk);
 	}
-
-	/* Enqueue message */
-	TIPC_SKB_CB(skb)->bytes_read = 0;
-	__skb_queue_tail(&sk->sk_receive_queue, skb);
-	skb_set_owner_r(skb, sk);
-
-	sk->sk_data_ready(sk);
-	return true;
-
-reject:
-	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
-		__skb_queue_tail(xmitq, skb);
-	return false;
 }
 
 /**
- * tipc_backlog_rcv - handle incoming message from backlog queue
+ * tipc_sk_backlog_rcv - handle incoming message from backlog queue
  * @sk: socket
  * @skb: message
  *
  * Caller must hold socket lock
- *
- * Returns 0
  */
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
-	unsigned int truesize = skb->truesize;
+	unsigned int before = sk_rmem_alloc_get(sk);
 	struct sk_buff_head xmitq;
-	u32 dnode, selector;
+	unsigned int added;
 
 	__skb_queue_head_init(&xmitq);
 
-	if (likely(filter_rcv(sk, skb, &xmitq))) {
-		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
-		return 0;
-	}
-
-	if (skb_queue_empty(&xmitq))
-		return 0;
+	tipc_sk_filter_rcv(sk, skb, &xmitq);
+	added = sk_rmem_alloc_get(sk) - before;
+	atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
 
-	/* Send response/rejected message */
-	skb = __skb_dequeue(&xmitq);
-	dnode = msg_destnode(buf_msg(skb));
-	selector = msg_origport(buf_msg(skb));
-	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
+	/* Send pending response/rejected messages, if any */
+	tipc_node_distr_xmit(sock_net(sk), &xmitq);
 	return 0;
 }
 
@@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Add message directly to receive queue if possible */
 		if (!sock_owned_by_user(sk)) {
-			filter_rcv(sk, skb, xmitq);
+			tipc_sk_filter_rcv(sk, skb, xmitq);
 			continue;
 		}
 
@@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 				spin_unlock_bh(&sk->sk_lock.slock);
 			}
 			/* Send pending response/rejected messages, if any */
-			while ((skb = __skb_dequeue(&xmitq))) {
-				dnode = msg_destnode(buf_msg(skb));
-				tipc_node_xmit_skb(net, skb, dnode, dport);
-			}
+			tipc_node_distr_xmit(sock_net(sk), &xmitq);
 			sock_put(sk);
 			continue;
 		}
-
 		/* No destination socket => dequeue skb if still there */
 		skb = tipc_skb_dequeue(inputq, dport);
 		if (!skb)
@@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
 	int previous;
 	int res = 0;
 
+	if (destlen != sizeof(struct sockaddr_tipc))
+		return -EINVAL;
+
 	lock_sock(sk);
 
-	/* DGRAM/RDM connect(), just save the destaddr */
-	if (tipc_sk_type_connectionless(sk)) {
-		if (dst->family == AF_UNSPEC) {
-			memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
-		} else if (destlen != sizeof(struct sockaddr_tipc)) {
-			res = -EINVAL;
-		} else {
-			memcpy(&tsk->peer, dest, destlen);
-		}
+	if (tsk->group) {
+		res = -EINVAL;
 		goto exit;
 	}
 
-	/*
-	 * Reject connection attempt using multicast address
-	 *
-	 * Note: send_msg() validates the rest of the address fields,
-	 *       so there's no need to do it here
-	 */
-	if (dst->addrtype == TIPC_ADDR_MCAST) {
+	if (dst->family == AF_UNSPEC) {
+		memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
+		if (!tipc_sk_type_connectionless(sk))
+			res = -EINVAL;
+		goto exit;
+	} else if (dst->family != AF_TIPC) {
+		res = -EINVAL;
+	}
+	if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
 		res = -EINVAL;
+	if (res)
+		goto exit;
+
+	/* DGRAM/RDM connect(), just save the destaddr */
+	if (tipc_sk_type_connectionless(sk)) {
+		memcpy(&tsk->peer, dest, destlen);
 		goto exit;
 	}
 
@@ -2141,46 +2530,43 @@ static int tipc_shutdown(struct socket *sock, int how)
 	return res;
 }
 
-static void tipc_sk_timeout(unsigned long data)
+static void tipc_sk_timeout(struct timer_list *t)
 {
-	struct tipc_sock *tsk = (struct tipc_sock *)data;
-	struct sock *sk = &tsk->sk;
-	struct sk_buff *skb = NULL;
-	u32 peer_port, peer_node;
+	struct sock *sk = from_timer(sk, t, sk_timer);
+	struct tipc_sock *tsk = tipc_sk(sk);
+	u32 peer_port = tsk_peer_port(tsk);
+	u32 peer_node = tsk_peer_node(tsk);
 	u32 own_node = tsk_own_node(tsk);
+	u32 own_port = tsk->portid;
+	struct net *net = sock_net(sk);
+	struct sk_buff *skb = NULL;
 
 	bh_lock_sock(sk);
-	if (!tipc_sk_connected(sk)) {
-		bh_unlock_sock(sk);
+	if (!tipc_sk_connected(sk))
+		goto exit;
+
+	/* Try again later if socket is busy */
+	if (sock_owned_by_user(sk)) {
+		sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
 		goto exit;
 	}
-	peer_port = tsk_peer_port(tsk);
-	peer_node = tsk_peer_node(tsk);
 
 	if (tsk->probe_unacked) {
-		if (!sock_owned_by_user(sk)) {
-			tipc_set_sk_state(sk, TIPC_DISCONNECTING);
-			tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
-					      tsk_peer_port(tsk));
-			sk->sk_state_change(sk);
-		} else {
-			/* Try again later */
-			sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
-		}
-
-		bh_unlock_sock(sk);
+		tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+		tipc_node_remove_conn(net, peer_node, peer_port);
+		sk->sk_state_change(sk);
 		goto exit;
 	}
-
-	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
-			      INT_H_SIZE, 0, peer_node, own_node,
-			      peer_port, tsk->portid, TIPC_OK);
+	/* Send new probe */
+	skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+			      peer_node, own_node, peer_port, own_port,
+			      TIPC_OK);
 	tsk->probe_unacked = true;
-	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+exit:
 	bh_unlock_sock(sk);
 	if (skb)
-		tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
-exit:
+		tipc_node_xmit_skb(net, skb, peer_node, own_port);
 	sock_put(sk);
 }
 
@@ -2255,8 +2641,8 @@ void tipc_sk_reinit(struct net *net)
 
 	do {
 		tsk = ERR_PTR(rhashtable_walk_start(&iter));
-		if (tsk)
-			continue;
+		if (IS_ERR(tsk))
+			goto walk_stop;
 
 		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2265,7 +2651,7 @@ void tipc_sk_reinit(struct net *net)
 			msg_set_orignode(msg, tn->own_addr);
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
-
+walk_stop:
 		rhashtable_walk_stop(&iter);
 	} while (tsk == ERR_PTR(-EAGAIN));
 }
@@ -2345,6 +2731,58 @@ void tipc_sk_rht_destroy(struct net *net)
 	rhashtable_destroy(&tn->sk_rht);
 }
 
+static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
+{
+	struct net *net = sock_net(&tsk->sk);
+	u32 domain = addr_domain(net, mreq->scope);
+	struct tipc_group *grp = tsk->group;
+	struct tipc_msg *hdr = &tsk->phdr;
+	struct tipc_name_seq seq;
+	int rc;
+
+	if (mreq->type < TIPC_RESERVED_TYPES)
+		return -EACCES;
+	if (grp)
+		return -EACCES;
+	grp = tipc_group_create(net, tsk->portid, mreq);
+	if (!grp)
+		return -ENOMEM;
+	tsk->group = grp;
+	msg_set_lookup_scope(hdr, mreq->scope);
+	msg_set_nametype(hdr, mreq->type);
+	msg_set_dest_droppable(hdr, true);
+	seq.type = mreq->type;
+	seq.lower = mreq->instance;
+	seq.upper = seq.lower;
+	tipc_nametbl_build_group(net, grp, mreq->type, domain);
+	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+	if (rc) {
+		tipc_group_delete(net, grp);
+		tsk->group = NULL;
+	}
+
+	/* Eliminate any risk that a broadcast overtakes the sent JOIN */
+	tsk->mc_method.rcast = true;
+	tsk->mc_method.mandatory = true;
+	return rc;
+}
+
+static int tipc_sk_leave(struct tipc_sock *tsk)
+{
+	struct net *net = sock_net(&tsk->sk);
+	struct tipc_group *grp = tsk->group;
+	struct tipc_name_seq seq;
+	int scope;
+
+	if (!grp)
+		return -EINVAL;
+	tipc_group_self(grp, &seq, &scope);
+	tipc_group_delete(net, grp);
+	tsk->group = NULL;
+	tipc_sk_withdraw(tsk, scope, &seq);
+	return 0;
+}
+
 /**
  * tipc_setsockopt - set socket option
  * @sock: socket structure
@@ -2363,6 +2801,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_group_req mreq;
 	u32 value = 0;
 	int res = 0;
 
@@ -2378,9 +2817,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 	case TIPC_CONN_TIMEOUT:
 		if (ol < sizeof(value))
 			return -EINVAL;
-		res = get_user(value, (u32 __user *)ov);
-		if (res)
-			return res;
+		if (get_user(value, (u32 __user *)ov))
+			return -EFAULT;
+		break;
+	case TIPC_GROUP_JOIN:
+		if (ol < sizeof(mreq))
+			return -EINVAL;
+		if (copy_from_user(&mreq, ov, sizeof(mreq)))
+			return -EFAULT;
 		break;
 	default:
 		if (ov || ol)
@@ -2413,6 +2857,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 		tsk->mc_method.rcast = true;
 		tsk->mc_method.mandatory = true;
 		break;
+	case TIPC_GROUP_JOIN:
+		res = tipc_sk_join(tsk, &mreq);
+		break;
+	case TIPC_GROUP_LEAVE:
+		res = tipc_sk_leave(tsk);
+		break;
 	default:
 		res = -EINVAL;
 	}
@@ -2440,7 +2890,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
-	int len;
+	struct tipc_name_seq seq;
+	int len, scope;
 	u32 value;
 	int res;
 
@@ -2474,6 +2925,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
 	case TIPC_SOCK_RECVQ_DEPTH:
 		value = skb_queue_len(&sk->sk_receive_queue);
 		break;
+	case TIPC_GROUP_JOIN:
+		seq.type = 0;
+		if (tsk->group)
+			tipc_group_self(tsk->group, &seq, &scope);
+		value = seq.type;
+		break;
 	default:
 		res = -EINVAL;
 	}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0bf91cd3733c..251065dfd8df 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -52,7 +52,6 @@ struct tipc_subscriber {
 	struct list_head subscrp_list;
 };
 
-static void tipc_subscrp_delete(struct tipc_subscription *sub);
 static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
 
 /**
@@ -134,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 				node);
 }
 
-static void tipc_subscrp_timeout(unsigned long data)
+static void tipc_subscrp_timeout(struct timer_list *t)
 {
-	struct tipc_subscription *sub = (struct tipc_subscription *)data;
+	struct tipc_subscription *sub = from_timer(sub, t, timer);
 	struct tipc_subscriber *subscriber = sub->subscriber;
 
 	spin_lock_bh(&subscriber->lock);
@@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
 {
 	struct list_head *subscription_list = &subscriber->subscrp_list;
 	struct tipc_subscription *sub, *temp;
+	u32 timeout;
 
 	spin_lock_bh(&subscriber->lock);
 	list_for_each_entry_safe(sub, temp, subscription_list,  subscrp_list) {
 		if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
 			continue;
 
-		tipc_nametbl_unsubscribe(sub);
-		list_del(&sub->subscrp_list);
-		tipc_subscrp_delete(sub);
+		timeout = htohl(sub->evt.s.timeout, sub->swap);
+		if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
+			tipc_nametbl_unsubscribe(sub);
+			list_del(&sub->subscrp_list);
+			tipc_subscrp_put(sub);
+		}
 
 		if (s)
 			break;
@@ -236,18 +239,12 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
 	tipc_subscrb_put(subscriber);
 }
 
-static void tipc_subscrp_delete(struct tipc_subscription *sub)
-{
-	u32 timeout = htohl(sub->evt.s.timeout, sub->swap);
-
-	if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer))
-		tipc_subscrp_put(sub);
-}
-
 static void tipc_subscrp_cancel(struct tipc_subscr *s,
 				struct tipc_subscriber *subscriber)
 {
+	tipc_subscrb_get(subscriber);
 	tipc_subscrb_subscrp_delete(subscriber, s);
+	tipc_subscrb_put(subscriber);
 }
 
 static struct tipc_subscription *tipc_subscrp_create(struct net *net,
@@ -306,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
 	tipc_subscrb_get(subscriber);
 	spin_unlock_bh(&subscriber->lock);
 
-	setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
+	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
 	timeout = htohl(sub->evt.s.timeout, swap);
 
 	if (timeout != TIPC_WAIT_FOREVER)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 60aff60e30ad..e07ee3ae0023 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies");
 MODULE_DESCRIPTION("Transport Layer Security Support");
 MODULE_LICENSE("Dual BSD/GPL");
 
-static struct proto tls_base_prot;
-static struct proto tls_sw_prot;
+enum {
+	TLS_BASE_TX,
+	TLS_SW_TX,
+	TLS_NUM_CONFIG,
+};
+
+static struct proto tls_prots[TLS_NUM_CONFIG];
+
+static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+{
+	sk->sk_prot = &tls_prots[ctx->tx_conf];
+}
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
 {
@@ -216,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
 	lock_sock(sk);
+	sk_proto_close = ctx->sk_proto_close;
+
+	if (ctx->tx_conf == TLS_BASE_TX) {
+		kfree(ctx);
+		goto skip_tx_cleanup;
+	}
 
 	if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
 		tls_handle_open_record(sk, 0);
@@ -232,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 			sg++;
 		}
 	}
-	ctx->free_resources(sk);
+
 	kfree(ctx->rec_seq);
 	kfree(ctx->iv);
 
-	sk_proto_close = ctx->sk_proto_close;
-	kfree(ctx);
+	if (ctx->tx_conf == TLS_SW_TX)
+		tls_sw_free_tx_resources(sk);
 
+skip_tx_cleanup:
 	release_sock(sk);
 	sk_proto_close(sk, timeout);
 }
@@ -338,46 +355,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
 static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 				unsigned int optlen)
 {
-	struct tls_crypto_info *crypto_info, tmp_crypto_info;
+	struct tls_crypto_info *crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
-	struct proto *prot = NULL;
 	int rc = 0;
+	int tx_conf;
 
 	if (!optval || (optlen < sizeof(*crypto_info))) {
 		rc = -EINVAL;
 		goto out;
 	}
 
-	rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
+	crypto_info = &ctx->crypto_send;
+	/* Currently we don't support set crypto info more than one time */
+	if (TLS_CRYPTO_INFO_READY(crypto_info))
+		goto out;
+
+	rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
 	if (rc) {
 		rc = -EFAULT;
 		goto out;
 	}
 
 	/* check version */
-	if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+	if (crypto_info->version != TLS_1_2_VERSION) {
 		rc = -ENOTSUPP;
-		goto out;
+		goto err_crypto_info;
 	}
 
-	/* get user crypto info */
-	crypto_info = &ctx->crypto_send;
-
-	/* Currently we don't support set crypto info more than one time */
-	if (TLS_CRYPTO_INFO_READY(crypto_info))
-		goto out;
-
-	switch (tmp_crypto_info.cipher_type) {
+	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128: {
 		if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
 			rc = -EINVAL;
 			goto out;
 		}
-		rc = copy_from_user(
-		  crypto_info,
-		  optval,
-		  sizeof(struct tls12_crypto_info_aes_gcm_128));
-
+		rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
+				    optlen - sizeof(*crypto_info));
 		if (rc) {
 			rc = -EFAULT;
 			goto err_crypto_info;
@@ -389,18 +401,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 		goto out;
 	}
 
-	ctx->sk_write_space = sk->sk_write_space;
-	sk->sk_write_space = tls_write_space;
-
-	ctx->sk_proto_close = sk->sk_prot->close;
-
 	/* currently SW is default, we will have ethtool in future */
 	rc = tls_set_sw_offload(sk, ctx);
-	prot = &tls_sw_prot;
+	tx_conf = TLS_SW_TX;
 	if (rc)
 		goto err_crypto_info;
 
-	sk->sk_prot = prot;
+	ctx->tx_conf = tx_conf;
+	update_sk_prot(sk, ctx);
+	ctx->sk_write_space = sk->sk_write_space;
+	sk->sk_write_space = tls_write_space;
 	goto out;
 
 err_crypto_info:
@@ -453,7 +463,10 @@ static int tls_init(struct sock *sk)
 	icsk->icsk_ulp_data = ctx;
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
-	sk->sk_prot = &tls_base_prot;
+	ctx->sk_proto_close = sk->sk_prot->close;
+
+	ctx->tx_conf = TLS_BASE_TX;
+	update_sk_prot(sk, ctx);
 out:
 	return rc;
 }
@@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.init			= tls_init,
 };
 
+static void build_protos(struct proto *prot, struct proto *base)
+{
+	prot[TLS_BASE_TX] = *base;
+	prot[TLS_BASE_TX].setsockopt	= tls_setsockopt;
+	prot[TLS_BASE_TX].getsockopt	= tls_getsockopt;
+	prot[TLS_BASE_TX].close		= tls_sk_proto_close;
+
+	prot[TLS_SW_TX] = prot[TLS_BASE_TX];
+	prot[TLS_SW_TX].sendmsg		= tls_sw_sendmsg;
+	prot[TLS_SW_TX].sendpage	= tls_sw_sendpage;
+}
+
 static int __init tls_register(void)
 {
-	tls_base_prot			= tcp_prot;
-	tls_base_prot.setsockopt	= tls_setsockopt;
-	tls_base_prot.getsockopt	= tls_getsockopt;
-
-	tls_sw_prot			= tls_base_prot;
-	tls_sw_prot.sendmsg		= tls_sw_sendmsg;
-	tls_sw_prot.sendpage            = tls_sw_sendpage;
-	tls_sw_prot.close               = tls_sk_proto_close;
+	build_protos(tls_prots, &tcp_prot);
 
 	tcp_register_ulp(&tcp_tls_ulp_ops);
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fa596fa71ba7..73d19210dd49 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -39,22 +39,6 @@
 
 #include <net/tls.h>
 
-static inline void tls_make_aad(int recv,
-				char *buf,
-				size_t size,
-				char *record_sequence,
-				int record_sequence_size,
-				unsigned char record_type)
-{
-	memcpy(buf, record_sequence, record_sequence_size);
-
-	buf[8] = record_type;
-	buf[9] = TLS_1_2_VERSION_MAJOR;
-	buf[10] = TLS_1_2_VERSION_MINOR;
-	buf[11] = size >> 8;
-	buf[12] = size & 0xFF;
-}
-
 static void trim_sg(struct sock *sk, struct scatterlist *sg,
 		    int *sg_num_elem, unsigned int *sg_size, int target_size)
 {
@@ -219,7 +203,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
 	struct aead_request *aead_req;
 	int rc;
 
-	aead_req = kmalloc(req_size, flags);
+	aead_req = kzalloc(req_size, flags);
 	if (!aead_req)
 		return -ENOMEM;
 
@@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
 	sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
 
-	tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size,
+	tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
 		     tls_ctx->rec_seq, tls_ctx->rec_seq_size,
 		     record_type);
 
@@ -639,7 +623,7 @@ sendpage_end:
 	return ret;
 }
 
-void tls_sw_free_resources(struct sock *sk)
+void tls_sw_free_tx_resources(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
@@ -650,6 +634,7 @@ void tls_sw_free_resources(struct sock *sk)
 	tls_free_both_sg(sk);
 
 	kfree(ctx);
+	kfree(tls_ctx);
 }
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
@@ -679,7 +664,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 	}
 
 	ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
-	ctx->free_resources = tls_sw_free_resources;
 
 	crypto_info = &ctx->crypto_send;
 	switch (crypto_info->cipher_type) {
diff --git a/net/unix/Makefile b/net/unix/Makefile
index b663c607b1c6..ffd0a275c3a7 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux unix domain socket layer.
 #
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7b52a380d710..a9ee634f3c42 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
 		 */
 	case SOCK_RAW:
 		sock->type = SOCK_DGRAM;
+		/* fall through */
 	case SOCK_DGRAM:
 		sock->ops = &unix_dgram_ops;
 		break;
@@ -1528,26 +1529,13 @@ static inline bool too_many_unix_fds(struct task_struct *p)
 	return false;
 }
 
-#define MAX_RECURSION_LEVEL 4
-
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
-	unsigned char max_level = 0;
 
 	if (too_many_unix_fds(current))
 		return -ETOOMANYREFS;
 
-	for (i = scm->fp->count - 1; i >= 0; i--) {
-		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
-		if (sk)
-			max_level = max(max_level,
-					unix_sk(sk)->recursion_level);
-	}
-	if (unlikely(max_level > MAX_RECURSION_LEVEL))
-		return -ETOOMANYREFS;
-
 	/*
 	 * Need to duplicate file references for the sake of garbage
 	 * collection.  Otherwise a socket in the fps might become a
@@ -1559,7 +1547,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
-	return max_level;
+	return 0;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1649,7 +1637,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie scm;
-	int max_level;
 	int data_len = 0;
 	int sk_locked;
 
@@ -1701,7 +1688,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	err = unix_scm_to_skb(&scm, skb, true);
 	if (err < 0)
 		goto out_free;
-	max_level = err + 1;
 
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
@@ -1819,8 +1805,6 @@ restart_locked:
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
 	skb_queue_tail(&other->sk_receive_queue, skb);
-	if (max_level > unix_sk(other)->recursion_level)
-		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
 	other->sk_data_ready(other);
 	sock_put(other);
@@ -1855,7 +1839,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	int sent = 0;
 	struct scm_cookie scm;
 	bool fds_sent = false;
-	int max_level;
 	int data_len;
 
 	wait_for_unix_gc();
@@ -1905,7 +1888,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			kfree_skb(skb);
 			goto out_err;
 		}
-		max_level = err + 1;
 		fds_sent = true;
 
 		skb_put(skb, size - data_len);
@@ -1925,8 +1907,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 		maybe_add_creds(skb, sock, other);
 		skb_queue_tail(&other->sk_receive_queue, skb);
-		if (max_level > unix_sk(other)->recursion_level)
-			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
 		other->sk_data_ready(other);
 		sent += size;
@@ -2304,10 +2284,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
 	 */
 	mutex_lock(&u->iolock);
 
-	if (flags & MSG_PEEK)
-		skip = sk_peek_offset(sk, flags);
-	else
-		skip = 0;
+	skip = max(sk_peek_offset(sk, flags), 0);
 
 	do {
 		int chunk;
@@ -2324,7 +2301,6 @@ redo:
 		last_len = last ? last->len : 0;
 again:
 		if (skb == NULL) {
-			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
 				goto unlock;
 
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 4d9679701a6d..384c84e83462 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
 	err = -ENOENT;
 	if (sk == NULL)
 		goto out_nosk;
+	if (!net_eq(sock_net(sk), net))
+		goto out;
 
 	err = sock_diag_check_cookie(sk, req->udiag_cookie);
 	if (err)
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 8831e7c42167..970f96489fe7 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
 	  To compile this driver as a module, choose M here: the module
 	  will be called vsock. If unsure, say N.
 
+config VSOCKETS_DIAG
+	tristate "Virtual Sockets monitoring interface"
+	depends on VSOCKETS
+	default y
+	help
+	  Support for PF_VSOCK sockets monitoring interface used by the ss tool.
+	  If unsure, say Y.
+
+	  Enable this module so userspace applications can query open sockets.
+
 config VMWARE_VMCI_VSOCKETS
 	tristate "VMware VMCI transport for Virtual Sockets"
 	depends on VSOCKETS && VMWARE_VMCI
@@ -46,3 +56,15 @@ config VIRTIO_VSOCKETS_COMMON
 	  This option is selected by any driver which needs to access
 	  the virtio_vsock.  The module will be called
 	  vmw_vsock_virtio_transport_common.
+
+config HYPERV_VSOCKETS
+	tristate "Hyper-V transport for Virtual Sockets"
+	depends on VSOCKETS && HYPERV
+	help
+	  This module implements a Hyper-V transport for Virtual Sockets.
+
+	  Enable this transport if your Virtual Machine host supports Virtual
+	  Sockets over Hyper-V VMBus.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called hv_sock. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 09fc2eb29dc8..7c6f9a0b67b0 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,13 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
 obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
 obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
 obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
+obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
 
 vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
 
+vsock_diag-y += diag.o
+
 vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
 	vmci_transport_notify_qstate.o
 
 vmw_vsock_virtio_transport-y += virtio_transport.o
 
 vmw_vsock_virtio_transport_common-y += virtio_transport_common.o
+
+hv_sock-y += hyperv_transport.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e4d74..5d28abf87fbf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
  * not support simultaneous connects (two "client" sockets connecting).
  *
  * - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the VSOCK_SS_LISTEN state.  When a
+ * implementation because they are in the TCP_LISTEN state.  When a
  * connection request is received (the second kind of socket mentioned above),
  * we create a new socket and refer to it as a pending socket.  These pending
  * sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
  * argument, we must ensure the reference count is increased to ensure the
  * socket isn't freed before the function is run; the deferred function will
  * then drop the reference.
+ *
+ * - sk->sk_state uses the TCP state constants because they are widely used by
+ * other address families and exposed to userspace tools like ss(8):
+ *
+ *   TCP_CLOSE - unconnected
+ *   TCP_SYN_SENT - connecting
+ *   TCP_ESTABLISHED - connected
+ *   TCP_CLOSING - disconnecting
+ *   TCP_LISTEN - listening
  */
 
 #include <linux/types.h>
@@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
  * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
  * mods with VSOCK_HASH_SIZE to ensure this.
  */
-#define VSOCK_HASH_SIZE         251
 #define MAX_PORT_RETRIES        24
 
 #define VSOCK_HASH(addr)        ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
 #define vsock_connected_sockets_vsk(vsk)				\
 	vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
 
-static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
-static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
-static DEFINE_SPINLOCK(vsock_table_lock);
+struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(vsock_bind_table);
+struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+EXPORT_SYMBOL_GPL(vsock_connected_table);
+DEFINE_SPINLOCK(vsock_table_lock);
+EXPORT_SYMBOL_GPL(vsock_table_lock);
 
 /* Autobind this socket to the local address if necessary. */
 static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -184,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
 	return __vsock_bind(sk, &local_addr);
 }
 
-static void vsock_init_tables(void)
+static int __init vsock_init_tables(void)
 {
 	int i;
 
@@ -193,6 +204,7 @@ static void vsock_init_tables(void)
 
 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
 		INIT_LIST_HEAD(&vsock_connected_table[i]);
+	return 0;
 }
 
 static void __vsock_insert_bound(struct list_head *list,
@@ -248,16 +260,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
 	return NULL;
 }
 
-static bool __vsock_in_bound_table(struct vsock_sock *vsk)
-{
-	return !list_empty(&vsk->bound_table);
-}
-
-static bool __vsock_in_connected_table(struct vsock_sock *vsk)
-{
-	return !list_empty(&vsk->connected_table);
-}
-
 static void vsock_insert_unbound(struct vsock_sock *vsk)
 {
 	spin_lock_bh(&vsock_table_lock);
@@ -485,7 +487,7 @@ void vsock_pending_work(struct work_struct *work)
 	if (vsock_in_connected_table(vsk))
 		vsock_remove_connected(vsk);
 
-	sk->sk_state = SS_FREE;
+	sk->sk_state = TCP_CLOSE;
 
 out:
 	release_sock(sk);
@@ -625,7 +627,6 @@ struct sock *__vsock_create(struct net *net,
 
 	sk->sk_destruct = vsock_sk_destruct;
 	sk->sk_backlog_rcv = vsock_queue_rcv_skb;
-	sk->sk_state = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 
 	INIT_LIST_HEAD(&vsk->bound_table);
@@ -899,7 +900,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
 		/* Listening sockets that have connections in their accept
 		 * queue can be read.
 		 */
-		if (sk->sk_state == VSOCK_SS_LISTEN
+		if (sk->sk_state == TCP_LISTEN
 		    && !vsock_is_accept_queue_empty(sk))
 			mask |= POLLIN | POLLRDNORM;
 
@@ -928,7 +929,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
 		}
 
 		/* Connected sockets that can produce data can be written. */
-		if (sk->sk_state == SS_CONNECTED) {
+		if (sk->sk_state == TCP_ESTABLISHED) {
 			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
 				bool space_avail_now = false;
 				int ret = transport->notify_poll_out(
@@ -950,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
 		 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
 		 * but local send is not shutdown.
 		 */
-		if (sk->sk_state == SS_UNCONNECTED) {
+		if (sk->sk_state == TCP_CLOSE) {
 			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
 				mask |= POLLOUT | POLLWRNORM;
 
@@ -1120,9 +1121,9 @@ static void vsock_connect_timeout(struct work_struct *work)
 	sk = sk_vsock(vsk);
 
 	lock_sock(sk);
-	if (sk->sk_state == SS_CONNECTING &&
+	if (sk->sk_state == TCP_SYN_SENT &&
 	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
-		sk->sk_state = SS_UNCONNECTED;
+		sk->sk_state = TCP_CLOSE;
 		sk->sk_err = ETIMEDOUT;
 		sk->sk_error_report(sk);
 		cancel = 1;
@@ -1168,7 +1169,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 		err = -EALREADY;
 		break;
 	default:
-		if ((sk->sk_state == VSOCK_SS_LISTEN) ||
+		if ((sk->sk_state == TCP_LISTEN) ||
 		    vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
 			err = -EINVAL;
 			goto out;
@@ -1191,7 +1192,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			goto out;
 
-		sk->sk_state = SS_CONNECTING;
+		sk->sk_state = TCP_SYN_SENT;
 
 		err = transport->connect(vsk);
 		if (err < 0)
@@ -1211,7 +1212,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 	timeout = vsk->connect_timeout;
 	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
-	while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+	while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
 		if (flags & O_NONBLOCK) {
 			/* If we're not going to block, we schedule a timeout
 			 * function to generate a timeout on the connection
@@ -1234,13 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 
 		if (signal_pending(current)) {
 			err = sock_intr_errno(timeout);
-			sk->sk_state = SS_UNCONNECTED;
+			sk->sk_state = TCP_CLOSE;
 			sock->state = SS_UNCONNECTED;
 			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;
 		} else if (timeout == 0) {
 			err = -ETIMEDOUT;
-			sk->sk_state = SS_UNCONNECTED;
+			sk->sk_state = TCP_CLOSE;
 			sock->state = SS_UNCONNECTED;
 			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;
@@ -1251,7 +1252,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 
 	if (sk->sk_err) {
 		err = -sk->sk_err;
-		sk->sk_state = SS_UNCONNECTED;
+		sk->sk_state = TCP_CLOSE;
 		sock->state = SS_UNCONNECTED;
 	} else {
 		err = 0;
@@ -1284,7 +1285,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
 		goto out;
 	}
 
-	if (listener->sk_state != VSOCK_SS_LISTEN) {
+	if (listener->sk_state != TCP_LISTEN) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -1374,7 +1375,7 @@ static int vsock_listen(struct socket *sock, int backlog)
 	}
 
 	sk->sk_max_ack_backlog = backlog;
-	sk->sk_state = VSOCK_SS_LISTEN;
+	sk->sk_state = TCP_LISTEN;
 
 	err = 0;
 
@@ -1554,7 +1555,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 	/* Callers should not provide a destination with stream sockets. */
 	if (msg->msg_namelen) {
-		err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
 		goto out;
 	}
 
@@ -1565,7 +1566,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 		goto out;
 	}
 
-	if (sk->sk_state != SS_CONNECTED ||
+	if (sk->sk_state != TCP_ESTABLISHED ||
 	    !vsock_addr_bound(&vsk->local_addr)) {
 		err = -ENOTCONN;
 		goto out;
@@ -1689,7 +1690,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	lock_sock(sk);
 
-	if (sk->sk_state != SS_CONNECTED) {
+	if (sk->sk_state != TCP_ESTABLISHED) {
 		/* Recvmsg is supposed to return 0 if a peer performs an
 		 * orderly shutdown. Differentiate between that case and when a
 		 * peer has not connected or a local shutdown occured with the
@@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
 	vsock_proto.owner = owner;
 	transport = t;
 
-	vsock_init_tables();
-
 	vsock_device.minor = MISC_DYNAMIC_MINOR;
 	err = misc_register(&vsock_device);
 	if (err) {
@@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void)
 }
 EXPORT_SYMBOL_GPL(vsock_core_get_transport);
 
+module_init(vsock_init_tables);
+
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Socket Family");
 MODULE_VERSION("1.0.2.0-k");
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 000000000000..31b567652250
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
+/*
+ * vsock sock_diag(7) module
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/vm_sockets_diag.h>
+#include <net/af_vsock.h>
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			u32 portid, u32 seq, u32 flags)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	struct vsock_diag_msg *rep;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+			flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rep = nlmsg_data(nlh);
+	rep->vdiag_family = AF_VSOCK;
+
+	/* Lock order dictates that sk_lock is acquired before
+	 * vsock_table_lock, so we cannot lock here.  Simply don't take
+	 * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
+	 * held.
+	 */
+	rep->vdiag_type = sk->sk_type;
+	rep->vdiag_state = sk->sk_state;
+	rep->vdiag_shutdown = sk->sk_shutdown;
+	rep->vdiag_src_cid = vsk->local_addr.svm_cid;
+	rep->vdiag_src_port = vsk->local_addr.svm_port;
+	rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
+	rep->vdiag_dst_port = vsk->remote_addr.svm_port;
+	rep->vdiag_ino = sock_i_ino(sk);
+
+	sock_diag_save_cookie(sk, rep->vdiag_cookie);
+
+	return 0;
+}
+
+static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct vsock_diag_req *req;
+	struct vsock_sock *vsk;
+	unsigned int bucket;
+	unsigned int last_i;
+	unsigned int table;
+	struct net *net;
+	unsigned int i;
+
+	req = nlmsg_data(cb->nlh);
+	net = sock_net(skb->sk);
+
+	/* State saved between calls: */
+	table = cb->args[0];
+	bucket = cb->args[1];
+	i = last_i = cb->args[2];
+
+	/* TODO VMCI pending sockets? */
+
+	spin_lock_bh(&vsock_table_lock);
+
+	/* Bind table (locally created sockets) */
+	if (table == 0) {
+		while (bucket < ARRAY_SIZE(vsock_bind_table)) {
+			struct list_head *head = &vsock_bind_table[bucket];
+
+			i = 0;
+			list_for_each_entry(vsk, head, bound_table) {
+				struct sock *sk = sk_vsock(vsk);
+
+				if (!net_eq(sock_net(sk), net))
+					continue;
+				if (i < last_i)
+					goto next_bind;
+				if (!(req->vdiag_states & (1 << sk->sk_state)))
+					goto next_bind;
+				if (sk_diag_fill(sk, skb,
+						 NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq,
+						 NLM_F_MULTI) < 0)
+					goto done;
+next_bind:
+				i++;
+			}
+			last_i = 0;
+			bucket++;
+		}
+
+		table++;
+		bucket = 0;
+	}
+
+	/* Connected table (accepted connections) */
+	while (bucket < ARRAY_SIZE(vsock_connected_table)) {
+		struct list_head *head = &vsock_connected_table[bucket];
+
+		i = 0;
+		list_for_each_entry(vsk, head, connected_table) {
+			struct sock *sk = sk_vsock(vsk);
+
+			/* Skip sockets we've already seen above */
+			if (__vsock_in_bound_table(vsk))
+				continue;
+
+			if (!net_eq(sock_net(sk), net))
+				continue;
+			if (i < last_i)
+				goto next_connected;
+			if (!(req->vdiag_states & (1 << sk->sk_state)))
+				goto next_connected;
+			if (sk_diag_fill(sk, skb,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq,
+					 NLM_F_MULTI) < 0)
+				goto done;
+next_connected:
+			i++;
+		}
+		last_i = 0;
+		bucket++;
+	}
+
+done:
+	spin_unlock_bh(&vsock_table_lock);
+
+	cb->args[0] = table;
+	cb->args[1] = bucket;
+	cb->args[2] = i;
+
+	return skb->len;
+}
+
+static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct vsock_diag_req);
+	struct net *net = sock_net(skb->sk);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = vsock_diag_dump,
+		};
+		return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler vsock_diag_handler = {
+	.family = AF_VSOCK,
+	.dump = vsock_diag_handler_dump,
+};
+
+static int __init vsock_diag_init(void)
+{
+	return sock_diag_register(&vsock_diag_handler);
+}
+
+static void __exit vsock_diag_exit(void)
+{
+	sock_diag_unregister(&vsock_diag_handler);
+}
+
+module_init(vsock_diag_init);
+module_exit(vsock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
+			       40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
new file mode 100644
index 000000000000..5583df708b8c
--- /dev/null
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -0,0 +1,917 @@
+/*
+ * Hyper-V transport for vsock
+ *
+ * Hyper-V Sockets supplies a byte-stream based communication mechanism
+ * between the host and the VM. This driver implements the necessary
+ * support in the VM by introducing the new vsock transport.
+ *
+ * Copyright (c) 2017, Microsoft Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/hyperv.h>
+#include <net/sock.h>
+#include <net/af_vsock.h>
+
+/* The host side's design of the feature requires 6 exact 4KB pages for
+ * recv/send rings respectively -- this is suboptimal considering memory
+ * consumption, however unluckily we have to live with it, before the
+ * host comes up with a better design in the future.
+ */
+#define PAGE_SIZE_4K		4096
+#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+
+/* The MTU is 16KB per the host side's design */
+#define HVS_MTU_SIZE		(1024 * 16)
+
+struct vmpipe_proto_header {
+	u32 pkt_type;
+	u32 data_size;
+};
+
+/* For recv, we use the VMBus in-place packet iterator APIs to directly copy
+ * data from the ringbuffer into the userspace buffer.
+ */
+struct hvs_recv_buf {
+	/* The header before the payload data */
+	struct vmpipe_proto_header hdr;
+
+	/* The payload */
+	u8 data[HVS_MTU_SIZE];
+};
+
+/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
+ * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
+ * buffer, because tests show there is no significant performance difference.
+ *
+ * Note: the buffer can be eliminated in the future when we add new VMBus
+ * ringbuffer APIs that allow us to directly copy data from userspace buffer
+ * to VMBus ringbuffer.
+ */
+#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
+
+struct hvs_send_buf {
+	/* The header before the payload data */
+	struct vmpipe_proto_header hdr;
+
+	/* The payload */
+	u8 data[HVS_SEND_BUF_SIZE];
+};
+
+#define HVS_HEADER_LEN	(sizeof(struct vmpacket_descriptor) + \
+			 sizeof(struct vmpipe_proto_header))
+
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
+ * __hv_pkt_iter_next().
+ */
+#define VMBUS_PKT_TRAILER_SIZE	(sizeof(u64))
+
+#define HVS_PKT_LEN(payload_len)	(HVS_HEADER_LEN + \
+					 ALIGN((payload_len), 8) + \
+					 VMBUS_PKT_TRAILER_SIZE)
+
+union hvs_service_id {
+	uuid_le	srv_id;
+
+	struct {
+		unsigned int svm_port;
+		unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
+	};
+};
+
+/* Per-socket state (accessed via vsk->trans) */
+struct hvsock {
+	struct vsock_sock *vsk;
+
+	uuid_le vm_srv_id;
+	uuid_le host_srv_id;
+
+	struct vmbus_channel *chan;
+	struct vmpacket_descriptor *recv_desc;
+
+	/* The length of the payload not delivered to userland yet */
+	u32 recv_data_len;
+	/* The offset of the payload */
+	u32 recv_data_off;
+
+	/* Have we sent the zero-length packet (FIN)? */
+	bool fin_sent;
+};
+
+/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
+ * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
+ * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
+ * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
+ * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
+ * as the local cid.
+ *
+ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
+ * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
+ * the below sockaddr:
+ *
+ * struct SOCKADDR_HV
+ * {
+ *    ADDRESS_FAMILY Family;
+ *    USHORT Reserved;
+ *    GUID VmId;
+ *    GUID ServiceId;
+ * };
+ * Note: VmID is not used by Linux VM and actually it isn't transmitted via
+ * VMBus, because here it's obvious the host and the VM can easily identify
+ * each other. Though the VmID is useful on the host, especially in the case
+ * of Windows container, Linux VM doesn't need it at all.
+ *
+ * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
+ * the available GUID space of SOCKADDR_HV so that we can create a mapping
+ * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
+ * Hyper-V Sockets apps on the host and in Linux VM is:
+ *
+ ****************************************************************************
+ * The only valid Service GUIDs, from the perspectives of both the host and *
+ * Linux VM, that can be connected by the other end, must conform to this   *
+ * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in    *
+ * this range [0, 0x7FFFFFFF].                                              *
+ ****************************************************************************
+ *
+ * When we write apps on the host to connect(), the GUID ServiceID is used.
+ * When we write apps in Linux VM to connect(), we only need to specify the
+ * port and the driver will form the GUID and use that to request the host.
+ *
+ * From the perspective of Linux VM:
+ * 1. the local ephemeral port (i.e. the local auto-bound port when we call
+ * connect() without explicit bind()) is generated by __vsock_bind_stream(),
+ * and the range is [1024, 0xFFFFFFFF).
+ * 2. the remote ephemeral port (i.e. the auto-generated remote port for
+ * a connect request initiated by the host's connect()) is generated by
+ * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
+ */
+
+#define MAX_LISTEN_PORT			((u32)0x7FFFFFFF)
+#define MAX_VM_LISTEN_PORT		MAX_LISTEN_PORT
+#define MAX_HOST_LISTEN_PORT		MAX_LISTEN_PORT
+#define MIN_HOST_EPHEMERAL_PORT		(MAX_HOST_LISTEN_PORT + 1)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const uuid_le srv_id_template =
+	UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
+		0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
+
+static bool is_valid_srv_id(const uuid_le *id)
+{
+	return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
+}
+
+static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
+{
+	return *((unsigned int *)svr_id);
+}
+
+static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
+{
+	unsigned int port = get_port_by_srv_id(svr_id);
+
+	vsock_addr_init(addr, VMADDR_CID_ANY, port);
+}
+
+static void hvs_remote_addr_init(struct sockaddr_vm *remote,
+				 struct sockaddr_vm *local)
+{
+	static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
+	struct sock *sk;
+
+	vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	while (1) {
+		/* Wrap around ? */
+		if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
+		    host_ephemeral_port == VMADDR_PORT_ANY)
+			host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
+
+		remote->svm_port = host_ephemeral_port++;
+
+		sk = vsock_find_connected_socket(remote, local);
+		if (!sk) {
+			/* Found an available ephemeral port */
+			return;
+		}
+
+		/* Release refcnt got in vsock_find_connected_socket */
+		sock_put(sk);
+	}
+}
+
+static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
+{
+	set_channel_pending_send_size(chan,
+				      HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
+
+	/* See hvs_stream_has_space(): we must make sure the host has seen
+	 * the new pending send size, before we can re-check the writable
+	 * bytes.
+	 */
+	virt_mb();
+}
+
+static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
+{
+	set_channel_pending_send_size(chan, 0);
+
+	/* Ditto */
+	virt_mb();
+}
+
+static bool hvs_channel_readable(struct vmbus_channel *chan)
+{
+	u32 readable = hv_get_bytes_to_read(&chan->inbound);
+
+	/* 0-size payload means FIN */
+	return readable >= HVS_PKT_LEN(0);
+}
+
+static int hvs_channel_readable_payload(struct vmbus_channel *chan)
+{
+	u32 readable = hv_get_bytes_to_read(&chan->inbound);
+
+	if (readable > HVS_PKT_LEN(0)) {
+		/* At least we have 1 byte to read. We don't need to return
+		 * the exact readable bytes: see vsock_stream_recvmsg() ->
+		 * vsock_stream_has_data().
+		 */
+		return 1;
+	}
+
+	if (readable == HVS_PKT_LEN(0)) {
+		/* 0-size payload means FIN */
+		return 0;
+	}
+
+	/* No payload or FIN */
+	return -1;
+}
+
+static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
+{
+	u32 writeable = hv_get_bytes_to_write(&chan->outbound);
+	size_t ret;
+
+	/* The ringbuffer mustn't be 100% full, and we should reserve a
+	 * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
+	 * and hvs_shutdown().
+	 */
+	if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
+		return 0;
+
+	ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
+
+	return round_down(ret, 8);
+}
+
+static int hvs_send_data(struct vmbus_channel *chan,
+			 struct hvs_send_buf *send_buf, size_t to_write)
+{
+	send_buf->hdr.pkt_type = 1;
+	send_buf->hdr.data_size = to_write;
+	return vmbus_sendpacket(chan, &send_buf->hdr,
+				sizeof(send_buf->hdr) + to_write,
+				0, VM_PKT_DATA_INBAND, 0);
+}
+
+static void hvs_channel_cb(void *ctx)
+{
+	struct sock *sk = (struct sock *)ctx;
+	struct vsock_sock *vsk = vsock_sk(sk);
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+
+	if (hvs_channel_readable(chan))
+		sk->sk_data_ready(sk);
+
+	/* See hvs_stream_has_space(): when we reach here, the writable bytes
+	 * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
+	 */
+	if (hv_get_bytes_to_write(&chan->outbound) > 0)
+		sk->sk_write_space(sk);
+}
+
+static void hvs_close_connection(struct vmbus_channel *chan)
+{
+	struct sock *sk = get_per_channel_state(chan);
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	sk->sk_state = TCP_CLOSE;
+	sock_set_flag(sk, SOCK_DONE);
+	vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
+
+	sk->sk_state_change(sk);
+
+	release_sock(sk);
+}
+
+static void hvs_open_connection(struct vmbus_channel *chan)
+{
+	uuid_le *if_instance, *if_type;
+	unsigned char conn_from_host;
+
+	struct sockaddr_vm addr;
+	struct sock *sk, *new = NULL;
+	struct vsock_sock *vnew;
+	struct hvsock *hvs, *hvs_new;
+	int ret;
+
+	if_type = &chan->offermsg.offer.if_type;
+	if_instance = &chan->offermsg.offer.if_instance;
+	conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
+
+	/* The host or the VM should only listen on a port in
+	 * [0, MAX_LISTEN_PORT]
+	 */
+	if (!is_valid_srv_id(if_type) ||
+	    get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
+		return;
+
+	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
+	sk = vsock_find_bound_socket(&addr);
+	if (!sk)
+		return;
+
+	lock_sock(sk);
+	if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
+	    (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
+		goto out;
+
+	if (conn_from_host) {
+		if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
+			goto out;
+
+		new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+				     sk->sk_type, 0);
+		if (!new)
+			goto out;
+
+		new->sk_state = TCP_SYN_SENT;
+		vnew = vsock_sk(new);
+		hvs_new = vnew->trans;
+		hvs_new->chan = chan;
+	} else {
+		hvs = vsock_sk(sk)->trans;
+		hvs->chan = chan;
+	}
+
+	set_channel_read_mode(chan, HV_CALL_DIRECT);
+	ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
+			 RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
+			 hvs_channel_cb, conn_from_host ? new : sk);
+	if (ret != 0) {
+		if (conn_from_host) {
+			hvs_new->chan = NULL;
+			sock_put(new);
+		} else {
+			hvs->chan = NULL;
+		}
+		goto out;
+	}
+
+	set_per_channel_state(chan, conn_from_host ? new : sk);
+	vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
+
+	if (conn_from_host) {
+		new->sk_state = TCP_ESTABLISHED;
+		sk->sk_ack_backlog++;
+
+		hvs_addr_init(&vnew->local_addr, if_type);
+		hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
+
+		hvs_new->vm_srv_id = *if_type;
+		hvs_new->host_srv_id = *if_instance;
+
+		vsock_insert_connected(vnew);
+
+		vsock_enqueue_accept(sk, new);
+	} else {
+		sk->sk_state = TCP_ESTABLISHED;
+		sk->sk_socket->state = SS_CONNECTED;
+
+		vsock_insert_connected(vsock_sk(sk));
+	}
+
+	sk->sk_state_change(sk);
+
+out:
+	/* Release refcnt obtained when we called vsock_find_bound_socket() */
+	sock_put(sk);
+
+	release_sock(sk);
+}
+
+static u32 hvs_get_local_cid(void)
+{
+	return VMADDR_CID_ANY;
+}
+
+static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
+{
+	struct hvsock *hvs;
+
+	hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
+	if (!hvs)
+		return -ENOMEM;
+
+	vsk->trans = hvs;
+	hvs->vsk = vsk;
+
+	return 0;
+}
+
+static int hvs_connect(struct vsock_sock *vsk)
+{
+	union hvs_service_id vm, host;
+	struct hvsock *h = vsk->trans;
+
+	vm.srv_id = srv_id_template;
+	vm.svm_port = vsk->local_addr.svm_port;
+	h->vm_srv_id = vm.srv_id;
+
+	host.srv_id = srv_id_template;
+	host.svm_port = vsk->remote_addr.svm_port;
+	h->host_srv_id = host.srv_id;
+
+	return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
+}
+
+static int hvs_shutdown(struct vsock_sock *vsk, int mode)
+{
+	struct sock *sk = sk_vsock(vsk);
+	struct vmpipe_proto_header hdr;
+	struct hvs_send_buf *send_buf;
+	struct hvsock *hvs;
+
+	if (!(mode & SEND_SHUTDOWN))
+		return 0;
+
+	lock_sock(sk);
+
+	hvs = vsk->trans;
+	if (hvs->fin_sent)
+		goto out;
+
+	send_buf = (struct hvs_send_buf *)&hdr;
+
+	/* It can't fail: see hvs_channel_writable_bytes(). */
+	(void)hvs_send_data(hvs->chan, send_buf, 0);
+
+	hvs->fin_sent = true;
+out:
+	release_sock(sk);
+	return 0;
+}
+
+static void hvs_release(struct vsock_sock *vsk)
+{
+	struct sock *sk = sk_vsock(vsk);
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan;
+
+	lock_sock(sk);
+
+	sk->sk_state = SS_DISCONNECTING;
+	vsock_remove_sock(vsk);
+
+	release_sock(sk);
+
+	chan = hvs->chan;
+	if (chan)
+		hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+
+}
+
+static void hvs_destruct(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+
+	if (chan)
+		vmbus_hvsock_device_unregister(chan);
+
+	kfree(hvs);
+}
+
+static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
+			     size_t len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hvs_dgram_enqueue(struct vsock_sock *vsk,
+			     struct sockaddr_vm *remote, struct msghdr *msg,
+			     size_t dgram_len)
+{
+	return -EOPNOTSUPP;
+}
+
+static bool hvs_dgram_allow(u32 cid, u32 port)
+{
+	return false;
+}
+
+static int hvs_update_recv_data(struct hvsock *hvs)
+{
+	struct hvs_recv_buf *recv_buf;
+	u32 payload_len;
+
+	recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
+	payload_len = recv_buf->hdr.data_size;
+
+	if (payload_len > HVS_MTU_SIZE)
+		return -EIO;
+
+	if (payload_len == 0)
+		hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
+
+	hvs->recv_data_len = payload_len;
+	hvs->recv_data_off = 0;
+
+	return 0;
+}
+
+static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
+				  size_t len, int flags)
+{
+	struct hvsock *hvs = vsk->trans;
+	bool need_refill = !hvs->recv_desc;
+	struct hvs_recv_buf *recv_buf;
+	u32 to_read;
+	int ret;
+
+	if (flags & MSG_PEEK)
+		return -EOPNOTSUPP;
+
+	if (need_refill) {
+		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+		ret = hvs_update_recv_data(hvs);
+		if (ret)
+			return ret;
+	}
+
+	recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
+	to_read = min_t(u32, len, hvs->recv_data_len);
+	ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
+	if (ret != 0)
+		return ret;
+
+	hvs->recv_data_len -= to_read;
+	if (hvs->recv_data_len == 0) {
+		hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+		if (hvs->recv_desc) {
+			ret = hvs_update_recv_data(hvs);
+			if (ret)
+				return ret;
+		}
+	} else {
+		hvs->recv_data_off += to_read;
+	}
+
+	return to_read;
+}
+
+static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
+				  size_t len)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+	struct hvs_send_buf *send_buf;
+	ssize_t to_write, max_writable, ret;
+
+	BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
+
+	send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
+	if (!send_buf)
+		return -ENOMEM;
+
+	max_writable = hvs_channel_writable_bytes(chan);
+	to_write = min_t(ssize_t, len, max_writable);
+	to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+
+	ret = memcpy_from_msg(send_buf->data, msg, to_write);
+	if (ret < 0)
+		goto out;
+
+	ret = hvs_send_data(hvs->chan, send_buf, to_write);
+	if (ret < 0)
+		goto out;
+
+	ret = to_write;
+out:
+	kfree(send_buf);
+	return ret;
+}
+
+static s64 hvs_stream_has_data(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	s64 ret;
+
+	if (hvs->recv_data_len > 0)
+		return 1;
+
+	switch (hvs_channel_readable_payload(hvs->chan)) {
+	case 1:
+		ret = 1;
+		break;
+	case 0:
+		vsk->peer_shutdown |= SEND_SHUTDOWN;
+		ret = 0;
+		break;
+	default: /* -1 */
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static s64 hvs_stream_has_space(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+	s64 ret;
+
+	ret = hvs_channel_writable_bytes(chan);
+	if (ret > 0)  {
+		hvs_clear_channel_pending_send_size(chan);
+	} else {
+		/* See hvs_channel_cb() */
+		hvs_set_channel_pending_send_size(chan);
+
+		/* Re-check the writable bytes to avoid race */
+		ret = hvs_channel_writable_bytes(chan);
+		if (ret > 0)
+			hvs_clear_channel_pending_send_size(chan);
+	}
+
+	return ret;
+}
+
+static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+	return HVS_MTU_SIZE + 1;
+}
+
+static bool hvs_stream_is_active(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+
+	return hvs->chan != NULL;
+}
+
+static bool hvs_stream_allow(u32 cid, u32 port)
+{
+	/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
+	 * reserved as ephemeral ports, which are used as the host's ports
+	 * when the host initiates connections.
+	 *
+	 * Perform this check in the guest so an immediate error is produced
+	 * instead of a timeout.
+	 */
+	if (port > MAX_HOST_LISTEN_PORT)
+		return false;
+
+	if (cid == VMADDR_CID_HOST)
+		return true;
+
+	return false;
+}
+
+static
+int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
+{
+	struct hvsock *hvs = vsk->trans;
+
+	*readable = hvs_channel_readable(hvs->chan);
+	return 0;
+}
+
+static
+int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
+{
+	*writable = hvs_stream_has_space(vsk) > 0;
+
+	return 0;
+}
+
+static
+int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
+			 struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
+			      struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
+				struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
+				 ssize_t copied, bool data_read,
+				 struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_init(struct vsock_sock *vsk,
+			 struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_pre_block(struct vsock_sock *vsk,
+			      struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
+				struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
+				 struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	/* Ignored. */
+}
+
+static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	/* Ignored. */
+}
+
+static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	/* Ignored. */
+}
+
+static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
+{
+	return -ENOPROTOOPT;
+}
+
+static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
+{
+	return -ENOPROTOOPT;
+}
+
+static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
+{
+	return -ENOPROTOOPT;
+}
+
+static struct vsock_transport hvs_transport = {
+	.get_local_cid            = hvs_get_local_cid,
+
+	.init                     = hvs_sock_init,
+	.destruct                 = hvs_destruct,
+	.release                  = hvs_release,
+	.connect                  = hvs_connect,
+	.shutdown                 = hvs_shutdown,
+
+	.dgram_bind               = hvs_dgram_bind,
+	.dgram_dequeue            = hvs_dgram_dequeue,
+	.dgram_enqueue            = hvs_dgram_enqueue,
+	.dgram_allow              = hvs_dgram_allow,
+
+	.stream_dequeue           = hvs_stream_dequeue,
+	.stream_enqueue           = hvs_stream_enqueue,
+	.stream_has_data          = hvs_stream_has_data,
+	.stream_has_space         = hvs_stream_has_space,
+	.stream_rcvhiwat          = hvs_stream_rcvhiwat,
+	.stream_is_active         = hvs_stream_is_active,
+	.stream_allow             = hvs_stream_allow,
+
+	.notify_poll_in           = hvs_notify_poll_in,
+	.notify_poll_out          = hvs_notify_poll_out,
+	.notify_recv_init         = hvs_notify_recv_init,
+	.notify_recv_pre_block    = hvs_notify_recv_pre_block,
+	.notify_recv_pre_dequeue  = hvs_notify_recv_pre_dequeue,
+	.notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
+	.notify_send_init         = hvs_notify_send_init,
+	.notify_send_pre_block    = hvs_notify_send_pre_block,
+	.notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
+	.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
+
+	.set_buffer_size          = hvs_set_buffer_size,
+	.set_min_buffer_size      = hvs_set_min_buffer_size,
+	.set_max_buffer_size      = hvs_set_max_buffer_size,
+	.get_buffer_size          = hvs_get_buffer_size,
+	.get_min_buffer_size      = hvs_get_min_buffer_size,
+	.get_max_buffer_size      = hvs_get_max_buffer_size,
+};
+
+static int hvs_probe(struct hv_device *hdev,
+		     const struct hv_vmbus_device_id *dev_id)
+{
+	struct vmbus_channel *chan = hdev->channel;
+
+	hvs_open_connection(chan);
+
+	/* Always return success to suppress the unnecessary error message
+	 * in vmbus_probe(): on error the host will rescind the device in
+	 * 30 seconds and we can do cleanup at that time in
+	 * vmbus_onoffer_rescind().
+	 */
+	return 0;
+}
+
+static int hvs_remove(struct hv_device *hdev)
+{
+	struct vmbus_channel *chan = hdev->channel;
+
+	vmbus_close(chan);
+
+	return 0;
+}
+
+/* This isn't really used. See vmbus_match() and vmbus_probe() */
+static const struct hv_vmbus_device_id id_table[] = {
+	{},
+};
+
+static struct hv_driver hvs_drv = {
+	.name		= "hv_sock",
+	.hvsock		= true,
+	.id_table	= id_table,
+	.probe		= hvs_probe,
+	.remove		= hvs_remove,
+};
+
+static int __init hvs_init(void)
+{
+	int ret;
+
+	if (vmbus_proto_version < VERSION_WIN10)
+		return -ENODEV;
+
+	ret = vmbus_driver_register(&hvs_drv);
+	if (ret != 0)
+		return ret;
+
+	ret = vsock_core_init(&hvs_transport);
+	if (ret) {
+		vmbus_driver_unregister(&hvs_drv);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit hvs_exit(void)
+{
+	vsock_core_exit();
+	vmbus_driver_unregister(&hvs_drv);
+}
+
+module_init(hvs_init);
+module_exit(hvs_exit);
+
+MODULE_DESCRIPTION("Hyper-V Sockets");
+MODULE_VERSION("1.0.0");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e80162..8e03bd3f3668 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
 static void virtio_vsock_reset_sock(struct sock *sk)
 {
 	lock_sock(sk);
-	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_state = TCP_CLOSE;
 	sk->sk_err = ECONNRESET;
 	sk->sk_error_report(sk);
 	release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab97563..3ae3a33da70b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
 	sock_set_flag(sk, SOCK_DONE);
 	vsk->peer_shutdown = SHUTDOWN_MASK;
 	if (vsock_stream_has_data(vsk) <= 0)
-		sk->sk_state = SS_DISCONNECTING;
+		sk->sk_state = TCP_CLOSING;
 	sk->sk_state_change(sk);
 
 	if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
 {
 	struct sock *sk = &vsk->sk;
 
-	if (!(sk->sk_state == SS_CONNECTED ||
-	      sk->sk_state == SS_DISCONNECTING))
+	if (!(sk->sk_state == TCP_ESTABLISHED ||
+	      sk->sk_state == TCP_CLOSING))
 		return true;
 
 	/* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
 
 	switch (le16_to_cpu(pkt->hdr.op)) {
 	case VIRTIO_VSOCK_OP_RESPONSE:
-		sk->sk_state = SS_CONNECTED;
+		sk->sk_state = TCP_ESTABLISHED;
 		sk->sk_socket->state = SS_CONNECTED;
 		vsock_insert_connected(vsk);
 		sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
 
 destroy:
 	virtio_transport_reset(vsk, pkt);
-	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_state = TCP_CLOSE;
 	sk->sk_err = skerr;
 	sk->sk_error_report(sk);
 	return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
 			vsk->peer_shutdown |= SEND_SHUTDOWN;
 		if (vsk->peer_shutdown == SHUTDOWN_MASK &&
 		    vsock_stream_has_data(vsk) <= 0)
-			sk->sk_state = SS_DISCONNECTING;
+			sk->sk_state = TCP_CLOSING;
 		if (le32_to_cpu(pkt->hdr.flags))
 			sk->sk_state_change(sk);
 		break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
 
 	lock_sock_nested(child, SINGLE_DEPTH_NESTING);
 
-	child->sk_state = SS_CONNECTED;
+	child->sk_state = TCP_ESTABLISHED;
 
 	vchild = vsock_sk(child);
 	vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
 		sk->sk_write_space(sk);
 
 	switch (sk->sk_state) {
-	case VSOCK_SS_LISTEN:
+	case TCP_LISTEN:
 		virtio_transport_recv_listen(sk, pkt);
 		virtio_transport_free_pkt(pkt);
 		break;
-	case SS_CONNECTING:
+	case TCP_SYN_SENT:
 		virtio_transport_recv_connecting(sk, pkt);
 		virtio_transport_free_pkt(pkt);
 		break;
-	case SS_CONNECTED:
+	case TCP_ESTABLISHED:
 		virtio_transport_recv_connected(sk, pkt);
 		break;
-	case SS_DISCONNECTING:
+	case TCP_CLOSING:
 		virtio_transport_recv_disconnecting(sk, pkt);
 		virtio_transport_free_pkt(pkt);
 		break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..391775e3575c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -21,7 +21,6 @@
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/list.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/net.h>
@@ -743,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
 		/* The local context ID may be out of date, update it. */
 		vsk->local_addr.svm_cid = dst.svm_cid;
 
-		if (sk->sk_state == SS_CONNECTED)
+		if (sk->sk_state == TCP_ESTABLISHED)
 			vmci_trans(vsk)->notify_ops->handle_notify_pkt(
 					sk, pkt, true, &dst, &src,
 					&bh_process_pkt);
@@ -801,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
 		 * left in our consume queue.
 		 */
 		if (vsock_stream_has_data(vsk) <= 0) {
-			if (sk->sk_state == SS_CONNECTING) {
+			sk->sk_state = TCP_CLOSE;
+
+			if (sk->sk_state == TCP_SYN_SENT) {
 				/* The peer may detach from a queue pair while
 				 * we are still in the connecting state, i.e.,
 				 * if the peer VM is killed after attaching to
@@ -810,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
 				 * event like a reset.
 				 */
 
-				sk->sk_state = SS_UNCONNECTED;
 				sk->sk_err = ECONNRESET;
 				sk->sk_error_report(sk);
 				return;
 			}
-			sk->sk_state = SS_UNCONNECTED;
 		}
 		sk->sk_state_change(sk);
 	}
@@ -883,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
 	vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
 
 	switch (sk->sk_state) {
-	case VSOCK_SS_LISTEN:
+	case TCP_LISTEN:
 		vmci_transport_recv_listen(sk, pkt);
 		break;
-	case SS_CONNECTING:
+	case TCP_SYN_SENT:
 		/* Processing of pending connections for servers goes through
 		 * the listening socket, so see vmci_transport_recv_listen()
 		 * for that path.
 		 */
 		vmci_transport_recv_connecting_client(sk, pkt);
 		break;
-	case SS_CONNECTED:
+	case TCP_ESTABLISHED:
 		vmci_transport_recv_connected(sk, pkt);
 		break;
 	default:
@@ -942,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
 		vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
 
 		switch (pending->sk_state) {
-		case SS_CONNECTING:
+		case TCP_SYN_SENT:
 			err = vmci_transport_recv_connecting_server(sk,
 								    pending,
 								    pkt);
@@ -1072,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
 	vsock_add_pending(sk, pending);
 	sk->sk_ack_backlog++;
 
-	pending->sk_state = SS_CONNECTING;
+	pending->sk_state = TCP_SYN_SENT;
 	vmci_trans(vpending)->produce_size =
 		vmci_trans(vpending)->consume_size = qp_size;
 	vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1197,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
 	 * the socket will be valid until it is removed from the queue.
 	 *
 	 * If we fail sending the attach below, we remove the socket from the
-	 * connected list and move the socket to SS_UNCONNECTED before
+	 * connected list and move the socket to TCP_CLOSE before
 	 * releasing the lock, so a pending slow path processing of an incoming
 	 * packet will not see the socket in the connected state in that case.
 	 */
-	pending->sk_state = SS_CONNECTED;
+	pending->sk_state = TCP_ESTABLISHED;
 
 	vsock_insert_connected(vpending);
 
@@ -1232,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
 
 destroy:
 	pending->sk_err = skerr;
-	pending->sk_state = SS_UNCONNECTED;
+	pending->sk_state = TCP_CLOSE;
 	/* As long as we drop our reference, all necessary cleanup will handle
 	 * when the cleanup function drops its reference and our destruct
 	 * implementation is called.  Note that since the listen handler will
@@ -1270,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
 		 * accounting (it can already be found since it's in the bound
 		 * table).
 		 */
-		sk->sk_state = SS_CONNECTED;
+		sk->sk_state = TCP_ESTABLISHED;
 		sk->sk_socket->state = SS_CONNECTED;
 		vsock_insert_connected(vsk);
 		sk->sk_state_change(sk);
@@ -1338,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
 destroy:
 	vmci_transport_send_reset(sk, pkt);
 
-	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_state = TCP_CLOSE;
 	sk->sk_err = skerr;
 	sk->sk_error_report(sk);
 	return err;
@@ -1526,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
 		sock_set_flag(sk, SOCK_DONE);
 		vsk->peer_shutdown = SHUTDOWN_MASK;
 		if (vsock_stream_has_data(vsk) <= 0)
-			sk->sk_state = SS_DISCONNECTING;
+			sk->sk_state = TCP_CLOSING;
 
 		sk->sk_state_change(sk);
 		break;
@@ -1790,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
 		err = vmci_transport_send_conn_request(
 			sk, vmci_trans(vsk)->queue_pair_size);
 		if (err < 0) {
-			sk->sk_state = SS_UNCONNECTED;
+			sk->sk_state = TCP_CLOSE;
 			return err;
 		}
 	} else {
@@ -1800,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
 				sk, vmci_trans(vsk)->queue_pair_size,
 				supported_proto_versions);
 		if (err < 0) {
-			sk->sk_state = SS_UNCONNECTED;
+			sk->sk_state = TCP_CLOSE;
 			return err;
 		}
 
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4d97d1..41fb427f150a 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
 		 * queue. Ask for notifications when there is something to
 		 * read.
 		 */
-		if (sk->sk_state == SS_CONNECTED) {
+		if (sk->sk_state == TCP_ESTABLISHED) {
 			if (!send_waiting_read(sk, 1))
 				return -1;
 
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc46208..0cc84f2bb05e 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
 		 * queue. Ask for notifications when there is something to
 		 * read.
 		 */
-		if (sk->sk_state == SS_CONNECTED)
+		if (sk->sk_state == TCP_ESTABLISHED)
 			vsock_block_update_write_window(sk);
 		*data_ready_now = false;
 	}
diff --git a/net/wimax/Makefile b/net/wimax/Makefile
index 8f1510d0cc2b..eb2db0d3b880 100644
--- a/net/wimax/Makefile
+++ b/net/wimax/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_WIMAX)		+= wimax.o
 
diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
index c33451b896d9..61cbc304a3d3 100644
--- a/net/wireless/.gitignore
+++ b/net/wireless/.gitignore
@@ -1 +1,2 @@
-regdb.c
+shipped-certs.c
+extra-certs.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6c606120abfe..da91bb547db3 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -19,6 +19,7 @@ config WEXT_PRIV
 config CFG80211
 	tristate "cfg80211 - wireless configuration API"
 	depends on RFKILL || !RFKILL
+	select FW_LOADER
 	---help---
 	  cfg80211 is the Linux wireless LAN (802.11) configuration API.
 	  Enable this if you have a wireless device.
@@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS
 	  you are a wireless researcher and are working in a controlled
 	  and approved environment by your local regulatory agency.
 
+config CFG80211_REQUIRE_SIGNED_REGDB
+	bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
+	default y
+	select SYSTEM_DATA_VERIFICATION
+	help
+	  Require that in addition to the "regulatory.db" file a
+	  "regulatory.db.p7s" can be loaded with a valid PKCS#7
+	  signature for the regulatory.db file made by one of the
+	  keys in the certs/ directory.
+
+config CFG80211_USE_KERNEL_REGDB_KEYS
+	bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS
+	default y
+	depends on CFG80211_REQUIRE_SIGNED_REGDB
+	help
+	  Allow the regulatory database to be signed by one of the keys for
+	  which certificates are part of the kernel sources
+	  (in net/wireless/certs/).
+
+	  This is currently only Seth Forshee's key, who is the regulatory
+	  database maintainer.
+
+config CFG80211_EXTRA_REGDB_KEYDIR
+	string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS
+	depends on CFG80211_REQUIRE_SIGNED_REGDB
+	help
+	  If selected, point to a directory with DER-encoded X.509
+	  certificates like in the kernel sources (net/wireless/certs/)
+	  that shall be accepted for a signed regulatory database.
+
 config CFG80211_REG_CELLULAR_HINTS
 	bool "cfg80211 regulatory support for cellular base station hints"
 	depends on CFG80211_CERTIFICATION_ONUS
@@ -139,35 +170,14 @@ config CFG80211_DEBUGFS
 
 	  If unsure, say N.
 
-config CFG80211_INTERNAL_REGDB
-	bool "use statically compiled regulatory rules database" if EXPERT
-	default n
-	depends on CFG80211
-	---help---
-	  This option generates an internal data structure representing
-	  the wireless regulatory rules described in net/wireless/db.txt
-	  and includes code to query that database. This is an alternative
-	  to using CRDA for defining regulatory rules for the kernel.
-
-	  Using this option requires some parsing of the db.txt at build time,
-	  the parser will be upkept with the latest wireless-regdb updates but
-	  older wireless-regdb formats will be ignored. The parser may later
-	  be replaced to avoid issues with conflicts on versions of
-	  wireless-regdb.
-
-	  For details see:
-
-	  http://wireless.kernel.org/en/developers/Regulatory
-
-	  Most distributions have a CRDA package. So if unsure, say N.
-
 config CFG80211_CRDA_SUPPORT
-	bool "support CRDA" if CFG80211_INTERNAL_REGDB
+	bool "support CRDA" if EXPERT
 	default y
 	depends on CFG80211
 	help
 	  You should enable this option unless you know for sure you have no
-	  need for it, for example when using internal regdb (above.)
+	  need for it, for example when using internal regdb (above) or the
+	  database loaded as a firmware file.
 
 	  If unsure, say Y.
 
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d06e5015751a..278d979c211a 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CFG80211) += cfg80211.o
 obj-$(CONFIG_LIB80211) += lib80211.o
 obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
@@ -14,11 +15,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
 cfg80211-$(CONFIG_OF) += of.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
-cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
 
 CFLAGS_trace.o := -I$(src)
 
-$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
-	@$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
+cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o
+ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
+cfg80211-y += extra-certs.o
+endif
 
-clean-files := regdb.c
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+	@$(kecho) "  GEN     $@"
+	@echo '#include "reg.h"' > $@
+	@echo 'const u8 shipped_regdb_certs[] = {' >> $@
+	@for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
+	@echo '};' >> $@
+	@echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
+		      $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
+	@$(kecho) "  GEN     $@"
+	@echo '#include "reg.h"' > $@
+	@echo 'const u8 extra_regdb_certs[] = {' >> $@
+	@for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
+	@echo '};' >> $@
+	@echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 25666d3009be..63682176c96c 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ieee80211.h>
 #include <linux/export.h>
 #include <net/cfg80211.h>
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
new file mode 100644
index 000000000000..c6f8f9d6b988
--- /dev/null
+++ b/net/wireless/certs/sforshee.x509
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b8aa5a7d5c77..a48859982a32 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * This file contains helper code to handle channel
  * settings and keeping track of what is possible at
@@ -464,7 +465,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
 			  struct ieee80211_channel *chan)
 {
 	int width;
-	u32 cf_offset, freq;
+	u32 freq;
 
 	if (chandef->chan->center_freq == chan->center_freq)
 		return true;
@@ -473,8 +474,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
 	if (width <= 20)
 		return false;
 
-	cf_offset = width / 2 - 10;
-
 	for (freq = chandef->center_freq1 - width / 2 + 10;
 	     freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
 		if (chan->center_freq == freq)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7b33e8c366bc..fdde0d98fde1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1384,7 +1384,7 @@ out_fail_sysfs:
 out_fail_pernet:
 	return err;
 }
-subsys_initcall(cfg80211_init);
+fs_initcall(cfg80211_init);
 
 static void __exit cfg80211_exit(void)
 {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6e809325af3b..d2f7e8b8a097 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Wireless configuration interface internals.
  *
@@ -216,6 +217,7 @@ enum cfg80211_event_type {
 	EVENT_DISCONNECTED,
 	EVENT_IBSS_JOINED,
 	EVENT_STOPPED,
+	EVENT_PORT_AUTHORIZED,
 };
 
 struct cfg80211_event {
@@ -235,6 +237,9 @@ struct cfg80211_event {
 			u8 bssid[ETH_ALEN];
 			struct ieee80211_channel *channel;
 		} ij;
+		struct {
+			u8 bssid[ETH_ALEN];
+		} pa;
 	};
 };
 
@@ -385,6 +390,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 			bool wextev);
 void __cfg80211_roamed(struct wireless_dev *wdev,
 		       struct cfg80211_roam_info *info);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid);
 int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev);
 void cfg80211_autodisconnect_wk(struct work_struct *work);
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
deleted file mode 100644
index a2fc3a09ccdc..000000000000
--- a/net/wireless/db.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# This file is a placeholder to prevent accidental build breakage if someone
-# enables CONFIG_CFG80211_INTERNAL_REGDB.  Almost no one actually needs to
-# enable that build option.
-#
-# You should be using CRDA instead.  It is even better if you use the CRDA
-# package provided by your distribution, since they will probably keep it
-# up-to-date on your behalf.
-#
-# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
-# need to replace this file with one containing appropriately formatted
-# regulatory rules that cover the regulatory domains you will be using.  Your
-# best option is to extract the db.txt file from the wireless-regdb git
-# repository:
-#
-#   git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
-#
diff --git a/net/wireless/debugfs.h b/net/wireless/debugfs.h
index 74fdd3811427..a8a135d94ab5 100644
--- a/net/wireless/debugfs.h
+++ b/net/wireless/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __CFG80211_DEBUGFS_H
 #define __CFG80211_DEBUGFS_H
 
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e9e91298c70d..a9c0f368db5d 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/utsname.h>
 #include <net/cfg80211.h>
 #include "core.h"
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
deleted file mode 100644
index baf2426b555a..000000000000
--- a/net/wireless/genregdb.awk
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/awk -f
-#
-# genregdb.awk -- generate regdb.c from db.txt
-#
-# Actually, it reads from stdin (presumed to be db.txt) and writes
-# to stdout (presumed to be regdb.c), but close enough...
-#
-# Copyright 2009 John W. Linville <linville@tuxdriver.com>
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-BEGIN {
-	active = 0
-	rules = 0;
-	print "/*"
-	print " * DO NOT EDIT -- file generated from data in db.txt"
-	print " */"
-	print ""
-	print "#include <linux/nl80211.h>"
-	print "#include <net/cfg80211.h>"
-	print "#include \"regdb.h\""
-	print ""
-	regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
-}
-
-function parse_country_head() {
-	country=$2
-	sub(/:/, "", country)
-	printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
-	printf "\t.alpha2 = \"%s\",\n", country
-	if ($NF ~ /DFS-ETSI/)
-		printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
-	else if ($NF ~ /DFS-FCC/)
-		printf "\t.dfs_region = NL80211_DFS_FCC,\n"
-	else if ($NF ~ /DFS-JP/)
-		printf "\t.dfs_region = NL80211_DFS_JP,\n"
-	printf "\t.reg_rules = {\n"
-	active = 1
-	regdb = regdb "\t&regdom_" country ",\n"
-}
-
-function parse_reg_rule()
-{
-	flag_starts_at = 7
-
-	start = $1
-	sub(/\(/, "", start)
-	end = $3
-	bw = $5
-	sub(/\),/, "", bw)
-	gain = 0
-	power = $6
-	# power might be in mW...
-	units = $7
-	dfs_cac = 0
-
-	sub(/\(/, "", power)
-	sub(/\),/, "", power)
-	sub(/\),/, "", units)
-	sub(/\)/, "", units)
-
-	if (units == "mW") {
-		flag_starts_at = 8
-		power = 10 * log(power)/log(10)
-		if ($8 ~ /[[:digit:]]/) {
-			flag_starts_at = 9
-			dfs_cac = $8
-		}
-	} else {
-		if ($7 ~ /[[:digit:]]/) {
-			flag_starts_at = 8
-			dfs_cac = $7
-		}
-	}
-	sub(/\(/, "", dfs_cac)
-	sub(/\),/, "", dfs_cac)
-	flagstr = ""
-	for (i=flag_starts_at; i<=NF; i++)
-		flagstr = flagstr $i
-	split(flagstr, flagarray, ",")
-	flags = ""
-	for (arg in flagarray) {
-		if (flagarray[arg] == "NO-OFDM") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
-		} else if (flagarray[arg] == "NO-CCK") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
-		} else if (flagarray[arg] == "NO-INDOOR") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
-		} else if (flagarray[arg] == "NO-OUTDOOR") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
-		} else if (flagarray[arg] == "DFS") {
-			flags = flags "\n\t\t\tNL80211_RRF_DFS | "
-		} else if (flagarray[arg] == "PTP-ONLY") {
-			flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
-		} else if (flagarray[arg] == "PTMP-ONLY") {
-			flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
-		} else if (flagarray[arg] == "PASSIVE-SCAN") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
-		} else if (flagarray[arg] == "NO-IBSS") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
-		} else if (flagarray[arg] == "NO-IR") {
-			flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
-		} else if (flagarray[arg] == "AUTO-BW") {
-			flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
-		}
-
-	}
-	flags = flags "0"
-	printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
-	rules++
-}
-
-function print_tail_country()
-{
-	active = 0
-	printf "\t},\n"
-	printf "\t.n_reg_rules = %d\n", rules
-	printf "};\n\n"
-	rules = 0;
-}
-
-/^[ \t]*#/ {
-	# Ignore
-}
-
-!active && /^[ \t]*$/ {
-	# Ignore
-}
-
-!active && /country/ {
-	parse_country_head()
-}
-
-active && /^[ \t]*\(/ {
-	parse_reg_rule()
-}
-
-active && /^[ \t]*$/ {
-	print_tail_country()
-}
-
-END {
-	if (active)
-		print_tail_country()
-	print regdb "};"
-	print ""
-	print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
-}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 10bf040a0982..413d4f4e6334 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Some IBSS support code for cfg80211.
  *
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 421a6b80ec62..51aa55618ef7 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ieee80211.h>
 #include <linux/export.h>
 #include <net/cfg80211.h>
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d8df7a5180a0..e7c64a8dce54 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * cfg80211 MLME SAP interface
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8ce85420ecb0..a0e1951227fa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
 	[NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
 };
 
+/* policy for packet pattern attributes */
+static const struct nla_policy
+nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
+	[NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, },
+	[NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, },
+	[NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
+};
+
 static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 				     struct netlink_callback *cb,
 				     struct cfg80211_registered_device **rdev,
@@ -2122,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 		case NL80211_CHAN_HT40MINUS:
 			cfg80211_chandef_create(chandef, chandef->chan,
 						chantype);
+			/* user input for center_freq is incorrect */
+			if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+			    chandef->center_freq1 != nla_get_u32(
+					info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+				return -EINVAL;
+			/* center_freq2 must be zero */
+			if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+			    nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+				return -EINVAL;
 			break;
 		default:
 			return -EINVAL;
@@ -3791,8 +3808,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
 static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
 {
 	const struct cfg80211_beacon_data *bcn = &params->beacon;
-	size_t ies_len = bcn->beacon_ies_len;
-	const u8 *ies = bcn->beacon_ies;
+	size_t ies_len = bcn->tail_len;
+	const u8 *ies = bcn->tail;
 	const u8 *rates;
 	const u8 *cap;
 
@@ -5669,6 +5686,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	}
 }
 
+static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+{
+	return reg_reload_regdb();
+}
+
 static int nl80211_get_mesh_config(struct sk_buff *skb,
 				   struct genl_info *info)
 {
@@ -6269,7 +6291,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	if (!hdr)
 		return -1;
 
-	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+	genl_dump_check_consistent(cb, hdr);
 
 	if (nl80211_put_regdom(regdom, msg))
 		goto nla_put_failure;
@@ -6610,6 +6632,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
 	return regulatory_pre_cac_allowed(wdev->wiphy);
 }
 
+static int
+nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+			 void *request, struct nlattr **attrs,
+			 bool is_sched_scan)
+{
+	u8 *mac_addr, *mac_addr_mask;
+	u32 *flags;
+	enum nl80211_feature_flags randomness_flag;
+
+	if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+		return 0;
+
+	if (is_sched_scan) {
+		struct cfg80211_sched_scan_request *req = request;
+
+		randomness_flag = wdev ?
+				  NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+				  NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+		flags = &req->flags;
+		mac_addr = req->mac_addr;
+		mac_addr_mask = req->mac_addr_mask;
+	} else {
+		struct cfg80211_scan_request *req = request;
+
+		randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+		flags = &req->flags;
+		mac_addr = req->mac_addr;
+		mac_addr_mask = req->mac_addr_mask;
+	}
+
+	*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+
+	if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+	    !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+		return -EOPNOTSUPP;
+
+	if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+		int err;
+
+		if (!(wiphy->features & randomness_flag) ||
+		    (wdev && wdev->current_bss))
+			return -EOPNOTSUPP;
+
+		err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+		if (err)
+			return err;
+	}
+
+	if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+	    !wiphy_ext_feature_isset(wiphy,
+				     NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+		return -EOPNOTSUPP;
+
+	if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+	   !wiphy_ext_feature_isset(wiphy,
+				    NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+		return -EOPNOTSUPP;
+
+	if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+	    !wiphy_ext_feature_isset(wiphy,
+				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+		return -EOPNOTSUPP;
+
+	if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+	    !wiphy_ext_feature_isset(wiphy,
+				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6815,34 +6908,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 			nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
 	}
 
-	if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
-		request->flags = nla_get_u32(
-			info->attrs[NL80211_ATTR_SCAN_FLAGS]);
-		if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-		    !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
-			err = -EOPNOTSUPP;
-			goto out_free;
-		}
-
-		if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-			if (!(wiphy->features &
-					NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
-				err = -EOPNOTSUPP;
-				goto out_free;
-			}
-
-			if (wdev->current_bss) {
-				err = -EOPNOTSUPP;
-				goto out_free;
-			}
-
-			err = nl80211_parse_random_mac(info->attrs,
-						       request->mac_addr,
-						       request->mac_addr_mask);
-			if (err)
-				goto out_free;
-		}
-	}
+	err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+				       false);
+	if (err)
+		goto out_free;
 
 	request->no_cck =
 		nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@ -7290,37 +7359,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		       request->ie_len);
 	}
 
-	if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
-		request->flags = nla_get_u32(
-			attrs[NL80211_ATTR_SCAN_FLAGS]);
-		if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-		    !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
-			err = -EOPNOTSUPP;
-			goto out_free;
-		}
-
-		if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-			u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
-
-			if (!wdev) /* must be net-detect */
-				flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
-
-			if (!(wiphy->features & flg)) {
-				err = -EOPNOTSUPP;
-				goto out_free;
-			}
-
-			if (wdev && wdev->current_bss) {
-				err = -EOPNOTSUPP;
-				goto out_free;
-			}
-
-			err = nl80211_parse_random_mac(attrs, request->mac_addr,
-						       request->mac_addr_mask);
-			if (err)
-				goto out_free;
-		}
-	}
+	err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+	if (err)
+		goto out_free;
 
 	if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
 		request->delay =
@@ -7681,7 +7722,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 	if (!hdr)
 		return -1;
 
-	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+	genl_dump_check_consistent(cb, hdr);
 
 	if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
 		goto nla_put_failure;
@@ -8924,8 +8965,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 
 	if (info->attrs[NL80211_ATTR_USE_MFP]) {
 		connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+		if (connect.mfp == NL80211_MFP_OPTIONAL &&
+		    !wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_MFP_OPTIONAL))
+			return -EOPNOTSUPP;
+
 		if (connect.mfp != NL80211_MFP_REQUIRED &&
-		    connect.mfp != NL80211_MFP_NO)
+		    connect.mfp != NL80211_MFP_NO &&
+		    connect.mfp != NL80211_MFP_OPTIONAL)
 			return -EINVAL;
 	} else {
 		connect.mfp = NL80211_MFP_NO;
@@ -9987,6 +10034,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 		if (err)
 			return err;
 
+		if (!setup.chandef.chan)
+			return -EINVAL;
+
 		err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
 					      &setup.beacon_rate);
 		if (err)
@@ -10529,7 +10579,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 			u8 *mask_pat;
 
 			nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
-					 NULL, info->extack);
+					 nl80211_packet_pattern_policy,
+					 info->extack);
 			err = -EINVAL;
 			if (!pat_tb[NL80211_PKTPAT_MASK] ||
 			    !pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10778,7 +10829,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
 			    rem) {
 		u8 *mask_pat;
 
-		nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL);
+		nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+				 nl80211_packet_pattern_policy, NULL);
 		if (!pat_tb[NL80211_PKTPAT_MASK] ||
 		    !pat_tb[NL80211_PKTPAT_PATTERN])
 			return -EINVAL;
@@ -10903,6 +10955,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
+	if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
+	    !tb[NL80211_REKEY_DATA_KCK])
+		return -EINVAL;
 	if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
 		return -ERANGE;
 	if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
@@ -12669,6 +12724,12 @@ static const struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
+		.cmd = NL80211_CMD_RELOAD_REGDB,
+		.doit = nl80211_reload_regdb,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
 		.cmd = NL80211_CMD_GET_MESH_CONFIG,
 		.doit = nl80211_get_mesh_config,
 		.policy = nl80211_policy,
@@ -13796,9 +13857,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 		     info->req_ie)) ||
 	    (info->resp_ie &&
 	     nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
-		     info->resp_ie)) ||
-	    (info->authorized &&
-	     nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+		     info->resp_ie)))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
@@ -13812,6 +13871,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev, const u8 *bssid)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_MLME, GFP_KERNEL);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
 			       const u8 *ie, size_t ie_len, bool from_ap)
@@ -14185,7 +14274,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	struct sk_buff *msg;
 	void *hdr;
-	u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
+	u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid);
 
 	if (!nlportid)
 		return false;
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b96933322077..79e47fe60c35 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __NET_WIRELESS_NL80211_H
 #define __NET_WIRELESS_NL80211_H
 
@@ -58,6 +59,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev,
 			 struct cfg80211_roam_info *info, gfp_t gfp);
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev, const u8 *bssid);
 void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
 			       const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ce23d7d49960..0c06240d25af 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __CFG80211_RDEV_OPS
 #define __CFG80211_RDEV_OPS
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5fae296a6a58..3871998059de 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4,6 +4,7 @@
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright      2017  Intel Deutschland GmbH
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -52,12 +53,13 @@
 #include <linux/ctype.h>
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
+#include <linux/verification.h>
 #include <linux/moduleparam.h>
+#include <linux/firmware.h>
 #include <net/cfg80211.h>
 #include "core.h"
 #include "reg.h"
 #include "rdev-ops.h"
-#include "regdb.h"
 #include "nl80211.h"
 
 /*
@@ -99,7 +101,7 @@ static struct regulatory_request core_request_world = {
 static struct regulatory_request __rcu *last_request =
 	(void __force __rcu *)&core_request_world;
 
-/* To trigger userspace events */
+/* To trigger userspace events and load firmware */
 static struct platform_device *reg_pdev;
 
 /*
@@ -442,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 	return regd;
 }
 
-#ifdef CONFIG_CFG80211_INTERNAL_REGDB
 struct reg_regdb_apply_request {
 	struct list_head list;
 	const struct ieee80211_regdomain *regdom;
@@ -474,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work)
 
 static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
 
-static int reg_query_builtin(const char *alpha2)
+static int reg_schedule_apply(const struct ieee80211_regdomain *regdom)
 {
-	const struct ieee80211_regdomain *regdom = NULL;
 	struct reg_regdb_apply_request *request;
-	unsigned int i;
-
-	for (i = 0; i < reg_regdb_size; i++) {
-		if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
-			regdom = reg_regdb[i];
-			break;
-		}
-	}
-
-	if (!regdom)
-		return -ENODATA;
 
 	request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
-	if (!request)
-		return -ENOMEM;
-
-	request->regdom = reg_copy_regd(regdom);
-	if (IS_ERR_OR_NULL(request->regdom)) {
-		kfree(request);
+	if (!request) {
+		kfree(regdom);
 		return -ENOMEM;
 	}
 
+	request->regdom = regdom;
+
 	mutex_lock(&reg_regdb_apply_mutex);
 	list_add_tail(&request->list, &reg_regdb_apply_list);
 	mutex_unlock(&reg_regdb_apply_mutex);
 
 	schedule_work(&reg_regdb_work);
-
 	return 0;
 }
 
-/* Feel free to add any other sanity checks here */
-static void reg_regdb_size_check(void)
-{
-	/* We should ideally BUILD_BUG_ON() but then random builds would fail */
-	WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
-}
-#else
-static inline void reg_regdb_size_check(void) {}
-static inline int reg_query_builtin(const char *alpha2)
-{
-	return -ENODATA;
-}
-#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
-
 #ifdef CONFIG_CFG80211_CRDA_SUPPORT
 /* Max number of consecutive attempts to communicate with CRDA  */
 #define REG_MAX_CRDA_TIMEOUTS 10
@@ -598,10 +570,402 @@ static inline int call_crda(const char *alpha2)
 }
 #endif /* CONFIG_CFG80211_CRDA_SUPPORT */
 
+/* code to directly load a firmware database through request_firmware */
+static const struct fwdb_header *regdb;
+
+struct fwdb_country {
+	u8 alpha2[2];
+	__be16 coll_ptr;
+	/* this struct cannot be extended */
+} __packed __aligned(4);
+
+struct fwdb_collection {
+	u8 len;
+	u8 n_rules;
+	u8 dfs_region;
+	/* no optional data yet */
+	/* aligned to 2, then followed by __be16 array of rule pointers */
+} __packed __aligned(4);
+
+enum fwdb_flags {
+	FWDB_FLAG_NO_OFDM	= BIT(0),
+	FWDB_FLAG_NO_OUTDOOR	= BIT(1),
+	FWDB_FLAG_DFS		= BIT(2),
+	FWDB_FLAG_NO_IR		= BIT(3),
+	FWDB_FLAG_AUTO_BW	= BIT(4),
+};
+
+struct fwdb_rule {
+	u8 len;
+	u8 flags;
+	__be16 max_eirp;
+	__be32 start, end, max_bw;
+	/* start of optional data */
+	__be16 cac_timeout;
+} __packed __aligned(4);
+
+#define FWDB_MAGIC 0x52474442
+#define FWDB_VERSION 20
+
+struct fwdb_header {
+	__be32 magic;
+	__be32 version;
+	struct fwdb_country country[];
+} __packed __aligned(4);
+
+static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
+{
+	struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
+
+	if ((u8 *)rule + sizeof(rule->len) > data + size)
+		return false;
+
+	/* mandatory fields */
+	if (rule->len < offsetofend(struct fwdb_rule, max_bw))
+		return false;
+
+	return true;
+}
+
+static bool valid_country(const u8 *data, unsigned int size,
+			  const struct fwdb_country *country)
+{
+	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+	struct fwdb_collection *coll = (void *)(data + ptr);
+	__be16 *rules_ptr;
+	unsigned int i;
+
+	/* make sure we can read len/n_rules */
+	if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size)
+		return false;
+
+	/* make sure base struct and all rules fit */
+	if ((u8 *)coll + ALIGN(coll->len, 2) +
+	    (coll->n_rules * 2) > data + size)
+		return false;
+
+	/* mandatory fields must exist */
+	if (coll->len < offsetofend(struct fwdb_collection, dfs_region))
+		return false;
+
+	rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+
+	for (i = 0; i < coll->n_rules; i++) {
+		u16 rule_ptr = be16_to_cpu(rules_ptr[i]);
+
+		if (!valid_rule(data, size, rule_ptr))
+			return false;
+	}
+
+	return true;
+}
+
+#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
+static struct key *builtin_regdb_keys;
+
+static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
+{
+	const u8 *end = p + buflen;
+	size_t plen;
+	key_ref_t key;
+
+	while (p < end) {
+		/* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+		 * than 256 bytes in size.
+		 */
+		if (end - p < 4)
+			goto dodgy_cert;
+		if (p[0] != 0x30 &&
+		    p[1] != 0x82)
+			goto dodgy_cert;
+		plen = (p[2] << 8) | p[3];
+		plen += 4;
+		if (plen > end - p)
+			goto dodgy_cert;
+
+		key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
+					   "asymmetric", NULL, p, plen,
+					   ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+					    KEY_USR_VIEW | KEY_USR_READ),
+					   KEY_ALLOC_NOT_IN_QUOTA |
+					   KEY_ALLOC_BUILT_IN |
+					   KEY_ALLOC_BYPASS_RESTRICTION);
+		if (IS_ERR(key)) {
+			pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+			       PTR_ERR(key));
+		} else {
+			pr_notice("Loaded X.509 cert '%s'\n",
+				  key_ref_to_ptr(key)->description);
+			key_ref_put(key);
+		}
+		p += plen;
+	}
+
+	return;
+
+dodgy_cert:
+	pr_err("Problem parsing in-kernel X.509 certificate list\n");
+}
+
+static int __init load_builtin_regdb_keys(void)
+{
+	builtin_regdb_keys =
+		keyring_alloc(".builtin_regdb_keys",
+			      KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+			      ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+			      KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+			      KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+	if (IS_ERR(builtin_regdb_keys))
+		return PTR_ERR(builtin_regdb_keys);
+
+	pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
+
+#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
+	load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
+#endif
+#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
+	if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
+		load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
+#endif
+
+	return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+	const struct firmware *sig;
+	bool result;
+
+	if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
+		return false;
+
+	result = verify_pkcs7_signature(data, size, sig->data, sig->size,
+					builtin_regdb_keys,
+					VERIFYING_UNSPECIFIED_SIGNATURE,
+					NULL, NULL) == 0;
+
+	release_firmware(sig);
+
+	return result;
+}
+
+static void free_regdb_keyring(void)
+{
+	key_put(builtin_regdb_keys);
+}
+#else
+static int load_builtin_regdb_keys(void)
+{
+	return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+	return true;
+}
+
+static void free_regdb_keyring(void)
+{
+}
+#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */
+
+static bool valid_regdb(const u8 *data, unsigned int size)
+{
+	const struct fwdb_header *hdr = (void *)data;
+	const struct fwdb_country *country;
+
+	if (size < sizeof(*hdr))
+		return false;
+
+	if (hdr->magic != cpu_to_be32(FWDB_MAGIC))
+		return false;
+
+	if (hdr->version != cpu_to_be32(FWDB_VERSION))
+		return false;
+
+	if (!regdb_has_valid_signature(data, size))
+		return false;
+
+	country = &hdr->country[0];
+	while ((u8 *)(country + 1) <= data + size) {
+		if (!country->coll_ptr)
+			break;
+		if (!valid_country(data, size, country))
+			return false;
+		country++;
+	}
+
+	return true;
+}
+
+static int regdb_query_country(const struct fwdb_header *db,
+			       const struct fwdb_country *country)
+{
+	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
+	struct ieee80211_regdomain *regdom;
+	unsigned int size_of_regd;
+	unsigned int i;
+
+	size_of_regd =
+		sizeof(struct ieee80211_regdomain) +
+		coll->n_rules * sizeof(struct ieee80211_reg_rule);
+
+	regdom = kzalloc(size_of_regd, GFP_KERNEL);
+	if (!regdom)
+		return -ENOMEM;
+
+	regdom->n_reg_rules = coll->n_rules;
+	regdom->alpha2[0] = country->alpha2[0];
+	regdom->alpha2[1] = country->alpha2[1];
+	regdom->dfs_region = coll->dfs_region;
+
+	for (i = 0; i < regdom->n_reg_rules; i++) {
+		__be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+		unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
+		struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
+		struct ieee80211_reg_rule *rrule = &regdom->reg_rules[i];
+
+		rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start);
+		rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end);
+		rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw);
+
+		rrule->power_rule.max_antenna_gain = 0;
+		rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp);
+
+		rrule->flags = 0;
+		if (rule->flags & FWDB_FLAG_NO_OFDM)
+			rrule->flags |= NL80211_RRF_NO_OFDM;
+		if (rule->flags & FWDB_FLAG_NO_OUTDOOR)
+			rrule->flags |= NL80211_RRF_NO_OUTDOOR;
+		if (rule->flags & FWDB_FLAG_DFS)
+			rrule->flags |= NL80211_RRF_DFS;
+		if (rule->flags & FWDB_FLAG_NO_IR)
+			rrule->flags |= NL80211_RRF_NO_IR;
+		if (rule->flags & FWDB_FLAG_AUTO_BW)
+			rrule->flags |= NL80211_RRF_AUTO_BW;
+
+		rrule->dfs_cac_ms = 0;
+
+		/* handle optional data */
+		if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
+			rrule->dfs_cac_ms =
+				1000 * be16_to_cpu(rule->cac_timeout);
+	}
+
+	return reg_schedule_apply(regdom);
+}
+
+static int query_regdb(const char *alpha2)
+{
+	const struct fwdb_header *hdr = regdb;
+	const struct fwdb_country *country;
+
+	ASSERT_RTNL();
+
+	if (IS_ERR(regdb))
+		return PTR_ERR(regdb);
+
+	country = &hdr->country[0];
+	while (country->coll_ptr) {
+		if (alpha2_equal(alpha2, country->alpha2))
+			return regdb_query_country(regdb, country);
+		country++;
+	}
+
+	return -ENODATA;
+}
+
+static void regdb_fw_cb(const struct firmware *fw, void *context)
+{
+	int set_error = 0;
+	bool restore = true;
+	void *db;
+
+	if (!fw) {
+		pr_info("failed to load regulatory.db\n");
+		set_error = -ENODATA;
+	} else if (!valid_regdb(fw->data, fw->size)) {
+		pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n");
+		set_error = -EINVAL;
+	}
+
+	rtnl_lock();
+	if (WARN_ON(regdb && !IS_ERR(regdb))) {
+		/* just restore and free new db */
+	} else if (set_error) {
+		regdb = ERR_PTR(set_error);
+	} else if (fw) {
+		db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+		if (db) {
+			regdb = db;
+			restore = context && query_regdb(context);
+		} else {
+			restore = true;
+		}
+	}
+
+	if (restore)
+		restore_regulatory_settings(true);
+
+	rtnl_unlock();
+
+	kfree(context);
+
+	release_firmware(fw);
+}
+
+static int query_regdb_file(const char *alpha2)
+{
+	ASSERT_RTNL();
+
+	if (regdb)
+		return query_regdb(alpha2);
+
+	alpha2 = kmemdup(alpha2, 2, GFP_KERNEL);
+	if (!alpha2)
+		return -ENOMEM;
+
+	return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+				       &reg_pdev->dev, GFP_KERNEL,
+				       (void *)alpha2, regdb_fw_cb);
+}
+
+int reg_reload_regdb(void)
+{
+	const struct firmware *fw;
+	void *db;
+	int err;
+
+	err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
+	if (err)
+		return err;
+
+	if (!valid_regdb(fw->data, fw->size)) {
+		err = -ENODATA;
+		goto out;
+	}
+
+	db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	if (!db) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rtnl_lock();
+	if (!IS_ERR_OR_NULL(regdb))
+		kfree(regdb);
+	regdb = db;
+	rtnl_unlock();
+
+ out:
+	release_firmware(fw);
+	return err;
+}
+
 static bool reg_query_database(struct regulatory_request *request)
 {
-	/* query internal regulatory database (if it exists) */
-	if (reg_query_builtin(request->alpha2) == 0)
+	if (query_regdb_file(request->alpha2) == 0)
 		return true;
 
 	if (call_crda(request->alpha2) == 0)
@@ -1483,7 +1847,9 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 {
 	struct ieee80211_supported_band *sband = wiphy->bands[channel->band];
 	struct ieee80211_channel *channel_before = NULL, *channel_after = NULL;
+	const struct ieee80211_regdomain *regd;
 	unsigned int i;
+	u32 flags;
 
 	if (!is_ht40_allowed(channel)) {
 		channel->flags |= IEEE80211_CHAN_NO_HT40;
@@ -1503,17 +1869,30 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy,
 			channel_after = c;
 	}
 
+	flags = 0;
+	regd = get_wiphy_regdom(wiphy);
+	if (regd) {
+		const struct ieee80211_reg_rule *reg_rule =
+			freq_reg_info_regd(MHZ_TO_KHZ(channel->center_freq),
+					   regd, MHZ_TO_KHZ(20));
+
+		if (!IS_ERR(reg_rule))
+			flags = reg_rule->flags;
+	}
+
 	/*
 	 * Please note that this assumes target bandwidth is 20 MHz,
 	 * if that ever changes we also need to change the below logic
 	 * to include that as well.
 	 */
-	if (!is_ht40_allowed(channel_before))
+	if (!is_ht40_allowed(channel_before) ||
+	    flags & NL80211_RRF_NO_HT40MINUS)
 		channel->flags |= IEEE80211_CHAN_NO_HT40MINUS;
 	else
 		channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS;
 
-	if (!is_ht40_allowed(channel_after))
+	if (!is_ht40_allowed(channel_after) ||
+	    flags & NL80211_RRF_NO_HT40PLUS)
 		channel->flags |= IEEE80211_CHAN_NO_HT40PLUS;
 	else
 		channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS;
@@ -3269,6 +3648,10 @@ int __init regulatory_init(void)
 {
 	int err = 0;
 
+	err = load_builtin_regdb_keys();
+	if (err)
+		return err;
+
 	reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
 	if (IS_ERR(reg_pdev))
 		return PTR_ERR(reg_pdev);
@@ -3277,8 +3660,6 @@ int __init regulatory_init(void)
 	spin_lock_init(&reg_pending_beacons_lock);
 	spin_lock_init(&reg_indoor_lock);
 
-	reg_regdb_size_check();
-
 	rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
 
 	user_alpha2[0] = '9';
@@ -3344,4 +3725,9 @@ void regulatory_exit(void)
 		list_del(&reg_request->list);
 		kfree(reg_request);
 	}
+
+	if (!IS_ERR_OR_NULL(regdb))
+		kfree(regdb);
+
+	free_regdb_keyring();
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index ca7fedf2e7a1..9ceeb5f3a7cb 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,5 +1,8 @@
 #ifndef __NET_WIRELESS_REG_H
 #define __NET_WIRELESS_REG_H
+
+#include <net/cfg80211.h>
+
 /*
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
  *
@@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
  * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
  */
 bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
+
+/**
+ * reg_reload_regdb - reload the regulatory.db firmware file
+ */
+int reg_reload_regdb(void);
+
+extern const u8 shipped_regdb_certs[];
+extern unsigned int shipped_regdb_certs_len;
+extern const u8 extra_regdb_certs[];
+extern unsigned int extra_regdb_certs_len;
+
 #endif  /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
deleted file mode 100644
index 3279cfcefb0c..000000000000
--- a/net/wireless/regdb.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __REGDB_H__
-#define __REGDB_H__
-
-/*
- * Copyright 2009 John W. Linville <linville@tuxdriver.com>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-extern const struct ieee80211_regdomain *reg_regdb[];
-extern int reg_regdb_size;
-
-#endif /* __REGDB_H__ */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 9f0901f3e42b..f6c5fe482506 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * cfg80211 scan result handling
  *
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0a49b88070d0..fdb3646274a5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * SME code for cfg80211
  * both driver SME event handling and the SME implementation
@@ -522,11 +523,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
 		return -EOPNOTSUPP;
 
 	if (wdev->current_bss) {
-		if (!prev_bssid)
-			return -EALREADY;
-		if (prev_bssid &&
-		    !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
-			return -ENOTCONN;
 		cfg80211_unhold_bss(wdev->current_bss);
 		cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
 		wdev->current_bss = NULL;
@@ -960,7 +956,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
 	ev->rm.resp_ie_len = info->resp_ie_len;
 	memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
 	ev->rm.bss = info->bss;
-	ev->rm.authorized = info->authorized;
 
 	spin_lock_irqsave(&wdev->event_lock, flags);
 	list_add_tail(&ev->list, &wdev->event_list);
@@ -969,6 +964,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
 }
 EXPORT_SYMBOL(cfg80211_roamed);
 
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
+{
+	ASSERT_WDEV_LOCK(wdev);
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return;
+
+	if (WARN_ON(!wdev->current_bss) ||
+	    WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+		return;
+
+	nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
+				     bssid);
+}
+
+void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+			      gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	if (WARN_ON(!bssid))
+		return;
+
+	ev = kzalloc(sizeof(*ev), gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_PORT_AUTHORIZED;
+	memcpy(ev->pa.bssid, bssid, ETH_ALEN);
+
+	/*
+	 * Use the wdev event list so that if there are pending
+	 * connected/roamed events, they will be reported first.
+	 */
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_port_authorized);
+
 void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap)
 {
@@ -1063,11 +1102,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (WARN_ON(wdev->connect_keys)) {
-		kzfree(wdev->connect_keys);
-		wdev->connect_keys = NULL;
+	/*
+	 * If we have an ssid_len, we're trying to connect or are
+	 * already connected, so reject a new SSID unless it's the
+	 * same (which is the case for re-association.)
+	 */
+	if (wdev->ssid_len &&
+	    (wdev->ssid_len != connect->ssid_len ||
+	     memcmp(wdev->ssid, connect->ssid, wdev->ssid_len)))
+		return -EALREADY;
+
+	/*
+	 * If connected, reject (re-)association unless prev_bssid
+	 * matches the current BSSID.
+	 */
+	if (wdev->current_bss) {
+		if (!prev_bssid)
+			return -EALREADY;
+		if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
+			return -ENOTCONN;
 	}
 
+	/*
+	 * Reject if we're in the process of connecting with WEP,
+	 * this case isn't very interesting and trying to handle
+	 * it would make the code much more complex.
+	 */
+	if (wdev->connect_keys)
+		return -EINPROGRESS;
+
 	cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
 				  rdev->wiphy.ht_capa_mod_mask);
 
@@ -1118,7 +1181,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 
 	if (err) {
 		wdev->connect_keys = NULL;
-		wdev->ssid_len = 0;
+		/*
+		 * This could be reassoc getting refused, don't clear
+		 * ssid_len in that case.
+		 */
+		if (!wdev->current_bss)
+			wdev->ssid_len = 0;
 		return err;
 	}
 
@@ -1145,6 +1213,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 	else if (wdev->ssid_len)
 		err = rdev_disconnect(rdev, dev, reason);
 
+	/*
+	 * Clear ssid_len unless we actually were fully connected,
+	 * in which case cfg80211_disconnected() will take care of
+	 * this later.
+	 */
+	if (!wdev->current_bss)
+		wdev->ssid_len = 0;
+
 	return err;
 }
 
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
index b533ed71daff..7b454c2de9b7 100644
--- a/net/wireless/sysfs.h
+++ b/net/wireless/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __WIRELESS_SYSFS_H
 #define __WIRELESS_SYSFS_H
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0f8db41eaddb..f3353fe5b35b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM cfg80211
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index bcb1284c3415..c69160694b6c 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Wireless utility functions
  *
@@ -157,32 +158,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
 	case NL80211_BAND_2GHZ:
 		want = 7;
 		for (i = 0; i < sband->n_bitrates; i++) {
-			if (sband->bitrates[i].bitrate == 10) {
+			switch (sband->bitrates[i].bitrate) {
+			case 10:
+			case 20:
+			case 55:
+			case 110:
 				sband->bitrates[i].flags |=
 					IEEE80211_RATE_MANDATORY_B |
 					IEEE80211_RATE_MANDATORY_G;
 				want--;
-			}
-
-			if (sband->bitrates[i].bitrate == 20 ||
-			    sband->bitrates[i].bitrate == 55 ||
-			    sband->bitrates[i].bitrate == 110 ||
-			    sband->bitrates[i].bitrate == 60 ||
-			    sband->bitrates[i].bitrate == 120 ||
-			    sband->bitrates[i].bitrate == 240) {
+				break;
+			case 60:
+			case 120:
+			case 240:
 				sband->bitrates[i].flags |=
 					IEEE80211_RATE_MANDATORY_G;
 				want--;
-			}
-
-			if (sband->bitrates[i].bitrate != 10 &&
-			    sband->bitrates[i].bitrate != 20 &&
-			    sband->bitrates[i].bitrate != 55 &&
-			    sband->bitrates[i].bitrate != 110)
+				/* fall through */
+			default:
 				sband->bitrates[i].flags |=
 					IEEE80211_RATE_ERP_G;
+				break;
+			}
 		}
-		WARN_ON(want != 0 && want != 3 && want != 6);
+		WARN_ON(want != 0 && want != 3);
 		break;
 	case NL80211_BAND_60GHZ:
 		/* check for mandatory HT MCS 1..4 */
@@ -529,121 +528,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 }
 EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
 
-int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
-			     enum nl80211_iftype iftype,
-			     const u8 *bssid, bool qos)
-{
-	struct ieee80211_hdr hdr;
-	u16 hdrlen, ethertype;
-	__le16 fc;
-	const u8 *encaps_data;
-	int encaps_len, skip_header_bytes;
-	int nh_pos, h_pos;
-	int head_need;
-
-	if (unlikely(skb->len < ETH_HLEN))
-		return -EINVAL;
-
-	nh_pos = skb_network_header(skb) - skb->data;
-	h_pos = skb_transport_header(skb) - skb->data;
-
-	/* convert Ethernet header to proper 802.11 header (based on
-	 * operation mode) */
-	ethertype = (skb->data[12] << 8) | skb->data[13];
-	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
-
-	switch (iftype) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_AP_VLAN:
-	case NL80211_IFTYPE_P2P_GO:
-		fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
-		/* DA BSSID SA */
-		memcpy(hdr.addr1, skb->data, ETH_ALEN);
-		memcpy(hdr.addr2, addr, ETH_ALEN);
-		memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
-		hdrlen = 24;
-		break;
-	case NL80211_IFTYPE_STATION:
-	case NL80211_IFTYPE_P2P_CLIENT:
-		fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
-		/* BSSID SA DA */
-		memcpy(hdr.addr1, bssid, ETH_ALEN);
-		memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
-		memcpy(hdr.addr3, skb->data, ETH_ALEN);
-		hdrlen = 24;
-		break;
-	case NL80211_IFTYPE_OCB:
-	case NL80211_IFTYPE_ADHOC:
-		/* DA SA BSSID */
-		memcpy(hdr.addr1, skb->data, ETH_ALEN);
-		memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
-		memcpy(hdr.addr3, bssid, ETH_ALEN);
-		hdrlen = 24;
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	if (qos) {
-		fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
-		hdrlen += 2;
-	}
-
-	hdr.frame_control = fc;
-	hdr.duration_id = 0;
-	hdr.seq_ctrl = 0;
-
-	skip_header_bytes = ETH_HLEN;
-	if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
-		encaps_data = bridge_tunnel_header;
-		encaps_len = sizeof(bridge_tunnel_header);
-		skip_header_bytes -= 2;
-	} else if (ethertype >= ETH_P_802_3_MIN) {
-		encaps_data = rfc1042_header;
-		encaps_len = sizeof(rfc1042_header);
-		skip_header_bytes -= 2;
-	} else {
-		encaps_data = NULL;
-		encaps_len = 0;
-	}
-
-	skb_pull(skb, skip_header_bytes);
-	nh_pos -= skip_header_bytes;
-	h_pos -= skip_header_bytes;
-
-	head_need = hdrlen + encaps_len - skb_headroom(skb);
-
-	if (head_need > 0 || skb_cloned(skb)) {
-		head_need = max(head_need, 0);
-		if (head_need)
-			skb_orphan(skb);
-
-		if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC))
-			return -ENOMEM;
-	}
-
-	if (encaps_data) {
-		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
-		nh_pos += encaps_len;
-		h_pos += encaps_len;
-	}
-
-	memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
-
-	nh_pos += hdrlen;
-	h_pos += hdrlen;
-
-	/* Update skb pointers to various headers since this modified frame
-	 * is going to go through Linux networking code that may potentially
-	 * need things like pointer to IP header. */
-	skb_reset_mac_header(skb);
-	skb_set_network_header(skb, nh_pos);
-	skb_set_transport_header(skb, h_pos);
-
-	return 0;
-}
-EXPORT_SYMBOL(ieee80211_data_from_8023);
-
 static void
 __frame_add_frag(struct sk_buff *skb, struct page *page,
 		 void *ptr, int len, int size)
@@ -963,6 +847,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
 		case EVENT_STOPPED:
 			__cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
 			break;
+		case EVENT_PORT_AUTHORIZED:
+			__cfg80211_port_authorized(wdev, ev->pa.bssid);
+			break;
 		}
 		wdev_unlock(wdev);
 
@@ -1367,13 +1254,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
 }
 EXPORT_SYMBOL(cfg80211_get_p2p_attr);
 
-static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext)
 {
 	int i;
 
-	for (i = 0; i < n_ids; i++)
-		if (ids[i] == id)
+	/* Make sure array values are legal */
+	if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION))
+		return false;
+
+	i = 0;
+	while (i < n_ids) {
+		if (ids[i] == WLAN_EID_EXTENSION) {
+			if (id_ext && (ids[i + 1] == id))
+				return true;
+
+			i += 2;
+			continue;
+		}
+
+		if (ids[i] == id && !id_ext)
 			return true;
+
+		i++;
+	}
 	return false;
 }
 
@@ -1403,14 +1306,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
 {
 	size_t pos = offset;
 
-	while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
+	while (pos < ielen) {
+		u8 ext = 0;
+
+		if (ies[pos] == WLAN_EID_EXTENSION)
+			ext = 2;
+		if ((pos + ext) >= ielen)
+			break;
+
+		if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext],
+					  ies[pos] == WLAN_EID_EXTENSION))
+			break;
+
 		if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
 			pos = skip_ie(ies, ielen, pos);
 
-			while (pos < ielen &&
-			       !ieee80211_id_in_list(after_ric, n_after_ric,
-						     ies[pos]))
-				pos = skip_ie(ies, ielen, pos);
+			while (pos < ielen) {
+				if (ies[pos] == WLAN_EID_EXTENSION)
+					ext = 2;
+				else
+					ext = 0;
+
+				if ((pos + ext) >= ielen)
+					break;
+
+				if (!ieee80211_id_in_list(after_ric,
+							  n_after_ric,
+							  ies[pos + ext],
+							  ext == 2))
+					pos = skip_ie(ies, ielen, pos);
+			}
 		} else {
 			pos = skip_ie(ies, ielen, pos);
 		}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5d4a02c7979b..7ca04a7de85a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * cfg80211 - wext compat code
  *
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c434f193f39a..c67d7a82ab13 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * cfg80211 wext compat for managed mode.
  *
diff --git a/net/x25/Makefile b/net/x25/Makefile
index a2c34ab6f194..5dd544a231f2 100644
--- a/net/x25/Makefile
+++ b/net/x25/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Linux X.25 Packet layer.
 #
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5a1a98df3499..ea87143314f3 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -74,7 +74,7 @@ DEFINE_RWLOCK(x25_list_lock);
 
 static const struct proto_ops x25_proto_ops;
 
-static struct x25_address null_x25_address = {"               "};
+static const struct x25_address null_x25_address = {"               "};
 
 #ifdef CONFIG_COMPAT
 struct compat_x25_subscrip_struct {
@@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *);
 /*
  *	handler for deferred kills.
  */
-static void x25_destroy_timer(unsigned long data)
+static void x25_destroy_timer(struct timer_list *t)
 {
-	x25_destroy_socket_from_timer((struct sock *)data);
+	struct sock *sk = from_timer(sk, t, sk_timer);
+
+	x25_destroy_socket_from_timer(sk);
 }
 
 /*
@@ -413,8 +415,7 @@ static void __x25_destroy_socket(struct sock *sk)
 	if (sk_has_allocations(sk)) {
 		/* Defer: outstanding buffers */
 		sk->sk_timer.expires  = jiffies + 10 * HZ;
-		sk->sk_timer.function = x25_destroy_timer;
-		sk->sk_timer.data = (unsigned long)sk;
+		sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer;
 		add_timer(&sk->sk_timer);
 	} else {
 		/* drop last reference so sock_put will free */
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index ba078c85f0a1..e9802afa43d0 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* -*- linux-c -*-
  * sysctl_net_x25.c: sysctl interface to net X.25 subsystem.
  *
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 997ff7b2509b..ad1734d36ed7 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
 					*vc_fac_mask |= X25_MASK_REVERSE;
 					break;
 				}
-
+				/*fall through */
 			case X25_FAC_THROUGHPUT:
 				facilities->throughput = p[1];
 				*vc_fac_mask |= X25_MASK_THROUGHPUT;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 7ac50098a375..3c12cae32001 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 
 		case X25_RESET_REQUEST:
 			x25_write_internal(sk, X25_RESET_CONFIRMATION);
+			/* fall through */
 		case X25_RESET_CONFIRMATION: {
 			x25_stop_timer(sk);
 			x25->condition = 0x00;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a36399..1dfba3c23459 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -26,18 +26,17 @@
 #include <net/tcp_states.h>
 #include <net/x25.h>
 
-static void x25_heartbeat_expiry(unsigned long);
-static void x25_timer_expiry(unsigned long);
+static void x25_heartbeat_expiry(struct timer_list *t);
+static void x25_timer_expiry(struct timer_list *t);
 
 void x25_init_timers(struct sock *sk)
 {
 	struct x25_sock *x25 = x25_sk(sk);
 
-	setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
+	timer_setup(&x25->timer, x25_timer_expiry, 0);
 
 	/* initialized by sock_init_data */
-	sk->sk_timer.data     = (unsigned long)sk;
-	sk->sk_timer.function = &x25_heartbeat_expiry;
+	sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry;
 }
 
 void x25_start_heartbeat(struct sock *sk)
@@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk)
 	return x25->timer.expires - jiffies;
 }
 
-static void x25_heartbeat_expiry(unsigned long param)
+static void x25_heartbeat_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */
@@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk)
 	}
 }
 
-static void x25_timer_expiry(unsigned long param)
+static void x25_timer_expiry(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)param;
+	struct x25_sock *x25 = from_timer(x25, t, timer);
+	struct sock *sk = &x25->sk;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 55b2ac300995..0bd2465a8c5a 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the XFRM subsystem.
 #
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5f7e8bfa0c2d..30e5746085b8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -63,7 +63,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 	xfrm_address_t *daddr;
 
 	if (!x->type_offload)
-		return 0;
+		return -EINVAL;
 
 	/* We don't yet support UDP encapsulation, TFC padding and ESN. */
 	if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
@@ -79,7 +79,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 			daddr = &x->props.saddr;
 		}
 
-		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
+		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
+					x->props.family, x->props.output_mark);
 		if (IS_ERR(dst))
 			return 0;
 
@@ -90,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 	}
 
 	if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
+		xso->dev = NULL;
 		dev_put(dev);
 		return 0;
 	}
@@ -153,6 +155,7 @@ static int xfrm_dev_register(struct net_device *dev)
 
 static int xfrm_dev_unregister(struct net_device *dev)
 {
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -175,8 +178,7 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_garbage_collect(dev_net(dev));
-
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 1e98bc0fe0a5..2ad33ce1ea17 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* xfrm_hash.c: Common hash table code.
  *
  * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index eaea9c4fb3b0..61be810389d8 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _XFRM_HASH_H
 #define _XFRM_HASH_H
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 923205e279f7..347ab31574d5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * xfrm_input.c
  *
@@ -247,6 +248,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 					goto drop;
 				}
 
+				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+					goto drop;
+				}
+
 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 				goto drop;
 			}
@@ -260,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		goto lock;
 	}
 
-	daddr = (xfrm_address_t *)(skb_network_header(skb) +
-				   XFRM_SPI_SKB_CB(skb)->daddroff);
 	family = XFRM_SPI_SKB_CB(skb)->family;
 
 	/* if tunnel is present override skb->mark value with tunnel i_key */
@@ -288,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		goto drop;
 	}
 
+	daddr = (xfrm_address_t *)(skb_network_header(skb) +
+				   XFRM_SPI_SKB_CB(skb)->daddroff);
 	do {
 		if (skb->sp->len == XFRM_MAX_DEPTH) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
@@ -424,6 +430,8 @@ resume:
 	nf_reset(skb);
 
 	if (decaps) {
+		if (skb->sp)
+			skb->sp->olen = 0;
 		skb_dst_drop(skb);
 		gro_cells_receive(&gro_cells, skb);
 		return 0;
@@ -434,6 +442,8 @@ resume:
 
 		err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
 		if (xfrm_gro) {
+			if (skb->sp)
+				skb->sp->olen = 0;
 			skb_dst_drop(skb);
 			gro_cells_receive(&gro_cells, skb);
 			return err;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8c0b6722aaa8..73ad8c8ef344 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,6 +66,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			goto error_nolock;
 		}
 
+		if (x->props.output_mark)
+			skb->mark = x->props.output_mark;
+
 		err = x->outer_mode->output(x, skb);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
@@ -102,6 +105,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		if (xfrm_offload(skb)) {
 			x->type_offload->encap(x, skb);
 		} else {
+			/* Inner headers are invalid now. */
+			skb->encapsulation = 0;
+
 			err = x->type->output(x, skb);
 			if (err == -EINPROGRESS)
 				goto out;
@@ -205,7 +211,6 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	secpath_reset(skb);
-	skb->encapsulation = 0;
 
 	if (xfrm_dev_offload_ok(skb, x)) {
 		struct sec_path *sp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ff61d8557929..9542975eb2f9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/cpu.h>
 #include <linux/audit.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
 	u8 flags;
 };
 
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -54,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation;
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
-static void xfrm_policy_queue_process(unsigned long arg);
+static void xfrm_policy_queue_process(struct timer_list *t);
 
 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -119,7 +122,7 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 				    const xfrm_address_t *saddr,
 				    const xfrm_address_t *daddr,
-				    int family)
+				    int family, u32 mark)
 {
 	const struct xfrm_policy_afinfo *afinfo;
 	struct dst_entry *dst;
@@ -128,7 +131,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 	if (unlikely(afinfo == NULL))
 		return ERR_PTR(-EAFNOSUPPORT);
 
-	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
+	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
 
 	rcu_read_unlock();
 
@@ -140,7 +143,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 						int tos, int oif,
 						xfrm_address_t *prev_saddr,
 						xfrm_address_t *prev_daddr,
-						int family)
+						int family, u32 mark)
 {
 	struct net *net = xs_net(x);
 	xfrm_address_t *saddr = &x->props.saddr;
@@ -156,7 +159,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 		daddr = x->coaddr;
 	}
 
-	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
+	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
 
 	if (!IS_ERR(dst)) {
 		if (prev_saddr != saddr)
@@ -176,9 +179,9 @@ static inline unsigned long make_jiffies(long secs)
 		return secs*HZ;
 }
 
-static void xfrm_policy_timer(unsigned long data)
+static void xfrm_policy_timer(struct timer_list *t)
 {
-	struct xfrm_policy *xp = (struct xfrm_policy *)data;
+	struct xfrm_policy *xp = from_timer(xp, t, timer);
 	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
@@ -246,36 +249,6 @@ expired:
 	xfrm_pol_put(xp);
 }
 
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	if (unlikely(pol->walk.dead))
-		flo = NULL;
-	else
-		xfrm_pol_hold(pol);
-
-	return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
-	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
-	.get = xfrm_policy_flo_get,
-	.check = xfrm_policy_flo_check,
-	.delete = xfrm_policy_flo_delete,
-};
-
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
  */
@@ -294,11 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 		rwlock_init(&policy->lock);
 		refcount_set(&policy->refcnt, 1);
 		skb_queue_head_init(&policy->polq.hold_queue);
-		setup_timer(&policy->timer, xfrm_policy_timer,
-				(unsigned long)policy);
-		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
-			    (unsigned long)policy);
-		policy->flo.ops = &xfrm_policy_fc_ops;
+		timer_setup(&policy->timer, xfrm_policy_timer, 0);
+		timer_setup(&policy->polq.hold_timer,
+			    xfrm_policy_queue_process, 0);
 	}
 	return policy;
 }
@@ -798,7 +769,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else
 		hlist_add_head(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
-	atomic_inc(&net->xfrm.flow_cache_genid);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
 	if (policy->family == AF_INET)
@@ -1004,6 +974,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 	}
 	if (!cnt)
 		err = -ESRCH;
+	else
+		xfrm_policy_cache_flush();
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return err;
@@ -1175,7 +1147,7 @@ fail:
 }
 
 static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_policy *pol;
@@ -1187,61 +1159,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
-static int flow_to_policy_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-
-	switch (dir) {
-	default:
-	case FLOW_DIR_IN:
-		return XFRM_POLICY_IN;
-	case FLOW_DIR_OUT:
-		return XFRM_POLICY_OUT;
-	case FLOW_DIR_FWD:
-		return XFRM_POLICY_FWD;
-	}
-}
-
-static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
-		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
-{
-	struct xfrm_policy *pol;
-
-	if (old_obj)
-		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
-
-	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
-	if (IS_ERR_OR_NULL(pol))
-		return ERR_CAST(pol);
-
-	/* Resolver returns two references:
-	 * one for cache and one for caller of flow_cache_lookup() */
-	xfrm_pol_hold(pol);
-
-	return &pol->flo;
-}
-
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	}
-}
-
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 						 const struct flowi *fl, u16 family)
 {
@@ -1261,7 +1178,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 			}
 			err = security_xfrm_policy_lookup(pol->security,
 						      fl->flowi_secid,
-						      policy_to_flow_dir(dir));
+						      dir);
 			if (!err) {
 				if (!xfrm_pol_hold_rcu(pol))
 					goto again;
@@ -1388,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
 		newp->xfrm_nr = old->xfrm_nr;
 		newp->index = old->index;
 		newp->type = old->type;
+		newp->family = old->family;
 		memcpy(newp->xfrm_vec, old->xfrm_vec,
 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 		spin_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -1422,14 +1340,14 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
 
 static int
 xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
-	       xfrm_address_t *remote, unsigned short family)
+	       xfrm_address_t *remote, unsigned short family, u32 mark)
 {
 	int err;
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
-	err = afinfo->get_saddr(net, oif, local, remote);
+	err = afinfo->get_saddr(net, oif, local, remote, mark);
 	rcu_read_unlock();
 	return err;
 }
@@ -1460,7 +1378,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 			if (xfrm_addr_any(local, tmpl->encap_family)) {
 				error = xfrm_get_saddr(net, fl->flowi_oif,
 						       &tmp, remote,
-						       tmpl->encap_family);
+						       tmpl->encap_family, 0);
 				if (error)
 					goto fail;
 				local = &tmp;
@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
 	return tos;
 }
 
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (xdst->route == NULL) {
-		/* Dummy bundle - if it has xfrms we were not
-		 * able to build bundle as template resolution failed.
-		 * It means we need to try again resolving. */
-		if (xdst->num_xfrms > 0)
-			return NULL;
-	} else if (dst->flags & DST_XFRM_QUEUE) {
-		return NULL;
-	} else {
-		/* Real bundle */
-		if (stale_bundle(dst))
-			return NULL;
-	}
-
-	dst_hold(dst);
-	return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (!xdst->route)
-		return 0;
-	if (stale_bundle(dst))
-		return 0;
-
-	return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-	dst->obsolete = DST_OBSOLETE_DEAD;
-	dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
-	.get = xfrm_bundle_flo_get,
-	.check = xfrm_bundle_flo_check,
-	.delete = xfrm_bundle_flo_delete,
-};
-
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		struct dst_entry *dst = &xdst->u.dst;
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
-		xdst->flo.ops = &xfrm_bundle_fc_ops;
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
@@ -1708,6 +1573,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 			goto put_states;
 		}
 
+		if (!dst_prev)
+			dst0 = dst1;
+		else
+			/* Ref count is taken during xfrm_alloc_dst()
+			 * No need to do dst_clone() on dst1
+			 */
+			dst_prev->child = dst1;
+
 		if (xfrm[i]->sel.family == AF_UNSPEC) {
 			inner_mode = xfrm_ip2inner_mode(xfrm[i],
 							xfrm_af2proto(family));
@@ -1719,21 +1592,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		} else
 			inner_mode = xfrm[i]->inner_mode;
 
-		if (!dst_prev)
-			dst0 = dst1;
-		else
-			/* Ref count is taken during xfrm_alloc_dst()
-			 * No need to do dst_clone() on dst1
-			 */
-			dst_prev->child = dst1;
-
 		xdst->route = dst;
 		dst_copy_metrics(dst1, dst);
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			family = xfrm[i]->props.family;
 			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
-					      &saddr, &daddr, family);
+					      &saddr, &daddr, family,
+					      xfrm[i]->props.output_mark);
 			err = PTR_ERR(dst);
 			if (IS_ERR(dst))
 				goto put_states;
@@ -1840,6 +1706,106 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
+static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+{
+	this_cpu_write(xfrm_last_dst, xdst);
+	if (old)
+		dst_release(&old->u.dst);
+}
+
+static void __xfrm_pcpu_work_fn(void)
+{
+	struct xfrm_dst *old;
+
+	old = this_cpu_read(xfrm_last_dst);
+	if (old && !xfrm_bundle_ok(old))
+		xfrm_last_dst_update(NULL, old);
+}
+
+static void xfrm_pcpu_work_fn(struct work_struct *work)
+{
+	local_bh_disable();
+	rcu_read_lock();
+	__xfrm_pcpu_work_fn();
+	rcu_read_unlock();
+	local_bh_enable();
+}
+
+void xfrm_policy_cache_flush(void)
+{
+	struct xfrm_dst *old;
+	bool found = 0;
+	int cpu;
+
+	local_bh_disable();
+	rcu_read_lock();
+	for_each_possible_cpu(cpu) {
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			if (smp_processor_id() == cpu) {
+				__xfrm_pcpu_work_fn();
+				continue;
+			}
+			found = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	local_bh_enable();
+
+	if (!found)
+		return;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		bool bundle_release;
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		bundle_release = old && !xfrm_bundle_ok(old);
+		rcu_read_unlock();
+
+		if (!bundle_release)
+			continue;
+
+		if (cpu_online(cpu)) {
+			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+			continue;
+		}
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			per_cpu(xfrm_last_dst, cpu) = NULL;
+			dst_release(&old->u.dst);
+		}
+		rcu_read_unlock();
+	}
+
+	put_online_cpus();
+}
+
+static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
+				struct xfrm_state * const xfrm[],
+				int num)
+{
+	const struct dst_entry *dst = &xdst->u.dst;
+	int i;
+
+	if (xdst->num_xfrms != num)
+		return false;
+
+	for (i = 0; i < num; i++) {
+		if (!dst || dst->xfrm != xfrm[i])
+			return false;
+		dst = dst->child;
+	}
+
+	return xfrm_bundle_ok(xdst);
+}
+
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1847,8 +1813,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 {
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct xfrm_dst *xdst, *old;
 	struct dst_entry *dst;
-	struct xfrm_dst *xdst;
 	int err;
 
 	/* Try to instantiate a bundle */
@@ -1859,6 +1825,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 		return ERR_PTR(err);
 	}
 
+	xdst = this_cpu_read(xfrm_last_dst);
+	if (xdst &&
+	    xdst->u.dst.dev == dst_orig->dev &&
+	    xdst->num_pols == num_pols &&
+	    memcmp(xdst->pols, pols,
+		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
+		dst_hold(&xdst->u.dst);
+		while (err > 0)
+			xfrm_state_put(xfrm[--err]);
+		return xdst;
+	}
+
+	old = xdst;
+
 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
 	if (IS_ERR(dst)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1871,15 +1852,18 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
+	atomic_set(&xdst->u.dst.__refcnt, 2);
+	xfrm_last_dst_update(xdst, old);
+
 	return xdst;
 }
 
-static void xfrm_policy_queue_process(unsigned long arg)
+static void xfrm_policy_queue_process(struct timer_list *t)
 {
 	struct sk_buff *skb;
 	struct sock *sk;
 	struct dst_entry *dst;
-	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+	struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
 	struct net *net = xp_net(pol);
 	struct xfrm_policy_queue *pq = &pol->polq;
 	struct flowi fl;
@@ -2051,86 +2035,39 @@ free_dst:
 	goto out;
 }
 
-static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
-		   struct flow_cache_object *oldflo, void *ctx)
+static struct xfrm_dst *
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
 {
-	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct xfrm_dst *xdst, *new_xdst;
-	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
-
-	/* Check if the policies from old bundle are usable */
-	xdst = NULL;
-	if (oldflo) {
-		xdst = container_of(oldflo, struct xfrm_dst, flo);
-		num_pols = xdst->num_pols;
-		num_xfrms = xdst->num_xfrms;
-		pol_dead = 0;
-		for (i = 0; i < num_pols; i++) {
-			pols[i] = xdst->pols[i];
-			pol_dead |= pols[i]->walk.dead;
-		}
-		if (pol_dead) {
-			/* Mark DST_OBSOLETE_DEAD to fail the next
-			 * xfrm_dst_check()
-			 */
-			xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-			dst_release_immediate(&xdst->u.dst);
-			xdst = NULL;
-			num_pols = 0;
-			num_xfrms = 0;
-			oldflo = NULL;
-		}
-	}
+	int num_pols = 0, num_xfrms = 0, err;
+	struct xfrm_dst *xdst;
 
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
-	if (xdst == NULL) {
-		num_pols = 1;
-		pols[0] = __xfrm_policy_lookup(net, fl, family,
-					       flow_to_policy_dir(dir));
-		err = xfrm_expand_policies(fl, family, pols,
+	num_pols = 1;
+	pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
-		if (err < 0)
-			goto inc_error;
-		if (num_pols == 0)
-			return NULL;
-		if (num_xfrms <= 0)
-			goto make_dummy_bundle;
-	}
+	if (err < 0)
+		goto inc_error;
+	if (num_pols == 0)
+		return NULL;
+	if (num_xfrms <= 0)
+		goto make_dummy_bundle;
 
-	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 						  xflo->dst_orig);
-	if (IS_ERR(new_xdst)) {
-		err = PTR_ERR(new_xdst);
+	if (IS_ERR(xdst)) {
+		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
 			goto error;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	} else if (new_xdst == NULL) {
+		goto make_dummy_bundle;
+	} else if (xdst == NULL) {
 		num_xfrms = 0;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		xdst->num_xfrms = 0;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	}
-
-	/* Kill the previous bundle */
-	if (xdst) {
-		/* The policies were stolen for newly generated bundle */
-		xdst->num_pols = 0;
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
+		goto make_dummy_bundle;
 	}
 
-	/* We do need to return one reference for original caller */
-	dst_hold(&new_xdst->u.dst);
-	return &new_xdst->flo;
+	return xdst;
 
 make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
@@ -2145,18 +2082,12 @@ make_dummy_bundle:
 	xdst->num_xfrms = num_xfrms;
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 
-	dst_hold(&xdst->u.dst);
-	return &xdst->flo;
+	return xdst;
 
 inc_error:
 	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 error:
-	if (xdst != NULL) {
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
-	} else
-		xfrm_pols_put(pols, num_pols);
+	xfrm_pols_put(pols, num_pols);
 	return ERR_PTR(err);
 }
 
@@ -2187,11 +2118,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct sock *sk, int flags)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct flow_cache_object *flo;
 	struct xfrm_dst *xdst;
 	struct dst_entry *dst, *route;
 	u16 family = dst_orig->ops->family;
-	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u8 dir = XFRM_POLICY_OUT;
 	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
 	dst = NULL;
@@ -2226,7 +2156,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 				goto no_transform;
 			}
 
-			dst_hold(&xdst->u.dst);
 			route = xdst->route;
 		}
 	}
@@ -2242,15 +2171,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = flow_cache_lookup(net, fl, family, dir,
-					xfrm_bundle_lookup, &xflo);
-		if (flo == NULL)
+		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+		if (xdst == NULL)
 			goto nopol;
-		if (IS_ERR(flo)) {
-			err = PTR_ERR(flo);
+		if (IS_ERR(xdst)) {
+			err = PTR_ERR(xdst);
 			goto dropdst;
 		}
-		xdst = container_of(flo, struct xfrm_dst, flo);
 
 		num_pols = xdst->num_pols;
 		num_xfrms = xdst->num_xfrms;
@@ -2449,12 +2376,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	int pi;
 	int reverse;
 	struct flowi fl;
-	u8 fl_dir;
 	int xerr_idx = -1;
 
 	reverse = dir & ~XFRM_POLICY_MASK;
 	dir &= XFRM_POLICY_MASK;
-	fl_dir = policy_to_flow_dir(dir);
 
 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -2486,16 +2411,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		}
 	}
 
-	if (!pol) {
-		struct flow_cache_object *flo;
-
-		flo = flow_cache_lookup(net, &fl, family, fl_dir,
-					xfrm_policy_lookup, NULL);
-		if (IS_ERR_OR_NULL(flo))
-			pol = ERR_CAST(flo);
-		else
-			pol = container_of(flo, struct xfrm_policy, flo);
-	}
+	if (!pol)
+		pol = xfrm_policy_lookup(net, &fl, family, dir);
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2641,11 +2558,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 	 * notice.  That's what we are validating here via the
 	 * stale_bundle() check.
 	 *
-	 * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
-	 * be marked on it.
 	 * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
 	 * be marked on it.
-	 * Both will force stable_bundle() to fail on any xdst bundle with
+	 * This will force stale_bundle() to fail on any xdst bundle with
 	 * this dst linked in it.
 	 */
 	if (dst->obsolete < 0 && !stale_bundle(dst))
@@ -2685,18 +2600,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-void xfrm_garbage_collect(struct net *net)
-{
-	flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
-	flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -3034,14 +2937,9 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-	rv = flow_cache_init(net);
-	if (rv < 0)
-		goto out;
 
 	return 0;
 
-out:
-	xfrm_sysctl_fini(net);
 out_sysctl:
 	xfrm_policy_fini(net);
 out_policy:
@@ -3054,7 +2952,6 @@ out_statistics:
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
-	flow_cache_fini(net);
 	xfrm_sysctl_fini(net);
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
@@ -3068,7 +2965,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-	flow_cache_hp_init();
+	int i;
+
+	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+				       GFP_KERNEL);
+	BUG_ON(!xfrm_pcpu_work);
+
+	for (i = 0; i < NR_CPUS; i++)
+		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
@@ -3308,9 +3213,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
 	struct xfrm_migrate *mp;
 
+	/* Stage 0 - sanity checks */
 	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
 		goto out;
 
+	if (dir >= XFRM_POLICY_MAX) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	/* Stage 1 - find policy */
 	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
 		err = -ENOENT;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 6c0956d10db6..1f5cee2269af 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -296,12 +296,14 @@ int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
 }
 EXPORT_SYMBOL(xfrm_unregister_type_offload);
 
-static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+static const struct xfrm_type_offload *
+xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
 {
 	struct xfrm_state_afinfo *afinfo;
 	const struct xfrm_type_offload **typemap;
 	const struct xfrm_type_offload *type;
 
+retry:
 	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return NULL;
@@ -311,6 +313,12 @@ static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned
 	if ((type && !try_module_get(type->owner)))
 		type = NULL;
 
+	if (!type && try_load) {
+		request_module("xfrm-offload-%d-%d", family, proto);
+		try_load = 0;
+		goto retry;
+	}
+
 	rcu_read_unlock();
 	return type;
 }
@@ -724,11 +732,12 @@ restart:
 			}
 		}
 	}
-	if (cnt)
-		err = 0;
-
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	if (cnt) {
+		err = 0;
+		xfrm_policy_cache_flush();
+	}
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
@@ -1620,6 +1629,7 @@ int
 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
 	       unsigned short family, struct net *net)
 {
+	int i;
 	int err = 0;
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	if (!afinfo)
@@ -1628,6 +1638,9 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
 	spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
 	if (afinfo->tmpl_sort)
 		err = afinfo->tmpl_sort(dst, src, n);
+	else
+		for (i = 0; i < n; i++)
+			dst[i] = src[i];
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	rcu_read_unlock();
 	return err;
@@ -1638,6 +1651,7 @@ int
 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
 		unsigned short family)
 {
+	int i;
 	int err = 0;
 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 	struct net *net = xs_net(*src);
@@ -1648,6 +1662,9 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 	if (afinfo->state_sort)
 		err = afinfo->state_sort(dst, src, n);
+	else
+		for (i = 0; i < n; i++)
+			dst[i] = src[i];
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	rcu_read_unlock();
 	return err;
@@ -2052,6 +2069,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 	if (err >= 0) {
 		xfrm_sk_policy_insert(sk, err, pol);
 		xfrm_pol_put(pol);
+		__sk_dst_reset(sk);
 		err = 0;
 	}
 
@@ -2164,7 +2182,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return mtu - x->props.header_len;
 }
 
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 {
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_mode *inner_mode;
@@ -2229,7 +2247,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 	if (x->type == NULL)
 		goto error;
 
-	x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+	x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
 
 	err = x->type->init_state(x);
 	if (err)
@@ -2257,7 +2275,7 @@ EXPORT_SYMBOL(__xfrm_init_state);
 
 int xfrm_init_state(struct xfrm_state *x)
 {
-	return __xfrm_init_state(x, true);
+	return __xfrm_init_state(x, true, false);
 }
 
 EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 35a7e794ad04..0c6c5ef65f9d 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/sysctl.h>
 #include <linux/slab.h>
 #include <net/net_namespace.h>
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2be4c6af008a..983b0233767b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
 		return 0;
 
 	algp = nla_data(rt);
-	if (nla_len(rt) < xfrm_alg_len(algp))
+	if (nla_len(rt) < (int)xfrm_alg_len(algp))
 		return -EINVAL;
 
 	switch (type) {
@@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs)
 		return 0;
 
 	algp = nla_data(rt);
-	if (nla_len(rt) < xfrm_alg_auth_len(algp))
+	if (nla_len(rt) < (int)xfrm_alg_auth_len(algp))
 		return -EINVAL;
 
 	algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs)
 		return 0;
 
 	algp = nla_data(rt);
-	if (nla_len(rt) < aead_len(algp))
+	if (nla_len(rt) < (int)aead_len(algp))
 		return -EINVAL;
 
 	algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
 		if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
 			return -EINVAL;
 
-		if (nla_len(rt) < xfrm_replay_state_esn_len(rs) &&
+		if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
 		    nla_len(rt) != sizeof(*rs))
 			return -EINVAL;
 	}
@@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
 					 struct nlattr *rp)
 {
 	struct xfrm_replay_state_esn *up;
-	int ulen;
+	unsigned int ulen;
 
 	if (!replay_esn || !rp)
 		return 0;
@@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
 
 	/* Check the overall length and the internal bitmap length to avoid
 	 * potential overflow. */
-	if (nla_len(rp) < ulen ||
+	if (nla_len(rp) < (int)ulen ||
 	    xfrm_replay_state_esn_len(replay_esn) != ulen ||
 	    replay_esn->bmp_len != up->bmp_len)
 		return -EINVAL;
@@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
 				       struct nlattr *rta)
 {
 	struct xfrm_replay_state_esn *p, *pp, *up;
-	int klen, ulen;
+	unsigned int klen, ulen;
 
 	if (!rta)
 		return 0;
 
 	up = nla_data(rta);
 	klen = xfrm_replay_state_esn_len(up);
-	ulen = nla_len(rta) >= klen ? klen : sizeof(*up);
+	ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up);
 
 	p = kzalloc(klen, GFP_KERNEL);
 	if (!p)
@@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
 	return 0;
 }
 
-static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
+static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
 {
-	int len = 0;
+	unsigned int len = 0;
 
 	if (xfrm_ctx) {
 		len += sizeof(struct xfrm_user_sec_ctx);
@@ -584,7 +584,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	err = __xfrm_init_state(x, false);
+	if (attrs[XFRMA_OUTPUT_MARK])
+		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+
+	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
 		goto error;
 
@@ -654,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
+		xfrm_dev_state_delete(x);
 		__xfrm_state_put(x);
 		goto out;
 	}
@@ -796,7 +800,7 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb
 		return -EMSGSIZE;
 
 	xuo = nla_data(attr);
-
+	memset(xuo, 0, sizeof(*xuo));
 	xuo->ifindex = xso->dev->ifindex;
 	xuo->flags = xso->flags;
 
@@ -897,6 +901,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 		ret = copy_user_offload(&x->xso, skb);
 	if (ret)
 		goto out;
+	if (x->props.output_mark) {
+		ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+		if (ret)
+			goto out;
+	}
 	if (x->security)
 		ret = copy_sec_ctx(x->security, skb);
 out:
@@ -1023,7 +1032,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
 		return -1;
 }
 
-static inline size_t xfrm_spdinfo_msgsize(void)
+static inline unsigned int xfrm_spdinfo_msgsize(void)
 {
 	return NLMSG_ALIGN(4)
 	       + nla_total_size(sizeof(struct xfrmu_spdinfo))
@@ -1138,18 +1147,19 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	u32 *flags = nlmsg_data(nlh);
 	u32 sportid = NETLINK_CB(skb).portid;
 	u32 seq = nlh->nlmsg_seq;
+	int err;
 
 	r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0)
-		BUG();
+	err = build_spdinfo(r_skb, net, sportid, seq, *flags);
+	BUG_ON(err < 0);
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
 }
 
-static inline size_t xfrm_sadinfo_msgsize(void)
+static inline unsigned int xfrm_sadinfo_msgsize(void)
 {
 	return NLMSG_ALIGN(4)
 	       + nla_total_size(sizeof(struct xfrmu_sadhinfo))
@@ -1196,13 +1206,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	u32 *flags = nlmsg_data(nlh);
 	u32 sportid = NETLINK_CB(skb).portid;
 	u32 seq = nlh->nlmsg_seq;
+	int err;
 
 	r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0)
-		BUG();
+	err = build_sadinfo(r_skb, net, sportid, seq, *flags);
+	BUG_ON(err < 0);
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
 }
@@ -1625,7 +1636,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
 		return copy_sec_ctx(xp->security, skb);
 	return 0;
 }
-static inline size_t userpolicy_type_attrsize(void)
+static inline unsigned int userpolicy_type_attrsize(void)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	return nla_total_size(sizeof(struct xfrm_userpolicy_type));
@@ -1684,32 +1695,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 
 static int xfrm_dump_policy_done(struct netlink_callback *cb)
 {
-	struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+	struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
 	struct net *net = sock_net(cb->skb->sk);
 
 	xfrm_policy_walk_done(walk, net);
 	return 0;
 }
 
+static int xfrm_dump_policy_start(struct netlink_callback *cb)
+{
+	struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+
+	BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
+
+	xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
+	return 0;
+}
+
 static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+	struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
 	struct xfrm_dump_info info;
 
-	BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
-		     sizeof(cb->args) - sizeof(cb->args[0]));
-
 	info.in_skb = cb->skb;
 	info.out_skb = skb;
 	info.nlmsg_seq = cb->nlh->nlmsg_seq;
 	info.nlmsg_flags = NLM_F_MULTI;
 
-	if (!cb->args[0]) {
-		cb->args[0] = 1;
-		xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
-	}
-
 	(void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
 
 	return skb->len;
@@ -1815,8 +1828,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -1844,9 +1855,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 }
 
-static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x)
+static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
 {
-	size_t replay_size = x->replay_esn ?
+	unsigned int replay_size = x->replay_esn ?
 			      xfrm_replay_state_esn_len(x->replay_esn) :
 			      sizeof(struct xfrm_replay_state);
 
@@ -1869,6 +1880,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 		return -EMSGSIZE;
 
 	id = nlmsg_data(nlh);
+	memset(&id->sa_id, 0, sizeof(id->sa_id));
 	memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr));
 	id->sa_id.spi = x->id.spi;
 	id->sa_id.family = x->props.family;
@@ -1950,8 +1962,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	c.seq = nlh->nlmsg_seq;
 	c.portid = nlh->nlmsg_pid;
 
-	if (build_aevent(r_skb, x, &c) < 0)
-		BUG();
+	err = build_aevent(r_skb, x, &c);
+	BUG_ON(err < 0);
+
 	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
@@ -2027,7 +2040,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
-	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
@@ -2315,8 +2327,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff
 	return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
 }
 
-static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma,
-					  int with_encp)
+static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma,
+						int with_encp)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))
 	      + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0)
@@ -2379,6 +2391,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 {
 	struct net *net = &init_net;
 	struct sk_buff *skb;
+	int err;
 
 	skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap),
 			GFP_ATOMIC);
@@ -2386,8 +2399,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 		return -ENOMEM;
 
 	/* build migrate */
-	if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0)
-		BUG();
+	err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type);
+	BUG_ON(err < 0);
 
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
 }
@@ -2457,6 +2470,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 	[XFRMA_OFFLOAD_DEV]	= { .len = sizeof(struct xfrm_user_offload) },
+	[XFRMA_OUTPUT_MARK]	= { .len = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2466,6 +2480,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
 
 static const struct xfrm_link {
 	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+	int (*start)(struct netlink_callback *);
 	int (*dump)(struct sk_buff *, struct netlink_callback *);
 	int (*done)(struct netlink_callback *);
 	const struct nla_policy *nla_pol;
@@ -2479,6 +2494,7 @@ static const struct xfrm_link {
 	[XFRM_MSG_NEWPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_add_policy    },
 	[XFRM_MSG_DELPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_get_policy    },
 	[XFRM_MSG_GETPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
+						   .start = xfrm_dump_policy_start,
 						   .dump = xfrm_dump_policy,
 						   .done = xfrm_dump_policy_done },
 	[XFRM_MSG_ALLOCSPI    - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
@@ -2531,6 +2547,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 		{
 			struct netlink_dump_control c = {
+				.start = link->start,
 				.dump = link->dump,
 				.done = link->done,
 			};
@@ -2559,7 +2576,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
 	mutex_unlock(&net->xfrm.xfrm_cfg_mutex);
 }
 
-static inline size_t xfrm_expire_msgsize(void)
+static inline unsigned int xfrm_expire_msgsize(void)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
 	       + nla_total_size(sizeof(struct xfrm_mark));
@@ -2578,6 +2595,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	ue = nlmsg_data(nlh);
 	copy_to_user_state(x, &ue->state);
 	ue->hard = (c->data.hard != 0) ? 1 : 0;
+	/* clear the padding bytes */
+	memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard));
 
 	err = xfrm_mark_put(skb, &x->mark);
 	if (err)
@@ -2608,13 +2627,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
 {
 	struct net *net = xs_net(x);
 	struct sk_buff *skb;
+	int err;
 
 	skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_aevent(skb, x, c) < 0)
-		BUG();
+	err = build_aevent(skb, x, c);
+	BUG_ON(err < 0);
 
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
 }
@@ -2645,9 +2665,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
 }
 
-static inline size_t xfrm_sa_len(struct xfrm_state *x)
+static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
 {
-	size_t l = 0;
+	unsigned int l = 0;
 	if (x->aead)
 		l += nla_total_size(aead_len(x->aead));
 	if (x->aalg) {
@@ -2676,6 +2696,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(x->props.extra_flags));
 	if (x->xso.dev)
 		 l += nla_total_size(sizeof(x->xso));
+	if (x->props.output_mark)
+		l += nla_total_size(sizeof(x->props.output_mark));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
@@ -2690,8 +2712,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
 	struct xfrm_usersa_id *id;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	int len = xfrm_sa_len(x);
-	int headlen, err;
+	unsigned int len = xfrm_sa_len(x);
+	unsigned int headlen;
+	int err;
 
 	headlen = sizeof(*p);
 	if (c->event == XFRM_MSG_DELSA) {
@@ -2715,6 +2738,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
 		struct nlattr *attr;
 
 		id = nlmsg_data(nlh);
+		memset(id, 0, sizeof(*id));
 		memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
 		id->spi = x->id.spi;
 		id->family = x->props.family;
@@ -2764,8 +2788,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c
 
 }
 
-static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
-					  struct xfrm_policy *xp)
+static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
+						struct xfrm_policy *xp)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))
 	       + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2817,13 +2841,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 {
 	struct net *net = xs_net(x);
 	struct sk_buff *skb;
+	int err;
 
 	skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_acquire(skb, x, xt, xp) < 0)
-		BUG();
+	err = build_acquire(skb, x, xt, xp);
+	BUG_ON(err < 0);
 
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
 }
@@ -2888,7 +2913,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 	return xp;
 }
 
-static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
+static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))
 	       + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2932,26 +2957,28 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
 {
 	struct net *net = xp_net(xp);
 	struct sk_buff *skb;
+	int err;
 
 	skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_polexpire(skb, xp, dir, c) < 0)
-		BUG();
+	err = build_polexpire(skb, xp, dir, c);
+	BUG_ON(err < 0);
 
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
 }
 
 static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
 {
-	int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+	unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	struct net *net = xp_net(xp);
 	struct xfrm_userpolicy_info *p;
 	struct xfrm_userpolicy_id *id;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	int headlen, err;
+	unsigned int headlen;
+	int err;
 
 	headlen = sizeof(*p);
 	if (c->event == XFRM_MSG_DELPOLICY) {
@@ -3058,7 +3085,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct
 
 }
 
-static inline size_t xfrm_report_msgsize(void)
+static inline unsigned int xfrm_report_msgsize(void)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_report));
 }
@@ -3092,18 +3119,19 @@ static int xfrm_send_report(struct net *net, u8 proto,
 			    struct xfrm_selector *sel, xfrm_address_t *addr)
 {
 	struct sk_buff *skb;
+	int err;
 
 	skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_report(skb, proto, sel, addr) < 0)
-		BUG();
+	err = build_report(skb, proto, sel, addr);
+	BUG_ON(err < 0);
 
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
 }
 
-static inline size_t xfrm_mapping_msgsize(void)
+static inline unsigned int xfrm_mapping_msgsize(void)
 {
 	return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping));
 }
@@ -3139,6 +3167,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 {
 	struct net *net = xs_net(x);
 	struct sk_buff *skb;
+	int err;
 
 	if (x->id.proto != IPPROTO_ESP)
 		return -EINVAL;
@@ -3150,8 +3179,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_mapping(skb, x, ipaddr, sport) < 0)
-		BUG();
+	err = build_mapping(skb, x, ipaddr, sport);
+	BUG_ON(err < 0);
 
 	return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
 }